LCOV - fstests of 6.5.0-rc4-xfsx @ Mon Jul 31 20:08:34 PDT 2023

LCOV - code coverage report

Current view:	top level - fs/btrfs - disk-io.c (source / functions)		Hit	Total	Coverage
Test:	fstests of 6.5.0-rc4-xfsx @ Mon Jul 31 20:08:34 PDT 2023	Lines:	1985	2566	77.4 %
Date:	2023-07-31 20:08:34	Functions:	106	112	94.6 %

          Line data    Source code

       1             : // SPDX-License-Identifier: GPL-2.0
       2             : /*
       3             :  * Copyright (C) 2007 Oracle.  All rights reserved.
       4             :  */
       5             : 
       6             : #include <linux/fs.h>
       7             : #include <linux/blkdev.h>
       8             : #include <linux/radix-tree.h>
       9             : #include <linux/writeback.h>
      10             : #include <linux/workqueue.h>
      11             : #include <linux/kthread.h>
      12             : #include <linux/slab.h>
      13             : #include <linux/migrate.h>
      14             : #include <linux/ratelimit.h>
      15             : #include <linux/uuid.h>
      16             : #include <linux/semaphore.h>
      17             : #include <linux/error-injection.h>
      18             : #include <linux/crc32c.h>
      19             : #include <linux/sched/mm.h>
      20             : #include <asm/unaligned.h>
      21             : #include <crypto/hash.h>
      22             : #include "ctree.h"
      23             : #include "disk-io.h"
      24             : #include "transaction.h"
      25             : #include "btrfs_inode.h"
      26             : #include "bio.h"
      27             : #include "print-tree.h"
      28             : #include "locking.h"
      29             : #include "tree-log.h"
      30             : #include "free-space-cache.h"
      31             : #include "free-space-tree.h"
      32             : #include "check-integrity.h"
      33             : #include "rcu-string.h"
      34             : #include "dev-replace.h"
      35             : #include "raid56.h"
      36             : #include "sysfs.h"
      37             : #include "qgroup.h"
      38             : #include "compression.h"
      39             : #include "tree-checker.h"
      40             : #include "ref-verify.h"
      41             : #include "block-group.h"
      42             : #include "discard.h"
      43             : #include "space-info.h"
      44             : #include "zoned.h"
      45             : #include "subpage.h"
      46             : #include "fs.h"
      47             : #include "accessors.h"
      48             : #include "extent-tree.h"
      49             : #include "root-tree.h"
      50             : #include "defrag.h"
      51             : #include "uuid-tree.h"
      52             : #include "relocation.h"
      53             : #include "scrub.h"
      54             : #include "super.h"
      55             : 
      56             : #define BTRFS_SUPER_FLAG_SUPP   (BTRFS_HEADER_FLAG_WRITTEN |\
      57             :                                  BTRFS_HEADER_FLAG_RELOC |\
      58             :                                  BTRFS_SUPER_FLAG_ERROR |\
      59             :                                  BTRFS_SUPER_FLAG_SEEDING |\
      60             :                                  BTRFS_SUPER_FLAG_METADUMP |\
      61             :                                  BTRFS_SUPER_FLAG_METADUMP_V2)
      62             : 
      63             : static int btrfs_cleanup_transaction(struct btrfs_fs_info *fs_info);
      64             : static void btrfs_error_commit_super(struct btrfs_fs_info *fs_info);
      65             : 
      66        3473 : static void btrfs_free_csum_hash(struct btrfs_fs_info *fs_info)
      67             : {
      68        3473 :         if (fs_info->csum_shash)
      69        3244 :                 crypto_free_shash(fs_info->csum_shash);
      70        3473 : }
      71             : 
      72             : /*
      73             :  * Compute the csum of a btree block and store the result to provided buffer.
      74             :  */
      75     9401148 : static void csum_tree_block(struct extent_buffer *buf, u8 *result)
      76             : {
      77     9401148 :         struct btrfs_fs_info *fs_info = buf->fs_info;
      78     9401148 :         const int num_pages = num_extent_pages(buf);
      79     9401148 :         const int first_page_part = min_t(u32, PAGE_SIZE, fs_info->nodesize);
      80     9401148 :         SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
      81     9401148 :         char *kaddr;
      82     9401148 :         int i;
      83             : 
      84     9401148 :         shash->tfm = fs_info->csum_shash;
      85     9401148 :         crypto_shash_init(shash);
      86     9401147 :         kaddr = page_address(buf->pages[0]) + offset_in_page(buf->start);
      87     9401147 :         crypto_shash_update(shash, kaddr + BTRFS_CSUM_SIZE,
      88     9401147 :                             first_page_part - BTRFS_CSUM_SIZE);
      89             : 
      90    45978973 :         for (i = 1; i < num_pages && INLINE_EXTENT_BUFFER_PAGES > 1; i++) {
      91    27176714 :                 kaddr = page_address(buf->pages[i]);
      92    27176714 :                 crypto_shash_update(shash, kaddr, PAGE_SIZE);
      93             :         }
      94     9401112 :         memset(result, 0, BTRFS_CSUM_SIZE);
      95     9401112 :         crypto_shash_final(shash, result);
      96     9401127 : }
      97             : 
      98             : /*
      99             :  * we can't consider a given block up to date unless the transid of the
     100             :  * block matches the transid in the parent node's pointer.  This is how we
     101             :  * detect blocks that either didn't get written at all or got written
     102             :  * in the wrong place.
     103             :  */
     104   557321407 : int btrfs_buffer_uptodate(struct extent_buffer *eb, u64 parent_transid, int atomic)
     105             : {
     106  1114642814 :         if (!extent_buffer_uptodate(eb))
     107             :                 return 0;
     108             : 
     109   557311647 :         if (!parent_transid || btrfs_header_generation(eb) == parent_transid)
     110             :                 return 1;
     111             : 
     112           0 :         if (atomic)
     113             :                 return -EAGAIN;
     114             : 
     115           0 :         if (!extent_buffer_uptodate(eb) ||
     116             :             btrfs_header_generation(eb) != parent_transid) {
     117           0 :                 btrfs_err_rl(eb->fs_info,
     118             : "parent transid verify failed on logical %llu mirror %u wanted %llu found %llu",
     119             :                         eb->start, eb->read_mirror,
     120             :                         parent_transid, btrfs_header_generation(eb));
     121           0 :                 clear_extent_buffer_uptodate(eb);
     122           0 :                 return 0;
     123             :         }
     124             :         return 1;
     125             : }
     126             : 
     127             : static bool btrfs_supported_super_csum(u16 csum_type)
     128             : {
     129      447051 :         switch (csum_type) {
     130             :         case BTRFS_CSUM_TYPE_CRC32:
     131             :         case BTRFS_CSUM_TYPE_XXHASH:
     132             :         case BTRFS_CSUM_TYPE_SHA256:
     133             :         case BTRFS_CSUM_TYPE_BLAKE2:
     134             :                 return true;
     135             :         default:
     136           0 :                 return false;
     137             :         }
     138             : }
     139             : 
     140             : /*
     141             :  * Return 0 if the superblock checksum type matches the checksum value of that
     142             :  * algorithm. Pass the raw disk superblock data.
     143             :  */
     144        3293 : int btrfs_check_super_csum(struct btrfs_fs_info *fs_info,
     145             :                            const struct btrfs_super_block *disk_sb)
     146             : {
     147        3293 :         char result[BTRFS_CSUM_SIZE];
     148        3293 :         SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
     149             : 
     150        3293 :         shash->tfm = fs_info->csum_shash;
     151             : 
     152             :         /*
     153             :          * The super_block structure does not span the whole
     154             :          * BTRFS_SUPER_INFO_SIZE range, we expect that the unused space is
     155             :          * filled with zeros and is included in the checksum.
     156             :          */
     157        3293 :         crypto_shash_digest(shash, (const u8 *)disk_sb + BTRFS_CSUM_SIZE,
     158             :                             BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE, result);
     159             : 
     160        6586 :         if (memcmp(disk_sb->csum, result, fs_info->csum_size))
     161           0 :                 return 1;
     162             : 
     163             :         return 0;
     164             : }
     165             : 
     166           0 : static int btrfs_repair_eb_io_failure(const struct extent_buffer *eb,
     167             :                                       int mirror_num)
     168             : {
     169           0 :         struct btrfs_fs_info *fs_info = eb->fs_info;
     170           0 :         int i, num_pages = num_extent_pages(eb);
     171           0 :         int ret = 0;
     172             : 
     173           0 :         if (sb_rdonly(fs_info->sb))
     174             :                 return -EROFS;
     175             : 
     176           0 :         for (i = 0; i < num_pages; i++) {
     177           0 :                 struct page *p = eb->pages[i];
     178           0 :                 u64 start = max_t(u64, eb->start, page_offset(p));
     179           0 :                 u64 end = min_t(u64, eb->start + eb->len, page_offset(p) + PAGE_SIZE);
     180           0 :                 u32 len = end - start;
     181             : 
     182           0 :                 ret = btrfs_repair_io_failure(fs_info, 0, start, len,
     183             :                                 start, p, offset_in_page(start), mirror_num);
     184           0 :                 if (ret)
     185             :                         break;
     186             :         }
     187             : 
     188             :         return ret;
     189             : }
     190             : 
     191             : /*
     192             :  * helper to read a given tree block, doing retries as required when
     193             :  * the checksums don't match and we have alternate mirrors to try.
     194             :  *
     195             :  * @check:              expected tree parentness check, see the comments of the
     196             :  *                      structure for details.
     197             :  */
     198    23714416 : int btrfs_read_extent_buffer(struct extent_buffer *eb,
     199             :                              struct btrfs_tree_parent_check *check)
     200             : {
     201    23714416 :         struct btrfs_fs_info *fs_info = eb->fs_info;
     202    23714416 :         int failed = 0;
     203    23714416 :         int ret;
     204    23714416 :         int num_copies = 0;
     205    23714416 :         int mirror_num = 0;
     206    23714416 :         int failed_mirror = 0;
     207             : 
     208    23714418 :         ASSERT(check);
     209             : 
     210    23714418 :         while (1) {
     211    23714418 :                 clear_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags);
     212    23715010 :                 ret = read_extent_buffer_pages(eb, WAIT_COMPLETE, mirror_num, check);
     213    23714170 :                 if (!ret)
     214             :                         break;
     215             : 
     216           4 :                 num_copies = btrfs_num_copies(fs_info,
     217           4 :                                               eb->start, eb->len);
     218           4 :                 if (num_copies == 1)
     219             :                         break;
     220             : 
     221           3 :                 if (!failed_mirror) {
     222           3 :                         failed = 1;
     223           3 :                         failed_mirror = eb->read_mirror;
     224             :                 }
     225             : 
     226           3 :                 mirror_num++;
     227           3 :                 if (mirror_num == failed_mirror)
     228           0 :                         mirror_num++;
     229             : 
     230           3 :                 if (mirror_num > num_copies)
     231             :                         break;
     232             :         }
     233             : 
     234    23714168 :         if (failed && !ret && failed_mirror)
     235           0 :                 btrfs_repair_eb_io_failure(eb, failed_mirror);
     236             : 
     237    23714168 :         return ret;
     238             : }
     239             : 
     240             : /*
     241             :  * Checksum a dirty tree block before IO.
     242             :  */
     243     9306636 : blk_status_t btree_csum_one_bio(struct btrfs_bio *bbio)
     244             : {
     245     9306636 :         struct extent_buffer *eb = bbio->private;
     246     9306636 :         struct btrfs_fs_info *fs_info = eb->fs_info;
     247     9306636 :         u64 found_start = btrfs_header_bytenr(eb);
     248     9306636 :         u8 result[BTRFS_CSUM_SIZE];
     249     9306636 :         int ret;
     250             : 
     251             :         /* Btree blocks are always contiguous on disk. */
     252     9306636 :         if (WARN_ON_ONCE(bbio->file_offset != eb->start))
     253             :                 return BLK_STS_IOERR;
     254     9306636 :         if (WARN_ON_ONCE(bbio->bio.bi_iter.bi_size != eb->len))
     255             :                 return BLK_STS_IOERR;
     256             : 
     257    18613272 :         if (test_bit(EXTENT_BUFFER_NO_CHECK, &eb->bflags)) {
     258           0 :                 WARN_ON_ONCE(found_start != 0);
     259             :                 return BLK_STS_OK;
     260             :         }
     261             : 
     262     9306636 :         if (WARN_ON_ONCE(found_start != eb->start))
     263             :                 return BLK_STS_IOERR;
     264     9306636 :         if (WARN_ON(!btrfs_page_test_uptodate(fs_info, eb->pages[0], eb->start,
     265             :                                               eb->len)))
     266             :                 return BLK_STS_IOERR;
     267             : 
     268     9306607 :         ASSERT(memcmp_extent_buffer(eb, fs_info->fs_devices->metadata_uuid,
     269             :                                     offsetof(struct btrfs_header, fsid),
     270             :                                     BTRFS_FSID_SIZE) == 0);
     271     9306590 :         csum_tree_block(eb, result);
     272             : 
     273     9306606 :         if (btrfs_header_level(eb))
     274      880239 :                 ret = btrfs_check_node(eb);
     275             :         else
     276     8426367 :                 ret = btrfs_check_leaf(eb);
     277             : 
     278     9306700 :         if (ret < 0)
     279           0 :                 goto error;
     280             : 
     281             :         /*
     282             :          * Also check the generation, the eb reached here must be newer than
     283             :          * last committed. Or something seriously wrong happened.
     284             :          */
     285     9306700 :         if (unlikely(btrfs_header_generation(eb) <= fs_info->last_trans_committed)) {
     286           0 :                 ret = -EUCLEAN;
     287           0 :                 btrfs_err(fs_info,
     288             :                         "block=%llu bad generation, have %llu expect > %llu",
     289             :                           eb->start, btrfs_header_generation(eb),
     290             :                           fs_info->last_trans_committed);
     291           0 :                 goto error;
     292             :         }
     293     9306700 :         write_extent_buffer(eb, result, 0, fs_info->csum_size);
     294     9306700 :         return BLK_STS_OK;
     295             : 
     296           0 : error:
     297           0 :         btrfs_print_tree(eb, 0);
     298           0 :         btrfs_err(fs_info, "block=%llu write time tree block corruption detected",
     299             :                   eb->start);
     300             :         /*
     301             :          * Be noisy if this is an extent buffer from a log tree. We don't abort
     302             :          * a transaction in case there's a bad log tree extent buffer, we just
     303             :          * fallback to a transaction commit. Still we want to know when there is
     304             :          * a bad log tree extent buffer, as that may signal a bug somewhere.
     305             :          */
     306           0 :         WARN_ON(IS_ENABLED(CONFIG_BTRFS_DEBUG) ||
     307             :                 btrfs_header_owner(eb) == BTRFS_TREE_LOG_OBJECTID);
     308           0 :         return errno_to_blk_status(ret);
     309             : }
     310             : 
     311       94520 : static bool check_tree_block_fsid(struct extent_buffer *eb)
     312             : {
     313       94520 :         struct btrfs_fs_info *fs_info = eb->fs_info;
     314       94520 :         struct btrfs_fs_devices *fs_devices = fs_info->fs_devices, *seed_devs;
     315       94520 :         u8 fsid[BTRFS_FSID_SIZE];
     316       94520 :         u8 *metadata_uuid;
     317             : 
     318       94520 :         read_extent_buffer(eb, fsid, offsetof(struct btrfs_header, fsid),
     319             :                            BTRFS_FSID_SIZE);
     320             :         /*
     321             :          * Checking the incompat flag is only valid for the current fs. For
     322             :          * seed devices it's forbidden to have their uuid changed so reading
     323             :          * ->fsid in this case is fine
     324             :          */
     325       94520 :         if (btrfs_fs_incompat(fs_info, METADATA_UUID))
     326           0 :                 metadata_uuid = fs_devices->metadata_uuid;
     327             :         else
     328       94520 :                 metadata_uuid = fs_devices->fsid;
     329             : 
     330      189040 :         if (!memcmp(fsid, metadata_uuid, BTRFS_FSID_SIZE))
     331             :                 return false;
     332             : 
     333           0 :         list_for_each_entry(seed_devs, &fs_devices->seed_list, seed_list)
     334           0 :                 if (!memcmp(fsid, seed_devs->fsid, BTRFS_FSID_SIZE))
     335             :                         return false;
     336             : 
     337             :         return true;
     338             : }
     339             : 
     340             : /* Do basic extent buffer checks at read time */
     341       94520 : int btrfs_validate_extent_buffer(struct extent_buffer *eb,
     342             :                                  struct btrfs_tree_parent_check *check)
     343             : {
     344       94520 :         struct btrfs_fs_info *fs_info = eb->fs_info;
     345       94520 :         u64 found_start;
     346       94520 :         const u32 csum_size = fs_info->csum_size;
     347       94520 :         u8 found_level;
     348       94520 :         u8 result[BTRFS_CSUM_SIZE];
     349       94520 :         const u8 *header_csum;
     350       94520 :         int ret = 0;
     351             : 
     352       94520 :         ASSERT(check);
     353             : 
     354       94520 :         found_start = btrfs_header_bytenr(eb);
     355       94520 :         if (found_start != eb->start) {
     356           0 :                 btrfs_err_rl(fs_info,
     357             :                         "bad tree block start, mirror %u want %llu have %llu",
     358             :                              eb->read_mirror, eb->start, found_start);
     359           0 :                 ret = -EIO;
     360           0 :                 goto out;
     361             :         }
     362       94520 :         if (check_tree_block_fsid(eb)) {
     363           0 :                 btrfs_err_rl(fs_info, "bad fsid on logical %llu mirror %u",
     364             :                              eb->start, eb->read_mirror);
     365           0 :                 ret = -EIO;
     366           0 :                 goto out;
     367             :         }
     368       94520 :         found_level = btrfs_header_level(eb);
     369       94520 :         if (found_level >= BTRFS_MAX_LEVEL) {
     370           0 :                 btrfs_err(fs_info,
     371             :                         "bad tree block level, mirror %u level %d on logical %llu",
     372             :                         eb->read_mirror, btrfs_header_level(eb), eb->start);
     373           0 :                 ret = -EIO;
     374           0 :                 goto out;
     375             :         }
     376             : 
     377       94520 :         csum_tree_block(eb, result);
     378       94520 :         header_csum = page_address(eb->pages[0]) +
     379             :                 get_eb_offset_in_page(eb, offsetof(struct btrfs_header, csum));
     380             : 
     381      189040 :         if (memcmp(result, header_csum, csum_size) != 0) {
     382           1 :                 btrfs_warn_rl(fs_info,
     383             : "checksum verify failed on logical %llu mirror %u wanted " CSUM_FMT " found " CSUM_FMT " level %d",
     384             :                               eb->start, eb->read_mirror,
     385             :                               CSUM_FMT_VALUE(csum_size, header_csum),
     386             :                               CSUM_FMT_VALUE(csum_size, result),
     387             :                               btrfs_header_level(eb));
     388           1 :                 ret = -EUCLEAN;
     389           1 :                 goto out;
     390             :         }
     391             : 
     392       94519 :         if (found_level != check->level) {
     393           0 :                 btrfs_err(fs_info,
     394             :                 "level verify failed on logical %llu mirror %u wanted %u found %u",
     395             :                           eb->start, eb->read_mirror, check->level, found_level);
     396           0 :                 ret = -EIO;
     397           0 :                 goto out;
     398             :         }
     399       94519 :         if (unlikely(check->transid &&
     400             :                      btrfs_header_generation(eb) != check->transid)) {
     401           0 :                 btrfs_err_rl(eb->fs_info,
     402             : "parent transid verify failed on logical %llu mirror %u wanted %llu found %llu",
     403             :                                 eb->start, eb->read_mirror, check->transid,
     404             :                                 btrfs_header_generation(eb));
     405           0 :                 ret = -EIO;
     406           0 :                 goto out;
     407             :         }
     408       94519 :         if (check->has_first_key) {
     409       46823 :                 struct btrfs_key *expect_key = &check->first_key;
     410       46823 :                 struct btrfs_key found_key;
     411             : 
     412       46823 :                 if (found_level)
     413         213 :                         btrfs_node_key_to_cpu(eb, &found_key, 0);
     414             :                 else
     415       46610 :                         btrfs_item_key_to_cpu(eb, &found_key, 0);
     416       46823 :                 if (unlikely(btrfs_comp_cpu_keys(expect_key, &found_key))) {
     417           0 :                         btrfs_err(fs_info,
     418             : "tree first key mismatch detected, bytenr=%llu parent_transid=%llu key expected=(%llu,%u,%llu) has=(%llu,%u,%llu)",
     419             :                                   eb->start, check->transid,
     420             :                                   expect_key->objectid,
     421             :                                   expect_key->type, expect_key->offset,
     422             :                                   found_key.objectid, found_key.type,
     423             :                                   found_key.offset);
     424           0 :                         ret = -EUCLEAN;
     425           0 :                         goto out;
     426             :                 }
     427             :         }
     428       94519 :         if (check->owner_root) {
     429       85201 :                 ret = btrfs_check_eb_owner(eb, check->owner_root);
     430       85201 :                 if (ret < 0)
     431           0 :                         goto out;
     432             :         }
     433             : 
     434             :         /*
     435             :          * If this is a leaf block and it is corrupt, set the corrupt bit so
     436             :          * that we don't try and read the other copies of this block, just
     437             :          * return -EIO.
     438             :          */
     439       94519 :         if (found_level == 0 && btrfs_check_leaf(eb)) {
     440           0 :                 set_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags);
     441           0 :                 ret = -EIO;
     442             :         }
     443             : 
     444       94519 :         if (found_level > 0 && btrfs_check_node(eb))
     445             :                 ret = -EIO;
     446             : 
     447       94519 :         if (ret)
     448           0 :                 btrfs_err(fs_info,
     449             :                 "read time tree block corruption detected on logical %llu mirror %u",
     450             :                           eb->start, eb->read_mirror);
     451       94519 : out:
     452       94520 :         return ret;
     453             : }
     454             : 
     455             : #ifdef CONFIG_MIGRATION
     456    20033632 : static int btree_migrate_folio(struct address_space *mapping,
     457             :                 struct folio *dst, struct folio *src, enum migrate_mode mode)
     458             : {
     459             :         /*
     460             :          * we can't safely write a btree page from here,
     461             :          * we haven't done the locking hook
     462             :          */
     463    20033632 :         if (folio_test_dirty(src))
     464             :                 return -EAGAIN;
     465             :         /*
     466             :          * Buffers may be managed in a filesystem specific way.
     467             :          * We must have no buffers or drop them.
     468             :          */
     469     3656422 :         if (folio_get_private(src) &&
     470      765077 :             !filemap_release_folio(src, GFP_KERNEL))
     471             :                 return -EAGAIN;
     472     2829792 :         return migrate_folio(mapping, dst, src, mode);
     473             : }
     474             : #else
     475             : #define btree_migrate_folio NULL
     476             : #endif
     477             : 
     478     2362732 : static int btree_writepages(struct address_space *mapping,
     479             :                             struct writeback_control *wbc)
     480             : {
     481     2362732 :         struct btrfs_fs_info *fs_info;
     482     2362732 :         int ret;
     483             : 
     484     2362732 :         if (wbc->sync_mode == WB_SYNC_NONE) {
     485             : 
     486       42528 :                 if (wbc->for_kupdate)
     487             :                         return 0;
     488             : 
     489       42140 :                 fs_info = BTRFS_I(mapping->host)->root->fs_info;
     490             :                 /* this is a bit racy, but that's ok */
     491       42140 :                 ret = __percpu_counter_compare(&fs_info->dirty_metadata_bytes,
     492             :                                              BTRFS_DIRTY_METADATA_THRESH,
     493             :                                              fs_info->dirty_metadata_batch);
     494       42138 :                 if (ret < 0)
     495             :                         return 0;
     496             :         }
     497     2320276 :         return btree_write_cache_pages(mapping, wbc);
     498             : }
     499             : 
     500     3225582 : static bool btree_release_folio(struct folio *folio, gfp_t gfp_flags)
     501             : {
     502     3225582 :         if (folio_test_writeback(folio) || folio_test_dirty(folio))
     503             :                 return false;
     504             : 
     505     3225582 :         return try_release_extent_buffer(&folio->page);
     506             : }
     507             : 
     508       34689 : static void btree_invalidate_folio(struct folio *folio, size_t offset,
     509             :                                  size_t length)
     510             : {
     511       34689 :         struct extent_io_tree *tree;
     512       34689 :         tree = &BTRFS_I(folio->mapping->host)->io_tree;
     513       34689 :         extent_invalidate_folio(tree, folio, offset);
     514       34689 :         btree_release_folio(folio, GFP_NOFS);
     515       34689 :         if (folio_get_private(folio)) {
     516           0 :                 btrfs_warn(BTRFS_I(folio->mapping->host)->root->fs_info,
     517             :                            "folio private not zero on folio %llu",
     518             :                            (unsigned long long)folio_pos(folio));
     519           0 :                 folio_detach_private(folio);
     520             :         }
     521       34689 : }
     522             : 
     523             : #ifdef DEBUG
     524             : static bool btree_dirty_folio(struct address_space *mapping,
     525             :                 struct folio *folio)
     526             : {
     527             :         struct btrfs_fs_info *fs_info = btrfs_sb(mapping->host->i_sb);
     528             :         struct btrfs_subpage *subpage;
     529             :         struct extent_buffer *eb;
     530             :         int cur_bit = 0;
     531             :         u64 page_start = folio_pos(folio);
     532             : 
     533             :         if (fs_info->sectorsize == PAGE_SIZE) {
     534             :                 eb = folio_get_private(folio);
     535             :                 BUG_ON(!eb);
     536             :                 BUG_ON(!test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags));
     537             :                 BUG_ON(!atomic_read(&eb->refs));
     538             :                 btrfs_assert_tree_write_locked(eb);
     539             :                 return filemap_dirty_folio(mapping, folio);
     540             :         }
     541             :         subpage = folio_get_private(folio);
     542             : 
     543             :         ASSERT(subpage->dirty_bitmap);
     544             :         while (cur_bit < BTRFS_SUBPAGE_BITMAP_SIZE) {
     545             :                 unsigned long flags;
     546             :                 u64 cur;
     547             :                 u16 tmp = (1 << cur_bit);
     548             : 
     549             :                 spin_lock_irqsave(&subpage->lock, flags);
     550             :                 if (!(tmp & subpage->dirty_bitmap)) {
     551             :                         spin_unlock_irqrestore(&subpage->lock, flags);
     552             :                         cur_bit++;
     553             :                         continue;
     554             :                 }
     555             :                 spin_unlock_irqrestore(&subpage->lock, flags);
     556             :                 cur = page_start + cur_bit * fs_info->sectorsize;
     557             : 
     558             :                 eb = find_extent_buffer(fs_info, cur);
     559             :                 ASSERT(eb);
     560             :                 ASSERT(test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags));
     561             :                 ASSERT(atomic_read(&eb->refs));
     562             :                 btrfs_assert_tree_write_locked(eb);
     563             :                 free_extent_buffer(eb);
     564             : 
     565             :                 cur_bit += (fs_info->nodesize >> fs_info->sectorsize_bits);
     566             :         }
     567             :         return filemap_dirty_folio(mapping, folio);
     568             : }
     569             : #else
     570             : #define btree_dirty_folio filemap_dirty_folio
     571             : #endif
     572             : 
     573             : static const struct address_space_operations btree_aops = {
     574             :         .writepages     = btree_writepages,
     575             :         .release_folio  = btree_release_folio,
     576             :         .invalidate_folio = btree_invalidate_folio,
     577             :         .migrate_folio  = btree_migrate_folio,
     578             :         .dirty_folio    = btree_dirty_folio,
     579             : };
     580             : 
     581    28245080 : struct extent_buffer *btrfs_find_create_tree_block(
     582             :                                                 struct btrfs_fs_info *fs_info,
     583             :                                                 u64 bytenr, u64 owner_root,
     584             :                                                 int level)
     585             : {
     586    51922520 :         if (btrfs_is_testing(fs_info))
     587             :                 return alloc_test_extent_buffer(fs_info, bytenr);
     588    28245080 :         return alloc_extent_buffer(fs_info, bytenr, owner_root, level);
     589             : }
     590             : 
     591             : /*
     592             :  * Read tree block at logical address @bytenr and do variant basic but critical
     593             :  * verification.
     594             :  *
     595             :  * @check:              expected tree parentness check, see comments of the
     596             :  *                      structure for details.
     597             :  */
     598    23677440 : struct extent_buffer *read_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr,
     599             :                                       struct btrfs_tree_parent_check *check)
     600             : {
     601    23677440 :         struct extent_buffer *buf = NULL;
     602    23677440 :         int ret;
     603             : 
     604    23677440 :         ASSERT(check);
     605             : 
     606    23677440 :         buf = btrfs_find_create_tree_block(fs_info, bytenr, check->owner_root,
     607    23677440 :                                            check->level);
     608    23677605 :         if (IS_ERR(buf))
     609             :                 return buf;
     610             : 
     611    23677605 :         ret = btrfs_read_extent_buffer(buf, check);
     612    23677158 :         if (ret) {
     613           1 :                 free_extent_buffer_stale(buf);
     614           1 :                 return ERR_PTR(ret);
     615             :         }
     616    23677157 :         if (btrfs_check_eb_owner(buf, check->owner_root)) {
     617           0 :                 free_extent_buffer_stale(buf);
     618           0 :                 return ERR_PTR(-EUCLEAN);
     619             :         }
     620             :         return buf;
     621             : 
     622             : }
     623             : 
     624       66801 : static void __setup_root(struct btrfs_root *root, struct btrfs_fs_info *fs_info,
     625             :                          u64 objectid)
     626             : {
     627       66801 :         bool dummy = test_bit(BTRFS_FS_STATE_DUMMY_FS_INFO, &fs_info->fs_state);
     628             : 
     629       66801 :         memset(&root->root_key, 0, sizeof(root->root_key));
     630       66801 :         memset(&root->root_item, 0, sizeof(root->root_item));
     631       66801 :         memset(&root->defrag_progress, 0, sizeof(root->defrag_progress));
     632       66801 :         root->fs_info = fs_info;
     633       66801 :         root->root_key.objectid = objectid;
     634       66801 :         root->node = NULL;
     635       66801 :         root->commit_root = NULL;
     636       66801 :         root->state = 0;
     637       66801 :         RB_CLEAR_NODE(&root->rb_node);
     638             : 
     639       66801 :         root->last_trans = 0;
     640       66801 :         root->free_objectid = 0;
     641       66801 :         root->nr_delalloc_inodes = 0;
     642       66801 :         root->nr_ordered_extents = 0;
     643       66801 :         root->inode_tree = RB_ROOT;
     644       66801 :         INIT_RADIX_TREE(&root->delayed_nodes_tree, GFP_ATOMIC);
     645             : 
     646       66801 :         btrfs_init_root_block_rsv(root);
     647             : 
     648       66801 :         INIT_LIST_HEAD(&root->dirty_list);
     649       66801 :         INIT_LIST_HEAD(&root->root_list);
     650       66801 :         INIT_LIST_HEAD(&root->delalloc_inodes);
     651       66801 :         INIT_LIST_HEAD(&root->delalloc_root);
     652       66801 :         INIT_LIST_HEAD(&root->ordered_extents);
     653       66801 :         INIT_LIST_HEAD(&root->ordered_root);
     654       66801 :         INIT_LIST_HEAD(&root->reloc_dirty_list);
     655       66801 :         INIT_LIST_HEAD(&root->logged_list[0]);
     656       66801 :         INIT_LIST_HEAD(&root->logged_list[1]);
     657       66801 :         spin_lock_init(&root->inode_lock);
     658       66801 :         spin_lock_init(&root->delalloc_lock);
     659       66801 :         spin_lock_init(&root->ordered_extent_lock);
     660       66801 :         spin_lock_init(&root->accounting_lock);
     661       66801 :         spin_lock_init(&root->log_extents_lock[0]);
     662       66801 :         spin_lock_init(&root->log_extents_lock[1]);
     663       66801 :         spin_lock_init(&root->qgroup_meta_rsv_lock);
     664       66801 :         mutex_init(&root->objectid_mutex);
     665       66801 :         mutex_init(&root->log_mutex);
     666       66801 :         mutex_init(&root->ordered_extent_mutex);
     667       66801 :         mutex_init(&root->delalloc_mutex);
     668       66801 :         init_waitqueue_head(&root->qgroup_flush_wait);
     669       66801 :         init_waitqueue_head(&root->log_writer_wait);
     670       66801 :         init_waitqueue_head(&root->log_commit_wait[0]);
     671       66801 :         init_waitqueue_head(&root->log_commit_wait[1]);
     672       66801 :         INIT_LIST_HEAD(&root->log_ctxs[0]);
     673       66801 :         INIT_LIST_HEAD(&root->log_ctxs[1]);
     674       66801 :         atomic_set(&root->log_commit[0], 0);
     675       66801 :         atomic_set(&root->log_commit[1], 0);
     676       66801 :         atomic_set(&root->log_writers, 0);
     677       66801 :         atomic_set(&root->log_batch, 0);
     678       66801 :         refcount_set(&root->refs, 1);
     679       66801 :         atomic_set(&root->snapshot_force_cow, 0);
     680       66801 :         atomic_set(&root->nr_swapfiles, 0);
     681       66801 :         root->log_transid = 0;
     682       66801 :         root->log_transid_committed = -1;
     683       66801 :         root->last_log_commit = 0;
     684       66801 :         root->anon_dev = 0;
     685       66801 :         if (!dummy) {
     686       66801 :                 extent_io_tree_init(fs_info, &root->dirty_log_pages,
     687             :                                     IO_TREE_ROOT_DIRTY_LOG_PAGES);
     688       66801 :                 extent_io_tree_init(fs_info, &root->log_csum_range,
     689             :                                     IO_TREE_LOG_CSUM_RANGE);
     690             :         }
     691             : 
     692       66801 :         spin_lock_init(&root->root_item_lock);
     693       66801 :         btrfs_qgroup_init_swapped_blocks(&root->swapped_blocks);
     694             : #ifdef CONFIG_BTRFS_DEBUG
     695             :         INIT_LIST_HEAD(&root->leak_list);
     696             :         spin_lock(&fs_info->fs_roots_radix_lock);
     697             :         list_add_tail(&root->leak_list, &fs_info->allocated_roots);
     698             :         spin_unlock(&fs_info->fs_roots_radix_lock);
     699             : #endif
     700       66801 : }
     701             : 
     702       66800 : static struct btrfs_root *btrfs_alloc_root(struct btrfs_fs_info *fs_info,
     703             :                                            u64 objectid, gfp_t flags)
     704             : {
     705       66800 :         struct btrfs_root *root = kzalloc(sizeof(*root), flags);
     706       66801 :         if (root)
     707       66801 :                 __setup_root(root, fs_info, objectid);
     708       66801 :         return root;
     709             : }
     710             : 
     711             : #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
     712             : /* Should only be used by the testing infrastructure */
     713             : struct btrfs_root *btrfs_alloc_dummy_root(struct btrfs_fs_info *fs_info)
     714             : {
     715             :         struct btrfs_root *root;
     716             : 
     717             :         if (!fs_info)
     718             :                 return ERR_PTR(-EINVAL);
     719             : 
     720             :         root = btrfs_alloc_root(fs_info, BTRFS_ROOT_TREE_OBJECTID, GFP_KERNEL);
     721             :         if (!root)
     722             :                 return ERR_PTR(-ENOMEM);
     723             : 
     724             :         /* We don't use the stripesize in selftest, set it as sectorsize */
     725             :         root->alloc_bytenr = 0;
     726             : 
     727             :         return root;
     728             : }
     729             : #endif
     730             : 
     731             : static int global_root_cmp(struct rb_node *a_node, const struct rb_node *b_node)
     732             : {
     733        9643 :         const struct btrfs_root *a = rb_entry(a_node, struct btrfs_root, rb_node);
     734        9643 :         const struct btrfs_root *b = rb_entry(b_node, struct btrfs_root, rb_node);
     735             : 
     736        9643 :         return btrfs_comp_cpu_keys(&a->root_key, &b->root_key);
     737             : }
     738             : 
     739             : static int global_root_key_cmp(const void *k, const struct rb_node *node)
     740             : {
     741   356280096 :         const struct btrfs_key *key = k;
     742   356280096 :         const struct btrfs_root *root = rb_entry(node, struct btrfs_root, rb_node);
     743             : 
     744   356280096 :         return btrfs_comp_cpu_keys(key, &root->root_key);
     745             : }
     746             : 
     747        9647 : int btrfs_global_root_insert(struct btrfs_root *root)
     748             : {
     749        9647 :         struct btrfs_fs_info *fs_info = root->fs_info;
     750        9647 :         struct rb_node *tmp;
     751        9647 :         int ret = 0;
     752             : 
     753        9647 :         write_lock(&fs_info->global_root_lock);
     754        9647 :         tmp = rb_find_add(&root->rb_node, &fs_info->global_root_tree, global_root_cmp);
     755        9647 :         write_unlock(&fs_info->global_root_lock);
     756             : 
     757        9647 :         if (tmp) {
     758           0 :                 ret = -EEXIST;
     759           0 :                 btrfs_warn(fs_info, "global root %llu %llu already exists",
     760             :                                 root->root_key.objectid, root->root_key.offset);
     761             :         }
     762        9647 :         return ret;
     763             : }
     764             : 
     765           5 : void btrfs_global_root_delete(struct btrfs_root *root)
     766             : {
     767           5 :         struct btrfs_fs_info *fs_info = root->fs_info;
     768             : 
     769           5 :         write_lock(&fs_info->global_root_lock);
     770           5 :         rb_erase(&root->rb_node, &fs_info->global_root_tree);
     771           5 :         write_unlock(&fs_info->global_root_lock);
     772           5 : }
     773             : 
     774   183924664 : struct btrfs_root *btrfs_global_root(struct btrfs_fs_info *fs_info,
     775             :                                      struct btrfs_key *key)
     776             : {
     777   183924664 :         struct rb_node *node;
     778   183924664 :         struct btrfs_root *root = NULL;
     779             : 
     780   183924664 :         read_lock(&fs_info->global_root_lock);
     781   183919401 :         node = rb_find(key, &fs_info->global_root_tree, global_root_key_cmp);
     782   183919401 :         if (node)
     783   183919401 :                 root = container_of(node, struct btrfs_root, rb_node);
     784   183919401 :         read_unlock(&fs_info->global_root_lock);
     785             : 
     786   183931715 :         return root;
     787             : }
     788             : 
     789   141125451 : static u64 btrfs_global_root_id(struct btrfs_fs_info *fs_info, u64 bytenr)
     790             : {
     791   141125451 :         struct btrfs_block_group *block_group;
     792   141125451 :         u64 ret;
     793             : 
     794   141125451 :         if (!btrfs_fs_incompat(fs_info, EXTENT_TREE_V2))
     795             :                 return 0;
     796             : 
     797           0 :         if (bytenr)
     798           0 :                 block_group = btrfs_lookup_block_group(fs_info, bytenr);
     799             :         else
     800           0 :                 block_group = btrfs_lookup_first_block_group(fs_info, bytenr);
     801           0 :         ASSERT(block_group);
     802           0 :         if (!block_group)
     803             :                 return 0;
     804           0 :         ret = block_group->global_root_id;
     805           0 :         btrfs_put_block_group(block_group);
     806             : 
     807           0 :         return ret;
     808             : }
     809             : 
     810    11546250 : struct btrfs_root *btrfs_csum_root(struct btrfs_fs_info *fs_info, u64 bytenr)
     811             : {
     812    23091003 :         struct btrfs_key key = {
     813             :                 .objectid = BTRFS_CSUM_TREE_OBJECTID,
     814             :                 .type = BTRFS_ROOT_ITEM_KEY,
     815    11546250 :                 .offset = btrfs_global_root_id(fs_info, bytenr),
     816             :         };
     817             : 
     818    11544753 :         return btrfs_global_root(fs_info, &key);
     819             : }
     820             : 
     821   129583332 : struct btrfs_root *btrfs_extent_root(struct btrfs_fs_info *fs_info, u64 bytenr)
     822             : {
     823   259163527 :         struct btrfs_key key = {
     824             :                 .objectid = BTRFS_EXTENT_TREE_OBJECTID,
     825             :                 .type = BTRFS_ROOT_ITEM_KEY,
     826   129583332 :                 .offset = btrfs_global_root_id(fs_info, bytenr),
     827             :         };
     828             : 
     829   129580195 :         return btrfs_global_root(fs_info, &key);
     830             : }
     831             : 
     832      582817 : struct btrfs_root *btrfs_block_group_root(struct btrfs_fs_info *fs_info)
     833             : {
     834      582817 :         if (btrfs_fs_compat_ro(fs_info, BLOCK_GROUP_TREE))
     835           0 :                 return fs_info->block_group_root;
     836      582817 :         return btrfs_extent_root(fs_info, 0);
     837             : }
     838             : 
     839         169 : struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans,
     840             :                                      u64 objectid)
     841             : {
     842         169 :         struct btrfs_fs_info *fs_info = trans->fs_info;
     843         169 :         struct extent_buffer *leaf;
     844         169 :         struct btrfs_root *tree_root = fs_info->tree_root;
     845         169 :         struct btrfs_root *root;
     846         169 :         struct btrfs_key key;
     847         169 :         unsigned int nofs_flag;
     848         169 :         int ret = 0;
     849             : 
     850             :         /*
     851             :          * We're holding a transaction handle, so use a NOFS memory allocation
     852             :          * context to avoid deadlock if reclaim happens.
     853             :          */
     854         169 :         nofs_flag = memalloc_nofs_save();
     855         169 :         root = btrfs_alloc_root(fs_info, objectid, GFP_KERNEL);
     856         169 :         memalloc_nofs_restore(nofs_flag);
     857         169 :         if (!root)
     858             :                 return ERR_PTR(-ENOMEM);
     859             : 
     860         169 :         root->root_key.objectid = objectid;
     861         169 :         root->root_key.type = BTRFS_ROOT_ITEM_KEY;
     862         169 :         root->root_key.offset = 0;
     863             : 
     864         169 :         leaf = btrfs_alloc_tree_block(trans, root, 0, objectid, NULL, 0, 0, 0,
     865             :                                       BTRFS_NESTING_NORMAL);
     866         169 :         if (IS_ERR(leaf)) {
     867           0 :                 ret = PTR_ERR(leaf);
     868           0 :                 leaf = NULL;
     869           0 :                 goto fail;
     870             :         }
     871             : 
     872         169 :         root->node = leaf;
     873         169 :         btrfs_mark_buffer_dirty(leaf);
     874             : 
     875         169 :         root->commit_root = btrfs_root_node(root);
     876         169 :         set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
     877             : 
     878         169 :         btrfs_set_root_flags(&root->root_item, 0);
     879         169 :         btrfs_set_root_limit(&root->root_item, 0);
     880         169 :         btrfs_set_root_bytenr(&root->root_item, leaf->start);
     881         169 :         btrfs_set_root_generation(&root->root_item, trans->transid);
     882         169 :         btrfs_set_root_level(&root->root_item, 0);
     883         169 :         btrfs_set_root_refs(&root->root_item, 1);
     884         169 :         btrfs_set_root_used(&root->root_item, leaf->len);
     885         169 :         btrfs_set_root_last_snapshot(&root->root_item, 0);
     886         169 :         btrfs_set_root_dirid(&root->root_item, 0);
     887         169 :         if (is_fstree(objectid))
     888           0 :                 generate_random_guid(root->root_item.uuid);
     889             :         else
     890         169 :                 export_guid(root->root_item.uuid, &guid_null);
     891         169 :         btrfs_set_root_drop_level(&root->root_item, 0);
     892             : 
     893         169 :         btrfs_tree_unlock(leaf);
     894             : 
     895         169 :         key.objectid = objectid;
     896         169 :         key.type = BTRFS_ROOT_ITEM_KEY;
     897         169 :         key.offset = 0;
     898         169 :         ret = btrfs_insert_root(trans, tree_root, &key, &root->root_item);
     899         169 :         if (ret)
     900           0 :                 goto fail;
     901             : 
     902             :         return root;
     903             : 
     904           0 : fail:
     905           0 :         btrfs_put_root(root);
     906             : 
     907           0 :         return ERR_PTR(ret);
     908             : }
     909             : 
     910        8520 : static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans,
     911             :                                          struct btrfs_fs_info *fs_info)
     912             : {
     913        8520 :         struct btrfs_root *root;
     914             : 
     915        8520 :         root = btrfs_alloc_root(fs_info, BTRFS_TREE_LOG_OBJECTID, GFP_NOFS);
     916        8521 :         if (!root)
     917             :                 return ERR_PTR(-ENOMEM);
     918             : 
     919        8521 :         root->root_key.objectid = BTRFS_TREE_LOG_OBJECTID;
     920        8521 :         root->root_key.type = BTRFS_ROOT_ITEM_KEY;
     921        8521 :         root->root_key.offset = BTRFS_TREE_LOG_OBJECTID;
     922             : 
     923        8521 :         return root;
     924             : }
     925             : 
     926        8521 : int btrfs_alloc_log_tree_node(struct btrfs_trans_handle *trans,
     927             :                               struct btrfs_root *root)
     928             : {
     929        8521 :         struct extent_buffer *leaf;
     930             : 
     931             :         /*
     932             :          * DON'T set SHAREABLE bit for log trees.
     933             :          *
     934             :          * Log trees are not exposed to user space thus can't be snapshotted,
     935             :          * and they go away before a real commit is actually done.
     936             :          *
     937             :          * They do store pointers to file data extents, and those reference
     938             :          * counts still get updated (along with back refs to the log tree).
     939             :          */
     940             : 
     941        8521 :         leaf = btrfs_alloc_tree_block(trans, root, 0, BTRFS_TREE_LOG_OBJECTID,
     942             :                         NULL, 0, 0, 0, BTRFS_NESTING_NORMAL);
     943        8520 :         if (IS_ERR(leaf))
     944           0 :                 return PTR_ERR(leaf);
     945             : 
     946        8520 :         root->node = leaf;
     947             : 
     948        8520 :         btrfs_mark_buffer_dirty(root->node);
     949        8521 :         btrfs_tree_unlock(root->node);
     950             : 
     951        8521 :         return 0;
     952             : }
     953             : 
     954        4235 : int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans,
     955             :                              struct btrfs_fs_info *fs_info)
     956             : {
     957        4235 :         struct btrfs_root *log_root;
     958             : 
     959        4235 :         log_root = alloc_log_tree(trans, fs_info);
     960        4235 :         if (IS_ERR(log_root))
     961           0 :                 return PTR_ERR(log_root);
     962             : 
     963        4235 :         if (!btrfs_is_zoned(fs_info)) {
     964        4235 :                 int ret = btrfs_alloc_log_tree_node(trans, log_root);
     965             : 
     966        4235 :                 if (ret) {
     967           0 :                         btrfs_put_root(log_root);
     968           0 :                         return ret;
     969             :                 }
     970             :         }
     971             : 
     972        4235 :         WARN_ON(fs_info->log_root_tree);
     973        4235 :         fs_info->log_root_tree = log_root;
     974        4235 :         return 0;
     975             : }
     976             : 
     977        4285 : int btrfs_add_log_tree(struct btrfs_trans_handle *trans,
     978             :                        struct btrfs_root *root)
     979             : {
     980        4285 :         struct btrfs_fs_info *fs_info = root->fs_info;
     981        4285 :         struct btrfs_root *log_root;
     982        4285 :         struct btrfs_inode_item *inode_item;
     983        4285 :         int ret;
     984             : 
     985        4285 :         log_root = alloc_log_tree(trans, fs_info);
     986        4286 :         if (IS_ERR(log_root))
     987           0 :                 return PTR_ERR(log_root);
     988             : 
     989        4286 :         ret = btrfs_alloc_log_tree_node(trans, log_root);
     990        4286 :         if (ret) {
     991           0 :                 btrfs_put_root(log_root);
     992           0 :                 return ret;
     993             :         }
     994             : 
     995        4286 :         log_root->last_trans = trans->transid;
     996        4286 :         log_root->root_key.offset = root->root_key.objectid;
     997             : 
     998        4286 :         inode_item = &log_root->root_item.inode;
     999        4286 :         btrfs_set_stack_inode_generation(inode_item, 1);
    1000        4286 :         btrfs_set_stack_inode_size(inode_item, 3);
    1001        4286 :         btrfs_set_stack_inode_nlink(inode_item, 1);
    1002        4286 :         btrfs_set_stack_inode_nbytes(inode_item,
    1003        4286 :                                      fs_info->nodesize);
    1004        4286 :         btrfs_set_stack_inode_mode(inode_item, S_IFDIR | 0755);
    1005             : 
    1006        4286 :         btrfs_set_root_node(&log_root->root_item, log_root->node);
    1007             : 
    1008        4286 :         WARN_ON(root->log_root);
    1009        4286 :         root->log_root = log_root;
    1010        4286 :         root->log_transid = 0;
    1011        4286 :         root->log_transid_committed = -1;
    1012        4286 :         root->last_log_commit = 0;
    1013        4286 :         return 0;
    1014             : }
    1015             : 
    1016       51338 : static struct btrfs_root *read_tree_root_path(struct btrfs_root *tree_root,
    1017             :                                               struct btrfs_path *path,
    1018             :                                               struct btrfs_key *key)
    1019             : {
    1020       51338 :         struct btrfs_root *root;
    1021       51338 :         struct btrfs_tree_parent_check check = { 0 };
    1022       51338 :         struct btrfs_fs_info *fs_info = tree_root->fs_info;
    1023       51338 :         u64 generation;
    1024       51338 :         int ret;
    1025       51338 :         int level;
    1026             : 
    1027       51338 :         root = btrfs_alloc_root(fs_info, key->objectid, GFP_NOFS);
    1028       51338 :         if (!root)
    1029             :                 return ERR_PTR(-ENOMEM);
    1030             : 
    1031       51338 :         ret = btrfs_find_root(tree_root, key, path,
    1032             :                               &root->root_item, &root->root_key);
    1033       51338 :         if (ret) {
    1034        3198 :                 if (ret > 0)
    1035        3198 :                         ret = -ENOENT;
    1036        3198 :                 goto fail;
    1037             :         }
    1038             : 
    1039       48140 :         generation = btrfs_root_generation(&root->root_item);
    1040       48140 :         level = btrfs_root_level(&root->root_item);
    1041       48140 :         check.level = level;
    1042       48140 :         check.transid = generation;
    1043       48140 :         check.owner_root = key->objectid;
    1044       48140 :         root->node = read_tree_block(fs_info, btrfs_root_bytenr(&root->root_item),
    1045             :                                      &check);
    1046       48140 :         if (IS_ERR(root->node)) {
    1047           0 :                 ret = PTR_ERR(root->node);
    1048           0 :                 root->node = NULL;
    1049           0 :                 goto fail;
    1050             :         }
    1051       48140 :         if (!btrfs_buffer_uptodate(root->node, generation, 0)) {
    1052           0 :                 ret = -EIO;
    1053           0 :                 goto fail;
    1054             :         }
    1055             : 
    1056             :         /*
    1057             :          * For real fs, and not log/reloc trees, root owner must
    1058             :          * match its root node owner
    1059             :          */
    1060       48140 :         if (!test_bit(BTRFS_FS_STATE_DUMMY_FS_INFO, &fs_info->fs_state) &&
    1061       48140 :             root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID &&
    1062       28436 :             root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID &&
    1063       28436 :             root->root_key.objectid != btrfs_header_owner(root->node)) {
    1064           0 :                 btrfs_crit(fs_info,
    1065             : "root=%llu block=%llu, tree root owner mismatch, have %llu expect %llu",
    1066             :                            root->root_key.objectid, root->node->start,
    1067             :                            btrfs_header_owner(root->node),
    1068             :                            root->root_key.objectid);
    1069           0 :                 ret = -EUCLEAN;
    1070           0 :                 goto fail;
    1071             :         }
    1072       48140 :         root->commit_root = btrfs_root_node(root);
    1073       48140 :         return root;
    1074        3198 : fail:
    1075        3198 :         btrfs_put_root(root);
    1076        3198 :         return ERR_PTR(ret);
    1077             : }
    1078             : 
    1079       41647 : struct btrfs_root *btrfs_read_tree_root(struct btrfs_root *tree_root,
    1080             :                                         struct btrfs_key *key)
    1081             : {
    1082       41647 :         struct btrfs_root *root;
    1083       41647 :         struct btrfs_path *path;
    1084             : 
    1085       41647 :         path = btrfs_alloc_path();
    1086       41647 :         if (!path)
    1087             :                 return ERR_PTR(-ENOMEM);
    1088       41647 :         root = read_tree_root_path(tree_root, path, key);
    1089       41647 :         btrfs_free_path(path);
    1090             : 
    1091       41647 :         return root;
    1092             : }
    1093             : 
    1094             : /*
    1095             :  * Initialize subvolume root in-memory structure
    1096             :  *
    1097             :  * @anon_dev:   anonymous device to attach to the root, if zero, allocate new
    1098             :  */
    1099       12292 : static int btrfs_init_fs_root(struct btrfs_root *root, dev_t anon_dev)
    1100             : {
    1101       12292 :         int ret;
    1102             : 
    1103       12292 :         btrfs_drew_lock_init(&root->snapshot_lock);
    1104             : 
    1105       12292 :         if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID &&
    1106             :             !btrfs_is_data_reloc_root(root)) {
    1107        9075 :                 set_bit(BTRFS_ROOT_SHAREABLE, &root->state);
    1108        9075 :                 btrfs_check_and_init_root_item(&root->root_item);
    1109             :         }
    1110             : 
    1111             :         /*
    1112             :          * Don't assign anonymous block device to roots that are not exposed to
    1113             :          * userspace, the id pool is limited to 1M
    1114             :          */
    1115       18150 :         if (is_fstree(root->root_key.objectid) &&
    1116             :             btrfs_root_refs(&root->root_item) > 0) {
    1117        8875 :                 if (!anon_dev) {
    1118        7597 :                         ret = get_anon_bdev(&root->anon_dev);
    1119        7597 :                         if (ret)
    1120           0 :                                 goto fail;
    1121             :                 } else {
    1122        1278 :                         root->anon_dev = anon_dev;
    1123             :                 }
    1124             :         }
    1125             : 
    1126       12292 :         mutex_lock(&root->objectid_mutex);
    1127       12292 :         ret = btrfs_init_root_free_objectid(root);
    1128       12292 :         if (ret) {
    1129           0 :                 mutex_unlock(&root->objectid_mutex);
    1130           0 :                 goto fail;
    1131             :         }
    1132             : 
    1133       12292 :         ASSERT(root->free_objectid <= BTRFS_LAST_FREE_OBJECTID);
    1134             : 
    1135       12292 :         mutex_unlock(&root->objectid_mutex);
    1136             : 
    1137       12292 :         return 0;
    1138             : fail:
    1139             :         /* The caller is responsible to call btrfs_free_fs_root */
    1140             :         return ret;
    1141             : }
    1142             : 
    1143    11308973 : static struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info,
    1144             :                                                u64 root_id)
    1145             : {
    1146    11308973 :         struct btrfs_root *root;
    1147             : 
    1148    11308973 :         spin_lock(&fs_info->fs_roots_radix_lock);
    1149    11309324 :         root = radix_tree_lookup(&fs_info->fs_roots_radix,
    1150             :                                  (unsigned long)root_id);
    1151    11309324 :         root = btrfs_grab_root(root);
    1152    11309324 :         spin_unlock(&fs_info->fs_roots_radix_lock);
    1153    11309324 :         return root;
    1154             : }
    1155             : 
    1156    11386752 : static struct btrfs_root *btrfs_get_global_root(struct btrfs_fs_info *fs_info,
    1157             :                                                 u64 objectid)
    1158             : {
    1159    11386752 :         struct btrfs_key key = {
    1160             :                 .objectid = objectid,
    1161             :                 .type = BTRFS_ROOT_ITEM_KEY,
    1162             :                 .offset = 0,
    1163             :         };
    1164             : 
    1165    11386752 :         switch (objectid) {
    1166        7502 :         case BTRFS_ROOT_TREE_OBJECTID:
    1167        7502 :                 return btrfs_grab_root(fs_info->tree_root);
    1168       36802 :         case BTRFS_EXTENT_TREE_OBJECTID:
    1169       36802 :                 return btrfs_grab_root(btrfs_global_root(fs_info, &key));
    1170         210 :         case BTRFS_CHUNK_TREE_OBJECTID:
    1171         210 :                 return btrfs_grab_root(fs_info->chunk_root);
    1172         211 :         case BTRFS_DEV_TREE_OBJECTID:
    1173         211 :                 return btrfs_grab_root(fs_info->dev_root);
    1174       30625 :         case BTRFS_CSUM_TREE_OBJECTID:
    1175       30625 :                 return btrfs_grab_root(btrfs_global_root(fs_info, &key));
    1176         153 :         case BTRFS_QUOTA_TREE_OBJECTID:
    1177         153 :                 return btrfs_grab_root(fs_info->quota_root);
    1178        2066 :         case BTRFS_UUID_TREE_OBJECTID:
    1179        2066 :                 return btrfs_grab_root(fs_info->uuid_root);
    1180           0 :         case BTRFS_BLOCK_GROUP_TREE_OBJECTID:
    1181           0 :                 return btrfs_grab_root(fs_info->block_group_root);
    1182         182 :         case BTRFS_FREE_SPACE_TREE_OBJECTID:
    1183         182 :                 return btrfs_grab_root(btrfs_global_root(fs_info, &key));
    1184             :         default:
    1185             :                 return NULL;
    1186             :         }
    1187             : }
    1188             : 
    1189       12292 : int btrfs_insert_fs_root(struct btrfs_fs_info *fs_info,
    1190             :                          struct btrfs_root *root)
    1191             : {
    1192       12292 :         int ret;
    1193             : 
    1194       12292 :         ret = radix_tree_preload(GFP_NOFS);
    1195       12292 :         if (ret)
    1196             :                 return ret;
    1197             : 
    1198       12292 :         spin_lock(&fs_info->fs_roots_radix_lock);
    1199       12292 :         ret = radix_tree_insert(&fs_info->fs_roots_radix,
    1200       12292 :                                 (unsigned long)root->root_key.objectid,
    1201             :                                 root);
    1202       12292 :         if (ret == 0) {
    1203       12292 :                 btrfs_grab_root(root);
    1204       12292 :                 set_bit(BTRFS_ROOT_IN_RADIX, &root->state);
    1205             :         }
    1206       12292 :         spin_unlock(&fs_info->fs_roots_radix_lock);
    1207       12292 :         radix_tree_preload_end();
    1208             : 
    1209       12292 :         return ret;
    1210             : }
    1211             : 
    1212           0 : void btrfs_check_leaked_roots(struct btrfs_fs_info *fs_info)
    1213             : {
    1214             : #ifdef CONFIG_BTRFS_DEBUG
    1215             :         struct btrfs_root *root;
    1216             : 
    1217             :         while (!list_empty(&fs_info->allocated_roots)) {
    1218             :                 char buf[BTRFS_ROOT_NAME_BUF_LEN];
    1219             : 
    1220             :                 root = list_first_entry(&fs_info->allocated_roots,
    1221             :                                         struct btrfs_root, leak_list);
    1222             :                 btrfs_err(fs_info, "leaked root %s refcount %d",
    1223             :                           btrfs_root_name(&root->root_key, buf),
    1224             :                           refcount_read(&root->refs));
    1225             :                 while (refcount_read(&root->refs) > 1)
    1226             :                         btrfs_put_root(root);
    1227             :                 btrfs_put_root(root);
    1228             :         }
    1229             : #endif
    1230           0 : }
    1231             : 
    1232        3473 : static void free_global_roots(struct btrfs_fs_info *fs_info)
    1233             : {
    1234        3473 :         struct btrfs_root *root;
    1235        3473 :         struct rb_node *node;
    1236             : 
    1237       13115 :         while ((node = rb_first_postorder(&fs_info->global_root_tree)) != NULL) {
    1238        9642 :                 root = rb_entry(node, struct btrfs_root, rb_node);
    1239        9642 :                 rb_erase(&root->rb_node, &fs_info->global_root_tree);
    1240        9642 :                 btrfs_put_root(root);
    1241             :         }
    1242        3473 : }
    1243             : 
    1244        3473 : void btrfs_free_fs_info(struct btrfs_fs_info *fs_info)
    1245             : {
    1246        3473 :         percpu_counter_destroy(&fs_info->dirty_metadata_bytes);
    1247        3473 :         percpu_counter_destroy(&fs_info->delalloc_bytes);
    1248        3473 :         percpu_counter_destroy(&fs_info->ordered_bytes);
    1249        3473 :         percpu_counter_destroy(&fs_info->dev_replace.bio_counter);
    1250        3473 :         btrfs_free_csum_hash(fs_info);
    1251        3473 :         btrfs_free_stripe_hash_table(fs_info);
    1252        3473 :         btrfs_free_ref_cache(fs_info);
    1253        3473 :         kfree(fs_info->balance_ctl);
    1254        3473 :         kfree(fs_info->delayed_root);
    1255        3473 :         free_global_roots(fs_info);
    1256        3473 :         btrfs_put_root(fs_info->tree_root);
    1257        3473 :         btrfs_put_root(fs_info->chunk_root);
    1258        3473 :         btrfs_put_root(fs_info->dev_root);
    1259        3473 :         btrfs_put_root(fs_info->quota_root);
    1260        3473 :         btrfs_put_root(fs_info->uuid_root);
    1261        3473 :         btrfs_put_root(fs_info->fs_root);
    1262        3473 :         btrfs_put_root(fs_info->data_reloc_root);
    1263        3473 :         btrfs_put_root(fs_info->block_group_root);
    1264        3473 :         btrfs_check_leaked_roots(fs_info);
    1265        3473 :         btrfs_extent_buffer_leak_debug_check(fs_info);
    1266        3473 :         kfree(fs_info->super_copy);
    1267        3473 :         kfree(fs_info->super_for_commit);
    1268        3473 :         kfree(fs_info->subpage_info);
    1269        3473 :         kvfree(fs_info);
    1270        3473 : }
    1271             : 
    1272             : 
    1273             : /*
    1274             :  * Get an in-memory reference of a root structure.
    1275             :  *
    1276             :  * For essential trees like root/extent tree, we grab it from fs_info directly.
    1277             :  * For subvolume trees, we check the cached filesystem roots first. If not
    1278             :  * found, then read it from disk and add it to cached fs roots.
    1279             :  *
    1280             :  * Caller should release the root by calling btrfs_put_root() after the usage.
    1281             :  *
    1282             :  * NOTE: Reloc and log trees can't be read by this function as they share the
    1283             :  *       same root objectid.
    1284             :  *
    1285             :  * @objectid:   root id
    1286             :  * @anon_dev:   preallocated anonymous block device number for new roots,
    1287             :  *              pass 0 for new allocation.
    1288             :  * @check_ref:  whether to check root item references, If true, return -ENOENT
    1289             :  *              for orphan roots
    1290             :  */
    1291     5241596 : static struct btrfs_root *btrfs_get_root_ref(struct btrfs_fs_info *fs_info,
    1292             :                                              u64 objectid, dev_t anon_dev,
    1293             :                                              bool check_ref)
    1294             : {
    1295     5241596 :         struct btrfs_root *root;
    1296     5241596 :         struct btrfs_path *path;
    1297     5241596 :         struct btrfs_key key;
    1298     5241596 :         int ret;
    1299             : 
    1300     5241596 :         root = btrfs_get_global_root(fs_info, objectid);
    1301     5241595 :         if (root)
    1302             :                 return root;
    1303     5171998 : again:
    1304     5171998 :         root = btrfs_lookup_fs_root(fs_info, objectid);
    1305     5172000 :         if (root) {
    1306             :                 /* Shouldn't get preallocated anon_dev for cached roots */
    1307     5159708 :                 ASSERT(!anon_dev);
    1308     5159708 :                 if (check_ref && btrfs_root_refs(&root->root_item) == 0) {
    1309           0 :                         btrfs_put_root(root);
    1310           0 :                         return ERR_PTR(-ENOENT);
    1311             :                 }
    1312             :                 return root;
    1313             :         }
    1314             : 
    1315       12292 :         key.objectid = objectid;
    1316       12292 :         key.type = BTRFS_ROOT_ITEM_KEY;
    1317       12292 :         key.offset = (u64)-1;
    1318       12292 :         root = btrfs_read_tree_root(fs_info->tree_root, &key);
    1319       12292 :         if (IS_ERR(root))
    1320           0 :                 return root;
    1321             : 
    1322       12292 :         if (check_ref && btrfs_root_refs(&root->root_item) == 0) {
    1323           0 :                 ret = -ENOENT;
    1324           0 :                 goto fail;
    1325             :         }
    1326             : 
    1327       12292 :         ret = btrfs_init_fs_root(root, anon_dev);
    1328       12292 :         if (ret)
    1329           0 :                 goto fail;
    1330             : 
    1331       12292 :         path = btrfs_alloc_path();
    1332       12292 :         if (!path) {
    1333           0 :                 ret = -ENOMEM;
    1334           0 :                 goto fail;
    1335             :         }
    1336       12292 :         key.objectid = BTRFS_ORPHAN_OBJECTID;
    1337       12292 :         key.type = BTRFS_ORPHAN_ITEM_KEY;
    1338       12292 :         key.offset = objectid;
    1339             : 
    1340       12292 :         ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, path, 0, 0);
    1341       12292 :         btrfs_free_path(path);
    1342       12292 :         if (ret < 0)
    1343           0 :                 goto fail;
    1344       12292 :         if (ret == 0)
    1345         200 :                 set_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED, &root->state);
    1346             : 
    1347       12292 :         ret = btrfs_insert_fs_root(fs_info, root);
    1348       12292 :         if (ret) {
    1349           0 :                 if (ret == -EEXIST) {
    1350           0 :                         btrfs_put_root(root);
    1351           0 :                         goto again;
    1352             :                 }
    1353           0 :                 goto fail;
    1354             :         }
    1355             :         return root;
    1356           0 : fail:
    1357             :         /*
    1358             :          * If our caller provided us an anonymous device, then it's his
    1359             :          * responsibility to free it in case we fail. So we have to set our
    1360             :          * root's anon_dev to 0 to avoid a double free, once by btrfs_put_root()
    1361             :          * and once again by our caller.
    1362             :          */
    1363           0 :         if (anon_dev)
    1364           0 :                 root->anon_dev = 0;
    1365           0 :         btrfs_put_root(root);
    1366           0 :         return ERR_PTR(ret);
    1367             : }
    1368             : 
    1369             : /*
    1370             :  * Get in-memory reference of a root structure
    1371             :  *
    1372             :  * @objectid:   tree objectid
    1373             :  * @check_ref:  if set, verify that the tree exists and the item has at least
    1374             :  *              one reference
    1375             :  */
    1376     5233884 : struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info,
    1377             :                                      u64 objectid, bool check_ref)
    1378             : {
    1379     5233884 :         return btrfs_get_root_ref(fs_info, objectid, 0, check_ref);
    1380             : }
    1381             : 
    1382             : /*
    1383             :  * Get in-memory reference of a root structure, created as new, optionally pass
    1384             :  * the anonymous block device id
    1385             :  *
    1386             :  * @objectid:   tree objectid
    1387             :  * @anon_dev:   if zero, allocate a new anonymous block device or use the
    1388             :  *              parameter value
    1389             :  */
    1390        1278 : struct btrfs_root *btrfs_get_new_fs_root(struct btrfs_fs_info *fs_info,
    1391             :                                          u64 objectid, dev_t anon_dev)
    1392             : {
    1393        1278 :         return btrfs_get_root_ref(fs_info, objectid, anon_dev, true);
    1394             : }
    1395             : 
    1396             : /*
    1397             :  * btrfs_get_fs_root_commit_root - return a root for the given objectid
    1398             :  * @fs_info:    the fs_info
    1399             :  * @objectid:   the objectid we need to lookup
    1400             :  *
    1401             :  * This is exclusively used for backref walking, and exists specifically because
    1402             :  * of how qgroups does lookups.  Qgroups will do a backref lookup at delayed ref
    1403             :  * creation time, which means we may have to read the tree_root in order to look
    1404             :  * up a fs root that is not in memory.  If the root is not in memory we will
    1405             :  * read the tree root commit root and look up the fs root from there.  This is a
    1406             :  * temporary root, it will not be inserted into the radix tree as it doesn't
    1407             :  * have the most uptodate information, it'll simply be discarded once the
    1408             :  * backref code is finished using the root.
    1409             :  */
    1410     6145163 : struct btrfs_root *btrfs_get_fs_root_commit_root(struct btrfs_fs_info *fs_info,
    1411             :                                                  struct btrfs_path *path,
    1412             :                                                  u64 objectid)
    1413             : {
    1414     6145163 :         struct btrfs_root *root;
    1415     6145163 :         struct btrfs_key key;
    1416             : 
    1417     6145163 :         ASSERT(path->search_commit_root && path->skip_locking);
    1418             : 
    1419             :         /*
    1420             :          * This can return -ENOENT if we ask for a root that doesn't exist, but
    1421             :          * since this is called via the backref walking code we won't be looking
    1422             :          * up a root that doesn't exist, unless there's corruption.  So if root
    1423             :          * != NULL just return it.
    1424             :          */
    1425     6145163 :         root = btrfs_get_global_root(fs_info, objectid);
    1426     6145120 :         if (root)
    1427             :                 return root;
    1428             : 
    1429     6136969 :         root = btrfs_lookup_fs_root(fs_info, objectid);
    1430     6137323 :         if (root)
    1431             :                 return root;
    1432             : 
    1433          46 :         key.objectid = objectid;
    1434          46 :         key.type = BTRFS_ROOT_ITEM_KEY;
    1435          46 :         key.offset = (u64)-1;
    1436          46 :         root = read_tree_root_path(fs_info->tree_root, path, &key);
    1437          46 :         btrfs_release_path(path);
    1438             : 
    1439          46 :         return root;
    1440             : }
    1441             : 
    1442        3217 : static int cleaner_kthread(void *arg)
    1443             : {
    1444        3217 :         struct btrfs_fs_info *fs_info = arg;
    1445       53707 :         int again;
    1446             : 
    1447       53707 :         while (1) {
    1448       53707 :                 again = 0;
    1449             : 
    1450       53707 :                 set_bit(BTRFS_FS_CLEANER_RUNNING, &fs_info->flags);
    1451             : 
    1452             :                 /* Make the cleaner go to sleep early. */
    1453       53707 :                 if (btrfs_need_cleaner_sleep(fs_info))
    1454        3300 :                         goto sleep;
    1455             : 
    1456             :                 /*
    1457             :                  * Do not do anything if we might cause open_ctree() to block
    1458             :                  * before we have finished mounting the filesystem.
    1459             :                  */
    1460       50406 :                 if (!test_bit(BTRFS_FS_OPEN, &fs_info->flags))
    1461        6354 :                         goto sleep;
    1462             : 
    1463       44052 :                 if (!mutex_trylock(&fs_info->cleaner_mutex))
    1464          40 :                         goto sleep;
    1465             : 
    1466             :                 /*
    1467             :                  * Avoid the problem that we change the status of the fs
    1468             :                  * during the above check and trylock.
    1469             :                  */
    1470       44013 :                 if (btrfs_need_cleaner_sleep(fs_info)) {
    1471           0 :                         mutex_unlock(&fs_info->cleaner_mutex);
    1472           0 :                         goto sleep;
    1473             :                 }
    1474             : 
    1475       44013 :                 if (test_and_clear_bit(BTRFS_FS_FEATURE_CHANGED, &fs_info->flags))
    1476          31 :                         btrfs_sysfs_feature_update(fs_info);
    1477             : 
    1478       44013 :                 btrfs_run_delayed_iputs(fs_info);
    1479             : 
    1480       44013 :                 again = btrfs_clean_one_deleted_snapshot(fs_info);
    1481       44012 :                 mutex_unlock(&fs_info->cleaner_mutex);
    1482             : 
    1483             :                 /*
    1484             :                  * The defragger has dealt with the R/O remount and umount,
    1485             :                  * needn't do anything special here.
    1486             :                  */
    1487       44013 :                 btrfs_run_defrag_inodes(fs_info);
    1488             : 
    1489             :                 /*
    1490             :                  * Acquires fs_info->reclaim_bgs_lock to avoid racing
    1491             :                  * with relocation (btrfs_relocate_chunk) and relocation
    1492             :                  * acquires fs_info->cleaner_mutex (btrfs_relocate_block_group)
    1493             :                  * after acquiring fs_info->reclaim_bgs_lock. So we
    1494             :                  * can't hold, nor need to, fs_info->cleaner_mutex when deleting
    1495             :                  * unused block groups.
    1496             :                  */
    1497       44013 :                 btrfs_delete_unused_bgs(fs_info);
    1498             : 
    1499             :                 /*
    1500             :                  * Reclaim block groups in the reclaim_bgs list after we deleted
    1501             :                  * all unused block_groups. This possibly gives us some more free
    1502             :                  * space.
    1503             :                  */
    1504       44013 :                 btrfs_reclaim_bgs(fs_info);
    1505       53707 : sleep:
    1506       53707 :                 clear_and_wake_up_bit(BTRFS_FS_CLEANER_RUNNING, &fs_info->flags);
    1507       53704 :                 if (kthread_should_park())
    1508        3217 :                         kthread_parkme();
    1509       53705 :                 if (kthread_should_stop())
    1510        3217 :                         return 0;
    1511       50487 :                 if (!again) {
    1512       50368 :                         set_current_state(TASK_INTERRUPTIBLE);
    1513       50371 :                         schedule();
    1514       50371 :                         __set_current_state(TASK_RUNNING);
    1515             :                 }
    1516             :         }
    1517             : }
    1518             : 
    1519        3217 : static int transaction_kthread(void *arg)
    1520             : {
    1521        3217 :         struct btrfs_root *root = arg;
    1522        3217 :         struct btrfs_fs_info *fs_info = root->fs_info;
    1523        4175 :         struct btrfs_trans_handle *trans;
    1524        4175 :         struct btrfs_transaction *cur;
    1525        4175 :         u64 transid;
    1526        4175 :         time64_t delta;
    1527        4175 :         unsigned long delay;
    1528        4175 :         bool cannot_commit;
    1529             : 
    1530        4175 :         do {
    1531        4175 :                 cannot_commit = false;
    1532        4175 :                 delay = msecs_to_jiffies(fs_info->commit_interval * 1000);
    1533        4174 :                 mutex_lock(&fs_info->transaction_kthread_mutex);
    1534             : 
    1535        4176 :                 spin_lock(&fs_info->trans_lock);
    1536        4176 :                 cur = fs_info->running_transaction;
    1537        4176 :                 if (!cur) {
    1538        3900 :                         spin_unlock(&fs_info->trans_lock);
    1539        3900 :                         goto sleep;
    1540             :                 }
    1541             : 
    1542         276 :                 delta = ktime_get_seconds() - cur->start_time;
    1543         276 :                 if (!test_and_clear_bit(BTRFS_FS_COMMIT_TRANS, &fs_info->flags) &&
    1544         272 :                     cur->state < TRANS_STATE_COMMIT_START &&
    1545         270 :                     delta < fs_info->commit_interval) {
    1546         128 :                         spin_unlock(&fs_info->trans_lock);
    1547         128 :                         delay -= msecs_to_jiffies((delta - 1) * 1000);
    1548         128 :                         delay = min(delay,
    1549             :                                     msecs_to_jiffies(fs_info->commit_interval * 1000));
    1550         128 :                         goto sleep;
    1551             :                 }
    1552         148 :                 transid = cur->transid;
    1553         148 :                 spin_unlock(&fs_info->trans_lock);
    1554             : 
    1555             :                 /* If the file system is aborted, this will always fail. */
    1556         148 :                 trans = btrfs_attach_transaction(root);
    1557         148 :                 if (IS_ERR(trans)) {
    1558           2 :                         if (PTR_ERR(trans) != -ENOENT)
    1559           0 :                                 cannot_commit = true;
    1560           2 :                         goto sleep;
    1561             :                 }
    1562         146 :                 if (transid == trans->transid) {
    1563         146 :                         btrfs_commit_transaction(trans);
    1564             :                 } else {
    1565           0 :                         btrfs_end_transaction(trans);
    1566             :                 }
    1567        4176 : sleep:
    1568        4176 :                 wake_up_process(fs_info->cleaner_kthread);
    1569        4176 :                 mutex_unlock(&fs_info->transaction_kthread_mutex);
    1570             : 
    1571        4176 :                 if (BTRFS_FS_ERROR(fs_info))
    1572           3 :                         btrfs_cleanup_transaction(fs_info);
    1573        8352 :                 if (!kthread_should_stop() &&
    1574        4177 :                                 (!btrfs_transaction_blocked(fs_info) ||
    1575             :                                  cannot_commit))
    1576        4175 :                         schedule_timeout_interruptible(delay);
    1577        4176 :         } while (!kthread_should_stop());
    1578        3217 :         return 0;
    1579             : }
    1580             : 
    1581             : /*
    1582             :  * This will find the highest generation in the array of root backups.  The
    1583             :  * index of the highest array is returned, or -EINVAL if we can't find
    1584             :  * anything.
    1585             :  *
    1586             :  * We check to make sure the array is valid by comparing the
    1587             :  * generation of the latest  root in the array with the generation
    1588             :  * in the super block.  If they don't match we pitch it.
    1589             :  */
    1590        3218 : static int find_newest_super_backup(struct btrfs_fs_info *info)
    1591             : {
    1592        3218 :         const u64 newest_gen = btrfs_super_generation(info->super_copy);
    1593        3218 :         u64 cur;
    1594        3218 :         struct btrfs_root_backup *root_backup;
    1595        3218 :         int i;
    1596             : 
    1597        8026 :         for (i = 0; i < BTRFS_NUM_BACKUP_ROOTS; i++) {
    1598        8026 :                 root_backup = info->super_copy->super_roots + i;
    1599        8026 :                 cur = btrfs_backup_tree_root_gen(root_backup);
    1600        8026 :                 if (cur == newest_gen)
    1601        3218 :                         return i;
    1602             :         }
    1603             : 
    1604             :         return -EINVAL;
    1605             : }
    1606             : 
    1607             : /*
    1608             :  * copy all the root pointers into the super backup array.
    1609             :  * this will bump the backup pointer by one when it is
    1610             :  * done
    1611             :  */
    1612      206170 : static void backup_super_roots(struct btrfs_fs_info *info)
    1613             : {
    1614      206170 :         const int next_backup = info->backup_root_index;
    1615      206170 :         struct btrfs_root_backup *root_backup;
    1616             : 
    1617      206170 :         root_backup = info->super_for_commit->super_roots + next_backup;
    1618             : 
    1619             :         /*
    1620             :          * make sure all of our padding and empty slots get zero filled
    1621             :          * regardless of which ones we use today
    1622             :          */
    1623      206170 :         memset(root_backup, 0, sizeof(*root_backup));
    1624             : 
    1625      206170 :         info->backup_root_index = (next_backup + 1) % BTRFS_NUM_BACKUP_ROOTS;
    1626             : 
    1627      206170 :         btrfs_set_backup_tree_root(root_backup, info->tree_root->node->start);
    1628      206170 :         btrfs_set_backup_tree_root_gen(root_backup,
    1629      206170 :                                btrfs_header_generation(info->tree_root->node));
    1630             : 
    1631      206170 :         btrfs_set_backup_tree_root_level(root_backup,
    1632      206170 :                                btrfs_header_level(info->tree_root->node));
    1633             : 
    1634      206170 :         btrfs_set_backup_chunk_root(root_backup, info->chunk_root->node->start);
    1635      206170 :         btrfs_set_backup_chunk_root_gen(root_backup,
    1636      206170 :                                btrfs_header_generation(info->chunk_root->node));
    1637      206170 :         btrfs_set_backup_chunk_root_level(root_backup,
    1638      206170 :                                btrfs_header_level(info->chunk_root->node));
    1639             : 
    1640      206170 :         if (!btrfs_fs_compat_ro(info, BLOCK_GROUP_TREE)) {
    1641      206170 :                 struct btrfs_root *extent_root = btrfs_extent_root(info, 0);
    1642      206170 :                 struct btrfs_root *csum_root = btrfs_csum_root(info, 0);
    1643             : 
    1644      206170 :                 btrfs_set_backup_extent_root(root_backup,
    1645      206170 :                                              extent_root->node->start);
    1646      206170 :                 btrfs_set_backup_extent_root_gen(root_backup,
    1647      206170 :                                 btrfs_header_generation(extent_root->node));
    1648      206170 :                 btrfs_set_backup_extent_root_level(root_backup,
    1649      206170 :                                         btrfs_header_level(extent_root->node));
    1650             : 
    1651      206170 :                 btrfs_set_backup_csum_root(root_backup, csum_root->node->start);
    1652      206170 :                 btrfs_set_backup_csum_root_gen(root_backup,
    1653      206170 :                                                btrfs_header_generation(csum_root->node));
    1654      206170 :                 btrfs_set_backup_csum_root_level(root_backup,
    1655      206170 :                                                  btrfs_header_level(csum_root->node));
    1656             :         }
    1657             : 
    1658             :         /*
    1659             :          * we might commit during log recovery, which happens before we set
    1660             :          * the fs_root.  Make sure it is valid before we fill it in.
    1661             :          */
    1662      206170 :         if (info->fs_root && info->fs_root->node) {
    1663      205885 :                 btrfs_set_backup_fs_root(root_backup,
    1664             :                                          info->fs_root->node->start);
    1665      205885 :                 btrfs_set_backup_fs_root_gen(root_backup,
    1666      205885 :                                btrfs_header_generation(info->fs_root->node));
    1667      205885 :                 btrfs_set_backup_fs_root_level(root_backup,
    1668      205885 :                                btrfs_header_level(info->fs_root->node));
    1669             :         }
    1670             : 
    1671      206170 :         btrfs_set_backup_dev_root(root_backup, info->dev_root->node->start);
    1672      206170 :         btrfs_set_backup_dev_root_gen(root_backup,
    1673      206170 :                                btrfs_header_generation(info->dev_root->node));
    1674      206170 :         btrfs_set_backup_dev_root_level(root_backup,
    1675      206170 :                                        btrfs_header_level(info->dev_root->node));
    1676             : 
    1677      206170 :         btrfs_set_backup_total_bytes(root_backup,
    1678      206170 :                              btrfs_super_total_bytes(info->super_copy));
    1679      206170 :         btrfs_set_backup_bytes_used(root_backup,
    1680      206170 :                              btrfs_super_bytes_used(info->super_copy));
    1681      206170 :         btrfs_set_backup_num_devices(root_backup,
    1682      206170 :                              btrfs_super_num_devices(info->super_copy));
    1683             : 
    1684             :         /*
    1685             :          * if we don't copy this out to the super_copy, it won't get remembered
    1686             :          * for the next commit
    1687             :          */
    1688      412340 :         memcpy(&info->super_copy->super_roots,
    1689             :                &info->super_for_commit->super_roots,
    1690             :                sizeof(*root_backup) * BTRFS_NUM_BACKUP_ROOTS);
    1691      206170 : }
    1692             : 
    1693             : /*
    1694             :  * read_backup_root - Reads a backup root based on the passed priority. Prio 0
    1695             :  * is the newest, prio 1/2/3 are 2nd newest/3rd newest/4th (oldest) backup roots
    1696             :  *
    1697             :  * fs_info - filesystem whose backup roots need to be read
    1698             :  * priority - priority of backup root required
    1699             :  *
    1700             :  * Returns backup root index on success and -EINVAL otherwise.
    1701             :  */
    1702           0 : static int read_backup_root(struct btrfs_fs_info *fs_info, u8 priority)
    1703             : {
    1704           0 :         int backup_index = find_newest_super_backup(fs_info);
    1705           0 :         struct btrfs_super_block *super = fs_info->super_copy;
    1706           0 :         struct btrfs_root_backup *root_backup;
    1707             : 
    1708           0 :         if (priority < BTRFS_NUM_BACKUP_ROOTS && backup_index >= 0) {
    1709           0 :                 if (priority == 0)
    1710             :                         return backup_index;
    1711             : 
    1712           0 :                 backup_index = backup_index + BTRFS_NUM_BACKUP_ROOTS - priority;
    1713           0 :                 backup_index %= BTRFS_NUM_BACKUP_ROOTS;
    1714             :         } else {
    1715             :                 return -EINVAL;
    1716             :         }
    1717             : 
    1718           0 :         root_backup = super->super_roots + backup_index;
    1719             : 
    1720           0 :         btrfs_set_super_generation(super,
    1721             :                                    btrfs_backup_tree_root_gen(root_backup));
    1722           0 :         btrfs_set_super_root(super, btrfs_backup_tree_root(root_backup));
    1723           0 :         btrfs_set_super_root_level(super,
    1724             :                                    btrfs_backup_tree_root_level(root_backup));
    1725           0 :         btrfs_set_super_bytes_used(super, btrfs_backup_bytes_used(root_backup));
    1726             : 
    1727             :         /*
    1728             :          * Fixme: the total bytes and num_devices need to match or we should
    1729             :          * need a fsck
    1730             :          */
    1731           0 :         btrfs_set_super_total_bytes(super, btrfs_backup_total_bytes(root_backup));
    1732           0 :         btrfs_set_super_num_devices(super, btrfs_backup_num_devices(root_backup));
    1733             : 
    1734           0 :         return backup_index;
    1735             : }
    1736             : 
    1737             : /* helper to cleanup workers */
    1738        3218 : static void btrfs_stop_all_workers(struct btrfs_fs_info *fs_info)
    1739             : {
    1740        3218 :         btrfs_destroy_workqueue(fs_info->fixup_workers);
    1741        3218 :         btrfs_destroy_workqueue(fs_info->delalloc_workers);
    1742        3218 :         btrfs_destroy_workqueue(fs_info->workers);
    1743        3218 :         if (fs_info->endio_workers)
    1744        3218 :                 destroy_workqueue(fs_info->endio_workers);
    1745        3218 :         if (fs_info->rmw_workers)
    1746        3218 :                 destroy_workqueue(fs_info->rmw_workers);
    1747        3218 :         if (fs_info->compressed_write_workers)
    1748        3218 :                 destroy_workqueue(fs_info->compressed_write_workers);
    1749        3218 :         btrfs_destroy_workqueue(fs_info->endio_write_workers);
    1750        3218 :         btrfs_destroy_workqueue(fs_info->endio_freespace_worker);
    1751        3218 :         btrfs_destroy_workqueue(fs_info->delayed_workers);
    1752        3218 :         btrfs_destroy_workqueue(fs_info->caching_workers);
    1753        3218 :         btrfs_destroy_workqueue(fs_info->flush_workers);
    1754        3218 :         btrfs_destroy_workqueue(fs_info->qgroup_rescan_workers);
    1755        3218 :         if (fs_info->discard_ctl.discard_workers)
    1756        3218 :                 destroy_workqueue(fs_info->discard_ctl.discard_workers);
    1757             :         /*
    1758             :          * Now that all other work queues are destroyed, we can safely destroy
    1759             :          * the queues used for metadata I/O, since tasks from those other work
    1760             :          * queues can do metadata I/O operations.
    1761             :          */
    1762        3218 :         if (fs_info->endio_meta_workers)
    1763        3218 :                 destroy_workqueue(fs_info->endio_meta_workers);
    1764        3218 : }
    1765             : 
    1766      102187 : static void free_root_extent_buffers(struct btrfs_root *root)
    1767             : {
    1768      102187 :         if (root) {
    1769       95801 :                 free_extent_buffer(root->node);
    1770       95801 :                 free_extent_buffer(root->commit_root);
    1771       95801 :                 root->node = NULL;
    1772       95801 :                 root->commit_root = NULL;
    1773             :         }
    1774      102187 : }
    1775             : 
    1776        3218 : static void free_global_root_pointers(struct btrfs_fs_info *fs_info)
    1777             : {
    1778        3218 :         struct btrfs_root *root, *tmp;
    1779             : 
    1780       16078 :         rbtree_postorder_for_each_entry_safe(root, tmp,
    1781             :                                              &fs_info->global_root_tree,
    1782             :                                              rb_node)
    1783        9642 :                 free_root_extent_buffers(root);
    1784        3218 : }
    1785             : 
    1786             : /* helper to cleanup tree roots */
    1787        3218 : static void free_root_pointers(struct btrfs_fs_info *info, bool free_chunk_root)
    1788             : {
    1789        3218 :         free_root_extent_buffers(info->tree_root);
    1790             : 
    1791        3218 :         free_global_root_pointers(info);
    1792        3218 :         free_root_extent_buffers(info->dev_root);
    1793        3218 :         free_root_extent_buffers(info->quota_root);
    1794        3218 :         free_root_extent_buffers(info->uuid_root);
    1795        3218 :         free_root_extent_buffers(info->fs_root);
    1796        3218 :         free_root_extent_buffers(info->data_reloc_root);
    1797        3218 :         free_root_extent_buffers(info->block_group_root);
    1798        3218 :         if (free_chunk_root)
    1799        3218 :                 free_root_extent_buffers(info->chunk_root);
    1800        3218 : }
    1801             : 
    1802    17489104 : void btrfs_put_root(struct btrfs_root *root)
    1803             : {
    1804    17489104 :         if (!root)
    1805             :                 return;
    1806             : 
    1807    15404607 :         if (refcount_dec_and_test(&root->refs)) {
    1808       66801 :                 WARN_ON(!RB_EMPTY_ROOT(&root->inode_tree));
    1809       66801 :                 WARN_ON(test_bit(BTRFS_ROOT_DEAD_RELOC_TREE, &root->state));
    1810       66801 :                 if (root->anon_dev)
    1811        8707 :                         free_anon_bdev(root->anon_dev);
    1812       66801 :                 free_root_extent_buffers(root);
    1813             : #ifdef CONFIG_BTRFS_DEBUG
    1814             :                 spin_lock(&root->fs_info->fs_roots_radix_lock);
    1815             :                 list_del_init(&root->leak_list);
    1816             :                 spin_unlock(&root->fs_info->fs_roots_radix_lock);
    1817             : #endif
    1818       66801 :                 kfree(root);
    1819             :         }
    1820             : }
    1821             : 
    1822        3217 : void btrfs_free_fs_roots(struct btrfs_fs_info *fs_info)
    1823             : {
    1824        3217 :         int ret;
    1825        3217 :         struct btrfs_root *gang[8];
    1826        3217 :         int i;
    1827             : 
    1828        3466 :         while (!list_empty(&fs_info->dead_roots)) {
    1829         249 :                 gang[0] = list_entry(fs_info->dead_roots.next,
    1830             :                                      struct btrfs_root, root_list);
    1831         249 :                 list_del(&gang[0]->root_list);
    1832             : 
    1833         498 :                 if (test_bit(BTRFS_ROOT_IN_RADIX, &gang[0]->state))
    1834         249 :                         btrfs_drop_and_free_fs_root(fs_info, gang[0]);
    1835         249 :                 btrfs_put_root(gang[0]);
    1836             :         }
    1837             : 
    1838        7051 :         while (1) {
    1839        7051 :                 ret = radix_tree_gang_lookup(&fs_info->fs_roots_radix,
    1840             :                                              (void **)gang, 0,
    1841             :                                              ARRAY_SIZE(gang));
    1842        7051 :                 if (!ret)
    1843             :                         break;
    1844       15758 :                 for (i = 0; i < ret; i++)
    1845       11924 :                         btrfs_drop_and_free_fs_root(fs_info, gang[i]);
    1846             :         }
    1847        3217 : }
    1848             : 
    1849        3473 : static void btrfs_init_scrub(struct btrfs_fs_info *fs_info)
    1850             : {
    1851        3473 :         mutex_init(&fs_info->scrub_lock);
    1852        3473 :         atomic_set(&fs_info->scrubs_running, 0);
    1853        3473 :         atomic_set(&fs_info->scrub_pause_req, 0);
    1854        3473 :         atomic_set(&fs_info->scrubs_paused, 0);
    1855        3473 :         atomic_set(&fs_info->scrub_cancel_req, 0);
    1856        3473 :         init_waitqueue_head(&fs_info->scrub_pause_wait);
    1857        3473 :         refcount_set(&fs_info->scrub_workers_refcnt, 0);
    1858        3473 : }
    1859             : 
    1860        3473 : static void btrfs_init_balance(struct btrfs_fs_info *fs_info)
    1861             : {
    1862        3473 :         spin_lock_init(&fs_info->balance_lock);
    1863        3473 :         mutex_init(&fs_info->balance_mutex);
    1864        3473 :         atomic_set(&fs_info->balance_pause_req, 0);
    1865        3473 :         atomic_set(&fs_info->balance_cancel_req, 0);
    1866        3473 :         fs_info->balance_ctl = NULL;
    1867        3473 :         init_waitqueue_head(&fs_info->balance_wait_q);
    1868        3473 :         atomic_set(&fs_info->reloc_cancel_req, 0);
    1869        3473 : }
    1870             : 
    1871        3244 : static int btrfs_init_btree_inode(struct super_block *sb)
    1872             : {
    1873        3244 :         struct btrfs_fs_info *fs_info = btrfs_sb(sb);
    1874        3244 :         unsigned long hash = btrfs_inode_hash(BTRFS_BTREE_INODE_OBJECTID,
    1875        3244 :                                               fs_info->tree_root);
    1876        3244 :         struct inode *inode;
    1877             : 
    1878        3244 :         inode = new_inode(sb);
    1879        3244 :         if (!inode)
    1880             :                 return -ENOMEM;
    1881             : 
    1882        3244 :         inode->i_ino = BTRFS_BTREE_INODE_OBJECTID;
    1883        3244 :         set_nlink(inode, 1);
    1884             :         /*
    1885             :          * we set the i_size on the btree inode to the max possible int.
    1886             :          * the real end of the address space is determined by all of
    1887             :          * the devices in the system
    1888             :          */
    1889        3244 :         inode->i_size = OFFSET_MAX;
    1890        3244 :         inode->i_mapping->a_ops = &btree_aops;
    1891        3244 :         mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS);
    1892             : 
    1893        3244 :         RB_CLEAR_NODE(&BTRFS_I(inode)->rb_node);
    1894        3244 :         extent_io_tree_init(fs_info, &BTRFS_I(inode)->io_tree,
    1895             :                             IO_TREE_BTREE_INODE_IO);
    1896        3244 :         extent_map_tree_init(&BTRFS_I(inode)->extent_tree);
    1897             : 
    1898        3244 :         BTRFS_I(inode)->root = btrfs_grab_root(fs_info->tree_root);
    1899        3244 :         BTRFS_I(inode)->location.objectid = BTRFS_BTREE_INODE_OBJECTID;
    1900        3244 :         BTRFS_I(inode)->location.type = 0;
    1901        3244 :         BTRFS_I(inode)->location.offset = 0;
    1902        3244 :         set_bit(BTRFS_INODE_DUMMY, &BTRFS_I(inode)->runtime_flags);
    1903        3244 :         __insert_inode_hash(inode, hash);
    1904        3244 :         fs_info->btree_inode = inode;
    1905             : 
    1906        3244 :         return 0;
    1907             : }
    1908             : 
    1909        3473 : static void btrfs_init_dev_replace_locks(struct btrfs_fs_info *fs_info)
    1910             : {
    1911        3473 :         mutex_init(&fs_info->dev_replace.lock_finishing_cancel_unmount);
    1912        3473 :         init_rwsem(&fs_info->dev_replace.rwsem);
    1913        3473 :         init_waitqueue_head(&fs_info->dev_replace.replace_wait);
    1914        3473 : }
    1915             : 
    1916        3473 : static void btrfs_init_qgroup(struct btrfs_fs_info *fs_info)
    1917             : {
    1918        3473 :         spin_lock_init(&fs_info->qgroup_lock);
    1919        3473 :         mutex_init(&fs_info->qgroup_ioctl_lock);
    1920        3473 :         fs_info->qgroup_tree = RB_ROOT;
    1921        3473 :         INIT_LIST_HEAD(&fs_info->dirty_qgroups);
    1922        3473 :         fs_info->qgroup_seq = 1;
    1923        3473 :         fs_info->qgroup_ulist = NULL;
    1924        3473 :         fs_info->qgroup_rescan_running = false;
    1925        3473 :         fs_info->qgroup_drop_subtree_thres = BTRFS_MAX_LEVEL;
    1926        3473 :         mutex_init(&fs_info->qgroup_rescan_lock);
    1927        3473 : }
    1928             : 
    1929        3218 : static int btrfs_init_workqueues(struct btrfs_fs_info *fs_info)
    1930             : {
    1931        3218 :         u32 max_active = fs_info->thread_pool_size;
    1932        3218 :         unsigned int flags = WQ_MEM_RECLAIM | WQ_FREEZABLE | WQ_UNBOUND;
    1933        3218 :         unsigned int ordered_flags = WQ_MEM_RECLAIM | WQ_FREEZABLE;
    1934             : 
    1935        6436 :         fs_info->workers =
    1936        3218 :                 btrfs_alloc_workqueue(fs_info, "worker", flags, max_active, 16);
    1937             : 
    1938        6436 :         fs_info->delalloc_workers =
    1939        3218 :                 btrfs_alloc_workqueue(fs_info, "delalloc",
    1940             :                                       flags, max_active, 2);
    1941             : 
    1942        6436 :         fs_info->flush_workers =
    1943        3218 :                 btrfs_alloc_workqueue(fs_info, "flush_delalloc",
    1944             :                                       flags, max_active, 0);
    1945             : 
    1946        6436 :         fs_info->caching_workers =
    1947        3218 :                 btrfs_alloc_workqueue(fs_info, "cache", flags, max_active, 0);
    1948             : 
    1949        6436 :         fs_info->fixup_workers =
    1950        3218 :                 btrfs_alloc_ordered_workqueue(fs_info, "fixup", ordered_flags);
    1951             : 
    1952        6436 :         fs_info->endio_workers =
    1953        3218 :                 alloc_workqueue("btrfs-endio", flags, max_active);
    1954        6436 :         fs_info->endio_meta_workers =
    1955        3218 :                 alloc_workqueue("btrfs-endio-meta", flags, max_active);
    1956        3218 :         fs_info->rmw_workers = alloc_workqueue("btrfs-rmw", flags, max_active);
    1957        6436 :         fs_info->endio_write_workers =
    1958        3218 :                 btrfs_alloc_workqueue(fs_info, "endio-write", flags,
    1959             :                                       max_active, 2);
    1960        6436 :         fs_info->compressed_write_workers =
    1961        3218 :                 alloc_workqueue("btrfs-compressed-write", flags, max_active);
    1962        6436 :         fs_info->endio_freespace_worker =
    1963        3218 :                 btrfs_alloc_workqueue(fs_info, "freespace-write", flags,
    1964             :                                       max_active, 0);
    1965        6436 :         fs_info->delayed_workers =
    1966        3218 :                 btrfs_alloc_workqueue(fs_info, "delayed-meta", flags,
    1967             :                                       max_active, 0);
    1968        6436 :         fs_info->qgroup_rescan_workers =
    1969        3218 :                 btrfs_alloc_ordered_workqueue(fs_info, "qgroup-rescan",
    1970             :                                               ordered_flags);
    1971        6436 :         fs_info->discard_ctl.discard_workers =
    1972        3218 :                 alloc_ordered_workqueue("btrfs_discard", WQ_FREEZABLE);
    1973             : 
    1974        6436 :         if (!(fs_info->workers &&
    1975        3218 :               fs_info->delalloc_workers && fs_info->flush_workers &&
    1976        3218 :               fs_info->endio_workers && fs_info->endio_meta_workers &&
    1977        3218 :               fs_info->compressed_write_workers &&
    1978        3218 :               fs_info->endio_write_workers &&
    1979        3218 :               fs_info->endio_freespace_worker && fs_info->rmw_workers &&
    1980        3218 :               fs_info->caching_workers && fs_info->fixup_workers &&
    1981        3218 :               fs_info->delayed_workers && fs_info->qgroup_rescan_workers &&
    1982             :               fs_info->discard_ctl.discard_workers)) {
    1983           0 :                 return -ENOMEM;
    1984             :         }
    1985             : 
    1986             :         return 0;
    1987             : }
    1988             : 
    1989        3244 : static int btrfs_init_csum_hash(struct btrfs_fs_info *fs_info, u16 csum_type)
    1990             : {
    1991        3244 :         struct crypto_shash *csum_shash;
    1992        3244 :         const char *csum_driver = btrfs_super_csum_driver(csum_type);
    1993             : 
    1994        3244 :         csum_shash = crypto_alloc_shash(csum_driver, 0, 0);
    1995             : 
    1996        3244 :         if (IS_ERR(csum_shash)) {
    1997           0 :                 btrfs_err(fs_info, "error allocating %s hash for checksum",
    1998             :                           csum_driver);
    1999           0 :                 return PTR_ERR(csum_shash);
    2000             :         }
    2001             : 
    2002        3244 :         fs_info->csum_shash = csum_shash;
    2003             : 
    2004             :         /*
    2005             :          * Check if the checksum implementation is a fast accelerated one.
    2006             :          * As-is this is a bit of a hack and should be replaced once the csum
    2007             :          * implementations provide that information themselves.
    2008             :          */
    2009        3244 :         switch (csum_type) {
    2010             :         case BTRFS_CSUM_TYPE_CRC32:
    2011        3244 :                 if (!strstr(crypto_shash_driver_name(csum_shash), "generic"))
    2012        3244 :                         set_bit(BTRFS_FS_CSUM_IMPL_FAST, &fs_info->flags);
    2013             :                 break;
    2014           0 :         case BTRFS_CSUM_TYPE_XXHASH:
    2015           0 :                 set_bit(BTRFS_FS_CSUM_IMPL_FAST, &fs_info->flags);
    2016             :                 break;
    2017             :         default:
    2018             :                 break;
    2019             :         }
    2020             : 
    2021        3244 :         btrfs_info(fs_info, "using %s (%s) checksum algorithm",
    2022             :                         btrfs_super_csum_name(csum_type),
    2023             :                         crypto_shash_driver_name(csum_shash));
    2024        3244 :         return 0;
    2025             : }
    2026             : 
    2027         285 : static int btrfs_replay_log(struct btrfs_fs_info *fs_info,
    2028             :                             struct btrfs_fs_devices *fs_devices)
    2029             : {
    2030         285 :         int ret;
    2031         285 :         struct btrfs_tree_parent_check check = { 0 };
    2032         285 :         struct btrfs_root *log_tree_root;
    2033         285 :         struct btrfs_super_block *disk_super = fs_info->super_copy;
    2034         285 :         u64 bytenr = btrfs_super_log_root(disk_super);
    2035         285 :         int level = btrfs_super_log_root_level(disk_super);
    2036             : 
    2037         285 :         if (fs_devices->rw_devices == 0) {
    2038           0 :                 btrfs_warn(fs_info, "log replay required on RO media");
    2039           0 :                 return -EIO;
    2040             :         }
    2041             : 
    2042         285 :         log_tree_root = btrfs_alloc_root(fs_info, BTRFS_TREE_LOG_OBJECTID,
    2043             :                                          GFP_KERNEL);
    2044         285 :         if (!log_tree_root)
    2045             :                 return -ENOMEM;
    2046             : 
    2047         285 :         check.level = level;
    2048         285 :         check.transid = fs_info->generation + 1;
    2049         285 :         check.owner_root = BTRFS_TREE_LOG_OBJECTID;
    2050         285 :         log_tree_root->node = read_tree_block(fs_info, bytenr, &check);
    2051         285 :         if (IS_ERR(log_tree_root->node)) {
    2052           0 :                 btrfs_warn(fs_info, "failed to read log tree");
    2053           0 :                 ret = PTR_ERR(log_tree_root->node);
    2054           0 :                 log_tree_root->node = NULL;
    2055           0 :                 btrfs_put_root(log_tree_root);
    2056           0 :                 return ret;
    2057             :         }
    2058         570 :         if (!extent_buffer_uptodate(log_tree_root->node)) {
    2059           0 :                 btrfs_err(fs_info, "failed to read log tree");
    2060           0 :                 btrfs_put_root(log_tree_root);
    2061           0 :                 return -EIO;
    2062             :         }
    2063             : 
    2064             :         /* returns with log_tree_root freed on success */
    2065         285 :         ret = btrfs_recover_log_trees(log_tree_root);
    2066         285 :         if (ret) {
    2067           0 :                 btrfs_handle_fs_error(fs_info, ret,
    2068             :                                       "Failed to recover log tree");
    2069           0 :                 btrfs_put_root(log_tree_root);
    2070           0 :                 return ret;
    2071             :         }
    2072             : 
    2073         285 :         if (sb_rdonly(fs_info->sb)) {
    2074           0 :                 ret = btrfs_commit_super(fs_info);
    2075           0 :                 if (ret)
    2076           0 :                         return ret;
    2077             :         }
    2078             : 
    2079             :         return 0;
    2080             : }
    2081             : 
    2082        9645 : static int load_global_roots_objectid(struct btrfs_root *tree_root,
    2083             :                                       struct btrfs_path *path, u64 objectid,
    2084             :                                       const char *name)
    2085             : {
    2086        9645 :         struct btrfs_fs_info *fs_info = tree_root->fs_info;
    2087        9645 :         struct btrfs_root *root;
    2088        9645 :         u64 max_global_id = 0;
    2089        9645 :         int ret;
    2090        9645 :         struct btrfs_key key = {
    2091             :                 .objectid = objectid,
    2092             :                 .type = BTRFS_ROOT_ITEM_KEY,
    2093             :                 .offset = 0,
    2094             :         };
    2095        9645 :         bool found = false;
    2096             : 
    2097             :         /* If we have IGNOREDATACSUMS skip loading these roots. */
    2098        9645 :         if (objectid == BTRFS_CSUM_TREE_OBJECTID &&
    2099        3217 :             btrfs_test_opt(fs_info, IGNOREDATACSUMS)) {
    2100           0 :                 set_bit(BTRFS_FS_STATE_NO_CSUMS, &fs_info->fs_state);
    2101           0 :                 return 0;
    2102             :         }
    2103             : 
    2104       28935 :         while (1) {
    2105       19290 :                 ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0);
    2106       19290 :                 if (ret < 0)
    2107             :                         break;
    2108             : 
    2109       19290 :                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
    2110           0 :                         ret = btrfs_next_leaf(tree_root, path);
    2111           0 :                         if (ret) {
    2112           0 :                                 if (ret > 0)
    2113             :                                         ret = 0;
    2114             :                                 break;
    2115             :                         }
    2116             :                 }
    2117       19290 :                 ret = 0;
    2118             : 
    2119       19290 :                 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
    2120       19290 :                 if (key.objectid != objectid)
    2121             :                         break;
    2122        9645 :                 btrfs_release_path(path);
    2123             : 
    2124             :                 /*
    2125             :                  * Just worry about this for extent tree, it'll be the same for
    2126             :                  * everybody.
    2127             :                  */
    2128        9645 :                 if (objectid == BTRFS_EXTENT_TREE_OBJECTID)
    2129        3217 :                         max_global_id = max(max_global_id, key.offset);
    2130             : 
    2131        9645 :                 found = true;
    2132        9645 :                 root = read_tree_root_path(tree_root, path, &key);
    2133        9645 :                 if (IS_ERR(root)) {
    2134           0 :                         if (!btrfs_test_opt(fs_info, IGNOREBADROOTS))
    2135           0 :                                 ret = PTR_ERR(root);
    2136             :                         break;
    2137             :                 }
    2138        9645 :                 set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
    2139        9645 :                 ret = btrfs_global_root_insert(root);
    2140        9645 :                 if (ret) {
    2141           0 :                         btrfs_put_root(root);
    2142           0 :                         break;
    2143             :                 }
    2144        9645 :                 key.offset++;
    2145             :         }
    2146        9645 :         btrfs_release_path(path);
    2147             : 
    2148        9645 :         if (objectid == BTRFS_EXTENT_TREE_OBJECTID)
    2149        3217 :                 fs_info->nr_global_roots = max_global_id + 1;
    2150             : 
    2151        9645 :         if (!found || ret) {
    2152           0 :                 if (objectid == BTRFS_CSUM_TREE_OBJECTID)
    2153           0 :                         set_bit(BTRFS_FS_STATE_NO_CSUMS, &fs_info->fs_state);
    2154             : 
    2155           0 :                 if (!btrfs_test_opt(fs_info, IGNOREBADROOTS))
    2156           0 :                         ret = ret ? ret : -ENOENT;
    2157             :                 else
    2158             :                         ret = 0;
    2159           0 :                 btrfs_err(fs_info, "failed to load root %s", name);
    2160             :         }
    2161             :         return ret;
    2162             : }
    2163             : 
    2164        3217 : static int load_global_roots(struct btrfs_root *tree_root)
    2165             : {
    2166        3217 :         struct btrfs_path *path;
    2167        3217 :         int ret = 0;
    2168             : 
    2169        3217 :         path = btrfs_alloc_path();
    2170        3217 :         if (!path)
    2171             :                 return -ENOMEM;
    2172             : 
    2173        3217 :         ret = load_global_roots_objectid(tree_root, path,
    2174             :                                          BTRFS_EXTENT_TREE_OBJECTID, "extent");
    2175        3217 :         if (ret)
    2176           0 :                 goto out;
    2177        3217 :         ret = load_global_roots_objectid(tree_root, path,
    2178             :                                          BTRFS_CSUM_TREE_OBJECTID, "csum");
    2179        3217 :         if (ret)
    2180           0 :                 goto out;
    2181        3217 :         if (!btrfs_fs_compat_ro(tree_root->fs_info, FREE_SPACE_TREE))
    2182           6 :                 goto out;
    2183        3211 :         ret = load_global_roots_objectid(tree_root, path,
    2184             :                                          BTRFS_FREE_SPACE_TREE_OBJECTID,
    2185             :                                          "free space");
    2186        3217 : out:
    2187        3217 :         btrfs_free_path(path);
    2188        3217 :         return ret;
    2189             : }
    2190             : 
    2191        3217 : static int btrfs_read_roots(struct btrfs_fs_info *fs_info)
    2192             : {
    2193        3217 :         struct btrfs_root *tree_root = fs_info->tree_root;
    2194        3217 :         struct btrfs_root *root;
    2195        3217 :         struct btrfs_key location;
    2196        3217 :         int ret;
    2197             : 
    2198        3217 :         BUG_ON(!fs_info->tree_root);
    2199             : 
    2200        3217 :         ret = load_global_roots(tree_root);
    2201        3217 :         if (ret)
    2202             :                 return ret;
    2203             : 
    2204        3217 :         location.type = BTRFS_ROOT_ITEM_KEY;
    2205        3217 :         location.offset = 0;
    2206             : 
    2207        3217 :         if (btrfs_fs_compat_ro(fs_info, BLOCK_GROUP_TREE)) {
    2208           0 :                 location.objectid = BTRFS_BLOCK_GROUP_TREE_OBJECTID;
    2209           0 :                 root = btrfs_read_tree_root(tree_root, &location);
    2210           0 :                 if (IS_ERR(root)) {
    2211           0 :                         if (!btrfs_test_opt(fs_info, IGNOREBADROOTS)) {
    2212           0 :                                 ret = PTR_ERR(root);
    2213           0 :                                 goto out;
    2214             :                         }
    2215             :                 } else {
    2216           0 :                         set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
    2217           0 :                         fs_info->block_group_root = root;
    2218             :                 }
    2219             :         }
    2220             : 
    2221        3217 :         location.objectid = BTRFS_DEV_TREE_OBJECTID;
    2222        3217 :         root = btrfs_read_tree_root(tree_root, &location);
    2223        3217 :         if (IS_ERR(root)) {
    2224           0 :                 if (!btrfs_test_opt(fs_info, IGNOREBADROOTS)) {
    2225           0 :                         ret = PTR_ERR(root);
    2226           0 :                         goto out;
    2227             :                 }
    2228             :         } else {
    2229        3217 :                 set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
    2230        3217 :                 fs_info->dev_root = root;
    2231             :         }
    2232             :         /* Initialize fs_info for all devices in any case */
    2233        3217 :         ret = btrfs_init_devices_late(fs_info);
    2234        3217 :         if (ret)
    2235           0 :                 goto out;
    2236             : 
    2237             :         /*
    2238             :          * This tree can share blocks with some other fs tree during relocation
    2239             :          * and we need a proper setup by btrfs_get_fs_root
    2240             :          */
    2241        3217 :         root = btrfs_get_fs_root(tree_root->fs_info,
    2242             :                                  BTRFS_DATA_RELOC_TREE_OBJECTID, true);
    2243        3217 :         if (IS_ERR(root)) {
    2244           0 :                 if (!btrfs_test_opt(fs_info, IGNOREBADROOTS)) {
    2245           0 :                         ret = PTR_ERR(root);
    2246           0 :                         goto out;
    2247             :                 }
    2248             :         } else {
    2249        3217 :                 set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
    2250        3217 :                 fs_info->data_reloc_root = root;
    2251             :         }
    2252             : 
    2253        3217 :         location.objectid = BTRFS_QUOTA_TREE_OBJECTID;
    2254        3217 :         root = btrfs_read_tree_root(tree_root, &location);
    2255        3217 :         if (!IS_ERR(root)) {
    2256          22 :                 set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
    2257          22 :                 set_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags);
    2258          22 :                 fs_info->quota_root = root;
    2259             :         }
    2260             : 
    2261        3217 :         location.objectid = BTRFS_UUID_TREE_OBJECTID;
    2262        3217 :         root = btrfs_read_tree_root(tree_root, &location);
    2263        3217 :         if (IS_ERR(root)) {
    2264           3 :                 if (!btrfs_test_opt(fs_info, IGNOREBADROOTS)) {
    2265           3 :                         ret = PTR_ERR(root);
    2266           3 :                         if (ret != -ENOENT)
    2267           0 :                                 goto out;
    2268             :                 }
    2269             :         } else {
    2270        3214 :                 set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
    2271        3214 :                 fs_info->uuid_root = root;
    2272             :         }
    2273             : 
    2274             :         return 0;
    2275           0 : out:
    2276           0 :         btrfs_warn(fs_info, "failed to read root (objectid=%llu): %d",
    2277             :                    location.objectid, ret);
    2278           0 :         return ret;
    2279             : }
    2280             : 
    2281             : /*
    2282             :  * Real super block validation
    2283             :  * NOTE: super csum type and incompat features will not be checked here.
    2284             :  *
    2285             :  * @sb:         super block to check
    2286             :  * @mirror_num: the super block number to check its bytenr:
    2287             :  *              0       the primary (1st) sb
    2288             :  *              1, 2    2nd and 3rd backup copy
    2289             :  *             -1       skip bytenr check
    2290             :  */
    2291      450344 : int btrfs_validate_super(struct btrfs_fs_info *fs_info,
    2292             :                          struct btrfs_super_block *sb, int mirror_num)
    2293             : {
    2294      450344 :         u64 nodesize = btrfs_super_nodesize(sb);
    2295      450344 :         u64 sectorsize = btrfs_super_sectorsize(sb);
    2296      450344 :         int ret = 0;
    2297             : 
    2298      450344 :         if (btrfs_super_magic(sb) != BTRFS_MAGIC) {
    2299           0 :                 btrfs_err(fs_info, "no valid FS found");
    2300           0 :                 ret = -EINVAL;
    2301             :         }
    2302      450344 :         if (btrfs_super_flags(sb) & ~BTRFS_SUPER_FLAG_SUPP) {
    2303           0 :                 btrfs_err(fs_info, "unrecognized or unsupported super flag: %llu",
    2304             :                                 btrfs_super_flags(sb) & ~BTRFS_SUPER_FLAG_SUPP);
    2305           0 :                 ret = -EINVAL;
    2306             :         }
    2307      450344 :         if (btrfs_super_root_level(sb) >= BTRFS_MAX_LEVEL) {
    2308           0 :                 btrfs_err(fs_info, "tree_root level too big: %d >= %d",
    2309             :                                 btrfs_super_root_level(sb), BTRFS_MAX_LEVEL);
    2310           0 :                 ret = -EINVAL;
    2311             :         }
    2312      450344 :         if (btrfs_super_chunk_root_level(sb) >= BTRFS_MAX_LEVEL) {
    2313           0 :                 btrfs_err(fs_info, "chunk_root level too big: %d >= %d",
    2314             :                                 btrfs_super_chunk_root_level(sb), BTRFS_MAX_LEVEL);
    2315           0 :                 ret = -EINVAL;
    2316             :         }
    2317      450344 :         if (btrfs_super_log_root_level(sb) >= BTRFS_MAX_LEVEL) {
    2318           0 :                 btrfs_err(fs_info, "log_root level too big: %d >= %d",
    2319             :                                 btrfs_super_log_root_level(sb), BTRFS_MAX_LEVEL);
    2320           0 :                 ret = -EINVAL;
    2321             :         }
    2322             : 
    2323             :         /*
    2324             :          * Check sectorsize and nodesize first, other check will need it.
    2325             :          * Check all possible sectorsize(4K, 8K, 16K, 32K, 64K) here.
    2326             :          */
    2327      900688 :         if (!is_power_of_2(sectorsize) || sectorsize < 4096 ||
    2328             :             sectorsize > BTRFS_MAX_METADATA_BLOCKSIZE) {
    2329           0 :                 btrfs_err(fs_info, "invalid sectorsize %llu", sectorsize);
    2330           0 :                 ret = -EINVAL;
    2331             :         }
    2332             : 
    2333             :         /*
    2334             :          * We only support at most two sectorsizes: 4K and PAGE_SIZE.
    2335             :          *
    2336             :          * We can support 16K sectorsize with 64K page size without problem,
    2337             :          * but such sectorsize/pagesize combination doesn't make much sense.
    2338             :          * 4K will be our future standard, PAGE_SIZE is supported from the very
    2339             :          * beginning.
    2340             :          */
    2341      450344 :         if (sectorsize > PAGE_SIZE || (sectorsize != SZ_4K && sectorsize != PAGE_SIZE)) {
    2342           0 :                 btrfs_err(fs_info,
    2343             :                         "sectorsize %llu not yet supported for page size %lu",
    2344             :                         sectorsize, PAGE_SIZE);
    2345           0 :                 ret = -EINVAL;
    2346             :         }
    2347             : 
    2348      900688 :         if (!is_power_of_2(nodesize) || nodesize < sectorsize ||
    2349      450344 :             nodesize > BTRFS_MAX_METADATA_BLOCKSIZE) {
    2350           0 :                 btrfs_err(fs_info, "invalid nodesize %llu", nodesize);
    2351           0 :                 ret = -EINVAL;
    2352             :         }
    2353      450344 :         if (nodesize != le32_to_cpu(sb->__unused_leafsize)) {
    2354           0 :                 btrfs_err(fs_info, "invalid leafsize %u, should be %llu",
    2355             :                           le32_to_cpu(sb->__unused_leafsize), nodesize);
    2356           0 :                 ret = -EINVAL;
    2357             :         }
    2358             : 
    2359             :         /* Root alignment check */
    2360      450344 :         if (!IS_ALIGNED(btrfs_super_root(sb), sectorsize)) {
    2361           0 :                 btrfs_warn(fs_info, "tree_root block unaligned: %llu",
    2362             :                            btrfs_super_root(sb));
    2363           0 :                 ret = -EINVAL;
    2364             :         }
    2365      450344 :         if (!IS_ALIGNED(btrfs_super_chunk_root(sb), sectorsize)) {
    2366           0 :                 btrfs_warn(fs_info, "chunk_root block unaligned: %llu",
    2367             :                            btrfs_super_chunk_root(sb));
    2368           0 :                 ret = -EINVAL;
    2369             :         }
    2370      450344 :         if (!IS_ALIGNED(btrfs_super_log_root(sb), sectorsize)) {
    2371           0 :                 btrfs_warn(fs_info, "log_root block unaligned: %llu",
    2372             :                            btrfs_super_log_root(sb));
    2373           0 :                 ret = -EINVAL;
    2374             :         }
    2375             : 
    2376      900688 :         if (memcmp(fs_info->fs_devices->fsid, fs_info->super_copy->fsid,
    2377             :                    BTRFS_FSID_SIZE)) {
    2378           0 :                 btrfs_err(fs_info,
    2379             :                 "superblock fsid doesn't match fsid of fs_devices: %pU != %pU",
    2380             :                         fs_info->super_copy->fsid, fs_info->fs_devices->fsid);
    2381           0 :                 ret = -EINVAL;
    2382             :         }
    2383             : 
    2384      450344 :         if (btrfs_fs_incompat(fs_info, METADATA_UUID) &&
    2385           0 :             memcmp(fs_info->fs_devices->metadata_uuid,
    2386           0 :                    fs_info->super_copy->metadata_uuid, BTRFS_FSID_SIZE)) {
    2387           0 :                 btrfs_err(fs_info,
    2388             : "superblock metadata_uuid doesn't match metadata uuid of fs_devices: %pU != %pU",
    2389             :                         fs_info->super_copy->metadata_uuid,
    2390             :                         fs_info->fs_devices->metadata_uuid);
    2391           0 :                 ret = -EINVAL;
    2392             :         }
    2393             : 
    2394      900688 :         if (memcmp(fs_info->fs_devices->metadata_uuid, sb->dev_item.fsid,
    2395             :                    BTRFS_FSID_SIZE) != 0) {
    2396           0 :                 btrfs_err(fs_info,
    2397             :                         "dev_item UUID does not match metadata fsid: %pU != %pU",
    2398             :                         fs_info->fs_devices->metadata_uuid, sb->dev_item.fsid);
    2399           0 :                 ret = -EINVAL;
    2400             :         }
    2401             : 
    2402             :         /*
    2403             :          * Artificial requirement for block-group-tree to force newer features
    2404             :          * (free-space-tree, no-holes) so the test matrix is smaller.
    2405             :          */
    2406      450344 :         if (btrfs_fs_compat_ro(fs_info, BLOCK_GROUP_TREE) &&
    2407           0 :             (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE_VALID) ||
    2408           0 :              !btrfs_fs_incompat(fs_info, NO_HOLES))) {
    2409           0 :                 btrfs_err(fs_info,
    2410             :                 "block-group-tree feature requires fres-space-tree and no-holes");
    2411           0 :                 ret = -EINVAL;
    2412             :         }
    2413             : 
    2414             :         /*
    2415             :          * Hint to catch really bogus numbers, bitflips or so, more exact checks are
    2416             :          * done later
    2417             :          */
    2418      450344 :         if (btrfs_super_bytes_used(sb) < 6 * btrfs_super_nodesize(sb)) {
    2419           0 :                 btrfs_err(fs_info, "bytes_used is too small %llu",
    2420             :                           btrfs_super_bytes_used(sb));
    2421           0 :                 ret = -EINVAL;
    2422             :         }
    2423      900688 :         if (!is_power_of_2(btrfs_super_stripesize(sb))) {
    2424           0 :                 btrfs_err(fs_info, "invalid stripesize %u",
    2425             :                           btrfs_super_stripesize(sb));
    2426           0 :                 ret = -EINVAL;
    2427             :         }
    2428      450344 :         if (btrfs_super_num_devices(sb) > (1UL << 31))
    2429           0 :                 btrfs_warn(fs_info, "suspicious number of devices: %llu",
    2430             :                            btrfs_super_num_devices(sb));
    2431      450344 :         if (btrfs_super_num_devices(sb) == 0) {
    2432           0 :                 btrfs_err(fs_info, "number of devices is 0");
    2433           0 :                 ret = -EINVAL;
    2434             :         }
    2435             : 
    2436      453631 :         if (mirror_num >= 0 &&
    2437             :             btrfs_super_bytenr(sb) != btrfs_sb_offset(mirror_num)) {
    2438           0 :                 btrfs_err(fs_info, "super offset mismatch %llu != %u",
    2439             :                           btrfs_super_bytenr(sb), BTRFS_SUPER_INFO_OFFSET);
    2440           0 :                 ret = -EINVAL;
    2441             :         }
    2442             : 
    2443             :         /*
    2444             :          * Obvious sys_chunk_array corruptions, it must hold at least one key
    2445             :          * and one chunk
    2446             :          */
    2447      450344 :         if (btrfs_super_sys_array_size(sb) > BTRFS_SYSTEM_CHUNK_ARRAY_SIZE) {
    2448           0 :                 btrfs_err(fs_info, "system chunk array too big %u > %u",
    2449             :                           btrfs_super_sys_array_size(sb),
    2450             :                           BTRFS_SYSTEM_CHUNK_ARRAY_SIZE);
    2451           0 :                 ret = -EINVAL;
    2452             :         }
    2453      450344 :         if (btrfs_super_sys_array_size(sb) < sizeof(struct btrfs_disk_key)
    2454             :                         + sizeof(struct btrfs_chunk)) {
    2455           0 :                 btrfs_err(fs_info, "system chunk array too small %u < %zu",
    2456             :                           btrfs_super_sys_array_size(sb),
    2457             :                           sizeof(struct btrfs_disk_key)
    2458             :                           + sizeof(struct btrfs_chunk));
    2459           0 :                 ret = -EINVAL;
    2460             :         }
    2461             : 
    2462             :         /*
    2463             :          * The generation is a global counter, we'll trust it more than the others
    2464             :          * but it's still possible that it's the one that's wrong.
    2465             :          */
    2466      450344 :         if (btrfs_super_generation(sb) < btrfs_super_chunk_root_generation(sb))
    2467           0 :                 btrfs_warn(fs_info,
    2468             :                         "suspicious: generation < chunk_root_generation: %llu < %llu",
    2469             :                         btrfs_super_generation(sb),
    2470             :                         btrfs_super_chunk_root_generation(sb));
    2471      450344 :         if (btrfs_super_generation(sb) < btrfs_super_cache_generation(sb)
    2472           3 :             && btrfs_super_cache_generation(sb) != (u64)-1)
    2473           0 :                 btrfs_warn(fs_info,
    2474             :                         "suspicious: generation < cache_generation: %llu < %llu",
    2475             :                         btrfs_super_generation(sb),
    2476             :                         btrfs_super_cache_generation(sb));
    2477             : 
    2478      450344 :         return ret;
    2479             : }
    2480             : 
    2481             : /*
    2482             :  * Validation of super block at mount time.
    2483             :  * Some checks already done early at mount time, like csum type and incompat
    2484             :  * flags will be skipped.
    2485             :  */
    2486             : static int btrfs_validate_mount_super(struct btrfs_fs_info *fs_info)
    2487             : {
    2488        3244 :         return btrfs_validate_super(fs_info, fs_info->super_copy, 0);
    2489             : }
    2490             : 
    2491             : /*
    2492             :  * Validation of super block at write time.
    2493             :  * Some checks like bytenr check will be skipped as their values will be
    2494             :  * overwritten soon.
    2495             :  * Extra checks like csum type and incompat flags will be done here.
    2496             :  */
    2497      447051 : static int btrfs_validate_write_super(struct btrfs_fs_info *fs_info,
    2498             :                                       struct btrfs_super_block *sb)
    2499             : {
    2500      447051 :         int ret;
    2501             : 
    2502      447051 :         ret = btrfs_validate_super(fs_info, sb, -1);
    2503      447051 :         if (ret < 0)
    2504           0 :                 goto out;
    2505      447051 :         if (!btrfs_supported_super_csum(btrfs_super_csum_type(sb))) {
    2506           0 :                 ret = -EUCLEAN;
    2507           0 :                 btrfs_err(fs_info, "invalid csum type, has %u want %u",
    2508             :                           btrfs_super_csum_type(sb), BTRFS_CSUM_TYPE_CRC32);
    2509           0 :                 goto out;
    2510             :         }
    2511      447051 :         if (btrfs_super_incompat_flags(sb) & ~BTRFS_FEATURE_INCOMPAT_SUPP) {
    2512           0 :                 ret = -EUCLEAN;
    2513           0 :                 btrfs_err(fs_info,
    2514             :                 "invalid incompat flags, has 0x%llx valid mask 0x%llx",
    2515             :                           btrfs_super_incompat_flags(sb),
    2516             :                           (unsigned long long)BTRFS_FEATURE_INCOMPAT_SUPP);
    2517           0 :                 goto out;
    2518             :         }
    2519      447051 : out:
    2520      447051 :         if (ret < 0)
    2521           0 :                 btrfs_err(fs_info,
    2522             :                 "super block corruption detected before writing it to disk");
    2523      447051 :         return ret;
    2524             : }
    2525             : 
    2526        6436 : static int load_super_root(struct btrfs_root *root, u64 bytenr, u64 gen, int level)
    2527             : {
    2528        6436 :         struct btrfs_tree_parent_check check = {
    2529             :                 .level = level,
    2530             :                 .transid = gen,
    2531        6436 :                 .owner_root = root->root_key.objectid
    2532             :         };
    2533        6436 :         int ret = 0;
    2534             : 
    2535        6436 :         root->node = read_tree_block(root->fs_info, bytenr, &check);
    2536        6436 :         if (IS_ERR(root->node)) {
    2537           1 :                 ret = PTR_ERR(root->node);
    2538           1 :                 root->node = NULL;
    2539           1 :                 return ret;
    2540             :         }
    2541       12870 :         if (!extent_buffer_uptodate(root->node)) {
    2542           0 :                 free_extent_buffer(root->node);
    2543           0 :                 root->node = NULL;
    2544           0 :                 return -EIO;
    2545             :         }
    2546             : 
    2547        6435 :         btrfs_set_root_node(&root->root_item, root->node);
    2548        6435 :         root->commit_root = btrfs_root_node(root);
    2549        6435 :         btrfs_set_root_refs(&root->root_item, 1);
    2550        6435 :         return ret;
    2551             : }
    2552             : 
    2553        3218 : static int load_important_roots(struct btrfs_fs_info *fs_info)
    2554             : {
    2555        3218 :         struct btrfs_super_block *sb = fs_info->super_copy;
    2556        3218 :         u64 gen, bytenr;
    2557        3218 :         int level, ret;
    2558             : 
    2559        3218 :         bytenr = btrfs_super_root(sb);
    2560        3218 :         gen = btrfs_super_generation(sb);
    2561        3218 :         level = btrfs_super_root_level(sb);
    2562        3218 :         ret = load_super_root(fs_info->tree_root, bytenr, gen, level);
    2563        3218 :         if (ret) {
    2564           1 :                 btrfs_warn(fs_info, "couldn't read tree root");
    2565           1 :                 return ret;
    2566             :         }
    2567             :         return 0;
    2568             : }
    2569             : 
    2570        3218 : static int __cold init_tree_roots(struct btrfs_fs_info *fs_info)
    2571             : {
    2572        3218 :         int backup_index = find_newest_super_backup(fs_info);
    2573        3218 :         struct btrfs_super_block *sb = fs_info->super_copy;
    2574        3218 :         struct btrfs_root *tree_root = fs_info->tree_root;
    2575        3218 :         bool handle_error = false;
    2576        3218 :         int ret = 0;
    2577        3218 :         int i;
    2578             : 
    2579        3219 :         for (i = 0; i < BTRFS_NUM_BACKUP_ROOTS; i++) {
    2580        3219 :                 if (handle_error) {
    2581           1 :                         if (!IS_ERR(tree_root->node))
    2582           1 :                                 free_extent_buffer(tree_root->node);
    2583           1 :                         tree_root->node = NULL;
    2584             : 
    2585           1 :                         if (!btrfs_test_opt(fs_info, USEBACKUPROOT))
    2586             :                                 break;
    2587             : 
    2588           0 :                         free_root_pointers(fs_info, 0);
    2589             : 
    2590             :                         /*
    2591             :                          * Don't use the log in recovery mode, it won't be
    2592             :                          * valid
    2593             :                          */
    2594           0 :                         btrfs_set_super_log_root(sb, 0);
    2595             : 
    2596             :                         /* We can't trust the free space cache either */
    2597           0 :                         btrfs_set_opt(fs_info->mount_opt, CLEAR_CACHE);
    2598             : 
    2599           0 :                         btrfs_warn(fs_info, "try to load backup roots slot %d", i);
    2600           0 :                         ret = read_backup_root(fs_info, i);
    2601           0 :                         backup_index = ret;
    2602           0 :                         if (ret < 0)
    2603           0 :                                 return ret;
    2604             :                 }
    2605             : 
    2606        3218 :                 ret = load_important_roots(fs_info);
    2607        3218 :                 if (ret) {
    2608           1 :                         handle_error = true;
    2609           1 :                         continue;
    2610             :                 }
    2611             : 
    2612             :                 /*
    2613             :                  * No need to hold btrfs_root::objectid_mutex since the fs
    2614             :                  * hasn't been fully initialised and we are the only user
    2615             :                  */
    2616        3217 :                 ret = btrfs_init_root_free_objectid(tree_root);
    2617        3217 :                 if (ret < 0) {
    2618           0 :                         handle_error = true;
    2619           0 :                         continue;
    2620             :                 }
    2621             : 
    2622        3217 :                 ASSERT(tree_root->free_objectid <= BTRFS_LAST_FREE_OBJECTID);
    2623             : 
    2624        3217 :                 ret = btrfs_read_roots(fs_info);
    2625        3217 :                 if (ret < 0) {
    2626           0 :                         handle_error = true;
    2627           0 :                         continue;
    2628             :                 }
    2629             : 
    2630             :                 /* All successful */
    2631        3217 :                 fs_info->generation = btrfs_header_generation(tree_root->node);
    2632        3217 :                 fs_info->last_trans_committed = fs_info->generation;
    2633        3217 :                 fs_info->last_reloc_trans = 0;
    2634             : 
    2635             :                 /* Always begin writing backup roots after the one being used */
    2636        3217 :                 if (backup_index < 0) {
    2637           0 :                         fs_info->backup_root_index = 0;
    2638             :                 } else {
    2639        3217 :                         fs_info->backup_root_index = backup_index + 1;
    2640        3217 :                         fs_info->backup_root_index %= BTRFS_NUM_BACKUP_ROOTS;
    2641             :                 }
    2642             :                 break;
    2643             :         }
    2644             : 
    2645             :         return ret;
    2646             : }
    2647             : 
    2648        3473 : void btrfs_init_fs_info(struct btrfs_fs_info *fs_info)
    2649             : {
    2650        3473 :         INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC);
    2651        3473 :         INIT_RADIX_TREE(&fs_info->buffer_radix, GFP_ATOMIC);
    2652        3473 :         INIT_LIST_HEAD(&fs_info->trans_list);
    2653        3473 :         INIT_LIST_HEAD(&fs_info->dead_roots);
    2654        3473 :         INIT_LIST_HEAD(&fs_info->delayed_iputs);
    2655        3473 :         INIT_LIST_HEAD(&fs_info->delalloc_roots);
    2656        3473 :         INIT_LIST_HEAD(&fs_info->caching_block_groups);
    2657        3473 :         spin_lock_init(&fs_info->delalloc_root_lock);
    2658        3473 :         spin_lock_init(&fs_info->trans_lock);
    2659        3473 :         spin_lock_init(&fs_info->fs_roots_radix_lock);
    2660        3473 :         spin_lock_init(&fs_info->delayed_iput_lock);
    2661        3473 :         spin_lock_init(&fs_info->defrag_inodes_lock);
    2662        3473 :         spin_lock_init(&fs_info->super_lock);
    2663        3473 :         spin_lock_init(&fs_info->buffer_lock);
    2664        3473 :         spin_lock_init(&fs_info->unused_bgs_lock);
    2665        3473 :         spin_lock_init(&fs_info->treelog_bg_lock);
    2666        3473 :         spin_lock_init(&fs_info->zone_active_bgs_lock);
    2667        3473 :         spin_lock_init(&fs_info->relocation_bg_lock);
    2668        3473 :         rwlock_init(&fs_info->tree_mod_log_lock);
    2669        3473 :         rwlock_init(&fs_info->global_root_lock);
    2670        3473 :         mutex_init(&fs_info->unused_bg_unpin_mutex);
    2671        3473 :         mutex_init(&fs_info->reclaim_bgs_lock);
    2672        3473 :         mutex_init(&fs_info->reloc_mutex);
    2673        3473 :         mutex_init(&fs_info->delalloc_root_mutex);
    2674        3473 :         mutex_init(&fs_info->zoned_meta_io_lock);
    2675        3473 :         mutex_init(&fs_info->zoned_data_reloc_io_lock);
    2676        3473 :         seqlock_init(&fs_info->profiles_lock);
    2677             : 
    2678        3473 :         btrfs_lockdep_init_map(fs_info, btrfs_trans_num_writers);
    2679        3473 :         btrfs_lockdep_init_map(fs_info, btrfs_trans_num_extwriters);
    2680        3473 :         btrfs_lockdep_init_map(fs_info, btrfs_trans_pending_ordered);
    2681        3473 :         btrfs_lockdep_init_map(fs_info, btrfs_ordered_extent);
    2682        3473 :         btrfs_state_lockdep_init_map(fs_info, btrfs_trans_commit_start,
    2683             :                                      BTRFS_LOCKDEP_TRANS_COMMIT_START);
    2684        3473 :         btrfs_state_lockdep_init_map(fs_info, btrfs_trans_unblocked,
    2685             :                                      BTRFS_LOCKDEP_TRANS_UNBLOCKED);
    2686        3473 :         btrfs_state_lockdep_init_map(fs_info, btrfs_trans_super_committed,
    2687             :                                      BTRFS_LOCKDEP_TRANS_SUPER_COMMITTED);
    2688        3473 :         btrfs_state_lockdep_init_map(fs_info, btrfs_trans_completed,
    2689             :                                      BTRFS_LOCKDEP_TRANS_COMPLETED);
    2690             : 
    2691        3473 :         INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots);
    2692        3473 :         INIT_LIST_HEAD(&fs_info->space_info);
    2693        3473 :         INIT_LIST_HEAD(&fs_info->tree_mod_seq_list);
    2694        3473 :         INIT_LIST_HEAD(&fs_info->unused_bgs);
    2695        3473 :         INIT_LIST_HEAD(&fs_info->reclaim_bgs);
    2696        3473 :         INIT_LIST_HEAD(&fs_info->zone_active_bgs);
    2697             : #ifdef CONFIG_BTRFS_DEBUG
    2698             :         INIT_LIST_HEAD(&fs_info->allocated_roots);
    2699             :         INIT_LIST_HEAD(&fs_info->allocated_ebs);
    2700             :         spin_lock_init(&fs_info->eb_leak_lock);
    2701             : #endif
    2702        3473 :         extent_map_tree_init(&fs_info->mapping_tree);
    2703        3473 :         btrfs_init_block_rsv(&fs_info->global_block_rsv,
    2704             :                              BTRFS_BLOCK_RSV_GLOBAL);
    2705        3473 :         btrfs_init_block_rsv(&fs_info->trans_block_rsv, BTRFS_BLOCK_RSV_TRANS);
    2706        3473 :         btrfs_init_block_rsv(&fs_info->chunk_block_rsv, BTRFS_BLOCK_RSV_CHUNK);
    2707        3473 :         btrfs_init_block_rsv(&fs_info->empty_block_rsv, BTRFS_BLOCK_RSV_EMPTY);
    2708        3473 :         btrfs_init_block_rsv(&fs_info->delayed_block_rsv,
    2709             :                              BTRFS_BLOCK_RSV_DELOPS);
    2710        3473 :         btrfs_init_block_rsv(&fs_info->delayed_refs_rsv,
    2711             :                              BTRFS_BLOCK_RSV_DELREFS);
    2712             : 
    2713        3473 :         atomic_set(&fs_info->async_delalloc_pages, 0);
    2714        3473 :         atomic_set(&fs_info->defrag_running, 0);
    2715        3473 :         atomic_set(&fs_info->nr_delayed_iputs, 0);
    2716        3473 :         atomic64_set(&fs_info->tree_mod_seq, 0);
    2717        3473 :         fs_info->global_root_tree = RB_ROOT;
    2718        3473 :         fs_info->max_inline = BTRFS_DEFAULT_MAX_INLINE;
    2719        3473 :         fs_info->metadata_ratio = 0;
    2720        3473 :         fs_info->defrag_inodes = RB_ROOT;
    2721        3473 :         atomic64_set(&fs_info->free_chunk_space, 0);
    2722        3473 :         fs_info->tree_mod_log = RB_ROOT;
    2723        3473 :         fs_info->commit_interval = BTRFS_DEFAULT_COMMIT_INTERVAL;
    2724        3473 :         btrfs_init_ref_verify(fs_info);
    2725             : 
    2726        3473 :         fs_info->thread_pool_size = min_t(unsigned long,
    2727             :                                           num_online_cpus() + 2, 8);
    2728             : 
    2729        3473 :         INIT_LIST_HEAD(&fs_info->ordered_roots);
    2730        3473 :         spin_lock_init(&fs_info->ordered_root_lock);
    2731             : 
    2732        3473 :         btrfs_init_scrub(fs_info);
    2733             : #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
    2734             :         fs_info->check_integrity_print_mask = 0;
    2735             : #endif
    2736        3473 :         btrfs_init_balance(fs_info);
    2737        3473 :         btrfs_init_async_reclaim_work(fs_info);
    2738             : 
    2739        3473 :         rwlock_init(&fs_info->block_group_cache_lock);
    2740        3473 :         fs_info->block_group_cache_tree = RB_ROOT_CACHED;
    2741             : 
    2742        3473 :         extent_io_tree_init(fs_info, &fs_info->excluded_extents,
    2743             :                             IO_TREE_FS_EXCLUDED_EXTENTS);
    2744             : 
    2745        3473 :         mutex_init(&fs_info->ordered_operations_mutex);
    2746        3473 :         mutex_init(&fs_info->tree_log_mutex);
    2747        3473 :         mutex_init(&fs_info->chunk_mutex);
    2748        3473 :         mutex_init(&fs_info->transaction_kthread_mutex);
    2749        3473 :         mutex_init(&fs_info->cleaner_mutex);
    2750        3473 :         mutex_init(&fs_info->ro_block_group_mutex);
    2751        3473 :         init_rwsem(&fs_info->commit_root_sem);
    2752        3473 :         init_rwsem(&fs_info->cleanup_work_sem);
    2753        3473 :         init_rwsem(&fs_info->subvol_sem);
    2754        3473 :         sema_init(&fs_info->uuid_tree_rescan_sem, 1);
    2755             : 
    2756        3473 :         btrfs_init_dev_replace_locks(fs_info);
    2757        3473 :         btrfs_init_qgroup(fs_info);
    2758        3473 :         btrfs_discard_init(fs_info);
    2759             : 
    2760        3473 :         btrfs_init_free_cluster(&fs_info->meta_alloc_cluster);
    2761        3473 :         btrfs_init_free_cluster(&fs_info->data_alloc_cluster);
    2762             : 
    2763        3473 :         init_waitqueue_head(&fs_info->transaction_throttle);
    2764        3473 :         init_waitqueue_head(&fs_info->transaction_wait);
    2765        3473 :         init_waitqueue_head(&fs_info->transaction_blocked_wait);
    2766        3473 :         init_waitqueue_head(&fs_info->async_submit_wait);
    2767        3473 :         init_waitqueue_head(&fs_info->delayed_iputs_wait);
    2768             : 
    2769             :         /* Usable values until the real ones are cached from the superblock */
    2770        3473 :         fs_info->nodesize = 4096;
    2771        3473 :         fs_info->sectorsize = 4096;
    2772        3473 :         fs_info->sectorsize_bits = ilog2(4096);
    2773        3473 :         fs_info->stripesize = 4096;
    2774             : 
    2775        3473 :         fs_info->max_extent_size = BTRFS_MAX_EXTENT_SIZE;
    2776             : 
    2777        3473 :         spin_lock_init(&fs_info->swapfile_pins_lock);
    2778        3473 :         fs_info->swapfile_pins = RB_ROOT;
    2779             : 
    2780        3473 :         fs_info->bg_reclaim_threshold = BTRFS_DEFAULT_RECLAIM_THRESH;
    2781        3473 :         INIT_WORK(&fs_info->reclaim_bgs_work, btrfs_reclaim_bgs_work);
    2782        3473 : }
    2783             : 
    2784        3244 : static int init_mount_fs_info(struct btrfs_fs_info *fs_info, struct super_block *sb)
    2785             : {
    2786        3244 :         int ret;
    2787             : 
    2788        3244 :         fs_info->sb = sb;
    2789        3244 :         sb->s_blocksize = BTRFS_BDEV_BLOCKSIZE;
    2790        3244 :         sb->s_blocksize_bits = blksize_bits(BTRFS_BDEV_BLOCKSIZE);
    2791             : 
    2792        3244 :         ret = percpu_counter_init(&fs_info->ordered_bytes, 0, GFP_KERNEL);
    2793        3244 :         if (ret)
    2794             :                 return ret;
    2795             : 
    2796        3244 :         ret = percpu_counter_init(&fs_info->dirty_metadata_bytes, 0, GFP_KERNEL);
    2797        3244 :         if (ret)
    2798             :                 return ret;
    2799             : 
    2800           0 :         fs_info->dirty_metadata_batch = PAGE_SIZE *
    2801        3244 :                                         (1 + ilog2(nr_cpu_ids));
    2802             : 
    2803        3244 :         ret = percpu_counter_init(&fs_info->delalloc_bytes, 0, GFP_KERNEL);
    2804        3244 :         if (ret)
    2805             :                 return ret;
    2806             : 
    2807        3244 :         ret = percpu_counter_init(&fs_info->dev_replace.bio_counter, 0,
    2808             :                         GFP_KERNEL);
    2809        3244 :         if (ret)
    2810             :                 return ret;
    2811             : 
    2812        3244 :         fs_info->delayed_root = kmalloc(sizeof(struct btrfs_delayed_root),
    2813             :                                         GFP_KERNEL);
    2814        3244 :         if (!fs_info->delayed_root)
    2815             :                 return -ENOMEM;
    2816        3244 :         btrfs_init_delayed_root(fs_info->delayed_root);
    2817             : 
    2818        3244 :         if (sb_rdonly(sb))
    2819          38 :                 set_bit(BTRFS_FS_STATE_RO, &fs_info->fs_state);
    2820             : 
    2821        3244 :         return btrfs_alloc_stripe_hash_table(fs_info);
    2822             : }
    2823             : 
    2824         992 : static int btrfs_uuid_rescan_kthread(void *data)
    2825             : {
    2826         992 :         struct btrfs_fs_info *fs_info = data;
    2827         992 :         int ret;
    2828             : 
    2829             :         /*
    2830             :          * 1st step is to iterate through the existing UUID tree and
    2831             :          * to delete all entries that contain outdated data.
    2832             :          * 2nd step is to add all missing entries to the UUID tree.
    2833             :          */
    2834         992 :         ret = btrfs_uuid_tree_iterate(fs_info);
    2835         992 :         if (ret < 0) {
    2836           0 :                 if (ret != -EINTR)
    2837           0 :                         btrfs_warn(fs_info, "iterating uuid_tree failed %d",
    2838             :                                    ret);
    2839           0 :                 up(&fs_info->uuid_tree_rescan_sem);
    2840           0 :                 return ret;
    2841             :         }
    2842         992 :         return btrfs_uuid_scan_kthread(data);
    2843             : }
    2844             : 
    2845         992 : static int btrfs_check_uuid_tree(struct btrfs_fs_info *fs_info)
    2846             : {
    2847         992 :         struct task_struct *task;
    2848             : 
    2849         992 :         down(&fs_info->uuid_tree_rescan_sem);
    2850         992 :         task = kthread_run(btrfs_uuid_rescan_kthread, fs_info, "btrfs-uuid");
    2851         992 :         if (IS_ERR(task)) {
    2852             :                 /* fs_info->update_uuid_tree_gen remains 0 in all error case */
    2853           0 :                 btrfs_warn(fs_info, "failed to start uuid_rescan task");
    2854           0 :                 up(&fs_info->uuid_tree_rescan_sem);
    2855           0 :                 return PTR_ERR(task);
    2856             :         }
    2857             : 
    2858             :         return 0;
    2859             : }
    2860             : 
    2861             : /*
    2862             :  * Some options only have meaning at mount time and shouldn't persist across
    2863             :  * remounts, or be displayed. Clear these at the end of mount and remount
    2864             :  * code paths.
    2865             :  */
    2866         118 : void btrfs_clear_oneshot_options(struct btrfs_fs_info *fs_info)
    2867             : {
    2868        3335 :         btrfs_clear_opt(fs_info->mount_opt, USEBACKUPROOT);
    2869        3335 :         btrfs_clear_opt(fs_info->mount_opt, CLEAR_CACHE);
    2870         118 : }
    2871             : 
    2872             : /*
    2873             :  * Mounting logic specific to read-write file systems. Shared by open_ctree
    2874             :  * and btrfs_remount when remounting from read-only to read-write.
    2875             :  */
    2876        3181 : int btrfs_start_pre_rw_mount(struct btrfs_fs_info *fs_info)
    2877             : {
    2878        3181 :         int ret;
    2879        3181 :         const bool cache_opt = btrfs_test_opt(fs_info, SPACE_CACHE);
    2880        3181 :         bool rebuild_free_space_tree = false;
    2881             : 
    2882        3181 :         if (btrfs_test_opt(fs_info, CLEAR_CACHE) &&
    2883          10 :             btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
    2884             :                 rebuild_free_space_tree = true;
    2885        3172 :         } else if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE) &&
    2886        3166 :                    !btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE_VALID)) {
    2887           0 :                 btrfs_warn(fs_info, "free space tree is invalid");
    2888           0 :                 rebuild_free_space_tree = true;
    2889             :         }
    2890             : 
    2891           0 :         if (rebuild_free_space_tree) {
    2892           9 :                 btrfs_info(fs_info, "rebuilding free space tree");
    2893           9 :                 ret = btrfs_rebuild_free_space_tree(fs_info);
    2894           9 :                 if (ret) {
    2895           0 :                         btrfs_warn(fs_info,
    2896             :                                    "failed to rebuild free space tree: %d", ret);
    2897           0 :                         goto out;
    2898             :                 }
    2899             :         }
    2900             : 
    2901        3181 :         if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE) &&
    2902        3175 :             !btrfs_test_opt(fs_info, FREE_SPACE_TREE)) {
    2903           5 :                 btrfs_info(fs_info, "disabling free space tree");
    2904           5 :                 ret = btrfs_delete_free_space_tree(fs_info);
    2905           5 :                 if (ret) {
    2906           0 :                         btrfs_warn(fs_info,
    2907             :                                    "failed to disable free space tree: %d", ret);
    2908           0 :                         goto out;
    2909             :                 }
    2910             :         }
    2911             : 
    2912             :         /*
    2913             :          * btrfs_find_orphan_roots() is responsible for finding all the dead
    2914             :          * roots (with 0 refs), flag them with BTRFS_ROOT_DEAD_TREE and load
    2915             :          * them into the fs_info->fs_roots_radix tree. This must be done before
    2916             :          * calling btrfs_orphan_cleanup() on the tree root. If we don't do it
    2917             :          * first, then btrfs_orphan_cleanup() will delete a dead root's orphan
    2918             :          * item before the root's tree is deleted - this means that if we unmount
    2919             :          * or crash before the deletion completes, on the next mount we will not
    2920             :          * delete what remains of the tree because the orphan item does not
    2921             :          * exists anymore, which is what tells us we have a pending deletion.
    2922             :          */
    2923        3181 :         ret = btrfs_find_orphan_roots(fs_info);
    2924        3181 :         if (ret)
    2925           0 :                 goto out;
    2926             : 
    2927        3181 :         ret = btrfs_cleanup_fs_roots(fs_info);
    2928        3181 :         if (ret)
    2929           0 :                 goto out;
    2930             : 
    2931        3181 :         down_read(&fs_info->cleanup_work_sem);
    2932        6362 :         if ((ret = btrfs_orphan_cleanup(fs_info->fs_root)) ||
    2933        3181 :             (ret = btrfs_orphan_cleanup(fs_info->tree_root))) {
    2934           0 :                 up_read(&fs_info->cleanup_work_sem);
    2935           0 :                 goto out;
    2936             :         }
    2937        3181 :         up_read(&fs_info->cleanup_work_sem);
    2938             : 
    2939        3181 :         mutex_lock(&fs_info->cleaner_mutex);
    2940        3181 :         ret = btrfs_recover_relocation(fs_info);
    2941        3181 :         mutex_unlock(&fs_info->cleaner_mutex);
    2942        3181 :         if (ret < 0) {
    2943           0 :                 btrfs_warn(fs_info, "failed to recover relocation: %d", ret);
    2944           0 :                 goto out;
    2945             :         }
    2946             : 
    2947        3181 :         if (btrfs_test_opt(fs_info, FREE_SPACE_TREE) &&
    2948        3172 :             !btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
    2949           2 :                 btrfs_info(fs_info, "creating free space tree");
    2950           2 :                 ret = btrfs_create_free_space_tree(fs_info);
    2951           2 :                 if (ret) {
    2952           0 :                         btrfs_warn(fs_info,
    2953             :                                 "failed to create free space tree: %d", ret);
    2954           0 :                         goto out;
    2955             :                 }
    2956             :         }
    2957             : 
    2958        3181 :         if (cache_opt != btrfs_free_space_cache_v1_active(fs_info)) {
    2959           2 :                 ret = btrfs_set_free_space_cache_v1_active(fs_info, cache_opt);
    2960           2 :                 if (ret)
    2961           0 :                         goto out;
    2962             :         }
    2963             : 
    2964        3181 :         ret = btrfs_resume_balance_async(fs_info);
    2965        3181 :         if (ret)
    2966           0 :                 goto out;
    2967             : 
    2968        3181 :         ret = btrfs_resume_dev_replace_async(fs_info);
    2969        3181 :         if (ret) {
    2970           0 :                 btrfs_warn(fs_info, "failed to resume dev_replace");
    2971           0 :                 goto out;
    2972             :         }
    2973             : 
    2974        3181 :         btrfs_qgroup_rescan_resume(fs_info);
    2975             : 
    2976        3181 :         if (!fs_info->uuid_root) {
    2977           3 :                 btrfs_info(fs_info, "creating UUID tree");
    2978           3 :                 ret = btrfs_create_uuid_tree(fs_info);
    2979           3 :                 if (ret) {
    2980           0 :                         btrfs_warn(fs_info,
    2981             :                                    "failed to create the UUID tree %d", ret);
    2982           0 :                         goto out;
    2983             :                 }
    2984             :         }
    2985             : 
    2986        3181 : out:
    2987        3181 :         return ret;
    2988             : }
    2989             : 
    2990             : /*
    2991             :  * Do various sanity and dependency checks of different features.
    2992             :  *
    2993             :  * @is_rw_mount:        If the mount is read-write.
    2994             :  *
    2995             :  * This is the place for less strict checks (like for subpage or artificial
    2996             :  * feature dependencies).
    2997             :  *
    2998             :  * For strict checks or possible corruption detection, see
    2999             :  * btrfs_validate_super().
    3000             :  *
    3001             :  * This should be called after btrfs_parse_options(), as some mount options
    3002             :  * (space cache related) can modify on-disk format like free space tree and
    3003             :  * screw up certain feature dependencies.
    3004             :  */
    3005        3336 : int btrfs_check_features(struct btrfs_fs_info *fs_info, bool is_rw_mount)
    3006             : {
    3007        3336 :         struct btrfs_super_block *disk_super = fs_info->super_copy;
    3008        3336 :         u64 incompat = btrfs_super_incompat_flags(disk_super);
    3009        3336 :         const u64 compat_ro = btrfs_super_compat_ro_flags(disk_super);
    3010        3336 :         const u64 compat_ro_unsupp = (compat_ro & ~BTRFS_FEATURE_COMPAT_RO_SUPP);
    3011             : 
    3012        3336 :         if (incompat & ~BTRFS_FEATURE_INCOMPAT_SUPP) {
    3013           0 :                 btrfs_err(fs_info,
    3014             :                 "cannot mount because of unknown incompat features (0x%llx)",
    3015             :                     incompat);
    3016           0 :                 return -EINVAL;
    3017             :         }
    3018             : 
    3019             :         /* Runtime limitation for mixed block groups. */
    3020        3336 :         if ((incompat & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS) &&
    3021          30 :             (fs_info->sectorsize != fs_info->nodesize)) {
    3022           0 :                 btrfs_err(fs_info,
    3023             : "unequal nodesize/sectorsize (%u != %u) are not allowed for mixed block groups",
    3024             :                         fs_info->nodesize, fs_info->sectorsize);
    3025           0 :                 return -EINVAL;
    3026             :         }
    3027             : 
    3028             :         /* Mixed backref is an always-enabled feature. */
    3029        3336 :         incompat |= BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF;
    3030             : 
    3031             :         /* Set compression related flags just in case. */
    3032        3336 :         if (fs_info->compress_type == BTRFS_COMPRESS_LZO)
    3033          19 :                 incompat |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO;
    3034        3317 :         else if (fs_info->compress_type == BTRFS_COMPRESS_ZSTD)
    3035          10 :                 incompat |= BTRFS_FEATURE_INCOMPAT_COMPRESS_ZSTD;
    3036             : 
    3037             :         /*
    3038             :          * An ancient flag, which should really be marked deprecated.
    3039             :          * Such runtime limitation doesn't really need a incompat flag.
    3040             :          */
    3041        3336 :         if (btrfs_super_nodesize(disk_super) > PAGE_SIZE)
    3042        3306 :                 incompat |= BTRFS_FEATURE_INCOMPAT_BIG_METADATA;
    3043             : 
    3044        3336 :         if (compat_ro_unsupp && is_rw_mount) {
    3045           0 :                 btrfs_err(fs_info,
    3046             :         "cannot mount read-write because of unknown compat_ro features (0x%llx)",
    3047             :                        compat_ro);
    3048           0 :                 return -EINVAL;
    3049             :         }
    3050             : 
    3051             :         /*
    3052             :          * We have unsupported RO compat features, although RO mounted, we
    3053             :          * should not cause any metadata writes, including log replay.
    3054             :          * Or we could screw up whatever the new feature requires.
    3055             :          */
    3056        3336 :         if (compat_ro_unsupp && btrfs_super_log_root(disk_super) &&
    3057           0 :             !btrfs_test_opt(fs_info, NOLOGREPLAY)) {
    3058           0 :                 btrfs_err(fs_info,
    3059             : "cannot replay dirty log with unsupported compat_ro features (0x%llx), try rescue=nologreplay",
    3060             :                           compat_ro);
    3061           0 :                 return -EINVAL;
    3062             :         }
    3063             : 
    3064             :         /*
    3065             :          * Artificial limitations for block group tree, to force
    3066             :          * block-group-tree to rely on no-holes and free-space-tree.
    3067             :          */
    3068        3336 :         if (btrfs_fs_compat_ro(fs_info, BLOCK_GROUP_TREE) &&
    3069           0 :             (!btrfs_fs_incompat(fs_info, NO_HOLES) ||
    3070           0 :              !btrfs_test_opt(fs_info, FREE_SPACE_TREE))) {
    3071           0 :                 btrfs_err(fs_info,
    3072             : "block-group-tree feature requires no-holes and free-space-tree features");
    3073           0 :                 return -EINVAL;
    3074             :         }
    3075             : 
    3076             :         /*
    3077             :          * Subpage runtime limitation on v1 cache.
    3078             :          *
    3079             :          * V1 space cache still has some hard codeed PAGE_SIZE usage, while
    3080             :          * we're already defaulting to v2 cache, no need to bother v1 as it's
    3081             :          * going to be deprecated anyway.
    3082             :          */
    3083        3336 :         if (fs_info->sectorsize < PAGE_SIZE && btrfs_test_opt(fs_info, SPACE_CACHE)) {
    3084           0 :                 btrfs_warn(fs_info,
    3085             :         "v1 space cache is not supported for page size %lu with sectorsize %u",
    3086             :                            PAGE_SIZE, fs_info->sectorsize);
    3087           0 :                 return -EINVAL;
    3088             :         }
    3089             : 
    3090             :         /* This can be called by remount, we need to protect the super block. */
    3091        3336 :         spin_lock(&fs_info->super_lock);
    3092        3336 :         btrfs_set_super_incompat_flags(disk_super, incompat);
    3093        3336 :         spin_unlock(&fs_info->super_lock);
    3094             : 
    3095        3336 :         return 0;
    3096             : }
    3097             : 
    3098        3244 : int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_devices,
    3099             :                       char *options)
    3100             : {
    3101        3244 :         u32 sectorsize;
    3102        3244 :         u32 nodesize;
    3103        3244 :         u32 stripesize;
    3104        3244 :         u64 generation;
    3105        3244 :         u64 features;
    3106        3244 :         u16 csum_type;
    3107        3244 :         struct btrfs_super_block *disk_super;
    3108        3244 :         struct btrfs_fs_info *fs_info = btrfs_sb(sb);
    3109        3244 :         struct btrfs_root *tree_root;
    3110        3244 :         struct btrfs_root *chunk_root;
    3111        3244 :         int ret;
    3112        3244 :         int level;
    3113             : 
    3114        3244 :         ret = init_mount_fs_info(fs_info, sb);
    3115        3244 :         if (ret)
    3116           0 :                 goto fail;
    3117             : 
    3118             :         /* These need to be init'ed before we start creating inodes and such. */
    3119        3244 :         tree_root = btrfs_alloc_root(fs_info, BTRFS_ROOT_TREE_OBJECTID,
    3120             :                                      GFP_KERNEL);
    3121        3244 :         fs_info->tree_root = tree_root;
    3122        3244 :         chunk_root = btrfs_alloc_root(fs_info, BTRFS_CHUNK_TREE_OBJECTID,
    3123             :                                       GFP_KERNEL);
    3124        3244 :         fs_info->chunk_root = chunk_root;
    3125        3244 :         if (!tree_root || !chunk_root) {
    3126           0 :                 ret = -ENOMEM;
    3127           0 :                 goto fail;
    3128             :         }
    3129             : 
    3130        3244 :         ret = btrfs_init_btree_inode(sb);
    3131        3244 :         if (ret)
    3132           0 :                 goto fail;
    3133             : 
    3134        3244 :         invalidate_bdev(fs_devices->latest_dev->bdev);
    3135             : 
    3136             :         /*
    3137             :          * Read super block and check the signature bytes only
    3138             :          */
    3139        3244 :         disk_super = btrfs_read_dev_super(fs_devices->latest_dev->bdev);
    3140        3244 :         if (IS_ERR(disk_super)) {
    3141           0 :                 ret = PTR_ERR(disk_super);
    3142           0 :                 goto fail_alloc;
    3143             :         }
    3144             : 
    3145             :         /*
    3146             :          * Verify the type first, if that or the checksum value are
    3147             :          * corrupted, we'll find out
    3148             :          */
    3149        3244 :         csum_type = btrfs_super_csum_type(disk_super);
    3150        3244 :         if (!btrfs_supported_super_csum(csum_type)) {
    3151           0 :                 btrfs_err(fs_info, "unsupported checksum algorithm: %u",
    3152             :                           csum_type);
    3153           0 :                 ret = -EINVAL;
    3154           0 :                 btrfs_release_disk_super(disk_super);
    3155           0 :                 goto fail_alloc;
    3156             :         }
    3157             : 
    3158        3244 :         fs_info->csum_size = btrfs_super_csum_size(disk_super);
    3159             : 
    3160        3244 :         ret = btrfs_init_csum_hash(fs_info, csum_type);
    3161        3244 :         if (ret) {
    3162           0 :                 btrfs_release_disk_super(disk_super);
    3163           0 :                 goto fail_alloc;
    3164             :         }
    3165             : 
    3166             :         /*
    3167             :          * We want to check superblock checksum, the type is stored inside.
    3168             :          * Pass the whole disk block of size BTRFS_SUPER_INFO_SIZE (4k).
    3169             :          */
    3170        3244 :         if (btrfs_check_super_csum(fs_info, disk_super)) {
    3171           0 :                 btrfs_err(fs_info, "superblock checksum mismatch");
    3172           0 :                 ret = -EINVAL;
    3173           0 :                 btrfs_release_disk_super(disk_super);
    3174           0 :                 goto fail_alloc;
    3175             :         }
    3176             : 
    3177             :         /*
    3178             :          * super_copy is zeroed at allocation time and we never touch the
    3179             :          * following bytes up to INFO_SIZE, the checksum is calculated from
    3180             :          * the whole block of INFO_SIZE
    3181             :          */
    3182        6488 :         memcpy(fs_info->super_copy, disk_super, sizeof(*fs_info->super_copy));
    3183        3244 :         btrfs_release_disk_super(disk_super);
    3184             : 
    3185        3244 :         disk_super = fs_info->super_copy;
    3186             : 
    3187             : 
    3188        3244 :         features = btrfs_super_flags(disk_super);
    3189        3244 :         if (features & BTRFS_SUPER_FLAG_CHANGING_FSID_V2) {
    3190           0 :                 features &= ~BTRFS_SUPER_FLAG_CHANGING_FSID_V2;
    3191           0 :                 btrfs_set_super_flags(disk_super, features);
    3192           0 :                 btrfs_info(fs_info,
    3193             :                         "found metadata UUID change in progress flag, clearing");
    3194             :         }
    3195             : 
    3196        6488 :         memcpy(fs_info->super_for_commit, fs_info->super_copy,
    3197             :                sizeof(*fs_info->super_for_commit));
    3198             : 
    3199        3244 :         ret = btrfs_validate_mount_super(fs_info);
    3200        3244 :         if (ret) {
    3201           0 :                 btrfs_err(fs_info, "superblock contains fatal errors");
    3202           0 :                 ret = -EINVAL;
    3203           0 :                 goto fail_alloc;
    3204             :         }
    3205             : 
    3206        3244 :         if (!btrfs_super_root(disk_super)) {
    3207           0 :                 btrfs_err(fs_info, "invalid superblock tree root bytenr");
    3208           0 :                 ret = -EINVAL;
    3209           0 :                 goto fail_alloc;
    3210             :         }
    3211             : 
    3212             :         /* check FS state, whether FS is broken. */
    3213        3244 :         if (btrfs_super_flags(disk_super) & BTRFS_SUPER_FLAG_ERROR)
    3214           0 :                 set_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state);
    3215             : 
    3216             :         /*
    3217             :          * In the long term, we'll store the compression type in the super
    3218             :          * block, and it'll be used for per file compression control.
    3219             :          */
    3220        3244 :         fs_info->compress_type = BTRFS_COMPRESS_ZLIB;
    3221             : 
    3222             : 
    3223             :         /* Set up fs_info before parsing mount options */
    3224        3244 :         nodesize = btrfs_super_nodesize(disk_super);
    3225        3244 :         sectorsize = btrfs_super_sectorsize(disk_super);
    3226        3244 :         stripesize = sectorsize;
    3227        3244 :         fs_info->dirty_metadata_batch = nodesize * (1 + ilog2(nr_cpu_ids));
    3228        3244 :         fs_info->delalloc_batch = sectorsize * 512 * (1 + ilog2(nr_cpu_ids));
    3229             : 
    3230        3244 :         fs_info->nodesize = nodesize;
    3231        3244 :         fs_info->sectorsize = sectorsize;
    3232        3244 :         fs_info->sectorsize_bits = ilog2(sectorsize);
    3233        3244 :         fs_info->csums_per_leaf = BTRFS_MAX_ITEM_SIZE(fs_info) / fs_info->csum_size;
    3234        3244 :         fs_info->stripesize = stripesize;
    3235             : 
    3236        3244 :         ret = btrfs_parse_options(fs_info, options, sb->s_flags);
    3237        3244 :         if (ret)
    3238          26 :                 goto fail_alloc;
    3239             : 
    3240        3218 :         ret = btrfs_check_features(fs_info, !sb_rdonly(sb));
    3241        3218 :         if (ret < 0)
    3242           0 :                 goto fail_alloc;
    3243             : 
    3244        3218 :         if (sectorsize < PAGE_SIZE) {
    3245           0 :                 struct btrfs_subpage_info *subpage_info;
    3246             : 
    3247             :                 /*
    3248             :                  * V1 space cache has some hardcoded PAGE_SIZE usage, and is
    3249             :                  * going to be deprecated.
    3250             :                  *
    3251             :                  * Force to use v2 cache for subpage case.
    3252             :                  */
    3253           0 :                 btrfs_clear_opt(fs_info->mount_opt, SPACE_CACHE);
    3254           0 :                 btrfs_set_and_info(fs_info, FREE_SPACE_TREE,
    3255             :                         "forcing free space tree for sector size %u with page size %lu",
    3256             :                         sectorsize, PAGE_SIZE);
    3257             : 
    3258           0 :                 btrfs_warn(fs_info,
    3259             :                 "read-write for sector size %u with page size %lu is experimental",
    3260             :                            sectorsize, PAGE_SIZE);
    3261           0 :                 subpage_info = kzalloc(sizeof(*subpage_info), GFP_KERNEL);
    3262           0 :                 if (!subpage_info) {
    3263           0 :                         ret = -ENOMEM;
    3264           0 :                         goto fail_alloc;
    3265             :                 }
    3266           0 :                 btrfs_init_subpage_info(subpage_info, sectorsize);
    3267           0 :                 fs_info->subpage_info = subpage_info;
    3268             :         }
    3269             : 
    3270        3218 :         ret = btrfs_init_workqueues(fs_info);
    3271        3218 :         if (ret)
    3272           0 :                 goto fail_sb_buffer;
    3273             : 
    3274        3218 :         sb->s_bdi->ra_pages *= btrfs_super_num_devices(disk_super);
    3275        3218 :         sb->s_bdi->ra_pages = max(sb->s_bdi->ra_pages, SZ_4M / PAGE_SIZE);
    3276             : 
    3277        3218 :         sb->s_blocksize = sectorsize;
    3278        3218 :         sb->s_blocksize_bits = blksize_bits(sectorsize);
    3279        6436 :         memcpy(&sb->s_uuid, fs_info->fs_devices->fsid, BTRFS_FSID_SIZE);
    3280             : 
    3281        3218 :         mutex_lock(&fs_info->chunk_mutex);
    3282        3218 :         ret = btrfs_read_sys_array(fs_info);
    3283        3218 :         mutex_unlock(&fs_info->chunk_mutex);
    3284        3218 :         if (ret) {
    3285           0 :                 btrfs_err(fs_info, "failed to read the system array: %d", ret);
    3286           0 :                 goto fail_sb_buffer;
    3287             :         }
    3288             : 
    3289        3218 :         generation = btrfs_super_chunk_root_generation(disk_super);
    3290        3218 :         level = btrfs_super_chunk_root_level(disk_super);
    3291        3218 :         ret = load_super_root(chunk_root, btrfs_super_chunk_root(disk_super),
    3292             :                               generation, level);
    3293        3218 :         if (ret) {
    3294           0 :                 btrfs_err(fs_info, "failed to read chunk root");
    3295           0 :                 goto fail_tree_roots;
    3296             :         }
    3297             : 
    3298        3218 :         read_extent_buffer(chunk_root->node, fs_info->chunk_tree_uuid,
    3299             :                            offsetof(struct btrfs_header, chunk_tree_uuid),
    3300             :                            BTRFS_UUID_SIZE);
    3301             : 
    3302        3218 :         ret = btrfs_read_chunk_tree(fs_info);
    3303        3218 :         if (ret) {
    3304           0 :                 btrfs_err(fs_info, "failed to read chunk tree: %d", ret);
    3305           0 :                 goto fail_tree_roots;
    3306             :         }
    3307             : 
    3308             :         /*
    3309             :          * At this point we know all the devices that make this filesystem,
    3310             :          * including the seed devices but we don't know yet if the replace
    3311             :          * target is required. So free devices that are not part of this
    3312             :          * filesystem but skip the replace target device which is checked
    3313             :          * below in btrfs_init_dev_replace().
    3314             :          */
    3315        3218 :         btrfs_free_extra_devids(fs_devices);
    3316        3218 :         if (!fs_devices->latest_dev->bdev) {
    3317           0 :                 btrfs_err(fs_info, "failed to read devices");
    3318           0 :                 ret = -EIO;
    3319           0 :                 goto fail_tree_roots;
    3320             :         }
    3321             : 
    3322        3218 :         ret = init_tree_roots(fs_info);
    3323        3218 :         if (ret)
    3324           1 :                 goto fail_tree_roots;
    3325             : 
    3326             :         /*
    3327             :          * Get zone type information of zoned block devices. This will also
    3328             :          * handle emulation of a zoned filesystem if a regular device has the
    3329             :          * zoned incompat feature flag set.
    3330             :          */
    3331        3217 :         ret = btrfs_get_dev_zone_info_all_devices(fs_info);
    3332        3217 :         if (ret) {
    3333             :                 btrfs_err(fs_info,
    3334             :                           "zoned: failed to read device zone info: %d", ret);
    3335             :                 goto fail_block_groups;
    3336             :         }
    3337             : 
    3338             :         /*
    3339             :          * If we have a uuid root and we're not being told to rescan we need to
    3340             :          * check the generation here so we can set the
    3341             :          * BTRFS_FS_UPDATE_UUID_TREE_GEN bit.  Otherwise we could commit the
    3342             :          * transaction during a balance or the log replay without updating the
    3343             :          * uuid generation, and then if we crash we would rescan the uuid tree,
    3344             :          * even though it was perfectly fine.
    3345             :          */
    3346        3217 :         if (fs_info->uuid_root && !btrfs_test_opt(fs_info, RESCAN_UUID_TREE) &&
    3347        3213 :             fs_info->generation == btrfs_super_uuid_tree_generation(disk_super))
    3348        2226 :                 set_bit(BTRFS_FS_UPDATE_UUID_TREE_GEN, &fs_info->flags);
    3349             : 
    3350        3217 :         ret = btrfs_verify_dev_extents(fs_info);
    3351        3217 :         if (ret) {
    3352           0 :                 btrfs_err(fs_info,
    3353             :                           "failed to verify dev extents against chunks: %d",
    3354             :                           ret);
    3355           0 :                 goto fail_block_groups;
    3356             :         }
    3357        3217 :         ret = btrfs_recover_balance(fs_info);
    3358        3217 :         if (ret) {
    3359           0 :                 btrfs_err(fs_info, "failed to recover balance: %d", ret);
    3360           0 :                 goto fail_block_groups;
    3361             :         }
    3362             : 
    3363        3217 :         ret = btrfs_init_dev_stats(fs_info);
    3364        3217 :         if (ret) {
    3365           0 :                 btrfs_err(fs_info, "failed to init dev_stats: %d", ret);
    3366           0 :                 goto fail_block_groups;
    3367             :         }
    3368             : 
    3369        3217 :         ret = btrfs_init_dev_replace(fs_info);
    3370        3217 :         if (ret) {
    3371           0 :                 btrfs_err(fs_info, "failed to init dev_replace: %d", ret);
    3372           0 :                 goto fail_block_groups;
    3373             :         }
    3374             : 
    3375        3217 :         ret = btrfs_check_zoned_mode(fs_info);
    3376        3217 :         if (ret) {
    3377             :                 btrfs_err(fs_info, "failed to initialize zoned mode: %d",
    3378             :                           ret);
    3379             :                 goto fail_block_groups;
    3380             :         }
    3381             : 
    3382        3217 :         ret = btrfs_sysfs_add_fsid(fs_devices);
    3383        3217 :         if (ret) {
    3384           0 :                 btrfs_err(fs_info, "failed to init sysfs fsid interface: %d",
    3385             :                                 ret);
    3386           0 :                 goto fail_block_groups;
    3387             :         }
    3388             : 
    3389        3217 :         ret = btrfs_sysfs_add_mounted(fs_info);
    3390        3217 :         if (ret) {
    3391           0 :                 btrfs_err(fs_info, "failed to init sysfs interface: %d", ret);
    3392           0 :                 goto fail_fsdev_sysfs;
    3393             :         }
    3394             : 
    3395        3217 :         ret = btrfs_init_space_info(fs_info);
    3396        3217 :         if (ret) {
    3397           0 :                 btrfs_err(fs_info, "failed to initialize space info: %d", ret);
    3398           0 :                 goto fail_sysfs;
    3399             :         }
    3400             : 
    3401        3217 :         ret = btrfs_read_block_groups(fs_info);
    3402        3217 :         if (ret) {
    3403           0 :                 btrfs_err(fs_info, "failed to read block groups: %d", ret);
    3404           0 :                 goto fail_sysfs;
    3405             :         }
    3406             : 
    3407        3217 :         btrfs_free_zone_cache(fs_info);
    3408             : 
    3409        3217 :         if (!sb_rdonly(sb) && fs_info->fs_devices->missing_devices &&
    3410           0 :             !btrfs_check_rw_degradable(fs_info, NULL)) {
    3411           0 :                 btrfs_warn(fs_info,
    3412             :                 "writable mount is not allowed due to too many missing devices");
    3413           0 :                 ret = -EINVAL;
    3414           0 :                 goto fail_sysfs;
    3415             :         }
    3416             : 
    3417        3217 :         fs_info->cleaner_kthread = kthread_run(cleaner_kthread, fs_info,
    3418             :                                                "btrfs-cleaner");
    3419        3217 :         if (IS_ERR(fs_info->cleaner_kthread)) {
    3420           0 :                 ret = PTR_ERR(fs_info->cleaner_kthread);
    3421           0 :                 goto fail_sysfs;
    3422             :         }
    3423             : 
    3424        3217 :         fs_info->transaction_kthread = kthread_run(transaction_kthread,
    3425             :                                                    tree_root,
    3426             :                                                    "btrfs-transaction");
    3427        3217 :         if (IS_ERR(fs_info->transaction_kthread)) {
    3428           0 :                 ret = PTR_ERR(fs_info->transaction_kthread);
    3429           0 :                 goto fail_cleaner;
    3430             :         }
    3431             : 
    3432        3217 :         if (!btrfs_test_opt(fs_info, NOSSD) &&
    3433        3215 :             !fs_info->fs_devices->rotating) {
    3434          16 :                 btrfs_set_and_info(fs_info, SSD, "enabling ssd optimizations");
    3435             :         }
    3436             : 
    3437             :         /*
    3438             :          * For devices supporting discard turn on discard=async automatically,
    3439             :          * unless it's already set or disabled. This could be turned off by
    3440             :          * nodiscard for the same mount.
    3441             :          *
    3442             :          * The zoned mode piggy backs on the discard functionality for
    3443             :          * resetting a zone. There is no reason to delay the zone reset as it is
    3444             :          * fast enough. So, do not enable async discard for zoned mode.
    3445             :          */
    3446        3217 :         if (!(btrfs_test_opt(fs_info, DISCARD_SYNC) ||
    3447             :               btrfs_test_opt(fs_info, DISCARD_ASYNC) ||
    3448        3208 :               btrfs_test_opt(fs_info, NODISCARD)) &&
    3449        3208 :             fs_info->fs_devices->discardable &&
    3450             :             !btrfs_is_zoned(fs_info)) {
    3451        3204 :                 btrfs_set_and_info(fs_info, DISCARD_ASYNC,
    3452             :                                    "auto enabling async discard");
    3453             :         }
    3454             : 
    3455             : #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
    3456             :         if (btrfs_test_opt(fs_info, CHECK_INTEGRITY)) {
    3457             :                 ret = btrfsic_mount(fs_info, fs_devices,
    3458             :                                     btrfs_test_opt(fs_info,
    3459             :                                         CHECK_INTEGRITY_DATA) ? 1 : 0,
    3460             :                                     fs_info->check_integrity_print_mask);
    3461             :                 if (ret)
    3462             :                         btrfs_warn(fs_info,
    3463             :                                 "failed to initialize integrity check module: %d",
    3464             :                                 ret);
    3465             :         }
    3466             : #endif
    3467        3217 :         ret = btrfs_read_qgroup_config(fs_info);
    3468        3217 :         if (ret)
    3469           0 :                 goto fail_trans_kthread;
    3470             : 
    3471        3217 :         if (btrfs_build_ref_tree(fs_info))
    3472             :                 btrfs_err(fs_info, "couldn't build ref tree");
    3473             : 
    3474             :         /* do not make disk changes in broken FS or nologreplay is given */
    3475        3217 :         if (btrfs_super_log_root(disk_super) != 0 &&
    3476         285 :             !btrfs_test_opt(fs_info, NOLOGREPLAY)) {
    3477         285 :                 btrfs_info(fs_info, "start tree-log replay");
    3478         285 :                 ret = btrfs_replay_log(fs_info, fs_devices);
    3479         285 :                 if (ret)
    3480           0 :                         goto fail_qgroup;
    3481             :         }
    3482             : 
    3483        3217 :         fs_info->fs_root = btrfs_get_fs_root(fs_info, BTRFS_FS_TREE_OBJECTID, true);
    3484        3217 :         if (IS_ERR(fs_info->fs_root)) {
    3485           0 :                 ret = PTR_ERR(fs_info->fs_root);
    3486           0 :                 btrfs_warn(fs_info, "failed to read fs tree: %d", ret);
    3487           0 :                 fs_info->fs_root = NULL;
    3488           0 :                 goto fail_qgroup;
    3489             :         }
    3490             : 
    3491        3217 :         if (sb_rdonly(sb))
    3492          38 :                 goto clear_oneshot;
    3493             : 
    3494        3179 :         ret = btrfs_start_pre_rw_mount(fs_info);
    3495        3179 :         if (ret) {
    3496           0 :                 close_ctree(fs_info);
    3497           0 :                 return ret;
    3498             :         }
    3499        3179 :         btrfs_discard_resume(fs_info);
    3500             : 
    3501        3179 :         if (fs_info->uuid_root &&
    3502        3179 :             (btrfs_test_opt(fs_info, RESCAN_UUID_TREE) ||
    3503        3178 :              fs_info->generation != btrfs_super_uuid_tree_generation(disk_super))) {
    3504         992 :                 btrfs_info(fs_info, "checking UUID tree");
    3505         992 :                 ret = btrfs_check_uuid_tree(fs_info);
    3506         992 :                 if (ret) {
    3507           0 :                         btrfs_warn(fs_info,
    3508             :                                 "failed to check the UUID tree: %d", ret);
    3509           0 :                         close_ctree(fs_info);
    3510           0 :                         return ret;
    3511             :                 }
    3512             :         }
    3513             : 
    3514        3179 :         set_bit(BTRFS_FS_OPEN, &fs_info->flags);
    3515             : 
    3516             :         /* Kick the cleaner thread so it'll start deleting snapshots. */
    3517        6358 :         if (test_bit(BTRFS_FS_UNFINISHED_DROPS, &fs_info->flags))
    3518           0 :                 wake_up_process(fs_info->cleaner_kthread);
    3519             : 
    3520        3179 : clear_oneshot:
    3521        3217 :         btrfs_clear_oneshot_options(fs_info);
    3522        3217 :         return 0;
    3523             : 
    3524           0 : fail_qgroup:
    3525           0 :         btrfs_free_qgroup_config(fs_info);
    3526           0 : fail_trans_kthread:
    3527           0 :         kthread_stop(fs_info->transaction_kthread);
    3528           0 :         btrfs_cleanup_transaction(fs_info);
    3529           0 :         btrfs_free_fs_roots(fs_info);
    3530           0 : fail_cleaner:
    3531           0 :         kthread_stop(fs_info->cleaner_kthread);
    3532             : 
    3533             :         /*
    3534             :          * make sure we're done with the btree inode before we stop our
    3535             :          * kthreads
    3536             :          */
    3537           0 :         filemap_write_and_wait(fs_info->btree_inode->i_mapping);
    3538             : 
    3539           0 : fail_sysfs:
    3540           0 :         btrfs_sysfs_remove_mounted(fs_info);
    3541             : 
    3542           0 : fail_fsdev_sysfs:
    3543           0 :         btrfs_sysfs_remove_fsid(fs_info->fs_devices);
    3544             : 
    3545           0 : fail_block_groups:
    3546           0 :         btrfs_put_block_group_cache(fs_info);
    3547             : 
    3548           1 : fail_tree_roots:
    3549           1 :         if (fs_info->data_reloc_root)
    3550           0 :                 btrfs_drop_and_free_fs_root(fs_info, fs_info->data_reloc_root);
    3551           1 :         free_root_pointers(fs_info, true);
    3552           1 :         invalidate_inode_pages2(fs_info->btree_inode->i_mapping);
    3553             : 
    3554           1 : fail_sb_buffer:
    3555           1 :         btrfs_stop_all_workers(fs_info);
    3556           1 :         btrfs_free_block_groups(fs_info);
    3557          27 : fail_alloc:
    3558          27 :         btrfs_mapping_tree_free(&fs_info->mapping_tree);
    3559             : 
    3560          27 :         iput(fs_info->btree_inode);
    3561          27 : fail:
    3562          27 :         btrfs_close_devices(fs_info->fs_devices);
    3563          27 :         ASSERT(ret < 0);
    3564          27 :         return ret;
    3565             : }
    3566             : ALLOW_ERROR_INJECTION(open_ctree, ERRNO);
    3567             : 
    3568      653223 : static void btrfs_end_super_write(struct bio *bio)
    3569             : {
    3570      653223 :         struct btrfs_device *device = bio->bi_private;
    3571      653223 :         struct bio_vec *bvec;
    3572      653223 :         struct bvec_iter_all iter_all;
    3573      653223 :         struct page *page;
    3574             : 
    3575     1306446 :         bio_for_each_segment_all(bvec, bio, iter_all) {
    3576      653223 :                 page = bvec->bv_page;
    3577             : 
    3578      653223 :                 if (bio->bi_status) {
    3579           0 :                         btrfs_warn_rl_in_rcu(device->fs_info,
    3580             :                                 "lost page write due to IO error on %s (%d)",
    3581             :                                 btrfs_dev_name(device),
    3582             :                                 blk_status_to_errno(bio->bi_status));
    3583           0 :                         ClearPageUptodate(page);
    3584           0 :                         SetPageError(page);
    3585           0 :                         btrfs_dev_stat_inc_and_print(device,
    3586             :                                                      BTRFS_DEV_STAT_WRITE_ERRS);
    3587             :                 } else {
    3588      653223 :                         SetPageUptodate(page);
    3589             :                 }
    3590             : 
    3591      653223 :                 put_page(page);
    3592      653223 :                 unlock_page(page);
    3593             :         }
    3594             : 
    3595      653223 :         bio_put(bio);
    3596      653223 : }
    3597             : 
    3598        6531 : struct btrfs_super_block *btrfs_read_dev_one_super(struct block_device *bdev,
    3599             :                                                    int copy_num, bool drop_cache)
    3600             : {
    3601        6531 :         struct btrfs_super_block *super;
    3602        6531 :         struct page *page;
    3603        6531 :         u64 bytenr, bytenr_orig;
    3604        6531 :         struct address_space *mapping = bdev->bd_inode->i_mapping;
    3605        6531 :         int ret;
    3606             : 
    3607        6531 :         bytenr_orig = btrfs_sb_offset(copy_num);
    3608        6531 :         ret = btrfs_sb_log_location_bdev(bdev, copy_num, READ, &bytenr);
    3609        6531 :         if (ret == -ENOENT)
    3610             :                 return ERR_PTR(-EINVAL);
    3611        6531 :         else if (ret)
    3612             :                 return ERR_PTR(ret);
    3613             : 
    3614        6531 :         if (bytenr + BTRFS_SUPER_INFO_SIZE >= bdev_nr_bytes(bdev))
    3615             :                 return ERR_PTR(-EINVAL);
    3616             : 
    3617        6531 :         if (drop_cache) {
    3618             :                 /* This should only be called with the primary sb. */
    3619          43 :                 ASSERT(copy_num == 0);
    3620             : 
    3621             :                 /*
    3622             :                  * Drop the page of the primary superblock, so later read will
    3623             :                  * always read from the device.
    3624             :                  */
    3625          43 :                 invalidate_inode_pages2_range(mapping,
    3626          43 :                                 bytenr >> PAGE_SHIFT,
    3627          43 :                                 (bytenr + BTRFS_SUPER_INFO_SIZE) >> PAGE_SHIFT);
    3628             :         }
    3629             : 
    3630        6531 :         page = read_cache_page_gfp(mapping, bytenr >> PAGE_SHIFT, GFP_NOFS);
    3631        6531 :         if (IS_ERR(page))
    3632             :                 return ERR_CAST(page);
    3633             : 
    3634        6531 :         super = page_address(page);
    3635        6531 :         if (btrfs_super_magic(super) != BTRFS_MAGIC) {
    3636           0 :                 btrfs_release_disk_super(super);
    3637           0 :                 return ERR_PTR(-ENODATA);
    3638             :         }
    3639             : 
    3640        6531 :         if (btrfs_super_bytenr(super) != bytenr_orig) {
    3641           0 :                 btrfs_release_disk_super(super);
    3642           0 :                 return ERR_PTR(-EINVAL);
    3643             :         }
    3644             : 
    3645             :         return super;
    3646             : }
    3647             : 
    3648             : 
    3649        6488 : struct btrfs_super_block *btrfs_read_dev_super(struct block_device *bdev)
    3650             : {
    3651        6488 :         struct btrfs_super_block *super, *latest = NULL;
    3652        6488 :         int i;
    3653        6488 :         u64 transid = 0;
    3654             : 
    3655             :         /* we would like to check all the supers, but that would make
    3656             :          * a btrfs mount succeed after a mkfs from a different FS.
    3657             :          * So, we need to add a special mount option to scan for
    3658             :          * later supers, using BTRFS_SUPER_MIRROR_MAX instead
    3659             :          */
    3660       12976 :         for (i = 0; i < 1; i++) {
    3661        6488 :                 super = btrfs_read_dev_one_super(bdev, i, false);
    3662        6488 :                 if (IS_ERR(super))
    3663           0 :                         continue;
    3664             : 
    3665        6488 :                 if (!latest || btrfs_super_generation(super) > transid) {
    3666        6488 :                         if (latest)
    3667           0 :                                 btrfs_release_disk_super(super);
    3668             : 
    3669        6488 :                         latest = super;
    3670        6488 :                         transid = btrfs_super_generation(super);
    3671             :                 }
    3672             :         }
    3673             : 
    3674        6488 :         return super;
    3675             : }
    3676             : 
    3677             : /*
    3678             :  * Write superblock @sb to the @device. Do not wait for completion, all the
    3679             :  * pages we use for writing are locked.
    3680             :  *
    3681             :  * Write @max_mirrors copies of the superblock, where 0 means default that fit
    3682             :  * the expected device size at commit time. Note that max_mirrors must be
    3683             :  * same for write and wait phases.
    3684             :  *
    3685             :  * Return number of errors when page is not found or submission fails.
    3686             :  */
    3687      447051 : static int write_dev_supers(struct btrfs_device *device,
    3688             :                             struct btrfs_super_block *sb, int max_mirrors)
    3689             : {
    3690      447051 :         struct btrfs_fs_info *fs_info = device->fs_info;
    3691      447051 :         struct address_space *mapping = device->bdev->bd_inode->i_mapping;
    3692      447051 :         SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
    3693      447051 :         int i;
    3694      447051 :         int errors = 0;
    3695      447051 :         int ret;
    3696      447051 :         u64 bytenr, bytenr_orig;
    3697             : 
    3698      447051 :         if (max_mirrors == 0)
    3699      206170 :                 max_mirrors = BTRFS_SUPER_MIRROR_MAX;
    3700             : 
    3701      447051 :         shash->tfm = fs_info->csum_shash;
    3702             : 
    3703     1100274 :         for (i = 0; i < max_mirrors; i++) {
    3704      859391 :                 struct page *page;
    3705      859391 :                 struct bio *bio;
    3706      859391 :                 struct btrfs_super_block *disk_super;
    3707             : 
    3708      859391 :                 bytenr_orig = btrfs_sb_offset(i);
    3709      859391 :                 ret = btrfs_sb_log_location(device, i, WRITE, &bytenr);
    3710      859391 :                 if (ret == -ENOENT) {
    3711             :                         continue;
    3712      859391 :                 } else if (ret < 0) {
    3713             :                         btrfs_err(device->fs_info,
    3714             :                                 "couldn't get super block location for mirror %d",
    3715             :                                 i);
    3716             :                         errors++;
    3717             :                         continue;
    3718             :                 }
    3719      859391 :                 if (bytenr + BTRFS_SUPER_INFO_SIZE >=
    3720      859391 :                     device->commit_total_bytes)
    3721             :                         break;
    3722             : 
    3723      653223 :                 btrfs_set_super_bytenr(sb, bytenr_orig);
    3724             : 
    3725      653223 :                 crypto_shash_digest(shash, (const char *)sb + BTRFS_CSUM_SIZE,
    3726             :                                     BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE,
    3727      653223 :                                     sb->csum);
    3728             : 
    3729      653223 :                 page = find_or_create_page(mapping, bytenr >> PAGE_SHIFT,
    3730             :                                            GFP_NOFS);
    3731      653223 :                 if (!page) {
    3732           0 :                         btrfs_err(device->fs_info,
    3733             :                             "couldn't get super block page for bytenr %llu",
    3734             :                             bytenr);
    3735           0 :                         errors++;
    3736           0 :                         continue;
    3737             :                 }
    3738             : 
    3739             :                 /* Bump the refcount for wait_dev_supers() */
    3740      653223 :                 get_page(page);
    3741             : 
    3742      653223 :                 disk_super = page_address(page);
    3743     1306446 :                 memcpy(disk_super, sb, BTRFS_SUPER_INFO_SIZE);
    3744             : 
    3745             :                 /*
    3746             :                  * Directly use bios here instead of relying on the page cache
    3747             :                  * to do I/O, so we don't lose the ability to do integrity
    3748             :                  * checking.
    3749             :                  */
    3750      653223 :                 bio = bio_alloc(device->bdev, 1,
    3751             :                                 REQ_OP_WRITE | REQ_SYNC | REQ_META | REQ_PRIO,
    3752             :                                 GFP_NOFS);
    3753      653223 :                 bio->bi_iter.bi_sector = bytenr >> SECTOR_SHIFT;
    3754      653223 :                 bio->bi_private = device;
    3755      653223 :                 bio->bi_end_io = btrfs_end_super_write;
    3756      653223 :                 __bio_add_page(bio, page, BTRFS_SUPER_INFO_SIZE,
    3757             :                                offset_in_page(bytenr));
    3758             : 
    3759             :                 /*
    3760             :                  * We FUA only the first super block.  The others we allow to
    3761             :                  * go down lazy and there's a short window where the on-disk
    3762             :                  * copies might still contain the older version.
    3763             :                  */
    3764      653223 :                 if (i == 0 && !btrfs_test_opt(device->fs_info, NOBARRIER))
    3765      447050 :                         bio->bi_opf |= REQ_FUA;
    3766             : 
    3767      653223 :                 btrfsic_check_bio(bio);
    3768      653223 :                 submit_bio(bio);
    3769             : 
    3770      653223 :                 if (btrfs_advance_sb_log(device, i))
    3771             :                         errors++;
    3772             :         }
    3773      447051 :         return errors < i ? 0 : -1;
    3774             : }
    3775             : 
    3776             : /*
    3777             :  * Wait for write completion of superblocks done by write_dev_supers,
    3778             :  * @max_mirrors same for write and wait phases.
    3779             :  *
    3780             :  * Return number of errors when page is not found or not marked up to
    3781             :  * date.
    3782             :  */
    3783      447051 : static int wait_dev_supers(struct btrfs_device *device, int max_mirrors)
    3784             : {
    3785      447051 :         int i;
    3786      447051 :         int errors = 0;
    3787      447051 :         bool primary_failed = false;
    3788      447051 :         int ret;
    3789      447051 :         u64 bytenr;
    3790             : 
    3791      447051 :         if (max_mirrors == 0)
    3792      206170 :                 max_mirrors = BTRFS_SUPER_MIRROR_MAX;
    3793             : 
    3794     1100274 :         for (i = 0; i < max_mirrors; i++) {
    3795      859391 :                 struct page *page;
    3796             : 
    3797      859391 :                 ret = btrfs_sb_log_location(device, i, READ, &bytenr);
    3798      859391 :                 if (ret == -ENOENT) {
    3799             :                         break;
    3800      859391 :                 } else if (ret < 0) {
    3801             :                         errors++;
    3802             :                         if (i == 0)
    3803             :                                 primary_failed = true;
    3804             :                         continue;
    3805             :                 }
    3806      859391 :                 if (bytenr + BTRFS_SUPER_INFO_SIZE >=
    3807      859391 :                     device->commit_total_bytes)
    3808             :                         break;
    3809             : 
    3810      653223 :                 page = find_get_page(device->bdev->bd_inode->i_mapping,
    3811      653223 :                                      bytenr >> PAGE_SHIFT);
    3812      653223 :                 if (!page) {
    3813           0 :                         errors++;
    3814           0 :                         if (i == 0)
    3815           0 :                                 primary_failed = true;
    3816           0 :                         continue;
    3817             :                 }
    3818             :                 /* Page is submitted locked and unlocked once the IO completes */
    3819      653223 :                 wait_on_page_locked(page);
    3820      653223 :                 if (PageError(page)) {
    3821           0 :                         errors++;
    3822           0 :                         if (i == 0)
    3823           0 :                                 primary_failed = true;
    3824             :                 }
    3825             : 
    3826             :                 /* Drop our reference */
    3827      653223 :                 put_page(page);
    3828             : 
    3829             :                 /* Drop the reference from the writing run */
    3830      653223 :                 put_page(page);
    3831             :         }
    3832             : 
    3833             :         /* log error, force error return */
    3834      447051 :         if (primary_failed) {
    3835           0 :                 btrfs_err(device->fs_info, "error writing primary super block to device %llu",
    3836             :                           device->devid);
    3837           0 :                 return -1;
    3838             :         }
    3839             : 
    3840      447051 :         return errors < i ? 0 : -1;
    3841             : }
    3842             : 
    3843             : /*
    3844             :  * endio for the write_dev_flush, this will wake anyone waiting
    3845             :  * for the barrier when it is done
    3846             :  */
    3847         739 : static void btrfs_end_empty_barrier(struct bio *bio)
    3848             : {
    3849         739 :         bio_uninit(bio);
    3850         739 :         complete(bio->bi_private);
    3851         739 : }
    3852             : 
    3853             : /*
    3854             :  * Submit a flush request to the device if it supports it. Error handling is
    3855             :  * done in the waiting counterpart.
    3856             :  */
    3857      447050 : static void write_dev_flush(struct btrfs_device *device)
    3858             : {
    3859      447050 :         struct bio *bio = &device->flush_bio;
    3860             : 
    3861      447050 :         device->last_flush_error = BLK_STS_OK;
    3862             : 
    3863             : #ifndef CONFIG_BTRFS_FS_CHECK_INTEGRITY
    3864             :         /*
    3865             :          * When a disk has write caching disabled, we skip submission of a bio
    3866             :          * with flush and sync requests before writing the superblock, since
    3867             :          * it's not needed. However when the integrity checker is enabled, this
    3868             :          * results in reports that there are metadata blocks referred by a
    3869             :          * superblock that were not properly flushed. So don't skip the bio
    3870             :          * submission only when the integrity checker is enabled for the sake
    3871             :          * of simplicity, since this is a debug tool and not meant for use in
    3872             :          * non-debug builds.
    3873             :          */
    3874      447050 :         if (!bdev_write_cache(device->bdev))
    3875             :                 return;
    3876             : #endif
    3877             : 
    3878         739 :         bio_init(bio, device->bdev, NULL, 0,
    3879             :                  REQ_OP_WRITE | REQ_SYNC | REQ_PREFLUSH);
    3880         739 :         bio->bi_end_io = btrfs_end_empty_barrier;
    3881         739 :         init_completion(&device->flush_wait);
    3882         739 :         bio->bi_private = &device->flush_wait;
    3883             : 
    3884         739 :         btrfsic_check_bio(bio);
    3885         739 :         submit_bio(bio);
    3886         739 :         set_bit(BTRFS_DEV_STATE_FLUSH_SENT, &device->dev_state);
    3887             : }
    3888             : 
    3889             : /*
    3890             :  * If the flush bio has been submitted by write_dev_flush, wait for it.
    3891             :  * Return true for any error, and false otherwise.
    3892             :  */
    3893      447050 : static bool wait_dev_flush(struct btrfs_device *device)
    3894             : {
    3895      447050 :         struct bio *bio = &device->flush_bio;
    3896             : 
    3897      447050 :         if (!test_and_clear_bit(BTRFS_DEV_STATE_FLUSH_SENT, &device->dev_state))
    3898             :                 return false;
    3899             : 
    3900         739 :         wait_for_completion_io(&device->flush_wait);
    3901             : 
    3902         739 :         if (bio->bi_status) {
    3903           0 :                 device->last_flush_error = bio->bi_status;
    3904           0 :                 btrfs_dev_stat_inc_and_print(device, BTRFS_DEV_STAT_FLUSH_ERRS);
    3905           0 :                 return true;
    3906             :         }
    3907             : 
    3908             :         return false;
    3909             : }
    3910             : 
    3911             : /*
    3912             :  * send an empty flush down to each device in parallel,
    3913             :  * then wait for them
    3914             :  */
    3915      447050 : static int barrier_all_devices(struct btrfs_fs_info *info)
    3916             : {
    3917      447050 :         struct list_head *head;
    3918      447050 :         struct btrfs_device *dev;
    3919      447050 :         int errors_wait = 0;
    3920             : 
    3921      447050 :         lockdep_assert_held(&info->fs_devices->device_list_mutex);
    3922             :         /* send down all the barriers */
    3923      447050 :         head = &info->fs_devices->devices;
    3924      894100 :         list_for_each_entry(dev, head, dev_list) {
    3925      894100 :                 if (test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state))
    3926           0 :                         continue;
    3927      447050 :                 if (!dev->bdev)
    3928           0 :                         continue;
    3929      447050 :                 if (!test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &dev->dev_state) ||
    3930           0 :                     !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state))
    3931           0 :                         continue;
    3932             : 
    3933      447050 :                 write_dev_flush(dev);
    3934             :         }
    3935             : 
    3936             :         /* wait for all the barriers */
    3937      894100 :         list_for_each_entry(dev, head, dev_list) {
    3938      894100 :                 if (test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state))
    3939           0 :                         continue;
    3940      447050 :                 if (!dev->bdev) {
    3941           0 :                         errors_wait++;
    3942           0 :                         continue;
    3943             :                 }
    3944      447050 :                 if (!test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &dev->dev_state) ||
    3945           0 :                     !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state))
    3946           0 :                         continue;
    3947             : 
    3948      447050 :                 if (wait_dev_flush(dev))
    3949           0 :                         errors_wait++;
    3950             :         }
    3951             : 
    3952             :         /*
    3953             :          * Checks last_flush_error of disks in order to determine the device
    3954             :          * state.
    3955             :          */
    3956      447050 :         if (errors_wait && !btrfs_check_rw_degradable(info, NULL))
    3957           0 :                 return -EIO;
    3958             : 
    3959             :         return 0;
    3960             : }
    3961             : 
    3962         388 : int btrfs_get_num_tolerated_disk_barrier_failures(u64 flags)
    3963             : {
    3964         388 :         int raid_type;
    3965         388 :         int min_tolerated = INT_MAX;
    3966             : 
    3967         388 :         if ((flags & BTRFS_BLOCK_GROUP_PROFILE_MASK) == 0 ||
    3968         193 :             (flags & BTRFS_AVAIL_ALLOC_BIT_SINGLE))
    3969         195 :                 min_tolerated = min_t(int, min_tolerated,
    3970             :                                     btrfs_raid_array[BTRFS_RAID_SINGLE].
    3971             :                                     tolerated_failures);
    3972             : 
    3973        3880 :         for (raid_type = 0; raid_type < BTRFS_NR_RAID_TYPES; raid_type++) {
    3974        3492 :                 if (raid_type == BTRFS_RAID_SINGLE)
    3975         388 :                         continue;
    3976        3104 :                 if (!(flags & btrfs_raid_array[raid_type].bg_flag))
    3977        2911 :                         continue;
    3978         193 :                 min_tolerated = min_t(int, min_tolerated,
    3979             :                                     btrfs_raid_array[raid_type].
    3980             :                                     tolerated_failures);
    3981             :         }
    3982             : 
    3983         388 :         if (min_tolerated == INT_MAX) {
    3984           0 :                 pr_warn("BTRFS: unknown raid flag: %llu", flags);
    3985           0 :                 min_tolerated = 0;
    3986             :         }
    3987             : 
    3988         388 :         return min_tolerated;
    3989             : }
    3990             : 
    3991      447051 : int write_all_supers(struct btrfs_fs_info *fs_info, int max_mirrors)
    3992             : {
    3993      447051 :         struct list_head *head;
    3994      447051 :         struct btrfs_device *dev;
    3995      447051 :         struct btrfs_super_block *sb;
    3996      447051 :         struct btrfs_dev_item *dev_item;
    3997      447051 :         int ret;
    3998      447051 :         int do_barriers;
    3999      447051 :         int max_errors;
    4000      447051 :         int total_errors = 0;
    4001      447051 :         u64 flags;
    4002             : 
    4003      447051 :         do_barriers = !btrfs_test_opt(fs_info, NOBARRIER);
    4004             : 
    4005             :         /*
    4006             :          * max_mirrors == 0 indicates we're from commit_transaction,
    4007             :          * not from fsync where the tree roots in fs_info have not
    4008             :          * been consistent on disk.
    4009             :          */
    4010      447051 :         if (max_mirrors == 0)
    4011      206170 :                 backup_super_roots(fs_info);
    4012             : 
    4013      447051 :         sb = fs_info->super_for_commit;
    4014      447051 :         dev_item = &sb->dev_item;
    4015             : 
    4016      447051 :         mutex_lock(&fs_info->fs_devices->device_list_mutex);
    4017      447051 :         head = &fs_info->fs_devices->devices;
    4018      447051 :         max_errors = btrfs_super_num_devices(fs_info->super_copy) - 1;
    4019             : 
    4020      447051 :         if (do_barriers) {
    4021      447050 :                 ret = barrier_all_devices(fs_info);
    4022      447050 :                 if (ret) {
    4023           0 :                         mutex_unlock(
    4024           0 :                                 &fs_info->fs_devices->device_list_mutex);
    4025           0 :                         btrfs_handle_fs_error(fs_info, ret,
    4026             :                                               "errors while submitting device barriers.");
    4027           0 :                         return ret;
    4028             :                 }
    4029             :         }
    4030             : 
    4031      894102 :         list_for_each_entry(dev, head, dev_list) {
    4032      447051 :                 if (!dev->bdev) {
    4033           0 :                         total_errors++;
    4034           0 :                         continue;
    4035             :                 }
    4036      447051 :                 if (!test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &dev->dev_state) ||
    4037           0 :                     !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state))
    4038           0 :                         continue;
    4039             : 
    4040      447051 :                 btrfs_set_stack_device_generation(dev_item, 0);
    4041      447051 :                 btrfs_set_stack_device_type(dev_item, dev->type);
    4042      447051 :                 btrfs_set_stack_device_id(dev_item, dev->devid);
    4043      447051 :                 btrfs_set_stack_device_total_bytes(dev_item,
    4044             :                                                    dev->commit_total_bytes);
    4045      447051 :                 btrfs_set_stack_device_bytes_used(dev_item,
    4046             :                                                   dev->commit_bytes_used);
    4047      447051 :                 btrfs_set_stack_device_io_align(dev_item, dev->io_align);
    4048      447051 :                 btrfs_set_stack_device_io_width(dev_item, dev->io_width);
    4049      447051 :                 btrfs_set_stack_device_sector_size(dev_item, dev->sector_size);
    4050      894102 :                 memcpy(dev_item->uuid, dev->uuid, BTRFS_UUID_SIZE);
    4051      894102 :                 memcpy(dev_item->fsid, dev->fs_devices->metadata_uuid,
    4052             :                        BTRFS_FSID_SIZE);
    4053             : 
    4054      447051 :                 flags = btrfs_super_flags(sb);
    4055      447051 :                 btrfs_set_super_flags(sb, flags | BTRFS_HEADER_FLAG_WRITTEN);
    4056             : 
    4057      447051 :                 ret = btrfs_validate_write_super(fs_info, sb);
    4058      447051 :                 if (ret < 0) {
    4059           0 :                         mutex_unlock(&fs_info->fs_devices->device_list_mutex);
    4060           0 :                         btrfs_handle_fs_error(fs_info, -EUCLEAN,
    4061             :                                 "unexpected superblock corruption detected");
    4062           0 :                         return -EUCLEAN;
    4063             :                 }
    4064             : 
    4065      447051 :                 ret = write_dev_supers(dev, sb, max_mirrors);
    4066      447051 :                 if (ret)
    4067           0 :                         total_errors++;
    4068             :         }
    4069      447051 :         if (total_errors > max_errors) {
    4070           0 :                 btrfs_err(fs_info, "%d errors while writing supers",
    4071             :                           total_errors);
    4072           0 :                 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
    4073             : 
    4074             :                 /* FUA is masked off if unsupported and can't be the reason */
    4075           0 :                 btrfs_handle_fs_error(fs_info, -EIO,
    4076             :                                       "%d errors while writing supers",
    4077             :                                       total_errors);
    4078           0 :                 return -EIO;
    4079             :         }
    4080             : 
    4081      447051 :         total_errors = 0;
    4082      894102 :         list_for_each_entry(dev, head, dev_list) {
    4083      447051 :                 if (!dev->bdev)
    4084           0 :                         continue;
    4085      447051 :                 if (!test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &dev->dev_state) ||
    4086           0 :                     !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state))
    4087           0 :                         continue;
    4088             : 
    4089      447051 :                 ret = wait_dev_supers(dev, max_mirrors);
    4090      447051 :                 if (ret)
    4091           0 :                         total_errors++;
    4092             :         }
    4093      447051 :         mutex_unlock(&fs_info->fs_devices->device_list_mutex);
    4094      447051 :         if (total_errors > max_errors) {
    4095           0 :                 btrfs_handle_fs_error(fs_info, -EIO,
    4096             :                                       "%d errors while writing supers",
    4097             :                                       total_errors);
    4098           0 :                 return -EIO;
    4099             :         }
    4100             :         return 0;
    4101             : }
    4102             : 
    4103             : /* Drop a fs root from the radix tree and free it. */
    4104       12292 : void btrfs_drop_and_free_fs_root(struct btrfs_fs_info *fs_info,
    4105             :                                   struct btrfs_root *root)
    4106             : {
    4107       12292 :         bool drop_ref = false;
    4108             : 
    4109       12292 :         spin_lock(&fs_info->fs_roots_radix_lock);
    4110       12292 :         radix_tree_delete(&fs_info->fs_roots_radix,
    4111       12292 :                           (unsigned long)root->root_key.objectid);
    4112       12292 :         if (test_and_clear_bit(BTRFS_ROOT_IN_RADIX, &root->state))
    4113       12292 :                 drop_ref = true;
    4114       12292 :         spin_unlock(&fs_info->fs_roots_radix_lock);
    4115             : 
    4116       12292 :         if (BTRFS_FS_ERROR(fs_info)) {
    4117          34 :                 ASSERT(root->log_root == NULL);
    4118          34 :                 if (root->reloc_root) {
    4119           0 :                         btrfs_put_root(root->reloc_root);
    4120           0 :                         root->reloc_root = NULL;
    4121             :                 }
    4122             :         }
    4123             : 
    4124       12292 :         if (drop_ref)
    4125       12292 :                 btrfs_put_root(root);
    4126       12292 : }
    4127             : 
    4128        3181 : int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info)
    4129             : {
    4130        3181 :         u64 root_objectid = 0;
    4131        3181 :         struct btrfs_root *gang[8];
    4132        3181 :         int i = 0;
    4133        3181 :         int err = 0;
    4134        3181 :         unsigned int ret = 0;
    4135             : 
    4136       10629 :         while (1) {
    4137        6905 :                 spin_lock(&fs_info->fs_roots_radix_lock);
    4138        6905 :                 ret = radix_tree_gang_lookup(&fs_info->fs_roots_radix,
    4139             :                                              (void **)gang, root_objectid,
    4140             :                                              ARRAY_SIZE(gang));
    4141        6905 :                 if (!ret) {
    4142        3181 :                         spin_unlock(&fs_info->fs_roots_radix_lock);
    4143             :                         break;
    4144             :                 }
    4145        3724 :                 root_objectid = gang[ret - 1]->root_key.objectid + 1;
    4146             : 
    4147       14663 :                 for (i = 0; i < ret; i++) {
    4148             :                         /* Avoid to grab roots in dead_roots */
    4149       10939 :                         if (btrfs_root_refs(&gang[i]->root_item) == 0) {
    4150         227 :                                 gang[i] = NULL;
    4151         227 :                                 continue;
    4152             :                         }
    4153             :                         /* grab all the search result for later use */
    4154       10712 :                         gang[i] = btrfs_grab_root(gang[i]);
    4155             :                 }
    4156        3724 :                 spin_unlock(&fs_info->fs_roots_radix_lock);
    4157             : 
    4158       18387 :                 for (i = 0; i < ret; i++) {
    4159       10939 :                         if (!gang[i])
    4160         227 :                                 continue;
    4161       10712 :                         root_objectid = gang[i]->root_key.objectid;
    4162       10712 :                         err = btrfs_orphan_cleanup(gang[i]);
    4163       10712 :                         if (err)
    4164           0 :                                 goto out;
    4165       10712 :                         btrfs_put_root(gang[i]);
    4166             :                 }
    4167        3724 :                 root_objectid++;
    4168             :         }
    4169        3181 : out:
    4170             :         /* release the uncleaned roots due to error */
    4171        3181 :         for (; i < ret; i++) {
    4172           0 :                 if (gang[i])
    4173           0 :                         btrfs_put_root(gang[i]);
    4174             :         }
    4175        3181 :         return err;
    4176             : }
    4177             : 
    4178        3166 : int btrfs_commit_super(struct btrfs_fs_info *fs_info)
    4179             : {
    4180        3166 :         struct btrfs_root *root = fs_info->tree_root;
    4181        3166 :         struct btrfs_trans_handle *trans;
    4182             : 
    4183        3166 :         mutex_lock(&fs_info->cleaner_mutex);
    4184        3166 :         btrfs_run_delayed_iputs(fs_info);
    4185        3166 :         mutex_unlock(&fs_info->cleaner_mutex);
    4186        3166 :         wake_up_process(fs_info->cleaner_kthread);
    4187             : 
    4188             :         /* wait until ongoing cleanup work done */
    4189        3166 :         down_write(&fs_info->cleanup_work_sem);
    4190        3166 :         up_write(&fs_info->cleanup_work_sem);
    4191             : 
    4192        3166 :         trans = btrfs_join_transaction(root);
    4193        3166 :         if (IS_ERR(trans))
    4194           0 :                 return PTR_ERR(trans);
    4195        3166 :         return btrfs_commit_transaction(trans);
    4196             : }
    4197             : 
    4198        3217 : static void warn_about_uncommitted_trans(struct btrfs_fs_info *fs_info)
    4199             : {
    4200        3217 :         struct btrfs_transaction *trans;
    4201        3217 :         struct btrfs_transaction *tmp;
    4202        3217 :         bool found = false;
    4203             : 
    4204        3217 :         if (list_empty(&fs_info->trans_list))
    4205             :                 return;
    4206             : 
    4207             :         /*
    4208             :          * This function is only called at the very end of close_ctree(),
    4209             :          * thus no other running transaction, no need to take trans_lock.
    4210             :          */
    4211           0 :         ASSERT(test_bit(BTRFS_FS_CLOSING_DONE, &fs_info->flags));
    4212           0 :         list_for_each_entry_safe(trans, tmp, &fs_info->trans_list, list) {
    4213           0 :                 struct extent_state *cached = NULL;
    4214           0 :                 u64 dirty_bytes = 0;
    4215           0 :                 u64 cur = 0;
    4216           0 :                 u64 found_start;
    4217           0 :                 u64 found_end;
    4218             : 
    4219           0 :                 found = true;
    4220           0 :                 while (!find_first_extent_bit(&trans->dirty_pages, cur,
    4221             :                         &found_start, &found_end, EXTENT_DIRTY, &cached)) {
    4222           0 :                         dirty_bytes += found_end + 1 - found_start;
    4223           0 :                         cur = found_end + 1;
    4224             :                 }
    4225           0 :                 btrfs_warn(fs_info,
    4226             :         "transaction %llu (with %llu dirty metadata bytes) is not committed",
    4227             :                            trans->transid, dirty_bytes);
    4228           0 :                 btrfs_cleanup_one_transaction(trans, fs_info);
    4229             : 
    4230           0 :                 if (trans == fs_info->running_transaction)
    4231           0 :                         fs_info->running_transaction = NULL;
    4232           0 :                 list_del_init(&trans->list);
    4233             : 
    4234           0 :                 btrfs_put_transaction(trans);
    4235           0 :                 trace_btrfs_transaction_commit(fs_info);
    4236             :         }
    4237        3217 :         ASSERT(!found);
    4238             : }
    4239             : 
    4240        3217 : void __cold close_ctree(struct btrfs_fs_info *fs_info)
    4241             : {
    4242        3217 :         int ret;
    4243             : 
    4244        3217 :         set_bit(BTRFS_FS_CLOSING_START, &fs_info->flags);
    4245             : 
    4246             :         /*
    4247             :          * If we had UNFINISHED_DROPS we could still be processing them, so
    4248             :          * clear that bit and wake up relocation so it can stop.
    4249             :          * We must do this before stopping the block group reclaim task, because
    4250             :          * at btrfs_relocate_block_group() we wait for this bit, and after the
    4251             :          * wait we stop with -EINTR if btrfs_fs_closing() returns non-zero - we
    4252             :          * have just set BTRFS_FS_CLOSING_START, so btrfs_fs_closing() will
    4253             :          * return 1.
    4254             :          */
    4255        3217 :         btrfs_wake_unfinished_drop(fs_info);
    4256             : 
    4257             :         /*
    4258             :          * We may have the reclaim task running and relocating a data block group,
    4259             :          * in which case it may create delayed iputs. So stop it before we park
    4260             :          * the cleaner kthread otherwise we can get new delayed iputs after
    4261             :          * parking the cleaner, and that can make the async reclaim task to hang
    4262             :          * if it's waiting for delayed iputs to complete, since the cleaner is
    4263             :          * parked and can not run delayed iputs - this will make us hang when
    4264             :          * trying to stop the async reclaim task.
    4265             :          */
    4266        3217 :         cancel_work_sync(&fs_info->reclaim_bgs_work);
    4267             :         /*
    4268             :          * We don't want the cleaner to start new transactions, add more delayed
    4269             :          * iputs, etc. while we're closing. We can't use kthread_stop() yet
    4270             :          * because that frees the task_struct, and the transaction kthread might
    4271             :          * still try to wake up the cleaner.
    4272             :          */
    4273        3217 :         kthread_park(fs_info->cleaner_kthread);
    4274             : 
    4275             :         /* wait for the qgroup rescan worker to stop */
    4276        3217 :         btrfs_qgroup_wait_for_completion(fs_info, false);
    4277             : 
    4278             :         /* wait for the uuid_scan task to finish */
    4279        3217 :         down(&fs_info->uuid_tree_rescan_sem);
    4280             :         /* avoid complains from lockdep et al., set sem back to initial state */
    4281        3217 :         up(&fs_info->uuid_tree_rescan_sem);
    4282             : 
    4283             :         /* pause restriper - we want to resume on mount */
    4284        3217 :         btrfs_pause_balance(fs_info);
    4285             : 
    4286        3217 :         btrfs_dev_replace_suspend_for_unmount(fs_info);
    4287             : 
    4288        3217 :         btrfs_scrub_cancel(fs_info);
    4289             : 
    4290             :         /* wait for any defraggers to finish */
    4291        3217 :         wait_event(fs_info->transaction_wait,
    4292             :                    (atomic_read(&fs_info->defrag_running) == 0));
    4293             : 
    4294             :         /* clear out the rbtree of defraggable inodes */
    4295        3217 :         btrfs_cleanup_defrag_inodes(fs_info);
    4296             : 
    4297             :         /*
    4298             :          * After we parked the cleaner kthread, ordered extents may have
    4299             :          * completed and created new delayed iputs. If one of the async reclaim
    4300             :          * tasks is running and in the RUN_DELAYED_IPUTS flush state, then we
    4301             :          * can hang forever trying to stop it, because if a delayed iput is
    4302             :          * added after it ran btrfs_run_delayed_iputs() and before it called
    4303             :          * btrfs_wait_on_delayed_iputs(), it will hang forever since there is
    4304             :          * no one else to run iputs.
    4305             :          *
    4306             :          * So wait for all ongoing ordered extents to complete and then run
    4307             :          * delayed iputs. This works because once we reach this point no one
    4308             :          * can either create new ordered extents nor create delayed iputs
    4309             :          * through some other means.
    4310             :          *
    4311             :          * Also note that btrfs_wait_ordered_roots() is not safe here, because
    4312             :          * it waits for BTRFS_ORDERED_COMPLETE to be set on an ordered extent,
    4313             :          * but the delayed iput for the respective inode is made only when doing
    4314             :          * the final btrfs_put_ordered_extent() (which must happen at
    4315             :          * btrfs_finish_ordered_io() when we are unmounting).
    4316             :          */
    4317        3217 :         btrfs_flush_workqueue(fs_info->endio_write_workers);
    4318             :         /* Ordered extents for free space inodes. */
    4319        3217 :         btrfs_flush_workqueue(fs_info->endio_freespace_worker);
    4320        3217 :         btrfs_run_delayed_iputs(fs_info);
    4321             : 
    4322        3217 :         cancel_work_sync(&fs_info->async_reclaim_work);
    4323        3217 :         cancel_work_sync(&fs_info->async_data_reclaim_work);
    4324        3217 :         cancel_work_sync(&fs_info->preempt_reclaim_work);
    4325             : 
    4326             :         /* Cancel or finish ongoing discard work */
    4327        3217 :         btrfs_discard_cleanup(fs_info);
    4328             : 
    4329        3217 :         if (!sb_rdonly(fs_info->sb)) {
    4330             :                 /*
    4331             :                  * The cleaner kthread is stopped, so do one final pass over
    4332             :                  * unused block groups.
    4333             :                  */
    4334        3162 :                 btrfs_delete_unused_bgs(fs_info);
    4335             : 
    4336             :                 /*
    4337             :                  * There might be existing delayed inode workers still running
    4338             :                  * and holding an empty delayed inode item. We must wait for
    4339             :                  * them to complete first because they can create a transaction.
    4340             :                  * This happens when someone calls btrfs_balance_delayed_items()
    4341             :                  * and then a transaction commit runs the same delayed nodes
    4342             :                  * before any delayed worker has done something with the nodes.
    4343             :                  * We must wait for any worker here and not at transaction
    4344             :                  * commit time since that could cause a deadlock.
    4345             :                  * This is a very rare case.
    4346             :                  */
    4347        3162 :                 btrfs_flush_workqueue(fs_info->delayed_workers);
    4348             : 
    4349        3162 :                 ret = btrfs_commit_super(fs_info);
    4350        3162 :                 if (ret)
    4351           2 :                         btrfs_err(fs_info, "commit super ret %d", ret);
    4352             :         }
    4353             : 
    4354        3217 :         if (BTRFS_FS_ERROR(fs_info))
    4355          17 :                 btrfs_error_commit_super(fs_info);
    4356             : 
    4357        3217 :         kthread_stop(fs_info->transaction_kthread);
    4358        3217 :         kthread_stop(fs_info->cleaner_kthread);
    4359             : 
    4360        3217 :         ASSERT(list_empty(&fs_info->delayed_iputs));
    4361        3217 :         set_bit(BTRFS_FS_CLOSING_DONE, &fs_info->flags);
    4362             : 
    4363        3217 :         if (btrfs_check_quota_leak(fs_info)) {
    4364           0 :                 WARN_ON(IS_ENABLED(CONFIG_BTRFS_DEBUG));
    4365           0 :                 btrfs_err(fs_info, "qgroup reserved space leaked");
    4366             :         }
    4367             : 
    4368        3217 :         btrfs_free_qgroup_config(fs_info);
    4369        3217 :         ASSERT(list_empty(&fs_info->delalloc_roots));
    4370             : 
    4371        3217 :         if (percpu_counter_sum(&fs_info->delalloc_bytes)) {
    4372           0 :                 btrfs_info(fs_info, "at unmount delalloc count %lld",
    4373             :                        percpu_counter_sum(&fs_info->delalloc_bytes));
    4374             :         }
    4375             : 
    4376        3217 :         if (percpu_counter_sum(&fs_info->ordered_bytes))
    4377           0 :                 btrfs_info(fs_info, "at unmount dio bytes count %lld",
    4378             :                            percpu_counter_sum(&fs_info->ordered_bytes));
    4379             : 
    4380        3217 :         btrfs_sysfs_remove_mounted(fs_info);
    4381        3217 :         btrfs_sysfs_remove_fsid(fs_info->fs_devices);
    4382             : 
    4383        3217 :         btrfs_put_block_group_cache(fs_info);
    4384             : 
    4385             :         /*
    4386             :          * we must make sure there is not any read request to
    4387             :          * submit after we stopping all workers.
    4388             :          */
    4389        3217 :         invalidate_inode_pages2(fs_info->btree_inode->i_mapping);
    4390        3217 :         btrfs_stop_all_workers(fs_info);
    4391             : 
    4392             :         /* We shouldn't have any transaction open at this point */
    4393        3217 :         warn_about_uncommitted_trans(fs_info);
    4394             : 
    4395        3217 :         clear_bit(BTRFS_FS_OPEN, &fs_info->flags);
    4396        3217 :         free_root_pointers(fs_info, true);
    4397        3217 :         btrfs_free_fs_roots(fs_info);
    4398             : 
    4399             :         /*
    4400             :          * We must free the block groups after dropping the fs_roots as we could
    4401             :          * have had an IO error and have left over tree log blocks that aren't
    4402             :          * cleaned up until the fs roots are freed.  This makes the block group
    4403             :          * accounting appear to be wrong because there's pending reserved bytes,
    4404             :          * so make sure we do the block group cleanup afterwards.
    4405             :          */
    4406        3217 :         btrfs_free_block_groups(fs_info);
    4407             : 
    4408        3217 :         iput(fs_info->btree_inode);
    4409             : 
    4410             : #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
    4411             :         if (btrfs_test_opt(fs_info, CHECK_INTEGRITY))
    4412             :                 btrfsic_unmount(fs_info->fs_devices);
    4413             : #endif
    4414             : 
    4415        3217 :         btrfs_mapping_tree_free(&fs_info->mapping_tree);
    4416        3217 :         btrfs_close_devices(fs_info->fs_devices);
    4417        3217 : }
    4418             : 
    4419   312062088 : void btrfs_mark_buffer_dirty(struct extent_buffer *buf)
    4420             : {
    4421   312062088 :         struct btrfs_fs_info *fs_info = buf->fs_info;
    4422   312062088 :         u64 transid = btrfs_header_generation(buf);
    4423             : 
    4424             : #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
    4425             :         /*
    4426             :          * This is a fast path so only do this check if we have sanity tests
    4427             :          * enabled.  Normal people shouldn't be using unmapped buffers as dirty
    4428             :          * outside of the sanity tests.
    4429             :          */
    4430             :         if (unlikely(test_bit(EXTENT_BUFFER_UNMAPPED, &buf->bflags)))
    4431             :                 return;
    4432             : #endif
    4433   312062088 :         btrfs_assert_tree_write_locked(buf);
    4434   312062088 :         if (transid != fs_info->generation)
    4435           0 :                 WARN(1, KERN_CRIT "btrfs transid mismatch buffer %llu, found %llu running %llu\n",
    4436             :                         buf->start, transid, fs_info->generation);
    4437   312062088 :         set_extent_buffer_dirty(buf);
    4438             : #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
    4439             :         /*
    4440             :          * btrfs_check_leaf() won't check item data if we don't have WRITTEN
    4441             :          * set, so this will only validate the basic structure of the items.
    4442             :          */
    4443             :         if (btrfs_header_level(buf) == 0 && btrfs_check_leaf(buf)) {
    4444             :                 btrfs_print_leaf(buf);
    4445             :                 ASSERT(0);
    4446             :         }
    4447             : #endif
    4448   312139566 : }
    4449             : 
    4450    19226028 : static void __btrfs_btree_balance_dirty(struct btrfs_fs_info *fs_info,
    4451             :                                         int flush_delayed)
    4452             : {
    4453             :         /*
    4454             :          * looks as though older kernels can get into trouble with
    4455             :          * this code, they end up stuck in balance_dirty_pages forever
    4456             :          */
    4457    19226028 :         int ret;
    4458             : 
    4459    19226028 :         if (current->flags & PF_MEMALLOC)
    4460             :                 return;
    4461             : 
    4462    19226028 :         if (flush_delayed)
    4463    12600822 :                 btrfs_balance_delayed_items(fs_info);
    4464             : 
    4465    19226163 :         ret = __percpu_counter_compare(&fs_info->dirty_metadata_bytes,
    4466             :                                      BTRFS_DIRTY_METADATA_THRESH,
    4467             :                                      fs_info->dirty_metadata_batch);
    4468    19224578 :         if (ret > 0) {
    4469     4816072 :                 balance_dirty_pages_ratelimited(fs_info->btree_inode->i_mapping);
    4470             :         }
    4471             : }
    4472             : 
    4473    12601766 : void btrfs_btree_balance_dirty(struct btrfs_fs_info *fs_info)
    4474             : {
    4475    12601766 :         __btrfs_btree_balance_dirty(fs_info, 1);
    4476    12599761 : }
    4477             : 
    4478     6627843 : void btrfs_btree_balance_dirty_nodelay(struct btrfs_fs_info *fs_info)
    4479             : {
    4480     6627843 :         __btrfs_btree_balance_dirty(fs_info, 0);
    4481     6624359 : }
    4482             : 
    4483          17 : static void btrfs_error_commit_super(struct btrfs_fs_info *fs_info)
    4484             : {
    4485             :         /* cleanup FS via transaction */
    4486          17 :         btrfs_cleanup_transaction(fs_info);
    4487             : 
    4488          17 :         mutex_lock(&fs_info->cleaner_mutex);
    4489          17 :         btrfs_run_delayed_iputs(fs_info);
    4490          17 :         mutex_unlock(&fs_info->cleaner_mutex);
    4491             : 
    4492          17 :         down_write(&fs_info->cleanup_work_sem);
    4493          17 :         up_write(&fs_info->cleanup_work_sem);
    4494          17 : }
    4495             : 
    4496          20 : static void btrfs_drop_all_logs(struct btrfs_fs_info *fs_info)
    4497             : {
    4498          20 :         struct btrfs_root *gang[8];
    4499          20 :         u64 root_objectid = 0;
    4500          20 :         int ret;
    4501             : 
    4502          20 :         spin_lock(&fs_info->fs_roots_radix_lock);
    4503          80 :         while ((ret = radix_tree_gang_lookup(&fs_info->fs_roots_radix,
    4504             :                                              (void **)gang, root_objectid,
    4505          40 :                                              ARRAY_SIZE(gang))) != 0) {
    4506             :                 int i;
    4507             : 
    4508          60 :                 for (i = 0; i < ret; i++)
    4509          40 :                         gang[i] = btrfs_grab_root(gang[i]);
    4510          20 :                 spin_unlock(&fs_info->fs_roots_radix_lock);
    4511             : 
    4512          80 :                 for (i = 0; i < ret; i++) {
    4513          40 :                         if (!gang[i])
    4514           0 :                                 continue;
    4515          40 :                         root_objectid = gang[i]->root_key.objectid;
    4516          40 :                         btrfs_free_log(NULL, gang[i]);
    4517          40 :                         btrfs_put_root(gang[i]);
    4518             :                 }
    4519          20 :                 root_objectid++;
    4520          20 :                 spin_lock(&fs_info->fs_roots_radix_lock);
    4521             :         }
    4522          20 :         spin_unlock(&fs_info->fs_roots_radix_lock);
    4523          20 :         btrfs_free_log_root_tree(NULL, fs_info);
    4524          20 : }
    4525             : 
    4526           0 : static void btrfs_destroy_ordered_extents(struct btrfs_root *root)
    4527             : {
    4528           0 :         struct btrfs_ordered_extent *ordered;
    4529             : 
    4530           0 :         spin_lock(&root->ordered_extent_lock);
    4531             :         /*
    4532             :          * This will just short circuit the ordered completion stuff which will
    4533             :          * make sure the ordered extent gets properly cleaned up.
    4534             :          */
    4535           0 :         list_for_each_entry(ordered, &root->ordered_extents,
    4536             :                             root_extent_list)
    4537           0 :                 set_bit(BTRFS_ORDERED_IOERR, &ordered->flags);
    4538           0 :         spin_unlock(&root->ordered_extent_lock);
    4539           0 : }
    4540             : 
    4541          20 : static void btrfs_destroy_all_ordered_extents(struct btrfs_fs_info *fs_info)
    4542             : {
    4543          20 :         struct btrfs_root *root;
    4544          20 :         struct list_head splice;
    4545             : 
    4546          20 :         INIT_LIST_HEAD(&splice);
    4547             : 
    4548          20 :         spin_lock(&fs_info->ordered_root_lock);
    4549          20 :         list_splice_init(&fs_info->ordered_roots, &splice);
    4550          20 :         while (!list_empty(&splice)) {
    4551           0 :                 root = list_first_entry(&splice, struct btrfs_root,
    4552             :                                         ordered_root);
    4553           0 :                 list_move_tail(&root->ordered_root,
    4554             :                                &fs_info->ordered_roots);
    4555             : 
    4556           0 :                 spin_unlock(&fs_info->ordered_root_lock);
    4557           0 :                 btrfs_destroy_ordered_extents(root);
    4558             : 
    4559           0 :                 cond_resched();
    4560           0 :                 spin_lock(&fs_info->ordered_root_lock);
    4561             :         }
    4562          20 :         spin_unlock(&fs_info->ordered_root_lock);
    4563             : 
    4564             :         /*
    4565             :          * We need this here because if we've been flipped read-only we won't
    4566             :          * get sync() from the umount, so we need to make sure any ordered
    4567             :          * extents that haven't had their dirty pages IO start writeout yet
    4568             :          * actually get run and error out properly.
    4569             :          */
    4570          20 :         btrfs_wait_ordered_roots(fs_info, U64_MAX, 0, (u64)-1);
    4571          20 : }
    4572             : 
    4573          17 : static void btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
    4574             :                                        struct btrfs_fs_info *fs_info)
    4575             : {
    4576          17 :         struct rb_node *node;
    4577          17 :         struct btrfs_delayed_ref_root *delayed_refs;
    4578          17 :         struct btrfs_delayed_ref_node *ref;
    4579             : 
    4580          17 :         delayed_refs = &trans->delayed_refs;
    4581             : 
    4582          17 :         spin_lock(&delayed_refs->lock);
    4583          17 :         if (atomic_read(&delayed_refs->num_entries) == 0) {
    4584          15 :                 spin_unlock(&delayed_refs->lock);
    4585          15 :                 btrfs_debug(fs_info, "delayed_refs has NO entry");
    4586          15 :                 return;
    4587             :         }
    4588             : 
    4589          32 :         while ((node = rb_first_cached(&delayed_refs->href_root)) != NULL) {
    4590          30 :                 struct btrfs_delayed_ref_head *head;
    4591          30 :                 struct rb_node *n;
    4592          30 :                 bool pin_bytes = false;
    4593             : 
    4594          30 :                 head = rb_entry(node, struct btrfs_delayed_ref_head,
    4595             :                                 href_node);
    4596          30 :                 if (btrfs_delayed_ref_lock(delayed_refs, head))
    4597           0 :                         continue;
    4598             : 
    4599          30 :                 spin_lock(&head->lock);
    4600          59 :                 while ((n = rb_first_cached(&head->ref_tree)) != NULL) {
    4601          29 :                         ref = rb_entry(n, struct btrfs_delayed_ref_node,
    4602             :                                        ref_node);
    4603          29 :                         rb_erase_cached(&ref->ref_node, &head->ref_tree);
    4604          29 :                         RB_CLEAR_NODE(&ref->ref_node);
    4605          29 :                         if (!list_empty(&ref->add_list))
    4606          28 :                                 list_del(&ref->add_list);
    4607          29 :                         atomic_dec(&delayed_refs->num_entries);
    4608          29 :                         btrfs_put_delayed_ref(ref);
    4609             :                 }
    4610          30 :                 if (head->must_insert_reserved)
    4611          28 :                         pin_bytes = true;
    4612          30 :                 btrfs_free_delayed_extent_op(head->extent_op);
    4613          30 :                 btrfs_delete_ref_head(delayed_refs, head);
    4614          30 :                 spin_unlock(&head->lock);
    4615          30 :                 spin_unlock(&delayed_refs->lock);
    4616          30 :                 mutex_unlock(&head->mutex);
    4617             : 
    4618          30 :                 if (pin_bytes) {
    4619          28 :                         struct btrfs_block_group *cache;
    4620             : 
    4621          28 :                         cache = btrfs_lookup_block_group(fs_info, head->bytenr);
    4622          28 :                         BUG_ON(!cache);
    4623             : 
    4624          28 :                         spin_lock(&cache->space_info->lock);
    4625          28 :                         spin_lock(&cache->lock);
    4626          28 :                         cache->pinned += head->num_bytes;
    4627          28 :                         btrfs_space_info_update_bytes_pinned(fs_info,
    4628          28 :                                 cache->space_info, head->num_bytes);
    4629          28 :                         cache->reserved -= head->num_bytes;
    4630          28 :                         cache->space_info->bytes_reserved -= head->num_bytes;
    4631          28 :                         spin_unlock(&cache->lock);
    4632          28 :                         spin_unlock(&cache->space_info->lock);
    4633             : 
    4634          28 :                         btrfs_put_block_group(cache);
    4635             : 
    4636          28 :                         btrfs_error_unpin_extent_range(fs_info, head->bytenr,
    4637          28 :                                 head->bytenr + head->num_bytes - 1);
    4638             :                 }
    4639          30 :                 btrfs_cleanup_ref_head_accounting(fs_info, delayed_refs, head);
    4640          30 :                 btrfs_put_delayed_ref_head(head);
    4641          30 :                 cond_resched();
    4642          30 :                 spin_lock(&delayed_refs->lock);
    4643             :         }
    4644           2 :         btrfs_qgroup_destroy_extent_records(trans);
    4645             : 
    4646           2 :         spin_unlock(&delayed_refs->lock);
    4647             : }
    4648             : 
    4649           0 : static void btrfs_destroy_delalloc_inodes(struct btrfs_root *root)
    4650             : {
    4651           0 :         struct btrfs_inode *btrfs_inode;
    4652           0 :         struct list_head splice;
    4653             : 
    4654           0 :         INIT_LIST_HEAD(&splice);
    4655             : 
    4656           0 :         spin_lock(&root->delalloc_lock);
    4657           0 :         list_splice_init(&root->delalloc_inodes, &splice);
    4658             : 
    4659           0 :         while (!list_empty(&splice)) {
    4660           0 :                 struct inode *inode = NULL;
    4661           0 :                 btrfs_inode = list_first_entry(&splice, struct btrfs_inode,
    4662             :                                                delalloc_inodes);
    4663           0 :                 __btrfs_del_delalloc_inode(root, btrfs_inode);
    4664           0 :                 spin_unlock(&root->delalloc_lock);
    4665             : 
    4666             :                 /*
    4667             :                  * Make sure we get a live inode and that it'll not disappear
    4668             :                  * meanwhile.
    4669             :                  */
    4670           0 :                 inode = igrab(&btrfs_inode->vfs_inode);
    4671           0 :                 if (inode) {
    4672           0 :                         unsigned int nofs_flag;
    4673             : 
    4674           0 :                         nofs_flag = memalloc_nofs_save();
    4675           0 :                         invalidate_inode_pages2(inode->i_mapping);
    4676           0 :                         memalloc_nofs_restore(nofs_flag);
    4677           0 :                         iput(inode);
    4678             :                 }
    4679           0 :                 spin_lock(&root->delalloc_lock);
    4680             :         }
    4681           0 :         spin_unlock(&root->delalloc_lock);
    4682           0 : }
    4683             : 
    4684          20 : static void btrfs_destroy_all_delalloc_inodes(struct btrfs_fs_info *fs_info)
    4685             : {
    4686          20 :         struct btrfs_root *root;
    4687          20 :         struct list_head splice;
    4688             : 
    4689          20 :         INIT_LIST_HEAD(&splice);
    4690             : 
    4691          20 :         spin_lock(&fs_info->delalloc_root_lock);
    4692          20 :         list_splice_init(&fs_info->delalloc_roots, &splice);
    4693          20 :         while (!list_empty(&splice)) {
    4694           0 :                 root = list_first_entry(&splice, struct btrfs_root,
    4695             :                                          delalloc_root);
    4696           0 :                 root = btrfs_grab_root(root);
    4697           0 :                 BUG_ON(!root);
    4698           0 :                 spin_unlock(&fs_info->delalloc_root_lock);
    4699             : 
    4700           0 :                 btrfs_destroy_delalloc_inodes(root);
    4701           0 :                 btrfs_put_root(root);
    4702             : 
    4703           0 :                 spin_lock(&fs_info->delalloc_root_lock);
    4704             :         }
    4705          20 :         spin_unlock(&fs_info->delalloc_root_lock);
    4706          20 : }
    4707             : 
    4708          17 : static int btrfs_destroy_marked_extents(struct btrfs_fs_info *fs_info,
    4709             :                                         struct extent_io_tree *dirty_pages,
    4710             :                                         int mark)
    4711             : {
    4712          17 :         int ret;
    4713          17 :         struct extent_buffer *eb;
    4714          17 :         u64 start = 0;
    4715          82 :         u64 end;
    4716             : 
    4717          82 :         while (1) {
    4718          82 :                 ret = find_first_extent_bit(dirty_pages, start, &start, &end,
    4719             :                                             mark, NULL);
    4720          82 :                 if (ret)
    4721             :                         break;
    4722             : 
    4723          65 :                 clear_extent_bits(dirty_pages, start, end, mark);
    4724         171 :                 while (start <= end) {
    4725         106 :                         eb = find_extent_buffer(fs_info, start);
    4726         106 :                         start += fs_info->nodesize;
    4727         106 :                         if (!eb)
    4728           0 :                                 continue;
    4729             : 
    4730         106 :                         btrfs_tree_lock(eb);
    4731         106 :                         wait_on_extent_buffer_writeback(eb);
    4732         106 :                         btrfs_clear_buffer_dirty(NULL, eb);
    4733         106 :                         btrfs_tree_unlock(eb);
    4734             : 
    4735         106 :                         free_extent_buffer_stale(eb);
    4736             :                 }
    4737             :         }
    4738             : 
    4739          17 :         return ret;
    4740             : }
    4741             : 
    4742          17 : static int btrfs_destroy_pinned_extent(struct btrfs_fs_info *fs_info,
    4743             :                                        struct extent_io_tree *unpin)
    4744             : {
    4745         129 :         u64 start;
    4746         129 :         u64 end;
    4747         129 :         int ret;
    4748             : 
    4749         241 :         while (1) {
    4750         129 :                 struct extent_state *cached_state = NULL;
    4751             : 
    4752             :                 /*
    4753             :                  * The btrfs_finish_extent_commit() may get the same range as
    4754             :                  * ours between find_first_extent_bit and clear_extent_dirty.
    4755             :                  * Hence, hold the unused_bg_unpin_mutex to avoid double unpin
    4756             :                  * the same extent range.
    4757             :                  */
    4758         129 :                 mutex_lock(&fs_info->unused_bg_unpin_mutex);
    4759         129 :                 ret = find_first_extent_bit(unpin, 0, &start, &end,
    4760             :                                             EXTENT_DIRTY, &cached_state);
    4761         129 :                 if (ret) {
    4762          17 :                         mutex_unlock(&fs_info->unused_bg_unpin_mutex);
    4763          17 :                         break;
    4764             :                 }
    4765             : 
    4766         112 :                 clear_extent_dirty(unpin, start, end, &cached_state);
    4767         112 :                 free_extent_state(cached_state);
    4768         112 :                 btrfs_error_unpin_extent_range(fs_info, start, end);
    4769         112 :                 mutex_unlock(&fs_info->unused_bg_unpin_mutex);
    4770         112 :                 cond_resched();
    4771             :         }
    4772             : 
    4773          17 :         return 0;
    4774             : }
    4775             : 
    4776           0 : static void btrfs_cleanup_bg_io(struct btrfs_block_group *cache)
    4777             : {
    4778           0 :         struct inode *inode;
    4779             : 
    4780           0 :         inode = cache->io_ctl.inode;
    4781           0 :         if (inode) {
    4782           0 :                 unsigned int nofs_flag;
    4783             : 
    4784           0 :                 nofs_flag = memalloc_nofs_save();
    4785           0 :                 invalidate_inode_pages2(inode->i_mapping);
    4786           0 :                 memalloc_nofs_restore(nofs_flag);
    4787             : 
    4788           0 :                 BTRFS_I(inode)->generation = 0;
    4789           0 :                 cache->io_ctl.inode = NULL;
    4790           0 :                 iput(inode);
    4791             :         }
    4792           0 :         ASSERT(cache->io_ctl.pages == NULL);
    4793           0 :         btrfs_put_block_group(cache);
    4794           0 : }
    4795             : 
    4796          17 : void btrfs_cleanup_dirty_bgs(struct btrfs_transaction *cur_trans,
    4797             :                              struct btrfs_fs_info *fs_info)
    4798             : {
    4799          17 :         struct btrfs_block_group *cache;
    4800             : 
    4801          17 :         spin_lock(&cur_trans->dirty_bgs_lock);
    4802          25 :         while (!list_empty(&cur_trans->dirty_bgs)) {
    4803           8 :                 cache = list_first_entry(&cur_trans->dirty_bgs,
    4804             :                                          struct btrfs_block_group,
    4805             :                                          dirty_list);
    4806             : 
    4807           8 :                 if (!list_empty(&cache->io_list)) {
    4808           0 :                         spin_unlock(&cur_trans->dirty_bgs_lock);
    4809           0 :                         list_del_init(&cache->io_list);
    4810           0 :                         btrfs_cleanup_bg_io(cache);
    4811           0 :                         spin_lock(&cur_trans->dirty_bgs_lock);
    4812             :                 }
    4813             : 
    4814           8 :                 list_del_init(&cache->dirty_list);
    4815           8 :                 spin_lock(&cache->lock);
    4816           8 :                 cache->disk_cache_state = BTRFS_DC_ERROR;
    4817           8 :                 spin_unlock(&cache->lock);
    4818             : 
    4819           8 :                 spin_unlock(&cur_trans->dirty_bgs_lock);
    4820           8 :                 btrfs_put_block_group(cache);
    4821           8 :                 btrfs_delayed_refs_rsv_release(fs_info, 1);
    4822           8 :                 spin_lock(&cur_trans->dirty_bgs_lock);
    4823             :         }
    4824          17 :         spin_unlock(&cur_trans->dirty_bgs_lock);
    4825             : 
    4826             :         /*
    4827             :          * Refer to the definition of io_bgs member for details why it's safe
    4828             :          * to use it without any locking
    4829             :          */
    4830          17 :         while (!list_empty(&cur_trans->io_bgs)) {
    4831           0 :                 cache = list_first_entry(&cur_trans->io_bgs,
    4832             :                                          struct btrfs_block_group,
    4833             :                                          io_list);
    4834             : 
    4835           0 :                 list_del_init(&cache->io_list);
    4836           0 :                 spin_lock(&cache->lock);
    4837           0 :                 cache->disk_cache_state = BTRFS_DC_ERROR;
    4838           0 :                 spin_unlock(&cache->lock);
    4839           0 :                 btrfs_cleanup_bg_io(cache);
    4840             :         }
    4841          17 : }
    4842             : 
    4843          17 : void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans,
    4844             :                                    struct btrfs_fs_info *fs_info)
    4845             : {
    4846          17 :         struct btrfs_device *dev, *tmp;
    4847             : 
    4848          17 :         btrfs_cleanup_dirty_bgs(cur_trans, fs_info);
    4849          17 :         ASSERT(list_empty(&cur_trans->dirty_bgs));
    4850          17 :         ASSERT(list_empty(&cur_trans->io_bgs));
    4851             : 
    4852          18 :         list_for_each_entry_safe(dev, tmp, &cur_trans->dev_update_list,
    4853             :                                  post_commit_list) {
    4854           1 :                 list_del_init(&dev->post_commit_list);
    4855             :         }
    4856             : 
    4857          17 :         btrfs_destroy_delayed_refs(cur_trans, fs_info);
    4858             : 
    4859          17 :         cur_trans->state = TRANS_STATE_COMMIT_START;
    4860          17 :         wake_up(&fs_info->transaction_blocked_wait);
    4861             : 
    4862          17 :         cur_trans->state = TRANS_STATE_UNBLOCKED;
    4863          17 :         wake_up(&fs_info->transaction_wait);
    4864             : 
    4865          17 :         btrfs_destroy_delayed_inodes(fs_info);
    4866             : 
    4867          17 :         btrfs_destroy_marked_extents(fs_info, &cur_trans->dirty_pages,
    4868             :                                      EXTENT_DIRTY);
    4869          17 :         btrfs_destroy_pinned_extent(fs_info, &cur_trans->pinned_extents);
    4870             : 
    4871          17 :         cur_trans->state =TRANS_STATE_COMPLETED;
    4872          17 :         wake_up(&cur_trans->commit_wait);
    4873          17 : }
    4874             : 
    4875          20 : static int btrfs_cleanup_transaction(struct btrfs_fs_info *fs_info)
    4876             : {
    4877          20 :         struct btrfs_transaction *t;
    4878             : 
    4879          20 :         mutex_lock(&fs_info->transaction_kthread_mutex);
    4880             : 
    4881          20 :         spin_lock(&fs_info->trans_lock);
    4882          21 :         while (!list_empty(&fs_info->trans_list)) {
    4883           1 :                 t = list_first_entry(&fs_info->trans_list,
    4884             :                                      struct btrfs_transaction, list);
    4885           1 :                 if (t->state >= TRANS_STATE_COMMIT_START) {
    4886           0 :                         refcount_inc(&t->use_count);
    4887           0 :                         spin_unlock(&fs_info->trans_lock);
    4888           0 :                         btrfs_wait_for_commit(fs_info, t->transid);
    4889           0 :                         btrfs_put_transaction(t);
    4890           0 :                         spin_lock(&fs_info->trans_lock);
    4891           0 :                         continue;
    4892             :                 }
    4893           1 :                 if (t == fs_info->running_transaction) {
    4894           1 :                         t->state = TRANS_STATE_COMMIT_DOING;
    4895           1 :                         spin_unlock(&fs_info->trans_lock);
    4896             :                         /*
    4897             :                          * We wait for 0 num_writers since we don't hold a trans
    4898             :                          * handle open currently for this transaction.
    4899             :                          */
    4900           1 :                         wait_event(t->writer_wait,
    4901             :                                    atomic_read(&t->num_writers) == 0);
    4902             :                 } else {
    4903           0 :                         spin_unlock(&fs_info->trans_lock);
    4904             :                 }
    4905           1 :                 btrfs_cleanup_one_transaction(t, fs_info);
    4906             : 
    4907           1 :                 spin_lock(&fs_info->trans_lock);
    4908           1 :                 if (t == fs_info->running_transaction)
    4909           1 :                         fs_info->running_transaction = NULL;
    4910           1 :                 list_del_init(&t->list);
    4911           1 :                 spin_unlock(&fs_info->trans_lock);
    4912             : 
    4913           1 :                 btrfs_put_transaction(t);
    4914           1 :                 trace_btrfs_transaction_commit(fs_info);
    4915           1 :                 spin_lock(&fs_info->trans_lock);
    4916             :         }
    4917          20 :         spin_unlock(&fs_info->trans_lock);
    4918          20 :         btrfs_destroy_all_ordered_extents(fs_info);
    4919          20 :         btrfs_destroy_delayed_inodes(fs_info);
    4920          20 :         btrfs_assert_delayed_root_empty(fs_info);
    4921          20 :         btrfs_destroy_all_delalloc_inodes(fs_info);
    4922          20 :         btrfs_drop_all_logs(fs_info);
    4923          20 :         mutex_unlock(&fs_info->transaction_kthread_mutex);
    4924             : 
    4925          20 :         return 0;
    4926             : }
    4927             : 
    4928       20055 : int btrfs_init_root_free_objectid(struct btrfs_root *root)
    4929             : {
    4930       20055 :         struct btrfs_path *path;
    4931       20055 :         int ret;
    4932       20055 :         struct extent_buffer *l;
    4933       20055 :         struct btrfs_key search_key;
    4934       20055 :         struct btrfs_key found_key;
    4935       20055 :         int slot;
    4936             : 
    4937       20055 :         path = btrfs_alloc_path();
    4938       20055 :         if (!path)
    4939             :                 return -ENOMEM;
    4940             : 
    4941       20055 :         search_key.objectid = BTRFS_LAST_FREE_OBJECTID;
    4942       20055 :         search_key.type = -1;
    4943       20055 :         search_key.offset = (u64)-1;
    4944       20055 :         ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0);
    4945       20055 :         if (ret < 0)
    4946           0 :                 goto error;
    4947       20055 :         BUG_ON(ret == 0); /* Corruption */
    4948       20055 :         if (path->slots[0] > 0) {
    4949       19803 :                 slot = path->slots[0] - 1;
    4950       19803 :                 l = path->nodes[0];
    4951       19803 :                 btrfs_item_key_to_cpu(l, &found_key, slot);
    4952       19803 :                 root->free_objectid = max_t(u64, found_key.objectid + 1,
    4953             :                                             BTRFS_FIRST_FREE_OBJECTID);
    4954             :         } else {
    4955         252 :                 root->free_objectid = BTRFS_FIRST_FREE_OBJECTID;
    4956             :         }
    4957             :         ret = 0;
    4958       20055 : error:
    4959       20055 :         btrfs_free_path(path);
    4960       20055 :         return ret;
    4961             : }
    4962             : 
    4963     3273853 : int btrfs_get_free_objectid(struct btrfs_root *root, u64 *objectid)
    4964             : {
    4965     3273853 :         int ret;
    4966     3273853 :         mutex_lock(&root->objectid_mutex);
    4967             : 
    4968     3274264 :         if (unlikely(root->free_objectid >= BTRFS_LAST_FREE_OBJECTID)) {
    4969           0 :                 btrfs_warn(root->fs_info,
    4970             :                            "the objectid of root %llu reaches its highest value",
    4971             :                            root->root_key.objectid);
    4972           0 :                 ret = -ENOSPC;
    4973           0 :                 goto out;
    4974             :         }
    4975             : 
    4976     3274264 :         *objectid = root->free_objectid++;
    4977     3274264 :         ret = 0;
    4978     3274264 : out:
    4979     3274264 :         mutex_unlock(&root->objectid_mutex);
    4980     3273655 :         return ret;
    4981             : }

Generated by: LCOV version 1.14