LCOV - fstests of 6.5.0-rc3-djwx @ Mon Jul 31 20:08:22 PDT 2023

LCOV - code coverage report

Current view:	top level - fs/btrfs - disk-io.c (source / functions)		Hit	Total	Coverage
Test:	fstests of 6.5.0-rc3-djwx @ Mon Jul 31 20:08:22 PDT 2023	Lines:	1985	2581	76.9 %
Date:	2023-07-31 20:08:22	Functions:	106	112	94.6 %

          Line data    Source code

       1             : // SPDX-License-Identifier: GPL-2.0
       2             : /*
       3             :  * Copyright (C) 2007 Oracle.  All rights reserved.
       4             :  */
       5             : 
       6             : #include <linux/fs.h>
       7             : #include <linux/blkdev.h>
       8             : #include <linux/radix-tree.h>
       9             : #include <linux/writeback.h>
      10             : #include <linux/workqueue.h>
      11             : #include <linux/kthread.h>
      12             : #include <linux/slab.h>
      13             : #include <linux/migrate.h>
      14             : #include <linux/ratelimit.h>
      15             : #include <linux/uuid.h>
      16             : #include <linux/semaphore.h>
      17             : #include <linux/error-injection.h>
      18             : #include <linux/crc32c.h>
      19             : #include <linux/sched/mm.h>
      20             : #include <asm/unaligned.h>
      21             : #include <crypto/hash.h>
      22             : #include "ctree.h"
      23             : #include "disk-io.h"
      24             : #include "transaction.h"
      25             : #include "btrfs_inode.h"
      26             : #include "bio.h"
      27             : #include "print-tree.h"
      28             : #include "locking.h"
      29             : #include "tree-log.h"
      30             : #include "free-space-cache.h"
      31             : #include "free-space-tree.h"
      32             : #include "check-integrity.h"
      33             : #include "rcu-string.h"
      34             : #include "dev-replace.h"
      35             : #include "raid56.h"
      36             : #include "sysfs.h"
      37             : #include "qgroup.h"
      38             : #include "compression.h"
      39             : #include "tree-checker.h"
      40             : #include "ref-verify.h"
      41             : #include "block-group.h"
      42             : #include "discard.h"
      43             : #include "space-info.h"
      44             : #include "zoned.h"
      45             : #include "subpage.h"
      46             : #include "fs.h"
      47             : #include "accessors.h"
      48             : #include "extent-tree.h"
      49             : #include "root-tree.h"
      50             : #include "defrag.h"
      51             : #include "uuid-tree.h"
      52             : #include "relocation.h"
      53             : #include "scrub.h"
      54             : #include "super.h"
      55             : 
      56             : #define BTRFS_SUPER_FLAG_SUPP   (BTRFS_HEADER_FLAG_WRITTEN |\
      57             :                                  BTRFS_HEADER_FLAG_RELOC |\
      58             :                                  BTRFS_SUPER_FLAG_ERROR |\
      59             :                                  BTRFS_SUPER_FLAG_SEEDING |\
      60             :                                  BTRFS_SUPER_FLAG_METADUMP |\
      61             :                                  BTRFS_SUPER_FLAG_METADUMP_V2)
      62             : 
      63             : static int btrfs_cleanup_transaction(struct btrfs_fs_info *fs_info);
      64             : static void btrfs_error_commit_super(struct btrfs_fs_info *fs_info);
      65             : 
      66        3472 : static void btrfs_free_csum_hash(struct btrfs_fs_info *fs_info)
      67             : {
      68        3472 :         if (fs_info->csum_shash)
      69        3242 :                 crypto_free_shash(fs_info->csum_shash);
      70        3472 : }
      71             : 
      72             : /*
      73             :  * Compute the csum of a btree block and store the result to provided buffer.
      74             :  */
      75     8981356 : static void csum_tree_block(struct extent_buffer *buf, u8 *result)
      76             : {
      77     8981356 :         struct btrfs_fs_info *fs_info = buf->fs_info;
      78     8981356 :         const int num_pages = num_extent_pages(buf);
      79     8981356 :         const int first_page_part = min_t(u32, PAGE_SIZE, fs_info->nodesize);
      80     8981356 :         SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
      81     8981356 :         char *kaddr;
      82     8981356 :         int i;
      83             : 
      84     8981356 :         shash->tfm = fs_info->csum_shash;
      85     8981356 :         crypto_shash_init(shash);
      86     8981341 :         kaddr = page_address(buf->pages[0]) + offset_in_page(buf->start);
      87     8981341 :         crypto_shash_update(shash, kaddr + BTRFS_CSUM_SIZE,
      88     8981341 :                             first_page_part - BTRFS_CSUM_SIZE);
      89             : 
      90    43932279 :         for (i = 1; i < num_pages && INLINE_EXTENT_BUFFER_PAGES > 1; i++) {
      91    25969594 :                 kaddr = page_address(buf->pages[i]);
      92    25969594 :                 crypto_shash_update(shash, kaddr, PAGE_SIZE);
      93             :         }
      94     8981344 :         memset(result, 0, BTRFS_CSUM_SIZE);
      95     8981344 :         crypto_shash_final(shash, result);
      96     8981364 : }
      97             : 
      98             : /*
      99             :  * we can't consider a given block up to date unless the transid of the
     100             :  * block matches the transid in the parent node's pointer.  This is how we
     101             :  * detect blocks that either didn't get written at all or got written
     102             :  * in the wrong place.
     103             :  */
     104   546259285 : int btrfs_buffer_uptodate(struct extent_buffer *eb, u64 parent_transid, int atomic)
     105             : {
     106  1092518570 :         if (!extent_buffer_uptodate(eb))
     107             :                 return 0;
     108             : 
     109   546249351 :         if (!parent_transid || btrfs_header_generation(eb) == parent_transid)
     110             :                 return 1;
     111             : 
     112           0 :         if (atomic)
     113             :                 return -EAGAIN;
     114             : 
     115           0 :         if (!extent_buffer_uptodate(eb) ||
     116             :             btrfs_header_generation(eb) != parent_transid) {
     117           0 :                 btrfs_err_rl(eb->fs_info,
     118             : "parent transid verify failed on logical %llu mirror %u wanted %llu found %llu",
     119             :                         eb->start, eb->read_mirror,
     120             :                         parent_transid, btrfs_header_generation(eb));
     121           0 :                 clear_extent_buffer_uptodate(eb);
     122           0 :                 return 0;
     123             :         }
     124             :         return 1;
     125             : }
     126             : 
     127             : static bool btrfs_supported_super_csum(u16 csum_type)
     128             : {
     129      442595 :         switch (csum_type) {
     130             :         case BTRFS_CSUM_TYPE_CRC32:
     131             :         case BTRFS_CSUM_TYPE_XXHASH:
     132             :         case BTRFS_CSUM_TYPE_SHA256:
     133             :         case BTRFS_CSUM_TYPE_BLAKE2:
     134             :                 return true;
     135             :         default:
     136           0 :                 return false;
     137             :         }
     138             : }
     139             : 
     140             : /*
     141             :  * Return 0 if the superblock checksum type matches the checksum value of that
     142             :  * algorithm. Pass the raw disk superblock data.
     143             :  */
     144        3294 : int btrfs_check_super_csum(struct btrfs_fs_info *fs_info,
     145             :                            const struct btrfs_super_block *disk_sb)
     146             : {
     147        3294 :         char result[BTRFS_CSUM_SIZE];
     148        3294 :         SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
     149             : 
     150        3294 :         shash->tfm = fs_info->csum_shash;
     151             : 
     152             :         /*
     153             :          * The super_block structure does not span the whole
     154             :          * BTRFS_SUPER_INFO_SIZE range, we expect that the unused space is
     155             :          * filled with zeros and is included in the checksum.
     156             :          */
     157        3294 :         crypto_shash_digest(shash, (const u8 *)disk_sb + BTRFS_CSUM_SIZE,
     158             :                             BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE, result);
     159             : 
     160        6588 :         if (memcmp(disk_sb->csum, result, fs_info->csum_size))
     161           0 :                 return 1;
     162             : 
     163             :         return 0;
     164             : }
     165             : 
     166           0 : static int btrfs_repair_eb_io_failure(const struct extent_buffer *eb,
     167             :                                       int mirror_num)
     168             : {
     169           0 :         struct btrfs_fs_info *fs_info = eb->fs_info;
     170           0 :         int i, num_pages = num_extent_pages(eb);
     171           0 :         int ret = 0;
     172             : 
     173           0 :         if (sb_rdonly(fs_info->sb))
     174             :                 return -EROFS;
     175             : 
     176           0 :         for (i = 0; i < num_pages; i++) {
     177           0 :                 struct page *p = eb->pages[i];
     178           0 :                 u64 start = max_t(u64, eb->start, page_offset(p));
     179           0 :                 u64 end = min_t(u64, eb->start + eb->len, page_offset(p) + PAGE_SIZE);
     180           0 :                 u32 len = end - start;
     181             : 
     182           0 :                 ret = btrfs_repair_io_failure(fs_info, 0, start, len,
     183             :                                 start, p, offset_in_page(start), mirror_num);
     184           0 :                 if (ret)
     185             :                         break;
     186             :         }
     187             : 
     188             :         return ret;
     189             : }
     190             : 
     191             : /*
     192             :  * helper to read a given tree block, doing retries as required when
     193             :  * the checksums don't match and we have alternate mirrors to try.
     194             :  *
     195             :  * @check:              expected tree parentness check, see the comments of the
     196             :  *                      structure for details.
     197             :  */
     198    23661365 : int btrfs_read_extent_buffer(struct extent_buffer *eb,
     199             :                              struct btrfs_tree_parent_check *check)
     200             : {
     201    23661365 :         struct btrfs_fs_info *fs_info = eb->fs_info;
     202    23661365 :         int failed = 0;
     203    23661365 :         int ret;
     204    23661365 :         int num_copies = 0;
     205    23661365 :         int mirror_num = 0;
     206    23661365 :         int failed_mirror = 0;
     207             : 
     208    23661367 :         ASSERT(check);
     209             : 
     210    23661367 :         while (1) {
     211    23661367 :                 clear_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags);
     212    23661893 :                 ret = read_extent_buffer_pages(eb, WAIT_COMPLETE, mirror_num, check);
     213    23661486 :                 if (!ret)
     214             :                         break;
     215             : 
     216           4 :                 num_copies = btrfs_num_copies(fs_info,
     217           4 :                                               eb->start, eb->len);
     218           4 :                 if (num_copies == 1)
     219             :                         break;
     220             : 
     221           3 :                 if (!failed_mirror) {
     222           3 :                         failed = 1;
     223           3 :                         failed_mirror = eb->read_mirror;
     224             :                 }
     225             : 
     226           3 :                 mirror_num++;
     227           3 :                 if (mirror_num == failed_mirror)
     228           0 :                         mirror_num++;
     229             : 
     230           3 :                 if (mirror_num > num_copies)
     231             :                         break;
     232             :         }
     233             : 
     234    23661484 :         if (failed && !ret && failed_mirror)
     235           0 :                 btrfs_repair_eb_io_failure(eb, failed_mirror);
     236             : 
     237    23661484 :         return ret;
     238             : }
     239             : 
     240             : /*
     241             :  * Checksum a dirty tree block before IO.
     242             :  */
     243     8882027 : blk_status_t btree_csum_one_bio(struct btrfs_bio *bbio)
     244             : {
     245     8882027 :         struct extent_buffer *eb = bbio->private;
     246     8882027 :         struct btrfs_fs_info *fs_info = eb->fs_info;
     247     8882027 :         u64 found_start = btrfs_header_bytenr(eb);
     248     8882027 :         u8 result[BTRFS_CSUM_SIZE];
     249     8882027 :         int ret;
     250             : 
     251             :         /* Btree blocks are always contiguous on disk. */
     252     8882027 :         if (WARN_ON_ONCE(bbio->file_offset != eb->start))
     253             :                 return BLK_STS_IOERR;
     254     8882027 :         if (WARN_ON_ONCE(bbio->bio.bi_iter.bi_size != eb->len))
     255             :                 return BLK_STS_IOERR;
     256             : 
     257    17764054 :         if (test_bit(EXTENT_BUFFER_NO_CHECK, &eb->bflags)) {
     258           0 :                 WARN_ON_ONCE(found_start != 0);
     259             :                 return BLK_STS_OK;
     260             :         }
     261             : 
     262     8882027 :         if (WARN_ON_ONCE(found_start != eb->start))
     263             :                 return BLK_STS_IOERR;
     264     8882027 :         if (WARN_ON(!btrfs_page_test_uptodate(fs_info, eb->pages[0], eb->start,
     265             :                                               eb->len)))
     266             :                 return BLK_STS_IOERR;
     267             : 
     268     8882005 :         ASSERT(memcmp_extent_buffer(eb, fs_info->fs_devices->metadata_uuid,
     269             :                                     offsetof(struct btrfs_header, fsid),
     270             :                                     BTRFS_FSID_SIZE) == 0);
     271     8881979 :         csum_tree_block(eb, result);
     272             : 
     273     8881976 :         if (btrfs_header_level(eb))
     274      879548 :                 ret = btrfs_check_node(eb);
     275             :         else
     276     8002428 :                 ret = btrfs_check_leaf(eb);
     277             : 
     278     8882098 :         if (ret < 0)
     279           0 :                 goto error;
     280             : 
     281             :         /*
     282             :          * Also check the generation, the eb reached here must be newer than
     283             :          * last committed. Or something seriously wrong happened.
     284             :          */
     285     8882098 :         if (unlikely(btrfs_header_generation(eb) <= fs_info->last_trans_committed)) {
     286           0 :                 ret = -EUCLEAN;
     287           0 :                 btrfs_err(fs_info,
     288             :                         "block=%llu bad generation, have %llu expect > %llu",
     289             :                           eb->start, btrfs_header_generation(eb),
     290             :                           fs_info->last_trans_committed);
     291           0 :                 goto error;
     292             :         }
     293     8882098 :         write_extent_buffer(eb, result, 0, fs_info->csum_size);
     294     8882098 :         return BLK_STS_OK;
     295             : 
     296           0 : error:
     297           0 :         btrfs_print_tree(eb, 0);
     298           0 :         btrfs_err(fs_info, "block=%llu write time tree block corruption detected",
     299             :                   eb->start);
     300             :         /*
     301             :          * Be noisy if this is an extent buffer from a log tree. We don't abort
     302             :          * a transaction in case there's a bad log tree extent buffer, we just
     303             :          * fallback to a transaction commit. Still we want to know when there is
     304             :          * a bad log tree extent buffer, as that may signal a bug somewhere.
     305             :          */
     306           0 :         WARN_ON(IS_ENABLED(CONFIG_BTRFS_DEBUG) ||
     307             :                 btrfs_header_owner(eb) == BTRFS_TREE_LOG_OBJECTID);
     308           0 :         return errno_to_blk_status(ret);
     309             : }
     310             : 
     311       99359 : static bool check_tree_block_fsid(struct extent_buffer *eb)
     312             : {
     313       99359 :         struct btrfs_fs_info *fs_info = eb->fs_info;
     314       99359 :         struct btrfs_fs_devices *fs_devices = fs_info->fs_devices, *seed_devs;
     315       99359 :         u8 fsid[BTRFS_FSID_SIZE];
     316       99359 :         u8 *metadata_uuid;
     317             : 
     318       99359 :         read_extent_buffer(eb, fsid, offsetof(struct btrfs_header, fsid),
     319             :                            BTRFS_FSID_SIZE);
     320             :         /*
     321             :          * Checking the incompat flag is only valid for the current fs. For
     322             :          * seed devices it's forbidden to have their uuid changed so reading
     323             :          * ->fsid in this case is fine
     324             :          */
     325       99359 :         if (btrfs_fs_incompat(fs_info, METADATA_UUID))
     326           0 :                 metadata_uuid = fs_devices->metadata_uuid;
     327             :         else
     328       99359 :                 metadata_uuid = fs_devices->fsid;
     329             : 
     330      198718 :         if (!memcmp(fsid, metadata_uuid, BTRFS_FSID_SIZE))
     331             :                 return false;
     332             : 
     333           0 :         list_for_each_entry(seed_devs, &fs_devices->seed_list, seed_list)
     334           0 :                 if (!memcmp(fsid, seed_devs->fsid, BTRFS_FSID_SIZE))
     335             :                         return false;
     336             : 
     337             :         return true;
     338             : }
     339             : 
     340             : /* Do basic extent buffer checks at read time */
     341       99359 : int btrfs_validate_extent_buffer(struct extent_buffer *eb,
     342             :                                  struct btrfs_tree_parent_check *check)
     343             : {
     344       99359 :         struct btrfs_fs_info *fs_info = eb->fs_info;
     345       99359 :         u64 found_start;
     346       99359 :         const u32 csum_size = fs_info->csum_size;
     347       99359 :         u8 found_level;
     348       99359 :         u8 result[BTRFS_CSUM_SIZE];
     349       99359 :         const u8 *header_csum;
     350       99359 :         int ret = 0;
     351             : 
     352       99359 :         ASSERT(check);
     353             : 
     354       99359 :         found_start = btrfs_header_bytenr(eb);
     355       99359 :         if (found_start != eb->start) {
     356           0 :                 btrfs_err_rl(fs_info,
     357             :                         "bad tree block start, mirror %u want %llu have %llu",
     358             :                              eb->read_mirror, eb->start, found_start);
     359           0 :                 ret = -EIO;
     360           0 :                 goto out;
     361             :         }
     362       99359 :         if (check_tree_block_fsid(eb)) {
     363           0 :                 btrfs_err_rl(fs_info, "bad fsid on logical %llu mirror %u",
     364             :                              eb->start, eb->read_mirror);
     365           0 :                 ret = -EIO;
     366           0 :                 goto out;
     367             :         }
     368       99356 :         found_level = btrfs_header_level(eb);
     369       99356 :         if (found_level >= BTRFS_MAX_LEVEL) {
     370           0 :                 btrfs_err(fs_info,
     371             :                         "bad tree block level, mirror %u level %d on logical %llu",
     372             :                         eb->read_mirror, btrfs_header_level(eb), eb->start);
     373           0 :                 ret = -EIO;
     374           0 :                 goto out;
     375             :         }
     376             : 
     377       99356 :         csum_tree_block(eb, result);
     378       99357 :         header_csum = page_address(eb->pages[0]) +
     379             :                 get_eb_offset_in_page(eb, offsetof(struct btrfs_header, csum));
     380             : 
     381      198714 :         if (memcmp(result, header_csum, csum_size) != 0) {
     382           1 :                 btrfs_warn_rl(fs_info,
     383             : "checksum verify failed on logical %llu mirror %u wanted " CSUM_FMT " found " CSUM_FMT " level %d",
     384             :                               eb->start, eb->read_mirror,
     385             :                               CSUM_FMT_VALUE(csum_size, header_csum),
     386             :                               CSUM_FMT_VALUE(csum_size, result),
     387             :                               btrfs_header_level(eb));
     388           1 :                 ret = -EUCLEAN;
     389           1 :                 goto out;
     390             :         }
     391             : 
     392       99356 :         if (found_level != check->level) {
     393           0 :                 btrfs_err(fs_info,
     394             :                 "level verify failed on logical %llu mirror %u wanted %u found %u",
     395             :                           eb->start, eb->read_mirror, check->level, found_level);
     396           0 :                 ret = -EIO;
     397           0 :                 goto out;
     398             :         }
     399       99356 :         if (unlikely(check->transid &&
     400             :                      btrfs_header_generation(eb) != check->transid)) {
     401           0 :                 btrfs_err_rl(eb->fs_info,
     402             : "parent transid verify failed on logical %llu mirror %u wanted %llu found %llu",
     403             :                                 eb->start, eb->read_mirror, check->transid,
     404             :                                 btrfs_header_generation(eb));
     405           0 :                 ret = -EIO;
     406           0 :                 goto out;
     407             :         }
     408       99356 :         if (check->has_first_key) {
     409       51775 :                 struct btrfs_key *expect_key = &check->first_key;
     410       51775 :                 struct btrfs_key found_key;
     411             : 
     412       51775 :                 if (found_level)
     413         210 :                         btrfs_node_key_to_cpu(eb, &found_key, 0);
     414             :                 else
     415       51565 :                         btrfs_item_key_to_cpu(eb, &found_key, 0);
     416       51775 :                 if (unlikely(btrfs_comp_cpu_keys(expect_key, &found_key))) {
     417           0 :                         btrfs_err(fs_info,
     418             : "tree first key mismatch detected, bytenr=%llu parent_transid=%llu key expected=(%llu,%u,%llu) has=(%llu,%u,%llu)",
     419             :                                   eb->start, check->transid,
     420             :                                   expect_key->objectid,
     421             :                                   expect_key->type, expect_key->offset,
     422             :                                   found_key.objectid, found_key.type,
     423             :                                   found_key.offset);
     424           0 :                         ret = -EUCLEAN;
     425           0 :                         goto out;
     426             :                 }
     427             :         }
     428       99356 :         if (check->owner_root) {
     429       89869 :                 ret = btrfs_check_eb_owner(eb, check->owner_root);
     430       89869 :                 if (ret < 0)
     431           0 :                         goto out;
     432             :         }
     433             : 
     434             :         /*
     435             :          * If this is a leaf block and it is corrupt, set the corrupt bit so
     436             :          * that we don't try and read the other copies of this block, just
     437             :          * return -EIO.
     438             :          */
     439       99356 :         if (found_level == 0 && btrfs_check_leaf(eb)) {
     440           0 :                 set_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags);
     441           0 :                 ret = -EIO;
     442             :         }
     443             : 
     444       99358 :         if (found_level > 0 && btrfs_check_node(eb))
     445             :                 ret = -EIO;
     446             : 
     447       99358 :         if (ret)
     448           0 :                 btrfs_err(fs_info,
     449             :                 "read time tree block corruption detected on logical %llu mirror %u",
     450             :                           eb->start, eb->read_mirror);
     451       99358 : out:
     452       99359 :         return ret;
     453             : }
     454             : 
     455             : #ifdef CONFIG_MIGRATION
     456    28116655 : static int btree_migrate_folio(struct address_space *mapping,
     457             :                 struct folio *dst, struct folio *src, enum migrate_mode mode)
     458             : {
     459             :         /*
     460             :          * we can't safely write a btree page from here,
     461             :          * we haven't done the locking hook
     462             :          */
     463    28116655 :         if (folio_test_dirty(src))
     464             :                 return -EAGAIN;
     465             :         /*
     466             :          * Buffers may be managed in a filesystem specific way.
     467             :          * We must have no buffers or drop them.
     468             :          */
     469     4814689 :         if (folio_get_private(src) &&
     470      940783 :             !filemap_release_folio(src, GFP_KERNEL))
     471             :                 return -EAGAIN;
     472     3768482 :         return migrate_folio(mapping, dst, src, mode);
     473             : }
     474             : #else
     475             : #define btree_migrate_folio NULL
     476             : #endif
     477             : 
     478     2323720 : static int btree_writepages(struct address_space *mapping,
     479             :                             struct writeback_control *wbc)
     480             : {
     481     2323720 :         struct btrfs_fs_info *fs_info;
     482     2323720 :         int ret;
     483             : 
     484     2323720 :         if (wbc->sync_mode == WB_SYNC_NONE) {
     485             : 
     486       43579 :                 if (wbc->for_kupdate)
     487             :                         return 0;
     488             : 
     489       42818 :                 fs_info = BTRFS_I(mapping->host)->root->fs_info;
     490             :                 /* this is a bit racy, but that's ok */
     491       42818 :                 ret = __percpu_counter_compare(&fs_info->dirty_metadata_bytes,
     492             :                                              BTRFS_DIRTY_METADATA_THRESH,
     493             :                                              fs_info->dirty_metadata_batch);
     494       42815 :                 if (ret < 0)
     495             :                         return 0;
     496             :         }
     497     2280214 :         return btree_write_cache_pages(mapping, wbc);
     498             : }
     499             : 
     500     3237570 : static bool btree_release_folio(struct folio *folio, gfp_t gfp_flags)
     501             : {
     502     6475140 :         if (folio_test_writeback(folio) || folio_test_dirty(folio))
     503           0 :                 return false;
     504             : 
     505     3237570 :         return try_release_extent_buffer(&folio->page);
     506             : }
     507             : 
     508       34505 : static void btree_invalidate_folio(struct folio *folio, size_t offset,
     509             :                                  size_t length)
     510             : {
     511       34505 :         struct extent_io_tree *tree;
     512       34505 :         tree = &BTRFS_I(folio->mapping->host)->io_tree;
     513       34505 :         extent_invalidate_folio(tree, folio, offset);
     514       34505 :         btree_release_folio(folio, GFP_NOFS);
     515       34505 :         if (folio_get_private(folio)) {
     516           0 :                 btrfs_warn(BTRFS_I(folio->mapping->host)->root->fs_info,
     517             :                            "folio private not zero on folio %llu",
     518             :                            (unsigned long long)folio_pos(folio));
     519           0 :                 folio_detach_private(folio);
     520             :         }
     521       34505 : }
     522             : 
     523             : #ifdef DEBUG
     524             : static bool btree_dirty_folio(struct address_space *mapping,
     525             :                 struct folio *folio)
     526             : {
     527             :         struct btrfs_fs_info *fs_info = btrfs_sb(mapping->host->i_sb);
     528             :         struct btrfs_subpage *subpage;
     529             :         struct extent_buffer *eb;
     530             :         int cur_bit = 0;
     531             :         u64 page_start = folio_pos(folio);
     532             : 
     533             :         if (fs_info->sectorsize == PAGE_SIZE) {
     534             :                 eb = folio_get_private(folio);
     535             :                 BUG_ON(!eb);
     536             :                 BUG_ON(!test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags));
     537             :                 BUG_ON(!atomic_read(&eb->refs));
     538             :                 btrfs_assert_tree_write_locked(eb);
     539             :                 return filemap_dirty_folio(mapping, folio);
     540             :         }
     541             :         subpage = folio_get_private(folio);
     542             : 
     543             :         ASSERT(subpage->dirty_bitmap);
     544             :         while (cur_bit < BTRFS_SUBPAGE_BITMAP_SIZE) {
     545             :                 unsigned long flags;
     546             :                 u64 cur;
     547             :                 u16 tmp = (1 << cur_bit);
     548             : 
     549             :                 spin_lock_irqsave(&subpage->lock, flags);
     550             :                 if (!(tmp & subpage->dirty_bitmap)) {
     551             :                         spin_unlock_irqrestore(&subpage->lock, flags);
     552             :                         cur_bit++;
     553             :                         continue;
     554             :                 }
     555             :                 spin_unlock_irqrestore(&subpage->lock, flags);
     556             :                 cur = page_start + cur_bit * fs_info->sectorsize;
     557             : 
     558             :                 eb = find_extent_buffer(fs_info, cur);
     559             :                 ASSERT(eb);
     560             :                 ASSERT(test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags));
     561             :                 ASSERT(atomic_read(&eb->refs));
     562             :                 btrfs_assert_tree_write_locked(eb);
     563             :                 free_extent_buffer(eb);
     564             : 
     565             :                 cur_bit += (fs_info->nodesize >> fs_info->sectorsize_bits);
     566             :         }
     567             :         return filemap_dirty_folio(mapping, folio);
     568             : }
     569             : #else
     570             : #define btree_dirty_folio filemap_dirty_folio
     571             : #endif
     572             : 
     573             : static const struct address_space_operations btree_aops = {
     574             :         .writepages     = btree_writepages,
     575             :         .release_folio  = btree_release_folio,
     576             :         .invalidate_folio = btree_invalidate_folio,
     577             :         .migrate_folio  = btree_migrate_folio,
     578             :         .dirty_folio    = btree_dirty_folio,
     579             : };
     580             : 
     581    22032735 : struct extent_buffer *btrfs_find_create_tree_block(
     582             :                                                 struct btrfs_fs_info *fs_info,
     583             :                                                 u64 bytenr, u64 owner_root,
     584             :                                                 int level)
     585             : {
     586    45664039 :         if (btrfs_is_testing(fs_info))
     587             :                 return alloc_test_extent_buffer(fs_info, bytenr);
     588    22032735 :         return alloc_extent_buffer(fs_info, bytenr, owner_root, level);
     589             : }
     590             : 
     591             : /*
     592             :  * Read tree block at logical address @bytenr and do variant basic but critical
     593             :  * verification.
     594             :  *
     595             :  * @check:              expected tree parentness check, see comments of the
     596             :  *                      structure for details.
     597             :  */
     598    23631304 : struct extent_buffer *read_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr,
     599             :                                       struct btrfs_tree_parent_check *check)
     600             : {
     601    23631304 :         struct extent_buffer *buf = NULL;
     602    23631304 :         int ret;
     603             : 
     604    23631304 :         ASSERT(check);
     605             : 
     606    23631304 :         buf = btrfs_find_create_tree_block(fs_info, bytenr, check->owner_root,
     607    23631304 :                                            check->level);
     608    23632280 :         if (IS_ERR(buf))
     609             :                 return buf;
     610             : 
     611    23632280 :         ret = btrfs_read_extent_buffer(buf, check);
     612    23631978 :         if (ret) {
     613           1 :                 free_extent_buffer_stale(buf);
     614           1 :                 return ERR_PTR(ret);
     615             :         }
     616    23631977 :         if (btrfs_check_eb_owner(buf, check->owner_root)) {
     617           0 :                 free_extent_buffer_stale(buf);
     618           0 :                 return ERR_PTR(-EUCLEAN);
     619             :         }
     620             :         return buf;
     621             : 
     622             : }
     623             : 
     624       67274 : static void __setup_root(struct btrfs_root *root, struct btrfs_fs_info *fs_info,
     625             :                          u64 objectid)
     626             : {
     627       67274 :         bool dummy = test_bit(BTRFS_FS_STATE_DUMMY_FS_INFO, &fs_info->fs_state);
     628             : 
     629       67274 :         memset(&root->root_key, 0, sizeof(root->root_key));
     630       67274 :         memset(&root->root_item, 0, sizeof(root->root_item));
     631       67274 :         memset(&root->defrag_progress, 0, sizeof(root->defrag_progress));
     632       67274 :         root->fs_info = fs_info;
     633       67274 :         root->root_key.objectid = objectid;
     634       67274 :         root->node = NULL;
     635       67274 :         root->commit_root = NULL;
     636       67274 :         root->state = 0;
     637       67274 :         RB_CLEAR_NODE(&root->rb_node);
     638             : 
     639       67274 :         root->last_trans = 0;
     640       67274 :         root->free_objectid = 0;
     641       67274 :         root->nr_delalloc_inodes = 0;
     642       67274 :         root->nr_ordered_extents = 0;
     643       67274 :         root->inode_tree = RB_ROOT;
     644       67274 :         INIT_RADIX_TREE(&root->delayed_nodes_tree, GFP_ATOMIC);
     645             : 
     646       67274 :         btrfs_init_root_block_rsv(root);
     647             : 
     648       67274 :         INIT_LIST_HEAD(&root->dirty_list);
     649       67274 :         INIT_LIST_HEAD(&root->root_list);
     650       67274 :         INIT_LIST_HEAD(&root->delalloc_inodes);
     651       67274 :         INIT_LIST_HEAD(&root->delalloc_root);
     652       67274 :         INIT_LIST_HEAD(&root->ordered_extents);
     653       67274 :         INIT_LIST_HEAD(&root->ordered_root);
     654       67274 :         INIT_LIST_HEAD(&root->reloc_dirty_list);
     655       67274 :         INIT_LIST_HEAD(&root->logged_list[0]);
     656       67274 :         INIT_LIST_HEAD(&root->logged_list[1]);
     657       67274 :         spin_lock_init(&root->inode_lock);
     658       67274 :         spin_lock_init(&root->delalloc_lock);
     659       67274 :         spin_lock_init(&root->ordered_extent_lock);
     660       67274 :         spin_lock_init(&root->accounting_lock);
     661       67274 :         spin_lock_init(&root->log_extents_lock[0]);
     662       67274 :         spin_lock_init(&root->log_extents_lock[1]);
     663       67274 :         spin_lock_init(&root->qgroup_meta_rsv_lock);
     664       67274 :         mutex_init(&root->objectid_mutex);
     665       67274 :         mutex_init(&root->log_mutex);
     666       67274 :         mutex_init(&root->ordered_extent_mutex);
     667       67274 :         mutex_init(&root->delalloc_mutex);
     668       67274 :         init_waitqueue_head(&root->qgroup_flush_wait);
     669       67274 :         init_waitqueue_head(&root->log_writer_wait);
     670       67274 :         init_waitqueue_head(&root->log_commit_wait[0]);
     671       67274 :         init_waitqueue_head(&root->log_commit_wait[1]);
     672       67274 :         INIT_LIST_HEAD(&root->log_ctxs[0]);
     673       67274 :         INIT_LIST_HEAD(&root->log_ctxs[1]);
     674       67274 :         atomic_set(&root->log_commit[0], 0);
     675       67274 :         atomic_set(&root->log_commit[1], 0);
     676       67274 :         atomic_set(&root->log_writers, 0);
     677       67274 :         atomic_set(&root->log_batch, 0);
     678       67274 :         refcount_set(&root->refs, 1);
     679       67274 :         atomic_set(&root->snapshot_force_cow, 0);
     680       67274 :         atomic_set(&root->nr_swapfiles, 0);
     681       67274 :         root->log_transid = 0;
     682       67274 :         root->log_transid_committed = -1;
     683       67274 :         root->last_log_commit = 0;
     684       67274 :         root->anon_dev = 0;
     685       67274 :         if (!dummy) {
     686       67274 :                 extent_io_tree_init(fs_info, &root->dirty_log_pages,
     687             :                                     IO_TREE_ROOT_DIRTY_LOG_PAGES);
     688       67274 :                 extent_io_tree_init(fs_info, &root->log_csum_range,
     689             :                                     IO_TREE_LOG_CSUM_RANGE);
     690             :         }
     691             : 
     692       67274 :         spin_lock_init(&root->root_item_lock);
     693       67274 :         btrfs_qgroup_init_swapped_blocks(&root->swapped_blocks);
     694             : #ifdef CONFIG_BTRFS_DEBUG
     695             :         INIT_LIST_HEAD(&root->leak_list);
     696             :         spin_lock(&fs_info->fs_roots_radix_lock);
     697             :         list_add_tail(&root->leak_list, &fs_info->allocated_roots);
     698             :         spin_unlock(&fs_info->fs_roots_radix_lock);
     699             : #endif
     700       67273 : }
     701             : 
     702       67274 : static struct btrfs_root *btrfs_alloc_root(struct btrfs_fs_info *fs_info,
     703             :                                            u64 objectid, gfp_t flags)
     704             : {
     705       67274 :         struct btrfs_root *root = kzalloc(sizeof(*root), flags);
     706       67274 :         if (root)
     707       67274 :                 __setup_root(root, fs_info, objectid);
     708       67274 :         return root;
     709             : }
     710             : 
     711             : #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
     712             : /* Should only be used by the testing infrastructure */
     713             : struct btrfs_root *btrfs_alloc_dummy_root(struct btrfs_fs_info *fs_info)
     714             : {
     715             :         struct btrfs_root *root;
     716             : 
     717             :         if (!fs_info)
     718             :                 return ERR_PTR(-EINVAL);
     719             : 
     720             :         root = btrfs_alloc_root(fs_info, BTRFS_ROOT_TREE_OBJECTID, GFP_KERNEL);
     721             :         if (!root)
     722             :                 return ERR_PTR(-ENOMEM);
     723             : 
     724             :         /* We don't use the stripesize in selftest, set it as sectorsize */
     725             :         root->alloc_bytenr = 0;
     726             : 
     727             :         return root;
     728             : }
     729             : #endif
     730             : 
     731             : static int global_root_cmp(struct rb_node *a_node, const struct rb_node *b_node)
     732             : {
     733        9637 :         const struct btrfs_root *a = rb_entry(a_node, struct btrfs_root, rb_node);
     734        9637 :         const struct btrfs_root *b = rb_entry(b_node, struct btrfs_root, rb_node);
     735             : 
     736        9637 :         return btrfs_comp_cpu_keys(&a->root_key, &b->root_key);
     737             : }
     738             : 
     739             : static int global_root_key_cmp(const void *k, const struct rb_node *node)
     740             : {
     741   345631383 :         const struct btrfs_key *key = k;
     742   345631383 :         const struct btrfs_root *root = rb_entry(node, struct btrfs_root, rb_node);
     743             : 
     744   345631383 :         return btrfs_comp_cpu_keys(key, &root->root_key);
     745             : }
     746             : 
     747        9641 : int btrfs_global_root_insert(struct btrfs_root *root)
     748             : {
     749        9641 :         struct btrfs_fs_info *fs_info = root->fs_info;
     750        9641 :         struct rb_node *tmp;
     751        9641 :         int ret = 0;
     752             : 
     753        9641 :         write_lock(&fs_info->global_root_lock);
     754        9641 :         tmp = rb_find_add(&root->rb_node, &fs_info->global_root_tree, global_root_cmp);
     755        9641 :         write_unlock(&fs_info->global_root_lock);
     756             : 
     757        9641 :         if (tmp) {
     758           0 :                 ret = -EEXIST;
     759           0 :                 btrfs_warn(fs_info, "global root %llu %llu already exists",
     760             :                                 root->root_key.objectid, root->root_key.offset);
     761             :         }
     762        9641 :         return ret;
     763             : }
     764             : 
     765           5 : void btrfs_global_root_delete(struct btrfs_root *root)
     766             : {
     767           5 :         struct btrfs_fs_info *fs_info = root->fs_info;
     768             : 
     769           5 :         write_lock(&fs_info->global_root_lock);
     770           5 :         rb_erase(&root->rb_node, &fs_info->global_root_tree);
     771           5 :         write_unlock(&fs_info->global_root_lock);
     772           5 : }
     773             : 
     774   178158701 : struct btrfs_root *btrfs_global_root(struct btrfs_fs_info *fs_info,
     775             :                                      struct btrfs_key *key)
     776             : {
     777   178158701 :         struct rb_node *node;
     778   178158701 :         struct btrfs_root *root = NULL;
     779             : 
     780   178158701 :         read_lock(&fs_info->global_root_lock);
     781   178150206 :         node = rb_find(key, &fs_info->global_root_tree, global_root_key_cmp);
     782   178150206 :         if (node)
     783   178150206 :                 root = container_of(node, struct btrfs_root, rb_node);
     784   178150206 :         read_unlock(&fs_info->global_root_lock);
     785             : 
     786   178160849 :         return root;
     787             : }
     788             : 
     789   136444263 : static u64 btrfs_global_root_id(struct btrfs_fs_info *fs_info, u64 bytenr)
     790             : {
     791   136444263 :         struct btrfs_block_group *block_group;
     792   136444263 :         u64 ret;
     793             : 
     794   136444263 :         if (!btrfs_fs_incompat(fs_info, EXTENT_TREE_V2))
     795             :                 return 0;
     796             : 
     797           0 :         if (bytenr)
     798           0 :                 block_group = btrfs_lookup_block_group(fs_info, bytenr);
     799             :         else
     800           0 :                 block_group = btrfs_lookup_first_block_group(fs_info, bytenr);
     801           0 :         ASSERT(block_group);
     802           0 :         if (!block_group)
     803             :                 return 0;
     804           0 :         ret = block_group->global_root_id;
     805           0 :         btrfs_put_block_group(block_group);
     806             : 
     807           0 :         return ret;
     808             : }
     809             : 
     810    10661076 : struct btrfs_root *btrfs_csum_root(struct btrfs_fs_info *fs_info, u64 bytenr)
     811             : {
     812    21320527 :         struct btrfs_key key = {
     813             :                 .objectid = BTRFS_CSUM_TREE_OBJECTID,
     814             :                 .type = BTRFS_ROOT_ITEM_KEY,
     815    10661076 :                 .offset = btrfs_global_root_id(fs_info, bytenr),
     816             :         };
     817             : 
     818    10659451 :         return btrfs_global_root(fs_info, &key);
     819             : }
     820             : 
     821   125788174 : struct btrfs_root *btrfs_extent_root(struct btrfs_fs_info *fs_info, u64 bytenr)
     822             : {
     823   251574014 :         struct btrfs_key key = {
     824             :                 .objectid = BTRFS_EXTENT_TREE_OBJECTID,
     825             :                 .type = BTRFS_ROOT_ITEM_KEY,
     826   125788174 :                 .offset = btrfs_global_root_id(fs_info, bytenr),
     827             :         };
     828             : 
     829   125785840 :         return btrfs_global_root(fs_info, &key);
     830             : }
     831             : 
     832      575586 : struct btrfs_root *btrfs_block_group_root(struct btrfs_fs_info *fs_info)
     833             : {
     834      575586 :         if (btrfs_fs_compat_ro(fs_info, BLOCK_GROUP_TREE))
     835           0 :                 return fs_info->block_group_root;
     836      575586 :         return btrfs_extent_root(fs_info, 0);
     837             : }
     838             : 
     839         164 : struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans,
     840             :                                      u64 objectid)
     841             : {
     842         164 :         struct btrfs_fs_info *fs_info = trans->fs_info;
     843         164 :         struct extent_buffer *leaf;
     844         164 :         struct btrfs_root *tree_root = fs_info->tree_root;
     845         164 :         struct btrfs_root *root;
     846         164 :         struct btrfs_key key;
     847         164 :         unsigned int nofs_flag;
     848         164 :         int ret = 0;
     849             : 
     850             :         /*
     851             :          * We're holding a transaction handle, so use a NOFS memory allocation
     852             :          * context to avoid deadlock if reclaim happens.
     853             :          */
     854         164 :         nofs_flag = memalloc_nofs_save();
     855         164 :         root = btrfs_alloc_root(fs_info, objectid, GFP_KERNEL);
     856         164 :         memalloc_nofs_restore(nofs_flag);
     857         164 :         if (!root)
     858             :                 return ERR_PTR(-ENOMEM);
     859             : 
     860         164 :         root->root_key.objectid = objectid;
     861         164 :         root->root_key.type = BTRFS_ROOT_ITEM_KEY;
     862         164 :         root->root_key.offset = 0;
     863             : 
     864         164 :         leaf = btrfs_alloc_tree_block(trans, root, 0, objectid, NULL, 0, 0, 0,
     865             :                                       BTRFS_NESTING_NORMAL);
     866         164 :         if (IS_ERR(leaf)) {
     867           0 :                 ret = PTR_ERR(leaf);
     868           0 :                 leaf = NULL;
     869           0 :                 goto fail;
     870             :         }
     871             : 
     872         164 :         root->node = leaf;
     873         164 :         btrfs_mark_buffer_dirty(leaf);
     874             : 
     875         164 :         root->commit_root = btrfs_root_node(root);
     876         164 :         set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
     877             : 
     878         164 :         btrfs_set_root_flags(&root->root_item, 0);
     879         164 :         btrfs_set_root_limit(&root->root_item, 0);
     880         164 :         btrfs_set_root_bytenr(&root->root_item, leaf->start);
     881         164 :         btrfs_set_root_generation(&root->root_item, trans->transid);
     882         164 :         btrfs_set_root_level(&root->root_item, 0);
     883         164 :         btrfs_set_root_refs(&root->root_item, 1);
     884         164 :         btrfs_set_root_used(&root->root_item, leaf->len);
     885         164 :         btrfs_set_root_last_snapshot(&root->root_item, 0);
     886         164 :         btrfs_set_root_dirid(&root->root_item, 0);
     887         164 :         if (is_fstree(objectid))
     888           0 :                 generate_random_guid(root->root_item.uuid);
     889             :         else
     890         164 :                 export_guid(root->root_item.uuid, &guid_null);
     891         164 :         btrfs_set_root_drop_level(&root->root_item, 0);
     892             : 
     893         164 :         btrfs_tree_unlock(leaf);
     894             : 
     895         164 :         key.objectid = objectid;
     896         164 :         key.type = BTRFS_ROOT_ITEM_KEY;
     897         164 :         key.offset = 0;
     898         164 :         ret = btrfs_insert_root(trans, tree_root, &key, &root->root_item);
     899         164 :         if (ret)
     900           0 :                 goto fail;
     901             : 
     902             :         return root;
     903             : 
     904           0 : fail:
     905           0 :         btrfs_put_root(root);
     906             : 
     907           0 :         return ERR_PTR(ret);
     908             : }
     909             : 
     910        9566 : static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans,
     911             :                                          struct btrfs_fs_info *fs_info)
     912             : {
     913        9566 :         struct btrfs_root *root;
     914             : 
     915        9566 :         root = btrfs_alloc_root(fs_info, BTRFS_TREE_LOG_OBJECTID, GFP_NOFS);
     916        9566 :         if (!root)
     917             :                 return ERR_PTR(-ENOMEM);
     918             : 
     919        9566 :         root->root_key.objectid = BTRFS_TREE_LOG_OBJECTID;
     920        9566 :         root->root_key.type = BTRFS_ROOT_ITEM_KEY;
     921        9566 :         root->root_key.offset = BTRFS_TREE_LOG_OBJECTID;
     922             : 
     923        9566 :         return root;
     924             : }
     925             : 
     926        9566 : int btrfs_alloc_log_tree_node(struct btrfs_trans_handle *trans,
     927             :                               struct btrfs_root *root)
     928             : {
     929        9566 :         struct extent_buffer *leaf;
     930             : 
     931             :         /*
     932             :          * DON'T set SHAREABLE bit for log trees.
     933             :          *
     934             :          * Log trees are not exposed to user space thus can't be snapshotted,
     935             :          * and they go away before a real commit is actually done.
     936             :          *
     937             :          * They do store pointers to file data extents, and those reference
     938             :          * counts still get updated (along with back refs to the log tree).
     939             :          */
     940             : 
     941        9566 :         leaf = btrfs_alloc_tree_block(trans, root, 0, BTRFS_TREE_LOG_OBJECTID,
     942             :                         NULL, 0, 0, 0, BTRFS_NESTING_NORMAL);
     943        9566 :         if (IS_ERR(leaf))
     944           0 :                 return PTR_ERR(leaf);
     945             : 
     946        9566 :         root->node = leaf;
     947             : 
     948        9566 :         btrfs_mark_buffer_dirty(root->node);
     949        9566 :         btrfs_tree_unlock(root->node);
     950             : 
     951        9566 :         return 0;
     952             : }
     953             : 
     954        4757 : int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans,
     955             :                              struct btrfs_fs_info *fs_info)
     956             : {
     957        4757 :         struct btrfs_root *log_root;
     958             : 
     959        4757 :         log_root = alloc_log_tree(trans, fs_info);
     960        4757 :         if (IS_ERR(log_root))
     961           0 :                 return PTR_ERR(log_root);
     962             : 
     963        4757 :         if (!btrfs_is_zoned(fs_info)) {
     964        4757 :                 int ret = btrfs_alloc_log_tree_node(trans, log_root);
     965             : 
     966        4757 :                 if (ret) {
     967           0 :                         btrfs_put_root(log_root);
     968           0 :                         return ret;
     969             :                 }
     970             :         }
     971             : 
     972        4757 :         WARN_ON(fs_info->log_root_tree);
     973        4757 :         fs_info->log_root_tree = log_root;
     974        4757 :         return 0;
     975             : }
     976             : 
     977        4809 : int btrfs_add_log_tree(struct btrfs_trans_handle *trans,
     978             :                        struct btrfs_root *root)
     979             : {
     980        4809 :         struct btrfs_fs_info *fs_info = root->fs_info;
     981        4809 :         struct btrfs_root *log_root;
     982        4809 :         struct btrfs_inode_item *inode_item;
     983        4809 :         int ret;
     984             : 
     985        4809 :         log_root = alloc_log_tree(trans, fs_info);
     986        4809 :         if (IS_ERR(log_root))
     987           0 :                 return PTR_ERR(log_root);
     988             : 
     989        4809 :         ret = btrfs_alloc_log_tree_node(trans, log_root);
     990        4809 :         if (ret) {
     991           0 :                 btrfs_put_root(log_root);
     992           0 :                 return ret;
     993             :         }
     994             : 
     995        4809 :         log_root->last_trans = trans->transid;
     996        4809 :         log_root->root_key.offset = root->root_key.objectid;
     997             : 
     998        4809 :         inode_item = &log_root->root_item.inode;
     999        4809 :         btrfs_set_stack_inode_generation(inode_item, 1);
    1000        4809 :         btrfs_set_stack_inode_size(inode_item, 3);
    1001        4809 :         btrfs_set_stack_inode_nlink(inode_item, 1);
    1002        4809 :         btrfs_set_stack_inode_nbytes(inode_item,
    1003        4809 :                                      fs_info->nodesize);
    1004        4809 :         btrfs_set_stack_inode_mode(inode_item, S_IFDIR | 0755);
    1005             : 
    1006        4809 :         btrfs_set_root_node(&log_root->root_item, log_root->node);
    1007             : 
    1008        4809 :         WARN_ON(root->log_root);
    1009        4809 :         root->log_root = log_root;
    1010        4809 :         root->log_transid = 0;
    1011        4809 :         root->log_transid_committed = -1;
    1012        4809 :         root->last_log_commit = 0;
    1013        4809 :         return 0;
    1014             : }
    1015             : 
    1016       50777 : static struct btrfs_root *read_tree_root_path(struct btrfs_root *tree_root,
    1017             :                                               struct btrfs_path *path,
    1018             :                                               struct btrfs_key *key)
    1019             : {
    1020       50777 :         struct btrfs_root *root;
    1021       50777 :         struct btrfs_tree_parent_check check = { 0 };
    1022       50777 :         struct btrfs_fs_info *fs_info = tree_root->fs_info;
    1023       50777 :         u64 generation;
    1024       50777 :         int ret;
    1025       50777 :         int level;
    1026             : 
    1027       50777 :         root = btrfs_alloc_root(fs_info, key->objectid, GFP_NOFS);
    1028       50777 :         if (!root)
    1029             :                 return ERR_PTR(-ENOMEM);
    1030             : 
    1031       50777 :         ret = btrfs_find_root(tree_root, key, path,
    1032             :                               &root->root_item, &root->root_key);
    1033       50777 :         if (ret) {
    1034        3196 :                 if (ret > 0)
    1035        3196 :                         ret = -ENOENT;
    1036        3196 :                 goto fail;
    1037             :         }
    1038             : 
    1039       47581 :         generation = btrfs_root_generation(&root->root_item);
    1040       47581 :         level = btrfs_root_level(&root->root_item);
    1041       47581 :         check.level = level;
    1042       47581 :         check.transid = generation;
    1043       47581 :         check.owner_root = key->objectid;
    1044       47581 :         root->node = read_tree_block(fs_info, btrfs_root_bytenr(&root->root_item),
    1045             :                                      &check);
    1046       47581 :         if (IS_ERR(root->node)) {
    1047           0 :                 ret = PTR_ERR(root->node);
    1048           0 :                 root->node = NULL;
    1049           0 :                 goto fail;
    1050             :         }
    1051       47581 :         if (!btrfs_buffer_uptodate(root->node, generation, 0)) {
    1052           0 :                 ret = -EIO;
    1053           0 :                 goto fail;
    1054             :         }
    1055             : 
    1056             :         /*
    1057             :          * For real fs, and not log/reloc trees, root owner must
    1058             :          * match its root node owner
    1059             :          */
    1060       47581 :         if (!test_bit(BTRFS_FS_STATE_DUMMY_FS_INFO, &fs_info->fs_state) &&
    1061       47581 :             root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID &&
    1062       28255 :             root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID &&
    1063       28255 :             root->root_key.objectid != btrfs_header_owner(root->node)) {
    1064           0 :                 btrfs_crit(fs_info,
    1065             : "root=%llu block=%llu, tree root owner mismatch, have %llu expect %llu",
    1066             :                            root->root_key.objectid, root->node->start,
    1067             :                            btrfs_header_owner(root->node),
    1068             :                            root->root_key.objectid);
    1069           0 :                 ret = -EUCLEAN;
    1070           0 :                 goto fail;
    1071             :         }
    1072       47581 :         root->commit_root = btrfs_root_node(root);
    1073       47581 :         return root;
    1074        3196 : fail:
    1075        3196 :         btrfs_put_root(root);
    1076        3196 :         return ERR_PTR(ret);
    1077             : }
    1078             : 
    1079       41092 : struct btrfs_root *btrfs_read_tree_root(struct btrfs_root *tree_root,
    1080             :                                         struct btrfs_key *key)
    1081             : {
    1082       41092 :         struct btrfs_root *root;
    1083       41092 :         struct btrfs_path *path;
    1084             : 
    1085       41092 :         path = btrfs_alloc_path();
    1086       41092 :         if (!path)
    1087             :                 return ERR_PTR(-ENOMEM);
    1088       41092 :         root = read_tree_root_path(tree_root, path, key);
    1089       41092 :         btrfs_free_path(path);
    1090             : 
    1091       41092 :         return root;
    1092             : }
    1093             : 
    1094             : /*
    1095             :  * Initialize subvolume root in-memory structure
    1096             :  *
    1097             :  * @anon_dev:   anonymous device to attach to the root, if zero, allocate new
    1098             :  */
    1099       12121 : static int btrfs_init_fs_root(struct btrfs_root *root, dev_t anon_dev)
    1100             : {
    1101       12121 :         int ret;
    1102             : 
    1103       12121 :         btrfs_drew_lock_init(&root->snapshot_lock);
    1104             : 
    1105       12121 :         if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID &&
    1106             :             !btrfs_is_data_reloc_root(root)) {
    1107        8906 :                 set_bit(BTRFS_ROOT_SHAREABLE, &root->state);
    1108        8906 :                 btrfs_check_and_init_root_item(&root->root_item);
    1109             :         }
    1110             : 
    1111             :         /*
    1112             :          * Don't assign anonymous block device to roots that are not exposed to
    1113             :          * userspace, the id pool is limited to 1M
    1114             :          */
    1115       17812 :         if (is_fstree(root->root_key.objectid) &&
    1116             :             btrfs_root_refs(&root->root_item) > 0) {
    1117        8774 :                 if (!anon_dev) {
    1118        7497 :                         ret = get_anon_bdev(&root->anon_dev);
    1119        7497 :                         if (ret)
    1120           0 :                                 goto fail;
    1121             :                 } else {
    1122        1277 :                         root->anon_dev = anon_dev;
    1123             :                 }
    1124             :         }
    1125             : 
    1126       12121 :         mutex_lock(&root->objectid_mutex);
    1127       12121 :         ret = btrfs_init_root_free_objectid(root);
    1128       12121 :         if (ret) {
    1129           0 :                 mutex_unlock(&root->objectid_mutex);
    1130           0 :                 goto fail;
    1131             :         }
    1132             : 
    1133       12121 :         ASSERT(root->free_objectid <= BTRFS_LAST_FREE_OBJECTID);
    1134             : 
    1135       12121 :         mutex_unlock(&root->objectid_mutex);
    1136             : 
    1137       12121 :         return 0;
    1138             : fail:
    1139             :         /* The caller is responsible to call btrfs_free_fs_root */
    1140             :         return ret;
    1141             : }
    1142             : 
    1143    12197584 : static struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info,
    1144             :                                                u64 root_id)
    1145             : {
    1146    12197584 :         struct btrfs_root *root;
    1147             : 
    1148    12197584 :         spin_lock(&fs_info->fs_roots_radix_lock);
    1149    12198682 :         root = radix_tree_lookup(&fs_info->fs_roots_radix,
    1150             :                                  (unsigned long)root_id);
    1151    12198682 :         root = btrfs_grab_root(root);
    1152    12198682 :         spin_unlock(&fs_info->fs_roots_radix_lock);
    1153    12198682 :         return root;
    1154             : }
    1155             : 
    1156    12272593 : static struct btrfs_root *btrfs_get_global_root(struct btrfs_fs_info *fs_info,
    1157             :                                                 u64 objectid)
    1158             : {
    1159    12272593 :         struct btrfs_key key = {
    1160             :                 .objectid = objectid,
    1161             :                 .type = BTRFS_ROOT_ITEM_KEY,
    1162             :                 .offset = 0,
    1163             :         };
    1164             : 
    1165    12272593 :         switch (objectid) {
    1166        7113 :         case BTRFS_ROOT_TREE_OBJECTID:
    1167        7113 :                 return btrfs_grab_root(fs_info->tree_root);
    1168       36071 :         case BTRFS_EXTENT_TREE_OBJECTID:
    1169       36071 :                 return btrfs_grab_root(btrfs_global_root(fs_info, &key));
    1170         227 :         case BTRFS_CHUNK_TREE_OBJECTID:
    1171         227 :                 return btrfs_grab_root(fs_info->chunk_root);
    1172         226 :         case BTRFS_DEV_TREE_OBJECTID:
    1173         226 :                 return btrfs_grab_root(fs_info->dev_root);
    1174       29121 :         case BTRFS_CSUM_TREE_OBJECTID:
    1175       29121 :                 return btrfs_grab_root(btrfs_global_root(fs_info, &key));
    1176         170 :         case BTRFS_QUOTA_TREE_OBJECTID:
    1177         170 :                 return btrfs_grab_root(fs_info->quota_root);
    1178        1961 :         case BTRFS_UUID_TREE_OBJECTID:
    1179        1961 :                 return btrfs_grab_root(fs_info->uuid_root);
    1180           0 :         case BTRFS_BLOCK_GROUP_TREE_OBJECTID:
    1181           0 :                 return btrfs_grab_root(fs_info->block_group_root);
    1182         197 :         case BTRFS_FREE_SPACE_TREE_OBJECTID:
    1183         197 :                 return btrfs_grab_root(btrfs_global_root(fs_info, &key));
    1184             :         default:
    1185             :                 return NULL;
    1186             :         }
    1187             : }
    1188             : 
    1189       12121 : int btrfs_insert_fs_root(struct btrfs_fs_info *fs_info,
    1190             :                          struct btrfs_root *root)
    1191             : {
    1192       12121 :         int ret;
    1193             : 
    1194       12121 :         ret = radix_tree_preload(GFP_NOFS);
    1195       12121 :         if (ret)
    1196             :                 return ret;
    1197             : 
    1198       12121 :         spin_lock(&fs_info->fs_roots_radix_lock);
    1199       12121 :         ret = radix_tree_insert(&fs_info->fs_roots_radix,
    1200       12121 :                                 (unsigned long)root->root_key.objectid,
    1201             :                                 root);
    1202       12121 :         if (ret == 0) {
    1203       12121 :                 btrfs_grab_root(root);
    1204       12121 :                 set_bit(BTRFS_ROOT_IN_RADIX, &root->state);
    1205             :         }
    1206       12121 :         spin_unlock(&fs_info->fs_roots_radix_lock);
    1207       12121 :         radix_tree_preload_end();
    1208             : 
    1209       12121 :         return ret;
    1210             : }
    1211             : 
    1212           0 : void btrfs_check_leaked_roots(struct btrfs_fs_info *fs_info)
    1213             : {
    1214             : #ifdef CONFIG_BTRFS_DEBUG
    1215             :         struct btrfs_root *root;
    1216             : 
    1217             :         while (!list_empty(&fs_info->allocated_roots)) {
    1218             :                 char buf[BTRFS_ROOT_NAME_BUF_LEN];
    1219             : 
    1220             :                 root = list_first_entry(&fs_info->allocated_roots,
    1221             :                                         struct btrfs_root, leak_list);
    1222             :                 btrfs_err(fs_info, "leaked root %s refcount %d",
    1223             :                           btrfs_root_name(&root->root_key, buf),
    1224             :                           refcount_read(&root->refs));
    1225             :                 while (refcount_read(&root->refs) > 1)
    1226             :                         btrfs_put_root(root);
    1227             :                 btrfs_put_root(root);
    1228             :         }
    1229             : #endif
    1230           0 : }
    1231             : 
    1232        3472 : static void free_global_roots(struct btrfs_fs_info *fs_info)
    1233             : {
    1234        3472 :         struct btrfs_root *root;
    1235        3472 :         struct rb_node *node;
    1236             : 
    1237       13108 :         while ((node = rb_first_postorder(&fs_info->global_root_tree)) != NULL) {
    1238        9636 :                 root = rb_entry(node, struct btrfs_root, rb_node);
    1239        9636 :                 rb_erase(&root->rb_node, &fs_info->global_root_tree);
    1240        9636 :                 btrfs_put_root(root);
    1241             :         }
    1242        3472 : }
    1243             : 
    1244        3472 : void btrfs_free_fs_info(struct btrfs_fs_info *fs_info)
    1245             : {
    1246        3472 :         percpu_counter_destroy(&fs_info->dirty_metadata_bytes);
    1247        3472 :         percpu_counter_destroy(&fs_info->delalloc_bytes);
    1248        3472 :         percpu_counter_destroy(&fs_info->ordered_bytes);
    1249        3472 :         percpu_counter_destroy(&fs_info->dev_replace.bio_counter);
    1250        3472 :         btrfs_free_csum_hash(fs_info);
    1251        3472 :         btrfs_free_stripe_hash_table(fs_info);
    1252        3472 :         btrfs_free_ref_cache(fs_info);
    1253        3472 :         kfree(fs_info->balance_ctl);
    1254        3472 :         kfree(fs_info->delayed_root);
    1255        3472 :         free_global_roots(fs_info);
    1256        3472 :         btrfs_put_root(fs_info->tree_root);
    1257        3472 :         btrfs_put_root(fs_info->chunk_root);
    1258        3472 :         btrfs_put_root(fs_info->dev_root);
    1259        3472 :         btrfs_put_root(fs_info->quota_root);
    1260        3472 :         btrfs_put_root(fs_info->uuid_root);
    1261        3472 :         btrfs_put_root(fs_info->fs_root);
    1262        3472 :         btrfs_put_root(fs_info->data_reloc_root);
    1263        3472 :         btrfs_put_root(fs_info->block_group_root);
    1264        3472 :         btrfs_check_leaked_roots(fs_info);
    1265        3472 :         btrfs_extent_buffer_leak_debug_check(fs_info);
    1266        3472 :         kfree(fs_info->super_copy);
    1267        3472 :         kfree(fs_info->super_for_commit);
    1268        3472 :         kfree(fs_info->subpage_info);
    1269        3472 :         kvfree(fs_info);
    1270        3472 : }
    1271             : 
    1272             : 
    1273             : /*
    1274             :  * Get an in-memory reference of a root structure.
    1275             :  *
    1276             :  * For essential trees like root/extent tree, we grab it from fs_info directly.
    1277             :  * For subvolume trees, we check the cached filesystem roots first. If not
    1278             :  * found, then read it from disk and add it to cached fs roots.
    1279             :  *
    1280             :  * Caller should release the root by calling btrfs_put_root() after the usage.
    1281             :  *
    1282             :  * NOTE: Reloc and log trees can't be read by this function as they share the
    1283             :  *       same root objectid.
    1284             :  *
    1285             :  * @objectid:   root id
    1286             :  * @anon_dev:   preallocated anonymous block device number for new roots,
    1287             :  *              pass 0 for new allocation.
    1288             :  * @check_ref:  whether to check root item references, If true, return -ENOENT
    1289             :  *              for orphan roots
    1290             :  */
    1291     5839719 : static struct btrfs_root *btrfs_get_root_ref(struct btrfs_fs_info *fs_info,
    1292             :                                              u64 objectid, dev_t anon_dev,
    1293             :                                              bool check_ref)
    1294             : {
    1295     5839719 :         struct btrfs_root *root;
    1296     5839719 :         struct btrfs_path *path;
    1297     5839719 :         struct btrfs_key key;
    1298     5839719 :         int ret;
    1299             : 
    1300     5839719 :         root = btrfs_get_global_root(fs_info, objectid);
    1301     5839719 :         if (root)
    1302             :                 return root;
    1303     5771886 : again:
    1304     5771886 :         root = btrfs_lookup_fs_root(fs_info, objectid);
    1305     5771888 :         if (root) {
    1306             :                 /* Shouldn't get preallocated anon_dev for cached roots */
    1307     5759767 :                 ASSERT(!anon_dev);
    1308     5759767 :                 if (check_ref && btrfs_root_refs(&root->root_item) == 0) {
    1309           0 :                         btrfs_put_root(root);
    1310           0 :                         return ERR_PTR(-ENOENT);
    1311             :                 }
    1312             :                 return root;
    1313             :         }
    1314             : 
    1315       12121 :         key.objectid = objectid;
    1316       12121 :         key.type = BTRFS_ROOT_ITEM_KEY;
    1317       12121 :         key.offset = (u64)-1;
    1318       12121 :         root = btrfs_read_tree_root(fs_info->tree_root, &key);
    1319       12121 :         if (IS_ERR(root))
    1320           0 :                 return root;
    1321             : 
    1322       12121 :         if (check_ref && btrfs_root_refs(&root->root_item) == 0) {
    1323           0 :                 ret = -ENOENT;
    1324           0 :                 goto fail;
    1325             :         }
    1326             : 
    1327       12121 :         ret = btrfs_init_fs_root(root, anon_dev);
    1328       12121 :         if (ret)
    1329           0 :                 goto fail;
    1330             : 
    1331       12121 :         path = btrfs_alloc_path();
    1332       12121 :         if (!path) {
    1333           0 :                 ret = -ENOMEM;
    1334           0 :                 goto fail;
    1335             :         }
    1336       12121 :         key.objectid = BTRFS_ORPHAN_OBJECTID;
    1337       12121 :         key.type = BTRFS_ORPHAN_ITEM_KEY;
    1338       12121 :         key.offset = objectid;
    1339             : 
    1340       12121 :         ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, path, 0, 0);
    1341       12121 :         btrfs_free_path(path);
    1342       12121 :         if (ret < 0)
    1343           0 :                 goto fail;
    1344       12121 :         if (ret == 0)
    1345         132 :                 set_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED, &root->state);
    1346             : 
    1347       12121 :         ret = btrfs_insert_fs_root(fs_info, root);
    1348       12121 :         if (ret) {
    1349           0 :                 if (ret == -EEXIST) {
    1350           0 :                         btrfs_put_root(root);
    1351           0 :                         goto again;
    1352             :                 }
    1353           0 :                 goto fail;
    1354             :         }
    1355             :         return root;
    1356           0 : fail:
    1357             :         /*
    1358             :          * If our caller provided us an anonymous device, then it's his
    1359             :          * responsibility to free it in case we fail. So we have to set our
    1360             :          * root's anon_dev to 0 to avoid a double free, once by btrfs_put_root()
    1361             :          * and once again by our caller.
    1362             :          */
    1363           0 :         if (anon_dev)
    1364           0 :                 root->anon_dev = 0;
    1365           0 :         btrfs_put_root(root);
    1366           0 :         return ERR_PTR(ret);
    1367             : }
    1368             : 
    1369             : /*
    1370             :  * Get in-memory reference of a root structure
    1371             :  *
    1372             :  * @objectid:   tree objectid
    1373             :  * @check_ref:  if set, verify that the tree exists and the item has at least
    1374             :  *              one reference
    1375             :  */
    1376     5832012 : struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info,
    1377             :                                      u64 objectid, bool check_ref)
    1378             : {
    1379     5832012 :         return btrfs_get_root_ref(fs_info, objectid, 0, check_ref);
    1380             : }
    1381             : 
    1382             : /*
    1383             :  * Get in-memory reference of a root structure, created as new, optionally pass
    1384             :  * the anonymous block device id
    1385             :  *
    1386             :  * @objectid:   tree objectid
    1387             :  * @anon_dev:   if zero, allocate a new anonymous block device or use the
    1388             :  *              parameter value
    1389             :  */
    1390        1277 : struct btrfs_root *btrfs_get_new_fs_root(struct btrfs_fs_info *fs_info,
    1391             :                                          u64 objectid, dev_t anon_dev)
    1392             : {
    1393        1277 :         return btrfs_get_root_ref(fs_info, objectid, anon_dev, true);
    1394             : }
    1395             : 
    1396             : /*
    1397             :  * btrfs_get_fs_root_commit_root - return a root for the given objectid
    1398             :  * @fs_info:    the fs_info
    1399             :  * @objectid:   the objectid we need to lookup
    1400             :  *
    1401             :  * This is exclusively used for backref walking, and exists specifically because
    1402             :  * of how qgroups does lookups.  Qgroups will do a backref lookup at delayed ref
    1403             :  * creation time, which means we may have to read the tree_root in order to look
    1404             :  * up a fs root that is not in memory.  If the root is not in memory we will
    1405             :  * read the tree root commit root and look up the fs root from there.  This is a
    1406             :  * temporary root, it will not be inserted into the radix tree as it doesn't
    1407             :  * have the most uptodate information, it'll simply be discarded once the
    1408             :  * backref code is finished using the root.
    1409             :  */
    1410     6433149 : struct btrfs_root *btrfs_get_fs_root_commit_root(struct btrfs_fs_info *fs_info,
    1411             :                                                  struct btrfs_path *path,
    1412             :                                                  u64 objectid)
    1413             : {
    1414     6433149 :         struct btrfs_root *root;
    1415     6433149 :         struct btrfs_key key;
    1416             : 
    1417     6433149 :         ASSERT(path->search_commit_root && path->skip_locking);
    1418             : 
    1419             :         /*
    1420             :          * This can return -ENOENT if we ask for a root that doesn't exist, but
    1421             :          * since this is called via the backref walking code we won't be looking
    1422             :          * up a root that doesn't exist, unless there's corruption.  So if root
    1423             :          * != NULL just return it.
    1424             :          */
    1425     6433149 :         root = btrfs_get_global_root(fs_info, objectid);
    1426     6432909 :         if (root)
    1427             :                 return root;
    1428             : 
    1429     6425683 :         root = btrfs_lookup_fs_root(fs_info, objectid);
    1430     6426794 :         if (root)
    1431             :                 return root;
    1432             : 
    1433          46 :         key.objectid = objectid;
    1434          46 :         key.type = BTRFS_ROOT_ITEM_KEY;
    1435          46 :         key.offset = (u64)-1;
    1436          46 :         root = read_tree_root_path(fs_info->tree_root, path, &key);
    1437          46 :         btrfs_release_path(path);
    1438             : 
    1439          46 :         return root;
    1440             : }
    1441             : 
    1442        3215 : static int cleaner_kthread(void *arg)
    1443             : {
    1444        3215 :         struct btrfs_fs_info *fs_info = arg;
    1445       51833 :         int again;
    1446             : 
    1447       51833 :         while (1) {
    1448       51833 :                 again = 0;
    1449             : 
    1450       51833 :                 set_bit(BTRFS_FS_CLEANER_RUNNING, &fs_info->flags);
    1451             : 
    1452             :                 /* Make the cleaner go to sleep early. */
    1453       51833 :                 if (btrfs_need_cleaner_sleep(fs_info))
    1454        3298 :                         goto sleep;
    1455             : 
    1456             :                 /*
    1457             :                  * Do not do anything if we might cause open_ctree() to block
    1458             :                  * before we have finished mounting the filesystem.
    1459             :                  */
    1460       48535 :                 if (!test_bit(BTRFS_FS_OPEN, &fs_info->flags))
    1461        6350 :                         goto sleep;
    1462             : 
    1463       42185 :                 if (!mutex_trylock(&fs_info->cleaner_mutex))
    1464          76 :                         goto sleep;
    1465             : 
    1466             :                 /*
    1467             :                  * Avoid the problem that we change the status of the fs
    1468             :                  * during the above check and trylock.
    1469             :                  */
    1470       42109 :                 if (btrfs_need_cleaner_sleep(fs_info)) {
    1471           0 :                         mutex_unlock(&fs_info->cleaner_mutex);
    1472           0 :                         goto sleep;
    1473             :                 }
    1474             : 
    1475       42109 :                 if (test_and_clear_bit(BTRFS_FS_FEATURE_CHANGED, &fs_info->flags))
    1476          32 :                         btrfs_sysfs_feature_update(fs_info);
    1477             : 
    1478       42109 :                 btrfs_run_delayed_iputs(fs_info);
    1479             : 
    1480       42109 :                 again = btrfs_clean_one_deleted_snapshot(fs_info);
    1481       42109 :                 mutex_unlock(&fs_info->cleaner_mutex);
    1482             : 
    1483             :                 /*
    1484             :                  * The defragger has dealt with the R/O remount and umount,
    1485             :                  * needn't do anything special here.
    1486             :                  */
    1487       42109 :                 btrfs_run_defrag_inodes(fs_info);
    1488             : 
    1489             :                 /*
    1490             :                  * Acquires fs_info->reclaim_bgs_lock to avoid racing
    1491             :                  * with relocation (btrfs_relocate_chunk) and relocation
    1492             :                  * acquires fs_info->cleaner_mutex (btrfs_relocate_block_group)
    1493             :                  * after acquiring fs_info->reclaim_bgs_lock. So we
    1494             :                  * can't hold, nor need to, fs_info->cleaner_mutex when deleting
    1495             :                  * unused block groups.
    1496             :                  */
    1497       42109 :                 btrfs_delete_unused_bgs(fs_info);
    1498             : 
    1499             :                 /*
    1500             :                  * Reclaim block groups in the reclaim_bgs list after we deleted
    1501             :                  * all unused block_groups. This possibly gives us some more free
    1502             :                  * space.
    1503             :                  */
    1504       42109 :                 btrfs_reclaim_bgs(fs_info);
    1505       51833 : sleep:
    1506       51833 :                 clear_and_wake_up_bit(BTRFS_FS_CLEANER_RUNNING, &fs_info->flags);
    1507       51833 :                 if (kthread_should_park())
    1508        3215 :                         kthread_parkme();
    1509       51833 :                 if (kthread_should_stop())
    1510        3215 :                         return 0;
    1511       48618 :                 if (!again) {
    1512       48500 :                         set_current_state(TASK_INTERRUPTIBLE);
    1513       48500 :                         schedule();
    1514       48500 :                         __set_current_state(TASK_RUNNING);
    1515             :                 }
    1516             :         }
    1517             : }
    1518             : 
    1519        3215 : static int transaction_kthread(void *arg)
    1520             : {
    1521        3215 :         struct btrfs_root *root = arg;
    1522        3215 :         struct btrfs_fs_info *fs_info = root->fs_info;
    1523        4484 :         struct btrfs_trans_handle *trans;
    1524        4484 :         struct btrfs_transaction *cur;
    1525        4484 :         u64 transid;
    1526        4484 :         time64_t delta;
    1527        4484 :         unsigned long delay;
    1528        4484 :         bool cannot_commit;
    1529             : 
    1530        4484 :         do {
    1531        4484 :                 cannot_commit = false;
    1532        4484 :                 delay = msecs_to_jiffies(fs_info->commit_interval * 1000);
    1533        4483 :                 mutex_lock(&fs_info->transaction_kthread_mutex);
    1534             : 
    1535        4484 :                 spin_lock(&fs_info->trans_lock);
    1536        4484 :                 cur = fs_info->running_transaction;
    1537        4484 :                 if (!cur) {
    1538        4083 :                         spin_unlock(&fs_info->trans_lock);
    1539        4083 :                         goto sleep;
    1540             :                 }
    1541             : 
    1542         401 :                 delta = ktime_get_seconds() - cur->start_time;
    1543         401 :                 if (!test_and_clear_bit(BTRFS_FS_COMMIT_TRANS, &fs_info->flags) &&
    1544         397 :                     cur->state < TRANS_STATE_COMMIT_START &&
    1545         393 :                     delta < fs_info->commit_interval) {
    1546         190 :                         spin_unlock(&fs_info->trans_lock);
    1547         190 :                         delay -= msecs_to_jiffies((delta - 1) * 1000);
    1548         190 :                         delay = min(delay,
    1549             :                                     msecs_to_jiffies(fs_info->commit_interval * 1000));
    1550         190 :                         goto sleep;
    1551             :                 }
    1552         211 :                 transid = cur->transid;
    1553         211 :                 spin_unlock(&fs_info->trans_lock);
    1554             : 
    1555             :                 /* If the file system is aborted, this will always fail. */
    1556         211 :                 trans = btrfs_attach_transaction(root);
    1557         211 :                 if (IS_ERR(trans)) {
    1558           4 :                         if (PTR_ERR(trans) != -ENOENT)
    1559           0 :                                 cannot_commit = true;
    1560           4 :                         goto sleep;
    1561             :                 }
    1562         207 :                 if (transid == trans->transid) {
    1563         207 :                         btrfs_commit_transaction(trans);
    1564             :                 } else {
    1565           0 :                         btrfs_end_transaction(trans);
    1566             :                 }
    1567        4484 : sleep:
    1568        4484 :                 wake_up_process(fs_info->cleaner_kthread);
    1569        4484 :                 mutex_unlock(&fs_info->transaction_kthread_mutex);
    1570             : 
    1571        4484 :                 if (BTRFS_FS_ERROR(fs_info))
    1572           3 :                         btrfs_cleanup_transaction(fs_info);
    1573        8968 :                 if (!kthread_should_stop() &&
    1574        4485 :                                 (!btrfs_transaction_blocked(fs_info) ||
    1575             :                                  cannot_commit))
    1576        4483 :                         schedule_timeout_interruptible(delay);
    1577        4484 :         } while (!kthread_should_stop());
    1578        3215 :         return 0;
    1579             : }
    1580             : 
    1581             : /*
    1582             :  * This will find the highest generation in the array of root backups.  The
    1583             :  * index of the highest array is returned, or -EINVAL if we can't find
    1584             :  * anything.
    1585             :  *
    1586             :  * We check to make sure the array is valid by comparing the
    1587             :  * generation of the latest  root in the array with the generation
    1588             :  * in the super block.  If they don't match we pitch it.
    1589             :  */
    1590        3216 : static int find_newest_super_backup(struct btrfs_fs_info *info)
    1591             : {
    1592        3216 :         const u64 newest_gen = btrfs_super_generation(info->super_copy);
    1593        3216 :         u64 cur;
    1594        3216 :         struct btrfs_root_backup *root_backup;
    1595        3216 :         int i;
    1596             : 
    1597        8076 :         for (i = 0; i < BTRFS_NUM_BACKUP_ROOTS; i++) {
    1598        8076 :                 root_backup = info->super_copy->super_roots + i;
    1599        8076 :                 cur = btrfs_backup_tree_root_gen(root_backup);
    1600        8076 :                 if (cur == newest_gen)
    1601        3216 :                         return i;
    1602             :         }
    1603             : 
    1604             :         return -EINVAL;
    1605             : }
    1606             : 
    1607             : /*
    1608             :  * copy all the root pointers into the super backup array.
    1609             :  * this will bump the backup pointer by one when it is
    1610             :  * done
    1611             :  */
    1612      203010 : static void backup_super_roots(struct btrfs_fs_info *info)
    1613             : {
    1614      203010 :         const int next_backup = info->backup_root_index;
    1615      203010 :         struct btrfs_root_backup *root_backup;
    1616             : 
    1617      203010 :         root_backup = info->super_for_commit->super_roots + next_backup;
    1618             : 
    1619             :         /*
    1620             :          * make sure all of our padding and empty slots get zero filled
    1621             :          * regardless of which ones we use today
    1622             :          */
    1623      203010 :         memset(root_backup, 0, sizeof(*root_backup));
    1624             : 
    1625      203010 :         info->backup_root_index = (next_backup + 1) % BTRFS_NUM_BACKUP_ROOTS;
    1626             : 
    1627      203010 :         btrfs_set_backup_tree_root(root_backup, info->tree_root->node->start);
    1628      203010 :         btrfs_set_backup_tree_root_gen(root_backup,
    1629      203010 :                                btrfs_header_generation(info->tree_root->node));
    1630             : 
    1631      203010 :         btrfs_set_backup_tree_root_level(root_backup,
    1632      203010 :                                btrfs_header_level(info->tree_root->node));
    1633             : 
    1634      203010 :         btrfs_set_backup_chunk_root(root_backup, info->chunk_root->node->start);
    1635      203010 :         btrfs_set_backup_chunk_root_gen(root_backup,
    1636      203010 :                                btrfs_header_generation(info->chunk_root->node));
    1637      203010 :         btrfs_set_backup_chunk_root_level(root_backup,
    1638      203010 :                                btrfs_header_level(info->chunk_root->node));
    1639             : 
    1640      203010 :         if (!btrfs_fs_compat_ro(info, BLOCK_GROUP_TREE)) {
    1641      203010 :                 struct btrfs_root *extent_root = btrfs_extent_root(info, 0);
    1642      203010 :                 struct btrfs_root *csum_root = btrfs_csum_root(info, 0);
    1643             : 
    1644      203010 :                 btrfs_set_backup_extent_root(root_backup,
    1645      203010 :                                              extent_root->node->start);
    1646      203010 :                 btrfs_set_backup_extent_root_gen(root_backup,
    1647      203010 :                                 btrfs_header_generation(extent_root->node));
    1648      203010 :                 btrfs_set_backup_extent_root_level(root_backup,
    1649      203010 :                                         btrfs_header_level(extent_root->node));
    1650             : 
    1651      203010 :                 btrfs_set_backup_csum_root(root_backup, csum_root->node->start);
    1652      203010 :                 btrfs_set_backup_csum_root_gen(root_backup,
    1653      203010 :                                                btrfs_header_generation(csum_root->node));
    1654      203010 :                 btrfs_set_backup_csum_root_level(root_backup,
    1655      203010 :                                                  btrfs_header_level(csum_root->node));
    1656             :         }
    1657             : 
    1658             :         /*
    1659             :          * we might commit during log recovery, which happens before we set
    1660             :          * the fs_root.  Make sure it is valid before we fill it in.
    1661             :          */
    1662      203010 :         if (info->fs_root && info->fs_root->node) {
    1663      202727 :                 btrfs_set_backup_fs_root(root_backup,
    1664             :                                          info->fs_root->node->start);
    1665      202727 :                 btrfs_set_backup_fs_root_gen(root_backup,
    1666      202727 :                                btrfs_header_generation(info->fs_root->node));
    1667      202727 :                 btrfs_set_backup_fs_root_level(root_backup,
    1668      202727 :                                btrfs_header_level(info->fs_root->node));
    1669             :         }
    1670             : 
    1671      203010 :         btrfs_set_backup_dev_root(root_backup, info->dev_root->node->start);
    1672      203010 :         btrfs_set_backup_dev_root_gen(root_backup,
    1673      203010 :                                btrfs_header_generation(info->dev_root->node));
    1674      203010 :         btrfs_set_backup_dev_root_level(root_backup,
    1675      203010 :                                        btrfs_header_level(info->dev_root->node));
    1676             : 
    1677      203010 :         btrfs_set_backup_total_bytes(root_backup,
    1678      203010 :                              btrfs_super_total_bytes(info->super_copy));
    1679      203010 :         btrfs_set_backup_bytes_used(root_backup,
    1680      203010 :                              btrfs_super_bytes_used(info->super_copy));
    1681      203010 :         btrfs_set_backup_num_devices(root_backup,
    1682      203010 :                              btrfs_super_num_devices(info->super_copy));
    1683             : 
    1684             :         /*
    1685             :          * if we don't copy this out to the super_copy, it won't get remembered
    1686             :          * for the next commit
    1687             :          */
    1688      406020 :         memcpy(&info->super_copy->super_roots,
    1689             :                &info->super_for_commit->super_roots,
    1690             :                sizeof(*root_backup) * BTRFS_NUM_BACKUP_ROOTS);
    1691      203010 : }
    1692             : 
    1693             : /*
    1694             :  * read_backup_root - Reads a backup root based on the passed priority. Prio 0
    1695             :  * is the newest, prio 1/2/3 are 2nd newest/3rd newest/4th (oldest) backup roots
    1696             :  *
    1697             :  * fs_info - filesystem whose backup roots need to be read
    1698             :  * priority - priority of backup root required
    1699             :  *
    1700             :  * Returns backup root index on success and -EINVAL otherwise.
    1701             :  */
    1702           0 : static int read_backup_root(struct btrfs_fs_info *fs_info, u8 priority)
    1703             : {
    1704           0 :         int backup_index = find_newest_super_backup(fs_info);
    1705           0 :         struct btrfs_super_block *super = fs_info->super_copy;
    1706           0 :         struct btrfs_root_backup *root_backup;
    1707             : 
    1708           0 :         if (priority < BTRFS_NUM_BACKUP_ROOTS && backup_index >= 0) {
    1709           0 :                 if (priority == 0)
    1710             :                         return backup_index;
    1711             : 
    1712           0 :                 backup_index = backup_index + BTRFS_NUM_BACKUP_ROOTS - priority;
    1713           0 :                 backup_index %= BTRFS_NUM_BACKUP_ROOTS;
    1714             :         } else {
    1715             :                 return -EINVAL;
    1716             :         }
    1717             : 
    1718           0 :         root_backup = super->super_roots + backup_index;
    1719             : 
    1720           0 :         btrfs_set_super_generation(super,
    1721             :                                    btrfs_backup_tree_root_gen(root_backup));
    1722           0 :         btrfs_set_super_root(super, btrfs_backup_tree_root(root_backup));
    1723           0 :         btrfs_set_super_root_level(super,
    1724             :                                    btrfs_backup_tree_root_level(root_backup));
    1725           0 :         btrfs_set_super_bytes_used(super, btrfs_backup_bytes_used(root_backup));
    1726             : 
    1727             :         /*
    1728             :          * Fixme: the total bytes and num_devices need to match or we should
    1729             :          * need a fsck
    1730             :          */
    1731           0 :         btrfs_set_super_total_bytes(super, btrfs_backup_total_bytes(root_backup));
    1732           0 :         btrfs_set_super_num_devices(super, btrfs_backup_num_devices(root_backup));
    1733             : 
    1734           0 :         return backup_index;
    1735             : }
    1736             : 
    1737             : /* helper to cleanup workers */
    1738        3216 : static void btrfs_stop_all_workers(struct btrfs_fs_info *fs_info)
    1739             : {
    1740        3216 :         btrfs_destroy_workqueue(fs_info->fixup_workers);
    1741        3216 :         btrfs_destroy_workqueue(fs_info->delalloc_workers);
    1742        3216 :         btrfs_destroy_workqueue(fs_info->workers);
    1743        3216 :         if (fs_info->endio_workers)
    1744        3216 :                 destroy_workqueue(fs_info->endio_workers);
    1745        3216 :         if (fs_info->rmw_workers)
    1746        3216 :                 destroy_workqueue(fs_info->rmw_workers);
    1747        3216 :         if (fs_info->compressed_write_workers)
    1748        3216 :                 destroy_workqueue(fs_info->compressed_write_workers);
    1749        3216 :         btrfs_destroy_workqueue(fs_info->endio_write_workers);
    1750        3216 :         btrfs_destroy_workqueue(fs_info->endio_freespace_worker);
    1751        3216 :         btrfs_destroy_workqueue(fs_info->delayed_workers);
    1752        3216 :         btrfs_destroy_workqueue(fs_info->caching_workers);
    1753        3216 :         btrfs_destroy_workqueue(fs_info->flush_workers);
    1754        3216 :         btrfs_destroy_workqueue(fs_info->qgroup_rescan_workers);
    1755        3216 :         if (fs_info->discard_ctl.discard_workers)
    1756        3216 :                 destroy_workqueue(fs_info->discard_ctl.discard_workers);
    1757             :         /*
    1758             :          * Now that all other work queues are destroyed, we can safely destroy
    1759             :          * the queues used for metadata I/O, since tasks from those other work
    1760             :          * queues can do metadata I/O operations.
    1761             :          */
    1762        3216 :         if (fs_info->endio_meta_workers)
    1763        3216 :                 destroy_workqueue(fs_info->endio_meta_workers);
    1764        3216 : }
    1765             : 
    1766      102638 : static void free_root_extent_buffers(struct btrfs_root *root)
    1767             : {
    1768      102638 :         if (root) {
    1769       96256 :                 free_extent_buffer(root->node);
    1770       96256 :                 free_extent_buffer(root->commit_root);
    1771       96256 :                 root->node = NULL;
    1772       96256 :                 root->commit_root = NULL;
    1773             :         }
    1774      102638 : }
    1775             : 
    1776        3216 : static void free_global_root_pointers(struct btrfs_fs_info *fs_info)
    1777             : {
    1778        3216 :         struct btrfs_root *root, *tmp;
    1779             : 
    1780       16068 :         rbtree_postorder_for_each_entry_safe(root, tmp,
    1781             :                                              &fs_info->global_root_tree,
    1782             :                                              rb_node)
    1783        9636 :                 free_root_extent_buffers(root);
    1784        3216 : }
    1785             : 
    1786             : /* helper to cleanup tree roots */
    1787        3216 : static void free_root_pointers(struct btrfs_fs_info *info, bool free_chunk_root)
    1788             : {
    1789        3216 :         free_root_extent_buffers(info->tree_root);
    1790             : 
    1791        3216 :         free_global_root_pointers(info);
    1792        3216 :         free_root_extent_buffers(info->dev_root);
    1793        3216 :         free_root_extent_buffers(info->quota_root);
    1794        3216 :         free_root_extent_buffers(info->uuid_root);
    1795        3216 :         free_root_extent_buffers(info->fs_root);
    1796        3216 :         free_root_extent_buffers(info->data_reloc_root);
    1797        3216 :         free_root_extent_buffers(info->block_group_root);
    1798        3216 :         if (free_chunk_root)
    1799        3216 :                 free_root_extent_buffers(info->chunk_root);
    1800        3216 : }
    1801             : 
    1802    18289918 : void btrfs_put_root(struct btrfs_root *root)
    1803             : {
    1804    18289918 :         if (!root)
    1805             :                 return;
    1806             : 
    1807    16273033 :         if (refcount_dec_and_test(&root->refs)) {
    1808       67274 :                 WARN_ON(!RB_EMPTY_ROOT(&root->inode_tree));
    1809       67274 :                 WARN_ON(test_bit(BTRFS_ROOT_DEAD_RELOC_TREE, &root->state));
    1810       67274 :                 if (root->anon_dev)
    1811        8606 :                         free_anon_bdev(root->anon_dev);
    1812       67274 :                 free_root_extent_buffers(root);
    1813             : #ifdef CONFIG_BTRFS_DEBUG
    1814             :                 spin_lock(&root->fs_info->fs_roots_radix_lock);
    1815             :                 list_del_init(&root->leak_list);
    1816             :                 spin_unlock(&root->fs_info->fs_roots_radix_lock);
    1817             : #endif
    1818       67274 :                 kfree(root);
    1819             :         }
    1820             : }
    1821             : 
    1822        3215 : void btrfs_free_fs_roots(struct btrfs_fs_info *fs_info)
    1823             : {
    1824        3215 :         int ret;
    1825        3215 :         struct btrfs_root *gang[8];
    1826        3215 :         int i;
    1827             : 
    1828        3397 :         while (!list_empty(&fs_info->dead_roots)) {
    1829         182 :                 gang[0] = list_entry(fs_info->dead_roots.next,
    1830             :                                      struct btrfs_root, root_list);
    1831         182 :                 list_del(&gang[0]->root_list);
    1832             : 
    1833         364 :                 if (test_bit(BTRFS_ROOT_IN_RADIX, &gang[0]->state))
    1834         182 :                         btrfs_drop_and_free_fs_root(fs_info, gang[0]);
    1835         182 :                 btrfs_put_root(gang[0]);
    1836             :         }
    1837             : 
    1838        7035 :         while (1) {
    1839        7035 :                 ret = radix_tree_gang_lookup(&fs_info->fs_roots_radix,
    1840             :                                              (void **)gang, 0,
    1841             :                                              ARRAY_SIZE(gang));
    1842        7035 :                 if (!ret)
    1843             :                         break;
    1844       15641 :                 for (i = 0; i < ret; i++)
    1845       11821 :                         btrfs_drop_and_free_fs_root(fs_info, gang[i]);
    1846             :         }
    1847        3215 : }
    1848             : 
    1849        3472 : static void btrfs_init_scrub(struct btrfs_fs_info *fs_info)
    1850             : {
    1851        3472 :         mutex_init(&fs_info->scrub_lock);
    1852        3472 :         atomic_set(&fs_info->scrubs_running, 0);
    1853        3472 :         atomic_set(&fs_info->scrub_pause_req, 0);
    1854        3472 :         atomic_set(&fs_info->scrubs_paused, 0);
    1855        3472 :         atomic_set(&fs_info->scrub_cancel_req, 0);
    1856        3472 :         init_waitqueue_head(&fs_info->scrub_pause_wait);
    1857        3472 :         refcount_set(&fs_info->scrub_workers_refcnt, 0);
    1858        3472 : }
    1859             : 
    1860        3472 : static void btrfs_init_balance(struct btrfs_fs_info *fs_info)
    1861             : {
    1862        3472 :         spin_lock_init(&fs_info->balance_lock);
    1863        3472 :         mutex_init(&fs_info->balance_mutex);
    1864        3472 :         atomic_set(&fs_info->balance_pause_req, 0);
    1865        3472 :         atomic_set(&fs_info->balance_cancel_req, 0);
    1866        3472 :         fs_info->balance_ctl = NULL;
    1867        3472 :         init_waitqueue_head(&fs_info->balance_wait_q);
    1868        3472 :         atomic_set(&fs_info->reloc_cancel_req, 0);
    1869        3472 : }
    1870             : 
    1871        3242 : static int btrfs_init_btree_inode(struct super_block *sb)
    1872             : {
    1873        3242 :         struct btrfs_fs_info *fs_info = btrfs_sb(sb);
    1874        3242 :         unsigned long hash = btrfs_inode_hash(BTRFS_BTREE_INODE_OBJECTID,
    1875        3242 :                                               fs_info->tree_root);
    1876        3242 :         struct inode *inode;
    1877             : 
    1878        3242 :         inode = new_inode(sb);
    1879        3242 :         if (!inode)
    1880             :                 return -ENOMEM;
    1881             : 
    1882        3242 :         inode->i_ino = BTRFS_BTREE_INODE_OBJECTID;
    1883        3242 :         set_nlink(inode, 1);
    1884             :         /*
    1885             :          * we set the i_size on the btree inode to the max possible int.
    1886             :          * the real end of the address space is determined by all of
    1887             :          * the devices in the system
    1888             :          */
    1889        3242 :         inode->i_size = OFFSET_MAX;
    1890        3242 :         inode->i_mapping->a_ops = &btree_aops;
    1891        3242 :         mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS);
    1892             : 
    1893        3242 :         RB_CLEAR_NODE(&BTRFS_I(inode)->rb_node);
    1894        3242 :         extent_io_tree_init(fs_info, &BTRFS_I(inode)->io_tree,
    1895             :                             IO_TREE_BTREE_INODE_IO);
    1896        3242 :         extent_map_tree_init(&BTRFS_I(inode)->extent_tree);
    1897             : 
    1898        3242 :         BTRFS_I(inode)->root = btrfs_grab_root(fs_info->tree_root);
    1899        3242 :         BTRFS_I(inode)->location.objectid = BTRFS_BTREE_INODE_OBJECTID;
    1900        3242 :         BTRFS_I(inode)->location.type = 0;
    1901        3242 :         BTRFS_I(inode)->location.offset = 0;
    1902        3242 :         set_bit(BTRFS_INODE_DUMMY, &BTRFS_I(inode)->runtime_flags);
    1903        3242 :         __insert_inode_hash(inode, hash);
    1904        3242 :         fs_info->btree_inode = inode;
    1905             : 
    1906        3242 :         return 0;
    1907             : }
    1908             : 
    1909        3472 : static void btrfs_init_dev_replace_locks(struct btrfs_fs_info *fs_info)
    1910             : {
    1911        3472 :         mutex_init(&fs_info->dev_replace.lock_finishing_cancel_unmount);
    1912        3472 :         init_rwsem(&fs_info->dev_replace.rwsem);
    1913        3472 :         init_waitqueue_head(&fs_info->dev_replace.replace_wait);
    1914        3472 : }
    1915             : 
    1916        3472 : static void btrfs_init_qgroup(struct btrfs_fs_info *fs_info)
    1917             : {
    1918        3472 :         spin_lock_init(&fs_info->qgroup_lock);
    1919        3472 :         mutex_init(&fs_info->qgroup_ioctl_lock);
    1920        3472 :         fs_info->qgroup_tree = RB_ROOT;
    1921        3472 :         INIT_LIST_HEAD(&fs_info->dirty_qgroups);
    1922        3472 :         fs_info->qgroup_seq = 1;
    1923        3472 :         fs_info->qgroup_ulist = NULL;
    1924        3472 :         fs_info->qgroup_rescan_running = false;
    1925        3472 :         fs_info->qgroup_drop_subtree_thres = BTRFS_MAX_LEVEL;
    1926        3472 :         mutex_init(&fs_info->qgroup_rescan_lock);
    1927        3472 : }
    1928             : 
    1929        3216 : static int btrfs_init_workqueues(struct btrfs_fs_info *fs_info)
    1930             : {
    1931        3216 :         u32 max_active = fs_info->thread_pool_size;
    1932        3216 :         unsigned int flags = WQ_MEM_RECLAIM | WQ_FREEZABLE | WQ_UNBOUND;
    1933        3216 :         unsigned int ordered_flags = WQ_MEM_RECLAIM | WQ_FREEZABLE;
    1934             : 
    1935        6432 :         fs_info->workers =
    1936        3216 :                 btrfs_alloc_workqueue(fs_info, "worker", flags, max_active, 16);
    1937             : 
    1938        6432 :         fs_info->delalloc_workers =
    1939        3216 :                 btrfs_alloc_workqueue(fs_info, "delalloc",
    1940             :                                       flags, max_active, 2);
    1941             : 
    1942        6432 :         fs_info->flush_workers =
    1943        3216 :                 btrfs_alloc_workqueue(fs_info, "flush_delalloc",
    1944             :                                       flags, max_active, 0);
    1945             : 
    1946        6432 :         fs_info->caching_workers =
    1947        3216 :                 btrfs_alloc_workqueue(fs_info, "cache", flags, max_active, 0);
    1948             : 
    1949        6432 :         fs_info->fixup_workers =
    1950        3216 :                 btrfs_alloc_ordered_workqueue(fs_info, "fixup", ordered_flags);
    1951             : 
    1952        6432 :         fs_info->endio_workers =
    1953        3216 :                 alloc_workqueue("btrfs-endio", flags, max_active);
    1954        6432 :         fs_info->endio_meta_workers =
    1955        3216 :                 alloc_workqueue("btrfs-endio-meta", flags, max_active);
    1956        3216 :         fs_info->rmw_workers = alloc_workqueue("btrfs-rmw", flags, max_active);
    1957        6432 :         fs_info->endio_write_workers =
    1958        3216 :                 btrfs_alloc_workqueue(fs_info, "endio-write", flags,
    1959             :                                       max_active, 2);
    1960        6432 :         fs_info->compressed_write_workers =
    1961        3216 :                 alloc_workqueue("btrfs-compressed-write", flags, max_active);
    1962        6432 :         fs_info->endio_freespace_worker =
    1963        3216 :                 btrfs_alloc_workqueue(fs_info, "freespace-write", flags,
    1964             :                                       max_active, 0);
    1965        6432 :         fs_info->delayed_workers =
    1966        3216 :                 btrfs_alloc_workqueue(fs_info, "delayed-meta", flags,
    1967             :                                       max_active, 0);
    1968        6432 :         fs_info->qgroup_rescan_workers =
    1969        3216 :                 btrfs_alloc_ordered_workqueue(fs_info, "qgroup-rescan",
    1970             :                                               ordered_flags);
    1971        6432 :         fs_info->discard_ctl.discard_workers =
    1972        3216 :                 alloc_ordered_workqueue("btrfs_discard", WQ_FREEZABLE);
    1973             : 
    1974        6432 :         if (!(fs_info->workers &&
    1975        3216 :               fs_info->delalloc_workers && fs_info->flush_workers &&
    1976        3216 :               fs_info->endio_workers && fs_info->endio_meta_workers &&
    1977        3216 :               fs_info->compressed_write_workers &&
    1978        3216 :               fs_info->endio_write_workers &&
    1979        3216 :               fs_info->endio_freespace_worker && fs_info->rmw_workers &&
    1980        3216 :               fs_info->caching_workers && fs_info->fixup_workers &&
    1981        3216 :               fs_info->delayed_workers && fs_info->qgroup_rescan_workers &&
    1982             :               fs_info->discard_ctl.discard_workers)) {
    1983           0 :                 return -ENOMEM;
    1984             :         }
    1985             : 
    1986             :         return 0;
    1987             : }
    1988             : 
    1989        3242 : static int btrfs_init_csum_hash(struct btrfs_fs_info *fs_info, u16 csum_type)
    1990             : {
    1991        3242 :         struct crypto_shash *csum_shash;
    1992        3242 :         const char *csum_driver = btrfs_super_csum_driver(csum_type);
    1993             : 
    1994        3242 :         csum_shash = crypto_alloc_shash(csum_driver, 0, 0);
    1995             : 
    1996        3242 :         if (IS_ERR(csum_shash)) {
    1997           0 :                 btrfs_err(fs_info, "error allocating %s hash for checksum",
    1998             :                           csum_driver);
    1999           0 :                 return PTR_ERR(csum_shash);
    2000             :         }
    2001             : 
    2002        3242 :         fs_info->csum_shash = csum_shash;
    2003             : 
    2004             :         /*
    2005             :          * Check if the checksum implementation is a fast accelerated one.
    2006             :          * As-is this is a bit of a hack and should be replaced once the csum
    2007             :          * implementations provide that information themselves.
    2008             :          */
    2009        3242 :         switch (csum_type) {
    2010             :         case BTRFS_CSUM_TYPE_CRC32:
    2011        3242 :                 if (!strstr(crypto_shash_driver_name(csum_shash), "generic"))
    2012        3242 :                         set_bit(BTRFS_FS_CSUM_IMPL_FAST, &fs_info->flags);
    2013             :                 break;
    2014           0 :         case BTRFS_CSUM_TYPE_XXHASH:
    2015           0 :                 set_bit(BTRFS_FS_CSUM_IMPL_FAST, &fs_info->flags);
    2016             :                 break;
    2017             :         default:
    2018             :                 break;
    2019             :         }
    2020             : 
    2021        3242 :         btrfs_info(fs_info, "using %s (%s) checksum algorithm",
    2022             :                         btrfs_super_csum_name(csum_type),
    2023             :                         crypto_shash_driver_name(csum_shash));
    2024        3242 :         return 0;
    2025             : }
    2026             : 
    2027         283 : static int btrfs_replay_log(struct btrfs_fs_info *fs_info,
    2028             :                             struct btrfs_fs_devices *fs_devices)
    2029             : {
    2030         283 :         int ret;
    2031         283 :         struct btrfs_tree_parent_check check = { 0 };
    2032         283 :         struct btrfs_root *log_tree_root;
    2033         283 :         struct btrfs_super_block *disk_super = fs_info->super_copy;
    2034         283 :         u64 bytenr = btrfs_super_log_root(disk_super);
    2035         283 :         int level = btrfs_super_log_root_level(disk_super);
    2036             : 
    2037         283 :         if (fs_devices->rw_devices == 0) {
    2038           0 :                 btrfs_warn(fs_info, "log replay required on RO media");
    2039           0 :                 return -EIO;
    2040             :         }
    2041             : 
    2042         283 :         log_tree_root = btrfs_alloc_root(fs_info, BTRFS_TREE_LOG_OBJECTID,
    2043             :                                          GFP_KERNEL);
    2044         283 :         if (!log_tree_root)
    2045             :                 return -ENOMEM;
    2046             : 
    2047         283 :         check.level = level;
    2048         283 :         check.transid = fs_info->generation + 1;
    2049         283 :         check.owner_root = BTRFS_TREE_LOG_OBJECTID;
    2050         283 :         log_tree_root->node = read_tree_block(fs_info, bytenr, &check);
    2051         283 :         if (IS_ERR(log_tree_root->node)) {
    2052           0 :                 btrfs_warn(fs_info, "failed to read log tree");
    2053           0 :                 ret = PTR_ERR(log_tree_root->node);
    2054           0 :                 log_tree_root->node = NULL;
    2055           0 :                 btrfs_put_root(log_tree_root);
    2056           0 :                 return ret;
    2057             :         }
    2058         566 :         if (!extent_buffer_uptodate(log_tree_root->node)) {
    2059           0 :                 btrfs_err(fs_info, "failed to read log tree");
    2060           0 :                 btrfs_put_root(log_tree_root);
    2061           0 :                 return -EIO;
    2062             :         }
    2063             : 
    2064             :         /* returns with log_tree_root freed on success */
    2065         283 :         ret = btrfs_recover_log_trees(log_tree_root);
    2066         283 :         if (ret) {
    2067           0 :                 btrfs_handle_fs_error(fs_info, ret,
    2068             :                                       "Failed to recover log tree");
    2069           0 :                 btrfs_put_root(log_tree_root);
    2070           0 :                 return ret;
    2071             :         }
    2072             : 
    2073         283 :         if (sb_rdonly(fs_info->sb)) {
    2074           0 :                 ret = btrfs_commit_super(fs_info);
    2075           0 :                 if (ret)
    2076           0 :                         return ret;
    2077             :         }
    2078             : 
    2079             :         return 0;
    2080             : }
    2081             : 
    2082        9639 : static int load_global_roots_objectid(struct btrfs_root *tree_root,
    2083             :                                       struct btrfs_path *path, u64 objectid,
    2084             :                                       const char *name)
    2085             : {
    2086        9639 :         struct btrfs_fs_info *fs_info = tree_root->fs_info;
    2087        9639 :         struct btrfs_root *root;
    2088        9639 :         u64 max_global_id = 0;
    2089        9639 :         int ret;
    2090        9639 :         struct btrfs_key key = {
    2091             :                 .objectid = objectid,
    2092             :                 .type = BTRFS_ROOT_ITEM_KEY,
    2093             :                 .offset = 0,
    2094             :         };
    2095        9639 :         bool found = false;
    2096             : 
    2097             :         /* If we have IGNOREDATACSUMS skip loading these roots. */
    2098        9639 :         if (objectid == BTRFS_CSUM_TREE_OBJECTID &&
    2099        3215 :             btrfs_test_opt(fs_info, IGNOREDATACSUMS)) {
    2100           0 :                 set_bit(BTRFS_FS_STATE_NO_CSUMS, &fs_info->fs_state);
    2101           0 :                 return 0;
    2102             :         }
    2103             : 
    2104       28917 :         while (1) {
    2105       19278 :                 ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0);
    2106       19278 :                 if (ret < 0)
    2107             :                         break;
    2108             : 
    2109       19278 :                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
    2110           0 :                         ret = btrfs_next_leaf(tree_root, path);
    2111           0 :                         if (ret) {
    2112           0 :                                 if (ret > 0)
    2113             :                                         ret = 0;
    2114             :                                 break;
    2115             :                         }
    2116             :                 }
    2117       19278 :                 ret = 0;
    2118             : 
    2119       19278 :                 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
    2120       19278 :                 if (key.objectid != objectid)
    2121             :                         break;
    2122        9639 :                 btrfs_release_path(path);
    2123             : 
    2124             :                 /*
    2125             :                  * Just worry about this for extent tree, it'll be the same for
    2126             :                  * everybody.
    2127             :                  */
    2128        9639 :                 if (objectid == BTRFS_EXTENT_TREE_OBJECTID)
    2129        3215 :                         max_global_id = max(max_global_id, key.offset);
    2130             : 
    2131        9639 :                 found = true;
    2132        9639 :                 root = read_tree_root_path(tree_root, path, &key);
    2133        9639 :                 if (IS_ERR(root)) {
    2134           0 :                         if (!btrfs_test_opt(fs_info, IGNOREBADROOTS))
    2135           0 :                                 ret = PTR_ERR(root);
    2136             :                         break;
    2137             :                 }
    2138        9639 :                 set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
    2139        9639 :                 ret = btrfs_global_root_insert(root);
    2140        9639 :                 if (ret) {
    2141           0 :                         btrfs_put_root(root);
    2142           0 :                         break;
    2143             :                 }
    2144        9639 :                 key.offset++;
    2145             :         }
    2146        9639 :         btrfs_release_path(path);
    2147             : 
    2148        9639 :         if (objectid == BTRFS_EXTENT_TREE_OBJECTID)
    2149        3215 :                 fs_info->nr_global_roots = max_global_id + 1;
    2150             : 
    2151        9639 :         if (!found || ret) {
    2152           0 :                 if (objectid == BTRFS_CSUM_TREE_OBJECTID)
    2153           0 :                         set_bit(BTRFS_FS_STATE_NO_CSUMS, &fs_info->fs_state);
    2154             : 
    2155           0 :                 if (!btrfs_test_opt(fs_info, IGNOREBADROOTS))
    2156           0 :                         ret = ret ? ret : -ENOENT;
    2157             :                 else
    2158             :                         ret = 0;
    2159           0 :                 btrfs_err(fs_info, "failed to load root %s", name);
    2160             :         }
    2161             :         return ret;
    2162             : }
    2163             : 
    2164        3215 : static int load_global_roots(struct btrfs_root *tree_root)
    2165             : {
    2166        3215 :         struct btrfs_path *path;
    2167        3215 :         int ret = 0;
    2168             : 
    2169        3215 :         path = btrfs_alloc_path();
    2170        3215 :         if (!path)
    2171             :                 return -ENOMEM;
    2172             : 
    2173        3215 :         ret = load_global_roots_objectid(tree_root, path,
    2174             :                                          BTRFS_EXTENT_TREE_OBJECTID, "extent");
    2175        3215 :         if (ret)
    2176           0 :                 goto out;
    2177        3215 :         ret = load_global_roots_objectid(tree_root, path,
    2178             :                                          BTRFS_CSUM_TREE_OBJECTID, "csum");
    2179        3215 :         if (ret)
    2180           0 :                 goto out;
    2181        3215 :         if (!btrfs_fs_compat_ro(tree_root->fs_info, FREE_SPACE_TREE))
    2182           6 :                 goto out;
    2183        3209 :         ret = load_global_roots_objectid(tree_root, path,
    2184             :                                          BTRFS_FREE_SPACE_TREE_OBJECTID,
    2185             :                                          "free space");
    2186        3215 : out:
    2187        3215 :         btrfs_free_path(path);
    2188        3215 :         return ret;
    2189             : }
    2190             : 
    2191        3215 : static int btrfs_read_roots(struct btrfs_fs_info *fs_info)
    2192             : {
    2193        3215 :         struct btrfs_root *tree_root = fs_info->tree_root;
    2194        3215 :         struct btrfs_root *root;
    2195        3215 :         struct btrfs_key location;
    2196        3215 :         int ret;
    2197             : 
    2198        3215 :         BUG_ON(!fs_info->tree_root);
    2199             : 
    2200        3215 :         ret = load_global_roots(tree_root);
    2201        3215 :         if (ret)
    2202             :                 return ret;
    2203             : 
    2204        3215 :         location.type = BTRFS_ROOT_ITEM_KEY;
    2205        3215 :         location.offset = 0;
    2206             : 
    2207        3215 :         if (btrfs_fs_compat_ro(fs_info, BLOCK_GROUP_TREE)) {
    2208           0 :                 location.objectid = BTRFS_BLOCK_GROUP_TREE_OBJECTID;
    2209           0 :                 root = btrfs_read_tree_root(tree_root, &location);
    2210           0 :                 if (IS_ERR(root)) {
    2211           0 :                         if (!btrfs_test_opt(fs_info, IGNOREBADROOTS)) {
    2212           0 :                                 ret = PTR_ERR(root);
    2213           0 :                                 goto out;
    2214             :                         }
    2215             :                 } else {
    2216           0 :                         set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
    2217           0 :                         fs_info->block_group_root = root;
    2218             :                 }
    2219             :         }
    2220             : 
    2221        3215 :         location.objectid = BTRFS_DEV_TREE_OBJECTID;
    2222        3215 :         root = btrfs_read_tree_root(tree_root, &location);
    2223        3215 :         if (IS_ERR(root)) {
    2224           0 :                 if (!btrfs_test_opt(fs_info, IGNOREBADROOTS)) {
    2225           0 :                         ret = PTR_ERR(root);
    2226           0 :                         goto out;
    2227             :                 }
    2228             :         } else {
    2229        3215 :                 set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
    2230        3215 :                 fs_info->dev_root = root;
    2231             :         }
    2232             :         /* Initialize fs_info for all devices in any case */
    2233        3215 :         ret = btrfs_init_devices_late(fs_info);
    2234        3215 :         if (ret)
    2235           0 :                 goto out;
    2236             : 
    2237             :         /*
    2238             :          * This tree can share blocks with some other fs tree during relocation
    2239             :          * and we need a proper setup by btrfs_get_fs_root
    2240             :          */
    2241        3215 :         root = btrfs_get_fs_root(tree_root->fs_info,
    2242             :                                  BTRFS_DATA_RELOC_TREE_OBJECTID, true);
    2243        3215 :         if (IS_ERR(root)) {
    2244           0 :                 if (!btrfs_test_opt(fs_info, IGNOREBADROOTS)) {
    2245           0 :                         ret = PTR_ERR(root);
    2246           0 :                         goto out;
    2247             :                 }
    2248             :         } else {
    2249        3215 :                 set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
    2250        3215 :                 fs_info->data_reloc_root = root;
    2251             :         }
    2252             : 
    2253        3215 :         location.objectid = BTRFS_QUOTA_TREE_OBJECTID;
    2254        3215 :         root = btrfs_read_tree_root(tree_root, &location);
    2255        3215 :         if (!IS_ERR(root)) {
    2256          22 :                 set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
    2257          22 :                 set_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags);
    2258          22 :                 fs_info->quota_root = root;
    2259             :         }
    2260             : 
    2261        3215 :         location.objectid = BTRFS_UUID_TREE_OBJECTID;
    2262        3215 :         root = btrfs_read_tree_root(tree_root, &location);
    2263        3215 :         if (IS_ERR(root)) {
    2264           3 :                 if (!btrfs_test_opt(fs_info, IGNOREBADROOTS)) {
    2265           3 :                         ret = PTR_ERR(root);
    2266           3 :                         if (ret != -ENOENT)
    2267           0 :                                 goto out;
    2268             :                 }
    2269             :         } else {
    2270        3212 :                 set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
    2271        3212 :                 fs_info->uuid_root = root;
    2272             :         }
    2273             : 
    2274             :         return 0;
    2275           0 : out:
    2276           0 :         btrfs_warn(fs_info, "failed to read root (objectid=%llu): %d",
    2277             :                    location.objectid, ret);
    2278           0 :         return ret;
    2279             : }
    2280             : 
    2281             : /*
    2282             :  * Real super block validation
    2283             :  * NOTE: super csum type and incompat features will not be checked here.
    2284             :  *
    2285             :  * @sb:         super block to check
    2286             :  * @mirror_num: the super block number to check its bytenr:
    2287             :  *              0       the primary (1st) sb
    2288             :  *              1, 2    2nd and 3rd backup copy
    2289             :  *             -1       skip bytenr check
    2290             :  */
    2291      445889 : int btrfs_validate_super(struct btrfs_fs_info *fs_info,
    2292             :                          struct btrfs_super_block *sb, int mirror_num)
    2293             : {
    2294      445889 :         u64 nodesize = btrfs_super_nodesize(sb);
    2295      445889 :         u64 sectorsize = btrfs_super_sectorsize(sb);
    2296      445889 :         int ret = 0;
    2297             : 
    2298      445889 :         if (btrfs_super_magic(sb) != BTRFS_MAGIC) {
    2299           0 :                 btrfs_err(fs_info, "no valid FS found");
    2300           0 :                 ret = -EINVAL;
    2301             :         }
    2302      445889 :         if (btrfs_super_flags(sb) & ~BTRFS_SUPER_FLAG_SUPP) {
    2303           0 :                 btrfs_err(fs_info, "unrecognized or unsupported super flag: %llu",
    2304             :                                 btrfs_super_flags(sb) & ~BTRFS_SUPER_FLAG_SUPP);
    2305           0 :                 ret = -EINVAL;
    2306             :         }
    2307      445889 :         if (btrfs_super_root_level(sb) >= BTRFS_MAX_LEVEL) {
    2308           0 :                 btrfs_err(fs_info, "tree_root level too big: %d >= %d",
    2309             :                                 btrfs_super_root_level(sb), BTRFS_MAX_LEVEL);
    2310           0 :                 ret = -EINVAL;
    2311             :         }
    2312      445889 :         if (btrfs_super_chunk_root_level(sb) >= BTRFS_MAX_LEVEL) {
    2313           0 :                 btrfs_err(fs_info, "chunk_root level too big: %d >= %d",
    2314             :                                 btrfs_super_chunk_root_level(sb), BTRFS_MAX_LEVEL);
    2315           0 :                 ret = -EINVAL;
    2316             :         }
    2317      445889 :         if (btrfs_super_log_root_level(sb) >= BTRFS_MAX_LEVEL) {
    2318           0 :                 btrfs_err(fs_info, "log_root level too big: %d >= %d",
    2319             :                                 btrfs_super_log_root_level(sb), BTRFS_MAX_LEVEL);
    2320           0 :                 ret = -EINVAL;
    2321             :         }
    2322             : 
    2323             :         /*
    2324             :          * Check sectorsize and nodesize first, other check will need it.
    2325             :          * Check all possible sectorsize(4K, 8K, 16K, 32K, 64K) here.
    2326             :          */
    2327      891778 :         if (!is_power_of_2(sectorsize) || sectorsize < 4096 ||
    2328             :             sectorsize > BTRFS_MAX_METADATA_BLOCKSIZE) {
    2329           0 :                 btrfs_err(fs_info, "invalid sectorsize %llu", sectorsize);
    2330           0 :                 ret = -EINVAL;
    2331             :         }
    2332             : 
    2333             :         /*
    2334             :          * We only support at most two sectorsizes: 4K and PAGE_SIZE.
    2335             :          *
    2336             :          * We can support 16K sectorsize with 64K page size without problem,
    2337             :          * but such sectorsize/pagesize combination doesn't make much sense.
    2338             :          * 4K will be our future standard, PAGE_SIZE is supported from the very
    2339             :          * beginning.
    2340             :          */
    2341      445889 :         if (sectorsize > PAGE_SIZE || (sectorsize != SZ_4K && sectorsize != PAGE_SIZE)) {
    2342           0 :                 btrfs_err(fs_info,
    2343             :                         "sectorsize %llu not yet supported for page size %lu",
    2344             :                         sectorsize, PAGE_SIZE);
    2345           0 :                 ret = -EINVAL;
    2346             :         }
    2347             : 
    2348      891778 :         if (!is_power_of_2(nodesize) || nodesize < sectorsize ||
    2349      445889 :             nodesize > BTRFS_MAX_METADATA_BLOCKSIZE) {
    2350           0 :                 btrfs_err(fs_info, "invalid nodesize %llu", nodesize);
    2351           0 :                 ret = -EINVAL;
    2352             :         }
    2353      445889 :         if (nodesize != le32_to_cpu(sb->__unused_leafsize)) {
    2354           0 :                 btrfs_err(fs_info, "invalid leafsize %u, should be %llu",
    2355             :                           le32_to_cpu(sb->__unused_leafsize), nodesize);
    2356           0 :                 ret = -EINVAL;
    2357             :         }
    2358             : 
    2359             :         /* Root alignment check */
    2360      445889 :         if (!IS_ALIGNED(btrfs_super_root(sb), sectorsize)) {
    2361           0 :                 btrfs_warn(fs_info, "tree_root block unaligned: %llu",
    2362             :                            btrfs_super_root(sb));
    2363           0 :                 ret = -EINVAL;
    2364             :         }
    2365      445889 :         if (!IS_ALIGNED(btrfs_super_chunk_root(sb), sectorsize)) {
    2366           0 :                 btrfs_warn(fs_info, "chunk_root block unaligned: %llu",
    2367             :                            btrfs_super_chunk_root(sb));
    2368           0 :                 ret = -EINVAL;
    2369             :         }
    2370      445889 :         if (!IS_ALIGNED(btrfs_super_log_root(sb), sectorsize)) {
    2371           0 :                 btrfs_warn(fs_info, "log_root block unaligned: %llu",
    2372             :                            btrfs_super_log_root(sb));
    2373           0 :                 ret = -EINVAL;
    2374             :         }
    2375             : 
    2376      891778 :         if (memcmp(fs_info->fs_devices->fsid, fs_info->super_copy->fsid,
    2377             :                    BTRFS_FSID_SIZE)) {
    2378           0 :                 btrfs_err(fs_info,
    2379             :                 "superblock fsid doesn't match fsid of fs_devices: %pU != %pU",
    2380             :                         fs_info->super_copy->fsid, fs_info->fs_devices->fsid);
    2381           0 :                 ret = -EINVAL;
    2382             :         }
    2383             : 
    2384      445889 :         if (btrfs_fs_incompat(fs_info, METADATA_UUID) &&
    2385           0 :             memcmp(fs_info->fs_devices->metadata_uuid,
    2386           0 :                    fs_info->super_copy->metadata_uuid, BTRFS_FSID_SIZE)) {
    2387           0 :                 btrfs_err(fs_info,
    2388             : "superblock metadata_uuid doesn't match metadata uuid of fs_devices: %pU != %pU",
    2389             :                         fs_info->super_copy->metadata_uuid,
    2390             :                         fs_info->fs_devices->metadata_uuid);
    2391           0 :                 ret = -EINVAL;
    2392             :         }
    2393             : 
    2394      891778 :         if (memcmp(fs_info->fs_devices->metadata_uuid, sb->dev_item.fsid,
    2395             :                    BTRFS_FSID_SIZE) != 0) {
    2396           0 :                 btrfs_err(fs_info,
    2397             :                         "dev_item UUID does not match metadata fsid: %pU != %pU",
    2398             :                         fs_info->fs_devices->metadata_uuid, sb->dev_item.fsid);
    2399           0 :                 ret = -EINVAL;
    2400             :         }
    2401             : 
    2402             :         /*
    2403             :          * Artificial requirement for block-group-tree to force newer features
    2404             :          * (free-space-tree, no-holes) so the test matrix is smaller.
    2405             :          */
    2406      445889 :         if (btrfs_fs_compat_ro(fs_info, BLOCK_GROUP_TREE) &&
    2407           0 :             (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE_VALID) ||
    2408           0 :              !btrfs_fs_incompat(fs_info, NO_HOLES))) {
    2409           0 :                 btrfs_err(fs_info,
    2410             :                 "block-group-tree feature requires fres-space-tree and no-holes");
    2411           0 :                 ret = -EINVAL;
    2412             :         }
    2413             : 
    2414             :         /*
    2415             :          * Hint to catch really bogus numbers, bitflips or so, more exact checks are
    2416             :          * done later
    2417             :          */
    2418      445889 :         if (btrfs_super_bytes_used(sb) < 6 * btrfs_super_nodesize(sb)) {
    2419           0 :                 btrfs_err(fs_info, "bytes_used is too small %llu",
    2420             :                           btrfs_super_bytes_used(sb));
    2421           0 :                 ret = -EINVAL;
    2422             :         }
    2423      891778 :         if (!is_power_of_2(btrfs_super_stripesize(sb))) {
    2424           0 :                 btrfs_err(fs_info, "invalid stripesize %u",
    2425             :                           btrfs_super_stripesize(sb));
    2426           0 :                 ret = -EINVAL;
    2427             :         }
    2428      445889 :         if (btrfs_super_num_devices(sb) > (1UL << 31))
    2429           0 :                 btrfs_warn(fs_info, "suspicious number of devices: %llu",
    2430             :                            btrfs_super_num_devices(sb));
    2431      445889 :         if (btrfs_super_num_devices(sb) == 0) {
    2432           0 :                 btrfs_err(fs_info, "number of devices is 0");
    2433           0 :                 ret = -EINVAL;
    2434             :         }
    2435             : 
    2436      449177 :         if (mirror_num >= 0 &&
    2437             :             btrfs_super_bytenr(sb) != btrfs_sb_offset(mirror_num)) {
    2438           0 :                 btrfs_err(fs_info, "super offset mismatch %llu != %u",
    2439             :                           btrfs_super_bytenr(sb), BTRFS_SUPER_INFO_OFFSET);
    2440           0 :                 ret = -EINVAL;
    2441             :         }
    2442             : 
    2443             :         /*
    2444             :          * Obvious sys_chunk_array corruptions, it must hold at least one key
    2445             :          * and one chunk
    2446             :          */
    2447      445889 :         if (btrfs_super_sys_array_size(sb) > BTRFS_SYSTEM_CHUNK_ARRAY_SIZE) {
    2448           0 :                 btrfs_err(fs_info, "system chunk array too big %u > %u",
    2449             :                           btrfs_super_sys_array_size(sb),
    2450             :                           BTRFS_SYSTEM_CHUNK_ARRAY_SIZE);
    2451           0 :                 ret = -EINVAL;
    2452             :         }
    2453      445889 :         if (btrfs_super_sys_array_size(sb) < sizeof(struct btrfs_disk_key)
    2454             :                         + sizeof(struct btrfs_chunk)) {
    2455           0 :                 btrfs_err(fs_info, "system chunk array too small %u < %zu",
    2456             :                           btrfs_super_sys_array_size(sb),
    2457             :                           sizeof(struct btrfs_disk_key)
    2458             :                           + sizeof(struct btrfs_chunk));
    2459           0 :                 ret = -EINVAL;
    2460             :         }
    2461             : 
    2462             :         /*
    2463             :          * The generation is a global counter, we'll trust it more than the others
    2464             :          * but it's still possible that it's the one that's wrong.
    2465             :          */
    2466      445889 :         if (btrfs_super_generation(sb) < btrfs_super_chunk_root_generation(sb))
    2467           0 :                 btrfs_warn(fs_info,
    2468             :                         "suspicious: generation < chunk_root_generation: %llu < %llu",
    2469             :                         btrfs_super_generation(sb),
    2470             :                         btrfs_super_chunk_root_generation(sb));
    2471      445889 :         if (btrfs_super_generation(sb) < btrfs_super_cache_generation(sb)
    2472           3 :             && btrfs_super_cache_generation(sb) != (u64)-1)
    2473           0 :                 btrfs_warn(fs_info,
    2474             :                         "suspicious: generation < cache_generation: %llu < %llu",
    2475             :                         btrfs_super_generation(sb),
    2476             :                         btrfs_super_cache_generation(sb));
    2477             : 
    2478      445889 :         return ret;
    2479             : }
    2480             : 
    2481             : /*
    2482             :  * Validation of super block at mount time.
    2483             :  * Some checks already done early at mount time, like csum type and incompat
    2484             :  * flags will be skipped.
    2485             :  */
    2486             : static int btrfs_validate_mount_super(struct btrfs_fs_info *fs_info)
    2487             : {
    2488        3242 :         return btrfs_validate_super(fs_info, fs_info->super_copy, 0);
    2489             : }
    2490             : 
    2491             : /*
    2492             :  * Validation of super block at write time.
    2493             :  * Some checks like bytenr check will be skipped as their values will be
    2494             :  * overwritten soon.
    2495             :  * Extra checks like csum type and incompat flags will be done here.
    2496             :  */
    2497      442595 : static int btrfs_validate_write_super(struct btrfs_fs_info *fs_info,
    2498             :                                       struct btrfs_super_block *sb)
    2499             : {
    2500      442595 :         int ret;
    2501             : 
    2502      442595 :         ret = btrfs_validate_super(fs_info, sb, -1);
    2503      442595 :         if (ret < 0)
    2504           0 :                 goto out;
    2505      442595 :         if (!btrfs_supported_super_csum(btrfs_super_csum_type(sb))) {
    2506           0 :                 ret = -EUCLEAN;
    2507           0 :                 btrfs_err(fs_info, "invalid csum type, has %u want %u",
    2508             :                           btrfs_super_csum_type(sb), BTRFS_CSUM_TYPE_CRC32);
    2509           0 :                 goto out;
    2510             :         }
    2511      442595 :         if (btrfs_super_incompat_flags(sb) & ~BTRFS_FEATURE_INCOMPAT_SUPP) {
    2512           0 :                 ret = -EUCLEAN;
    2513           0 :                 btrfs_err(fs_info,
    2514             :                 "invalid incompat flags, has 0x%llx valid mask 0x%llx",
    2515             :                           btrfs_super_incompat_flags(sb),
    2516             :                           (unsigned long long)BTRFS_FEATURE_INCOMPAT_SUPP);
    2517           0 :                 goto out;
    2518             :         }
    2519      442595 : out:
    2520      442595 :         if (ret < 0)
    2521           0 :                 btrfs_err(fs_info,
    2522             :                 "super block corruption detected before writing it to disk");
    2523      442595 :         return ret;
    2524             : }
    2525             : 
    2526        6432 : static int load_super_root(struct btrfs_root *root, u64 bytenr, u64 gen, int level)
    2527             : {
    2528        6432 :         struct btrfs_tree_parent_check check = {
    2529             :                 .level = level,
    2530             :                 .transid = gen,
    2531        6432 :                 .owner_root = root->root_key.objectid
    2532             :         };
    2533        6432 :         int ret = 0;
    2534             : 
    2535        6432 :         root->node = read_tree_block(root->fs_info, bytenr, &check);
    2536        6432 :         if (IS_ERR(root->node)) {
    2537           1 :                 ret = PTR_ERR(root->node);
    2538           1 :                 root->node = NULL;
    2539           1 :                 return ret;
    2540             :         }
    2541       12862 :         if (!extent_buffer_uptodate(root->node)) {
    2542           0 :                 free_extent_buffer(root->node);
    2543           0 :                 root->node = NULL;
    2544           0 :                 return -EIO;
    2545             :         }
    2546             : 
    2547        6431 :         btrfs_set_root_node(&root->root_item, root->node);
    2548        6431 :         root->commit_root = btrfs_root_node(root);
    2549        6431 :         btrfs_set_root_refs(&root->root_item, 1);
    2550        6431 :         return ret;
    2551             : }
    2552             : 
    2553        3216 : static int load_important_roots(struct btrfs_fs_info *fs_info)
    2554             : {
    2555        3216 :         struct btrfs_super_block *sb = fs_info->super_copy;
    2556        3216 :         u64 gen, bytenr;
    2557        3216 :         int level, ret;
    2558             : 
    2559        3216 :         bytenr = btrfs_super_root(sb);
    2560        3216 :         gen = btrfs_super_generation(sb);
    2561        3216 :         level = btrfs_super_root_level(sb);
    2562        3216 :         ret = load_super_root(fs_info->tree_root, bytenr, gen, level);
    2563        3216 :         if (ret) {
    2564           1 :                 btrfs_warn(fs_info, "couldn't read tree root");
    2565           1 :                 return ret;
    2566             :         }
    2567             :         return 0;
    2568             : }
    2569             : 
    2570        3216 : static int __cold init_tree_roots(struct btrfs_fs_info *fs_info)
    2571             : {
    2572        3216 :         int backup_index = find_newest_super_backup(fs_info);
    2573        3216 :         struct btrfs_super_block *sb = fs_info->super_copy;
    2574        3216 :         struct btrfs_root *tree_root = fs_info->tree_root;
    2575        3216 :         bool handle_error = false;
    2576        3216 :         int ret = 0;
    2577        3216 :         int i;
    2578             : 
    2579        3217 :         for (i = 0; i < BTRFS_NUM_BACKUP_ROOTS; i++) {
    2580        3217 :                 if (handle_error) {
    2581           1 :                         if (!IS_ERR(tree_root->node))
    2582           1 :                                 free_extent_buffer(tree_root->node);
    2583           1 :                         tree_root->node = NULL;
    2584             : 
    2585           1 :                         if (!btrfs_test_opt(fs_info, USEBACKUPROOT))
    2586             :                                 break;
    2587             : 
    2588           0 :                         free_root_pointers(fs_info, 0);
    2589             : 
    2590             :                         /*
    2591             :                          * Don't use the log in recovery mode, it won't be
    2592             :                          * valid
    2593             :                          */
    2594           0 :                         btrfs_set_super_log_root(sb, 0);
    2595             : 
    2596             :                         /* We can't trust the free space cache either */
    2597           0 :                         btrfs_set_opt(fs_info->mount_opt, CLEAR_CACHE);
    2598             : 
    2599           0 :                         btrfs_warn(fs_info, "try to load backup roots slot %d", i);
    2600           0 :                         ret = read_backup_root(fs_info, i);
    2601           0 :                         backup_index = ret;
    2602           0 :                         if (ret < 0)
    2603           0 :                                 return ret;
    2604             :                 }
    2605             : 
    2606        3216 :                 ret = load_important_roots(fs_info);
    2607        3216 :                 if (ret) {
    2608           1 :                         handle_error = true;
    2609           1 :                         continue;
    2610             :                 }
    2611             : 
    2612             :                 /*
    2613             :                  * No need to hold btrfs_root::objectid_mutex since the fs
    2614             :                  * hasn't been fully initialised and we are the only user
    2615             :                  */
    2616        3215 :                 ret = btrfs_init_root_free_objectid(tree_root);
    2617        3215 :                 if (ret < 0) {
    2618           0 :                         handle_error = true;
    2619           0 :                         continue;
    2620             :                 }
    2621             : 
    2622        3215 :                 ASSERT(tree_root->free_objectid <= BTRFS_LAST_FREE_OBJECTID);
    2623             : 
    2624        3215 :                 ret = btrfs_read_roots(fs_info);
    2625        3215 :                 if (ret < 0) {
    2626           0 :                         handle_error = true;
    2627           0 :                         continue;
    2628             :                 }
    2629             : 
    2630             :                 /* All successful */
    2631        3215 :                 fs_info->generation = btrfs_header_generation(tree_root->node);
    2632        3215 :                 fs_info->last_trans_committed = fs_info->generation;
    2633        3215 :                 fs_info->last_reloc_trans = 0;
    2634             : 
    2635             :                 /* Always begin writing backup roots after the one being used */
    2636        3215 :                 if (backup_index < 0) {
    2637           0 :                         fs_info->backup_root_index = 0;
    2638             :                 } else {
    2639        3215 :                         fs_info->backup_root_index = backup_index + 1;
    2640        3215 :                         fs_info->backup_root_index %= BTRFS_NUM_BACKUP_ROOTS;
    2641             :                 }
    2642             :                 break;
    2643             :         }
    2644             : 
    2645             :         return ret;
    2646             : }
    2647             : 
    2648        3472 : void btrfs_init_fs_info(struct btrfs_fs_info *fs_info)
    2649             : {
    2650        3472 :         INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC);
    2651        3472 :         INIT_RADIX_TREE(&fs_info->buffer_radix, GFP_ATOMIC);
    2652        3472 :         INIT_LIST_HEAD(&fs_info->trans_list);
    2653        3472 :         INIT_LIST_HEAD(&fs_info->dead_roots);
    2654        3472 :         INIT_LIST_HEAD(&fs_info->delayed_iputs);
    2655        3472 :         INIT_LIST_HEAD(&fs_info->delalloc_roots);
    2656        3472 :         INIT_LIST_HEAD(&fs_info->caching_block_groups);
    2657        3472 :         spin_lock_init(&fs_info->delalloc_root_lock);
    2658        3472 :         spin_lock_init(&fs_info->trans_lock);
    2659        3472 :         spin_lock_init(&fs_info->fs_roots_radix_lock);
    2660        3472 :         spin_lock_init(&fs_info->delayed_iput_lock);
    2661        3472 :         spin_lock_init(&fs_info->defrag_inodes_lock);
    2662        3472 :         spin_lock_init(&fs_info->super_lock);
    2663        3472 :         spin_lock_init(&fs_info->buffer_lock);
    2664        3472 :         spin_lock_init(&fs_info->unused_bgs_lock);
    2665        3472 :         spin_lock_init(&fs_info->treelog_bg_lock);
    2666        3472 :         spin_lock_init(&fs_info->zone_active_bgs_lock);
    2667        3472 :         spin_lock_init(&fs_info->relocation_bg_lock);
    2668        3472 :         rwlock_init(&fs_info->tree_mod_log_lock);
    2669        3472 :         rwlock_init(&fs_info->global_root_lock);
    2670        3472 :         mutex_init(&fs_info->unused_bg_unpin_mutex);
    2671        3472 :         mutex_init(&fs_info->reclaim_bgs_lock);
    2672        3472 :         mutex_init(&fs_info->reloc_mutex);
    2673        3472 :         mutex_init(&fs_info->delalloc_root_mutex);
    2674        3472 :         mutex_init(&fs_info->zoned_meta_io_lock);
    2675        3472 :         mutex_init(&fs_info->zoned_data_reloc_io_lock);
    2676        3472 :         seqlock_init(&fs_info->profiles_lock);
    2677             : 
    2678        3472 :         btrfs_lockdep_init_map(fs_info, btrfs_trans_num_writers);
    2679        3472 :         btrfs_lockdep_init_map(fs_info, btrfs_trans_num_extwriters);
    2680        3472 :         btrfs_lockdep_init_map(fs_info, btrfs_trans_pending_ordered);
    2681        3472 :         btrfs_lockdep_init_map(fs_info, btrfs_ordered_extent);
    2682        3472 :         btrfs_state_lockdep_init_map(fs_info, btrfs_trans_commit_start,
    2683             :                                      BTRFS_LOCKDEP_TRANS_COMMIT_START);
    2684        3472 :         btrfs_state_lockdep_init_map(fs_info, btrfs_trans_unblocked,
    2685             :                                      BTRFS_LOCKDEP_TRANS_UNBLOCKED);
    2686        3472 :         btrfs_state_lockdep_init_map(fs_info, btrfs_trans_super_committed,
    2687             :                                      BTRFS_LOCKDEP_TRANS_SUPER_COMMITTED);
    2688        3472 :         btrfs_state_lockdep_init_map(fs_info, btrfs_trans_completed,
    2689             :                                      BTRFS_LOCKDEP_TRANS_COMPLETED);
    2690             : 
    2691        3472 :         INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots);
    2692        3472 :         INIT_LIST_HEAD(&fs_info->space_info);
    2693        3472 :         INIT_LIST_HEAD(&fs_info->tree_mod_seq_list);
    2694        3472 :         INIT_LIST_HEAD(&fs_info->unused_bgs);
    2695        3472 :         INIT_LIST_HEAD(&fs_info->reclaim_bgs);
    2696        3472 :         INIT_LIST_HEAD(&fs_info->zone_active_bgs);
    2697             : #ifdef CONFIG_BTRFS_DEBUG
    2698             :         INIT_LIST_HEAD(&fs_info->allocated_roots);
    2699             :         INIT_LIST_HEAD(&fs_info->allocated_ebs);
    2700             :         spin_lock_init(&fs_info->eb_leak_lock);
    2701             : #endif
    2702        3472 :         extent_map_tree_init(&fs_info->mapping_tree);
    2703        3472 :         btrfs_init_block_rsv(&fs_info->global_block_rsv,
    2704             :                              BTRFS_BLOCK_RSV_GLOBAL);
    2705        3472 :         btrfs_init_block_rsv(&fs_info->trans_block_rsv, BTRFS_BLOCK_RSV_TRANS);
    2706        3472 :         btrfs_init_block_rsv(&fs_info->chunk_block_rsv, BTRFS_BLOCK_RSV_CHUNK);
    2707        3472 :         btrfs_init_block_rsv(&fs_info->empty_block_rsv, BTRFS_BLOCK_RSV_EMPTY);
    2708        3472 :         btrfs_init_block_rsv(&fs_info->delayed_block_rsv,
    2709             :                              BTRFS_BLOCK_RSV_DELOPS);
    2710        3472 :         btrfs_init_block_rsv(&fs_info->delayed_refs_rsv,
    2711             :                              BTRFS_BLOCK_RSV_DELREFS);
    2712             : 
    2713        3472 :         atomic_set(&fs_info->async_delalloc_pages, 0);
    2714        3472 :         atomic_set(&fs_info->defrag_running, 0);
    2715        3472 :         atomic_set(&fs_info->nr_delayed_iputs, 0);
    2716        3472 :         atomic64_set(&fs_info->tree_mod_seq, 0);
    2717        3472 :         fs_info->global_root_tree = RB_ROOT;
    2718        3472 :         fs_info->max_inline = BTRFS_DEFAULT_MAX_INLINE;
    2719        3472 :         fs_info->metadata_ratio = 0;
    2720        3472 :         fs_info->defrag_inodes = RB_ROOT;
    2721        3472 :         atomic64_set(&fs_info->free_chunk_space, 0);
    2722        3472 :         fs_info->tree_mod_log = RB_ROOT;
    2723        3472 :         fs_info->commit_interval = BTRFS_DEFAULT_COMMIT_INTERVAL;
    2724        3472 :         btrfs_init_ref_verify(fs_info);
    2725             : 
    2726        3472 :         fs_info->thread_pool_size = min_t(unsigned long,
    2727             :                                           num_online_cpus() + 2, 8);
    2728             : 
    2729        3472 :         INIT_LIST_HEAD(&fs_info->ordered_roots);
    2730        3472 :         spin_lock_init(&fs_info->ordered_root_lock);
    2731             : 
    2732        3472 :         btrfs_init_scrub(fs_info);
    2733             : #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
    2734             :         fs_info->check_integrity_print_mask = 0;
    2735             : #endif
    2736        3472 :         btrfs_init_balance(fs_info);
    2737        3472 :         btrfs_init_async_reclaim_work(fs_info);
    2738             : 
    2739        3472 :         rwlock_init(&fs_info->block_group_cache_lock);
    2740        3472 :         fs_info->block_group_cache_tree = RB_ROOT_CACHED;
    2741             : 
    2742        3472 :         extent_io_tree_init(fs_info, &fs_info->excluded_extents,
    2743             :                             IO_TREE_FS_EXCLUDED_EXTENTS);
    2744             : 
    2745        3472 :         mutex_init(&fs_info->ordered_operations_mutex);
    2746        3472 :         mutex_init(&fs_info->tree_log_mutex);
    2747        3472 :         mutex_init(&fs_info->chunk_mutex);
    2748        3472 :         mutex_init(&fs_info->transaction_kthread_mutex);
    2749        3472 :         mutex_init(&fs_info->cleaner_mutex);
    2750        3472 :         mutex_init(&fs_info->ro_block_group_mutex);
    2751        3472 :         init_rwsem(&fs_info->commit_root_sem);
    2752        3472 :         init_rwsem(&fs_info->cleanup_work_sem);
    2753        3472 :         init_rwsem(&fs_info->subvol_sem);
    2754        3472 :         sema_init(&fs_info->uuid_tree_rescan_sem, 1);
    2755             : 
    2756        3472 :         btrfs_init_dev_replace_locks(fs_info);
    2757        3472 :         btrfs_init_qgroup(fs_info);
    2758        3472 :         btrfs_discard_init(fs_info);
    2759             : 
    2760        3472 :         btrfs_init_free_cluster(&fs_info->meta_alloc_cluster);
    2761        3472 :         btrfs_init_free_cluster(&fs_info->data_alloc_cluster);
    2762             : 
    2763        3472 :         init_waitqueue_head(&fs_info->transaction_throttle);
    2764        3472 :         init_waitqueue_head(&fs_info->transaction_wait);
    2765        3472 :         init_waitqueue_head(&fs_info->transaction_blocked_wait);
    2766        3472 :         init_waitqueue_head(&fs_info->async_submit_wait);
    2767        3472 :         init_waitqueue_head(&fs_info->delayed_iputs_wait);
    2768             : 
    2769             :         /* Usable values until the real ones are cached from the superblock */
    2770        3472 :         fs_info->nodesize = 4096;
    2771        3472 :         fs_info->sectorsize = 4096;
    2772        3472 :         fs_info->sectorsize_bits = ilog2(4096);
    2773        3472 :         fs_info->stripesize = 4096;
    2774             : 
    2775        3472 :         fs_info->max_extent_size = BTRFS_MAX_EXTENT_SIZE;
    2776             : 
    2777        3472 :         spin_lock_init(&fs_info->swapfile_pins_lock);
    2778        3472 :         fs_info->swapfile_pins = RB_ROOT;
    2779             : 
    2780        3472 :         fs_info->bg_reclaim_threshold = BTRFS_DEFAULT_RECLAIM_THRESH;
    2781        3472 :         INIT_WORK(&fs_info->reclaim_bgs_work, btrfs_reclaim_bgs_work);
    2782        3472 : }
    2783             : 
    2784        3242 : static int init_mount_fs_info(struct btrfs_fs_info *fs_info, struct super_block *sb)
    2785             : {
    2786        3242 :         int ret;
    2787             : 
    2788        3242 :         fs_info->sb = sb;
    2789        3242 :         sb->s_blocksize = BTRFS_BDEV_BLOCKSIZE;
    2790        3242 :         sb->s_blocksize_bits = blksize_bits(BTRFS_BDEV_BLOCKSIZE);
    2791             : 
    2792        3242 :         ret = percpu_counter_init(&fs_info->ordered_bytes, 0, GFP_KERNEL);
    2793        3242 :         if (ret)
    2794             :                 return ret;
    2795             : 
    2796        3242 :         ret = percpu_counter_init(&fs_info->dirty_metadata_bytes, 0, GFP_KERNEL);
    2797        3242 :         if (ret)
    2798             :                 return ret;
    2799             : 
    2800           0 :         fs_info->dirty_metadata_batch = PAGE_SIZE *
    2801        3242 :                                         (1 + ilog2(nr_cpu_ids));
    2802             : 
    2803        3242 :         ret = percpu_counter_init(&fs_info->delalloc_bytes, 0, GFP_KERNEL);
    2804        3242 :         if (ret)
    2805             :                 return ret;
    2806             : 
    2807        3242 :         ret = percpu_counter_init(&fs_info->dev_replace.bio_counter, 0,
    2808             :                         GFP_KERNEL);
    2809        3242 :         if (ret)
    2810             :                 return ret;
    2811             : 
    2812        3242 :         fs_info->delayed_root = kmalloc(sizeof(struct btrfs_delayed_root),
    2813             :                                         GFP_KERNEL);
    2814        3242 :         if (!fs_info->delayed_root)
    2815             :                 return -ENOMEM;
    2816        3242 :         btrfs_init_delayed_root(fs_info->delayed_root);
    2817             : 
    2818        3242 :         if (sb_rdonly(sb))
    2819          38 :                 set_bit(BTRFS_FS_STATE_RO, &fs_info->fs_state);
    2820             : 
    2821        3242 :         return btrfs_alloc_stripe_hash_table(fs_info);
    2822             : }
    2823             : 
    2824         991 : static int btrfs_uuid_rescan_kthread(void *data)
    2825             : {
    2826         991 :         struct btrfs_fs_info *fs_info = data;
    2827         991 :         int ret;
    2828             : 
    2829             :         /*
    2830             :          * 1st step is to iterate through the existing UUID tree and
    2831             :          * to delete all entries that contain outdated data.
    2832             :          * 2nd step is to add all missing entries to the UUID tree.
    2833             :          */
    2834         991 :         ret = btrfs_uuid_tree_iterate(fs_info);
    2835         991 :         if (ret < 0) {
    2836           0 :                 if (ret != -EINTR)
    2837           0 :                         btrfs_warn(fs_info, "iterating uuid_tree failed %d",
    2838             :                                    ret);
    2839           0 :                 up(&fs_info->uuid_tree_rescan_sem);
    2840           0 :                 return ret;
    2841             :         }
    2842         991 :         return btrfs_uuid_scan_kthread(data);
    2843             : }
    2844             : 
    2845         991 : static int btrfs_check_uuid_tree(struct btrfs_fs_info *fs_info)
    2846             : {
    2847         991 :         struct task_struct *task;
    2848             : 
    2849         991 :         down(&fs_info->uuid_tree_rescan_sem);
    2850         991 :         task = kthread_run(btrfs_uuid_rescan_kthread, fs_info, "btrfs-uuid");
    2851         991 :         if (IS_ERR(task)) {
    2852             :                 /* fs_info->update_uuid_tree_gen remains 0 in all error case */
    2853           0 :                 btrfs_warn(fs_info, "failed to start uuid_rescan task");
    2854           0 :                 up(&fs_info->uuid_tree_rescan_sem);
    2855           0 :                 return PTR_ERR(task);
    2856             :         }
    2857             : 
    2858             :         return 0;
    2859             : }
    2860             : 
    2861             : /*
    2862             :  * Some options only have meaning at mount time and shouldn't persist across
    2863             :  * remounts, or be displayed. Clear these at the end of mount and remount
    2864             :  * code paths.
    2865             :  */
    2866         118 : void btrfs_clear_oneshot_options(struct btrfs_fs_info *fs_info)
    2867             : {
    2868        3333 :         btrfs_clear_opt(fs_info->mount_opt, USEBACKUPROOT);
    2869        3333 :         btrfs_clear_opt(fs_info->mount_opt, CLEAR_CACHE);
    2870         118 : }
    2871             : 
    2872             : /*
    2873             :  * Mounting logic specific to read-write file systems. Shared by open_ctree
    2874             :  * and btrfs_remount when remounting from read-only to read-write.
    2875             :  */
    2876        3179 : int btrfs_start_pre_rw_mount(struct btrfs_fs_info *fs_info)
    2877             : {
    2878        3179 :         int ret;
    2879        3179 :         const bool cache_opt = btrfs_test_opt(fs_info, SPACE_CACHE);
    2880        3179 :         bool rebuild_free_space_tree = false;
    2881             : 
    2882        3179 :         if (btrfs_test_opt(fs_info, CLEAR_CACHE) &&
    2883          10 :             btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
    2884             :                 rebuild_free_space_tree = true;
    2885        3170 :         } else if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE) &&
    2886        3164 :                    !btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE_VALID)) {
    2887           0 :                 btrfs_warn(fs_info, "free space tree is invalid");
    2888           0 :                 rebuild_free_space_tree = true;
    2889             :         }
    2890             : 
    2891           0 :         if (rebuild_free_space_tree) {
    2892           9 :                 btrfs_info(fs_info, "rebuilding free space tree");
    2893           9 :                 ret = btrfs_rebuild_free_space_tree(fs_info);
    2894           9 :                 if (ret) {
    2895           0 :                         btrfs_warn(fs_info,
    2896             :                                    "failed to rebuild free space tree: %d", ret);
    2897           0 :                         goto out;
    2898             :                 }
    2899             :         }
    2900             : 
    2901        3179 :         if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE) &&
    2902        3173 :             !btrfs_test_opt(fs_info, FREE_SPACE_TREE)) {
    2903           5 :                 btrfs_info(fs_info, "disabling free space tree");
    2904           5 :                 ret = btrfs_delete_free_space_tree(fs_info);
    2905           5 :                 if (ret) {
    2906           0 :                         btrfs_warn(fs_info,
    2907             :                                    "failed to disable free space tree: %d", ret);
    2908           0 :                         goto out;
    2909             :                 }
    2910             :         }
    2911             : 
    2912             :         /*
    2913             :          * btrfs_find_orphan_roots() is responsible for finding all the dead
    2914             :          * roots (with 0 refs), flag them with BTRFS_ROOT_DEAD_TREE and load
    2915             :          * them into the fs_info->fs_roots_radix tree. This must be done before
    2916             :          * calling btrfs_orphan_cleanup() on the tree root. If we don't do it
    2917             :          * first, then btrfs_orphan_cleanup() will delete a dead root's orphan
    2918             :          * item before the root's tree is deleted - this means that if we unmount
    2919             :          * or crash before the deletion completes, on the next mount we will not
    2920             :          * delete what remains of the tree because the orphan item does not
    2921             :          * exists anymore, which is what tells us we have a pending deletion.
    2922             :          */
    2923        3179 :         ret = btrfs_find_orphan_roots(fs_info);
    2924        3179 :         if (ret)
    2925           0 :                 goto out;
    2926             : 
    2927        3179 :         ret = btrfs_cleanup_fs_roots(fs_info);
    2928        3179 :         if (ret)
    2929           0 :                 goto out;
    2930             : 
    2931        3179 :         down_read(&fs_info->cleanup_work_sem);
    2932        6358 :         if ((ret = btrfs_orphan_cleanup(fs_info->fs_root)) ||
    2933        3179 :             (ret = btrfs_orphan_cleanup(fs_info->tree_root))) {
    2934           0 :                 up_read(&fs_info->cleanup_work_sem);
    2935           0 :                 goto out;
    2936             :         }
    2937        3179 :         up_read(&fs_info->cleanup_work_sem);
    2938             : 
    2939        3179 :         mutex_lock(&fs_info->cleaner_mutex);
    2940        3179 :         ret = btrfs_recover_relocation(fs_info);
    2941        3179 :         mutex_unlock(&fs_info->cleaner_mutex);
    2942        3179 :         if (ret < 0) {
    2943           0 :                 btrfs_warn(fs_info, "failed to recover relocation: %d", ret);
    2944           0 :                 goto out;
    2945             :         }
    2946             : 
    2947        3179 :         if (btrfs_test_opt(fs_info, FREE_SPACE_TREE) &&
    2948        3170 :             !btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
    2949           2 :                 btrfs_info(fs_info, "creating free space tree");
    2950           2 :                 ret = btrfs_create_free_space_tree(fs_info);
    2951           2 :                 if (ret) {
    2952           0 :                         btrfs_warn(fs_info,
    2953             :                                 "failed to create free space tree: %d", ret);
    2954           0 :                         goto out;
    2955             :                 }
    2956             :         }
    2957             : 
    2958        3179 :         if (cache_opt != btrfs_free_space_cache_v1_active(fs_info)) {
    2959           2 :                 ret = btrfs_set_free_space_cache_v1_active(fs_info, cache_opt);
    2960           2 :                 if (ret)
    2961           0 :                         goto out;
    2962             :         }
    2963             : 
    2964        3179 :         ret = btrfs_resume_balance_async(fs_info);
    2965        3179 :         if (ret)
    2966           0 :                 goto out;
    2967             : 
    2968        3179 :         ret = btrfs_resume_dev_replace_async(fs_info);
    2969        3179 :         if (ret) {
    2970           0 :                 btrfs_warn(fs_info, "failed to resume dev_replace");
    2971           0 :                 goto out;
    2972             :         }
    2973             : 
    2974        3179 :         btrfs_qgroup_rescan_resume(fs_info);
    2975             : 
    2976        3179 :         if (!fs_info->uuid_root) {
    2977           3 :                 btrfs_info(fs_info, "creating UUID tree");
    2978           3 :                 ret = btrfs_create_uuid_tree(fs_info);
    2979           3 :                 if (ret) {
    2980           0 :                         btrfs_warn(fs_info,
    2981             :                                    "failed to create the UUID tree %d", ret);
    2982           0 :                         goto out;
    2983             :                 }
    2984             :         }
    2985             : 
    2986        3179 : out:
    2987        3179 :         return ret;
    2988             : }
    2989             : 
    2990             : /*
    2991             :  * Do various sanity and dependency checks of different features.
    2992             :  *
    2993             :  * @is_rw_mount:        If the mount is read-write.
    2994             :  *
    2995             :  * This is the place for less strict checks (like for subpage or artificial
    2996             :  * feature dependencies).
    2997             :  *
    2998             :  * For strict checks or possible corruption detection, see
    2999             :  * btrfs_validate_super().
    3000             :  *
    3001             :  * This should be called after btrfs_parse_options(), as some mount options
    3002             :  * (space cache related) can modify on-disk format like free space tree and
    3003             :  * screw up certain feature dependencies.
    3004             :  */
    3005        3334 : int btrfs_check_features(struct btrfs_fs_info *fs_info, bool is_rw_mount)
    3006             : {
    3007        3334 :         struct btrfs_super_block *disk_super = fs_info->super_copy;
    3008        3334 :         u64 incompat = btrfs_super_incompat_flags(disk_super);
    3009        3334 :         const u64 compat_ro = btrfs_super_compat_ro_flags(disk_super);
    3010        3334 :         const u64 compat_ro_unsupp = (compat_ro & ~BTRFS_FEATURE_COMPAT_RO_SUPP);
    3011             : 
    3012        3334 :         if (incompat & ~BTRFS_FEATURE_INCOMPAT_SUPP) {
    3013           0 :                 btrfs_err(fs_info,
    3014             :                 "cannot mount because of unknown incompat features (0x%llx)",
    3015             :                     incompat);
    3016           0 :                 return -EINVAL;
    3017             :         }
    3018             : 
    3019             :         /* Runtime limitation for mixed block groups. */
    3020        3334 :         if ((incompat & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS) &&
    3021          30 :             (fs_info->sectorsize != fs_info->nodesize)) {
    3022           0 :                 btrfs_err(fs_info,
    3023             : "unequal nodesize/sectorsize (%u != %u) are not allowed for mixed block groups",
    3024             :                         fs_info->nodesize, fs_info->sectorsize);
    3025           0 :                 return -EINVAL;
    3026             :         }
    3027             : 
    3028             :         /* Mixed backref is an always-enabled feature. */
    3029        3334 :         incompat |= BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF;
    3030             : 
    3031             :         /* Set compression related flags just in case. */
    3032        3334 :         if (fs_info->compress_type == BTRFS_COMPRESS_LZO)
    3033          19 :                 incompat |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO;
    3034        3315 :         else if (fs_info->compress_type == BTRFS_COMPRESS_ZSTD)
    3035          10 :                 incompat |= BTRFS_FEATURE_INCOMPAT_COMPRESS_ZSTD;
    3036             : 
    3037             :         /*
    3038             :          * An ancient flag, which should really be marked deprecated.
    3039             :          * Such runtime limitation doesn't really need a incompat flag.
    3040             :          */
    3041        3334 :         if (btrfs_super_nodesize(disk_super) > PAGE_SIZE)
    3042        3304 :                 incompat |= BTRFS_FEATURE_INCOMPAT_BIG_METADATA;
    3043             : 
    3044        3334 :         if (compat_ro_unsupp && is_rw_mount) {
    3045           0 :                 btrfs_err(fs_info,
    3046             :         "cannot mount read-write because of unknown compat_ro features (0x%llx)",
    3047             :                        compat_ro);
    3048           0 :                 return -EINVAL;
    3049             :         }
    3050             : 
    3051             :         /*
    3052             :          * We have unsupported RO compat features, although RO mounted, we
    3053             :          * should not cause any metadata writes, including log replay.
    3054             :          * Or we could screw up whatever the new feature requires.
    3055             :          */
    3056        3334 :         if (compat_ro_unsupp && btrfs_super_log_root(disk_super) &&
    3057           0 :             !btrfs_test_opt(fs_info, NOLOGREPLAY)) {
    3058           0 :                 btrfs_err(fs_info,
    3059             : "cannot replay dirty log with unsupported compat_ro features (0x%llx), try rescue=nologreplay",
    3060             :                           compat_ro);
    3061           0 :                 return -EINVAL;
    3062             :         }
    3063             : 
    3064             :         /*
    3065             :          * Artificial limitations for block group tree, to force
    3066             :          * block-group-tree to rely on no-holes and free-space-tree.
    3067             :          */
    3068        3334 :         if (btrfs_fs_compat_ro(fs_info, BLOCK_GROUP_TREE) &&
    3069           0 :             (!btrfs_fs_incompat(fs_info, NO_HOLES) ||
    3070           0 :              !btrfs_test_opt(fs_info, FREE_SPACE_TREE))) {
    3071           0 :                 btrfs_err(fs_info,
    3072             : "block-group-tree feature requires no-holes and free-space-tree features");
    3073           0 :                 return -EINVAL;
    3074             :         }
    3075             : 
    3076             :         /*
    3077             :          * Subpage runtime limitation on v1 cache.
    3078             :          *
    3079             :          * V1 space cache still has some hard codeed PAGE_SIZE usage, while
    3080             :          * we're already defaulting to v2 cache, no need to bother v1 as it's
    3081             :          * going to be deprecated anyway.
    3082             :          */
    3083        3334 :         if (fs_info->sectorsize < PAGE_SIZE && btrfs_test_opt(fs_info, SPACE_CACHE)) {
    3084           0 :                 btrfs_warn(fs_info,
    3085             :         "v1 space cache is not supported for page size %lu with sectorsize %u",
    3086             :                            PAGE_SIZE, fs_info->sectorsize);
    3087           0 :                 return -EINVAL;
    3088             :         }
    3089             : 
    3090             :         /* This can be called by remount, we need to protect the super block. */
    3091        3334 :         spin_lock(&fs_info->super_lock);
    3092        3334 :         btrfs_set_super_incompat_flags(disk_super, incompat);
    3093        3334 :         spin_unlock(&fs_info->super_lock);
    3094             : 
    3095        3334 :         return 0;
    3096             : }
    3097             : 
    3098        3242 : int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_devices,
    3099             :                       char *options)
    3100             : {
    3101        3242 :         u32 sectorsize;
    3102        3242 :         u32 nodesize;
    3103        3242 :         u32 stripesize;
    3104        3242 :         u64 generation;
    3105        3242 :         u64 features;
    3106        3242 :         u16 csum_type;
    3107        3242 :         struct btrfs_super_block *disk_super;
    3108        3242 :         struct btrfs_fs_info *fs_info = btrfs_sb(sb);
    3109        3242 :         struct btrfs_root *tree_root;
    3110        3242 :         struct btrfs_root *chunk_root;
    3111        3242 :         int ret;
    3112        3242 :         int level;
    3113             : 
    3114        3242 :         ret = init_mount_fs_info(fs_info, sb);
    3115        3242 :         if (ret)
    3116           0 :                 goto fail;
    3117             : 
    3118             :         /* These need to be init'ed before we start creating inodes and such. */
    3119        3242 :         tree_root = btrfs_alloc_root(fs_info, BTRFS_ROOT_TREE_OBJECTID,
    3120             :                                      GFP_KERNEL);
    3121        3242 :         fs_info->tree_root = tree_root;
    3122        3242 :         chunk_root = btrfs_alloc_root(fs_info, BTRFS_CHUNK_TREE_OBJECTID,
    3123             :                                       GFP_KERNEL);
    3124        3242 :         fs_info->chunk_root = chunk_root;
    3125        3242 :         if (!tree_root || !chunk_root) {
    3126           0 :                 ret = -ENOMEM;
    3127           0 :                 goto fail;
    3128             :         }
    3129             : 
    3130        3242 :         ret = btrfs_init_btree_inode(sb);
    3131        3242 :         if (ret)
    3132           0 :                 goto fail;
    3133             : 
    3134        3242 :         invalidate_bdev(fs_devices->latest_dev->bdev);
    3135             : 
    3136             :         /*
    3137             :          * Read super block and check the signature bytes only
    3138             :          */
    3139        3242 :         disk_super = btrfs_read_dev_super(fs_devices->latest_dev->bdev);
    3140        3242 :         if (IS_ERR(disk_super)) {
    3141           0 :                 ret = PTR_ERR(disk_super);
    3142           0 :                 goto fail_alloc;
    3143             :         }
    3144             : 
    3145             :         /*
    3146             :          * Verify the type first, if that or the checksum value are
    3147             :          * corrupted, we'll find out
    3148             :          */
    3149        3242 :         csum_type = btrfs_super_csum_type(disk_super);
    3150        3242 :         if (!btrfs_supported_super_csum(csum_type)) {
    3151           0 :                 btrfs_err(fs_info, "unsupported checksum algorithm: %u",
    3152             :                           csum_type);
    3153           0 :                 ret = -EINVAL;
    3154           0 :                 btrfs_release_disk_super(disk_super);
    3155           0 :                 goto fail_alloc;
    3156             :         }
    3157             : 
    3158        3242 :         fs_info->csum_size = btrfs_super_csum_size(disk_super);
    3159             : 
    3160        3242 :         ret = btrfs_init_csum_hash(fs_info, csum_type);
    3161        3242 :         if (ret) {
    3162           0 :                 btrfs_release_disk_super(disk_super);
    3163           0 :                 goto fail_alloc;
    3164             :         }
    3165             : 
    3166             :         /*
    3167             :          * We want to check superblock checksum, the type is stored inside.
    3168             :          * Pass the whole disk block of size BTRFS_SUPER_INFO_SIZE (4k).
    3169             :          */
    3170        3242 :         if (btrfs_check_super_csum(fs_info, disk_super)) {
    3171           0 :                 btrfs_err(fs_info, "superblock checksum mismatch");
    3172           0 :                 ret = -EINVAL;
    3173           0 :                 btrfs_release_disk_super(disk_super);
    3174           0 :                 goto fail_alloc;
    3175             :         }
    3176             : 
    3177             :         /*
    3178             :          * super_copy is zeroed at allocation time and we never touch the
    3179             :          * following bytes up to INFO_SIZE, the checksum is calculated from
    3180             :          * the whole block of INFO_SIZE
    3181             :          */
    3182        6484 :         memcpy(fs_info->super_copy, disk_super, sizeof(*fs_info->super_copy));
    3183        3242 :         btrfs_release_disk_super(disk_super);
    3184             : 
    3185        3242 :         disk_super = fs_info->super_copy;
    3186             : 
    3187             : 
    3188        3242 :         features = btrfs_super_flags(disk_super);
    3189        3242 :         if (features & BTRFS_SUPER_FLAG_CHANGING_FSID_V2) {
    3190           0 :                 features &= ~BTRFS_SUPER_FLAG_CHANGING_FSID_V2;
    3191           0 :                 btrfs_set_super_flags(disk_super, features);
    3192           0 :                 btrfs_info(fs_info,
    3193             :                         "found metadata UUID change in progress flag, clearing");
    3194             :         }
    3195             : 
    3196        6484 :         memcpy(fs_info->super_for_commit, fs_info->super_copy,
    3197             :                sizeof(*fs_info->super_for_commit));
    3198             : 
    3199        3242 :         ret = btrfs_validate_mount_super(fs_info);
    3200        3242 :         if (ret) {
    3201           0 :                 btrfs_err(fs_info, "superblock contains fatal errors");
    3202           0 :                 ret = -EINVAL;
    3203           0 :                 goto fail_alloc;
    3204             :         }
    3205             : 
    3206        3242 :         if (!btrfs_super_root(disk_super)) {
    3207           0 :                 btrfs_err(fs_info, "invalid superblock tree root bytenr");
    3208           0 :                 ret = -EINVAL;
    3209           0 :                 goto fail_alloc;
    3210             :         }
    3211             : 
    3212             :         /* check FS state, whether FS is broken. */
    3213        3242 :         if (btrfs_super_flags(disk_super) & BTRFS_SUPER_FLAG_ERROR)
    3214           0 :                 set_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state);
    3215             : 
    3216             :         /*
    3217             :          * In the long term, we'll store the compression type in the super
    3218             :          * block, and it'll be used for per file compression control.
    3219             :          */
    3220        3242 :         fs_info->compress_type = BTRFS_COMPRESS_ZLIB;
    3221             : 
    3222             : 
    3223             :         /* Set up fs_info before parsing mount options */
    3224        3242 :         nodesize = btrfs_super_nodesize(disk_super);
    3225        3242 :         sectorsize = btrfs_super_sectorsize(disk_super);
    3226        3242 :         stripesize = sectorsize;
    3227        3242 :         fs_info->dirty_metadata_batch = nodesize * (1 + ilog2(nr_cpu_ids));
    3228        3242 :         fs_info->delalloc_batch = sectorsize * 512 * (1 + ilog2(nr_cpu_ids));
    3229             : 
    3230        3242 :         fs_info->nodesize = nodesize;
    3231        3242 :         fs_info->sectorsize = sectorsize;
    3232        3242 :         fs_info->sectorsize_bits = ilog2(sectorsize);
    3233        3242 :         fs_info->csums_per_leaf = BTRFS_MAX_ITEM_SIZE(fs_info) / fs_info->csum_size;
    3234        3242 :         fs_info->stripesize = stripesize;
    3235             : 
    3236        3242 :         ret = btrfs_parse_options(fs_info, options, sb->s_flags);
    3237        3242 :         if (ret)
    3238          26 :                 goto fail_alloc;
    3239             : 
    3240        3216 :         ret = btrfs_check_features(fs_info, !sb_rdonly(sb));
    3241        3216 :         if (ret < 0)
    3242           0 :                 goto fail_alloc;
    3243             : 
    3244        3216 :         if (sectorsize < PAGE_SIZE) {
    3245           0 :                 struct btrfs_subpage_info *subpage_info;
    3246             : 
    3247             :                 /*
    3248             :                  * V1 space cache has some hardcoded PAGE_SIZE usage, and is
    3249             :                  * going to be deprecated.
    3250             :                  *
    3251             :                  * Force to use v2 cache for subpage case.
    3252             :                  */
    3253           0 :                 btrfs_clear_opt(fs_info->mount_opt, SPACE_CACHE);
    3254           0 :                 btrfs_set_and_info(fs_info, FREE_SPACE_TREE,
    3255             :                         "forcing free space tree for sector size %u with page size %lu",
    3256             :                         sectorsize, PAGE_SIZE);
    3257             : 
    3258           0 :                 btrfs_warn(fs_info,
    3259             :                 "read-write for sector size %u with page size %lu is experimental",
    3260             :                            sectorsize, PAGE_SIZE);
    3261           0 :                 subpage_info = kzalloc(sizeof(*subpage_info), GFP_KERNEL);
    3262           0 :                 if (!subpage_info) {
    3263           0 :                         ret = -ENOMEM;
    3264           0 :                         goto fail_alloc;
    3265             :                 }
    3266           0 :                 btrfs_init_subpage_info(subpage_info, sectorsize);
    3267           0 :                 fs_info->subpage_info = subpage_info;
    3268             :         }
    3269             : 
    3270        3216 :         ret = btrfs_init_workqueues(fs_info);
    3271        3216 :         if (ret)
    3272           0 :                 goto fail_sb_buffer;
    3273             : 
    3274        3216 :         sb->s_bdi->ra_pages *= btrfs_super_num_devices(disk_super);
    3275        3216 :         sb->s_bdi->ra_pages = max(sb->s_bdi->ra_pages, SZ_4M / PAGE_SIZE);
    3276             : 
    3277        3216 :         sb->s_blocksize = sectorsize;
    3278        3216 :         sb->s_blocksize_bits = blksize_bits(sectorsize);
    3279        6432 :         memcpy(&sb->s_uuid, fs_info->fs_devices->fsid, BTRFS_FSID_SIZE);
    3280             : 
    3281        3216 :         mutex_lock(&fs_info->chunk_mutex);
    3282        3216 :         ret = btrfs_read_sys_array(fs_info);
    3283        3216 :         mutex_unlock(&fs_info->chunk_mutex);
    3284        3216 :         if (ret) {
    3285           0 :                 btrfs_err(fs_info, "failed to read the system array: %d", ret);
    3286           0 :                 goto fail_sb_buffer;
    3287             :         }
    3288             : 
    3289        3216 :         generation = btrfs_super_chunk_root_generation(disk_super);
    3290        3216 :         level = btrfs_super_chunk_root_level(disk_super);
    3291        3216 :         ret = load_super_root(chunk_root, btrfs_super_chunk_root(disk_super),
    3292             :                               generation, level);
    3293        3216 :         if (ret) {
    3294           0 :                 btrfs_err(fs_info, "failed to read chunk root");
    3295           0 :                 goto fail_tree_roots;
    3296             :         }
    3297             : 
    3298        3216 :         read_extent_buffer(chunk_root->node, fs_info->chunk_tree_uuid,
    3299             :                            offsetof(struct btrfs_header, chunk_tree_uuid),
    3300             :                            BTRFS_UUID_SIZE);
    3301             : 
    3302        3216 :         ret = btrfs_read_chunk_tree(fs_info);
    3303        3216 :         if (ret) {
    3304           0 :                 btrfs_err(fs_info, "failed to read chunk tree: %d", ret);
    3305           0 :                 goto fail_tree_roots;
    3306             :         }
    3307             : 
    3308             :         /*
    3309             :          * At this point we know all the devices that make this filesystem,
    3310             :          * including the seed devices but we don't know yet if the replace
    3311             :          * target is required. So free devices that are not part of this
    3312             :          * filesystem but skip the replace target device which is checked
    3313             :          * below in btrfs_init_dev_replace().
    3314             :          */
    3315        3216 :         btrfs_free_extra_devids(fs_devices);
    3316        3216 :         if (!fs_devices->latest_dev->bdev) {
    3317           0 :                 btrfs_err(fs_info, "failed to read devices");
    3318           0 :                 ret = -EIO;
    3319           0 :                 goto fail_tree_roots;
    3320             :         }
    3321             : 
    3322        3216 :         ret = init_tree_roots(fs_info);
    3323        3216 :         if (ret)
    3324           1 :                 goto fail_tree_roots;
    3325             : 
    3326             :         /*
    3327             :          * Get zone type information of zoned block devices. This will also
    3328             :          * handle emulation of a zoned filesystem if a regular device has the
    3329             :          * zoned incompat feature flag set.
    3330             :          */
    3331        3215 :         ret = btrfs_get_dev_zone_info_all_devices(fs_info);
    3332        3215 :         if (ret) {
    3333           0 :                 btrfs_err(fs_info,
    3334             :                           "zoned: failed to read device zone info: %d", ret);
    3335           0 :                 goto fail_block_groups;
    3336             :         }
    3337             : 
    3338             :         /*
    3339             :          * If we have a uuid root and we're not being told to rescan we need to
    3340             :          * check the generation here so we can set the
    3341             :          * BTRFS_FS_UPDATE_UUID_TREE_GEN bit.  Otherwise we could commit the
    3342             :          * transaction during a balance or the log replay without updating the
    3343             :          * uuid generation, and then if we crash we would rescan the uuid tree,
    3344             :          * even though it was perfectly fine.
    3345             :          */
    3346        3215 :         if (fs_info->uuid_root && !btrfs_test_opt(fs_info, RESCAN_UUID_TREE) &&
    3347        3211 :             fs_info->generation == btrfs_super_uuid_tree_generation(disk_super))
    3348        2224 :                 set_bit(BTRFS_FS_UPDATE_UUID_TREE_GEN, &fs_info->flags);
    3349             : 
    3350        3215 :         ret = btrfs_verify_dev_extents(fs_info);
    3351        3215 :         if (ret) {
    3352           0 :                 btrfs_err(fs_info,
    3353             :                           "failed to verify dev extents against chunks: %d",
    3354             :                           ret);
    3355           0 :                 goto fail_block_groups;
    3356             :         }
    3357        3215 :         ret = btrfs_recover_balance(fs_info);
    3358        3215 :         if (ret) {
    3359           0 :                 btrfs_err(fs_info, "failed to recover balance: %d", ret);
    3360           0 :                 goto fail_block_groups;
    3361             :         }
    3362             : 
    3363        3215 :         ret = btrfs_init_dev_stats(fs_info);
    3364        3215 :         if (ret) {
    3365           0 :                 btrfs_err(fs_info, "failed to init dev_stats: %d", ret);
    3366           0 :                 goto fail_block_groups;
    3367             :         }
    3368             : 
    3369        3215 :         ret = btrfs_init_dev_replace(fs_info);
    3370        3215 :         if (ret) {
    3371           0 :                 btrfs_err(fs_info, "failed to init dev_replace: %d", ret);
    3372           0 :                 goto fail_block_groups;
    3373             :         }
    3374             : 
    3375        3215 :         ret = btrfs_check_zoned_mode(fs_info);
    3376        3215 :         if (ret) {
    3377           0 :                 btrfs_err(fs_info, "failed to initialize zoned mode: %d",
    3378             :                           ret);
    3379           0 :                 goto fail_block_groups;
    3380             :         }
    3381             : 
    3382        3215 :         ret = btrfs_sysfs_add_fsid(fs_devices);
    3383        3215 :         if (ret) {
    3384           0 :                 btrfs_err(fs_info, "failed to init sysfs fsid interface: %d",
    3385             :                                 ret);
    3386           0 :                 goto fail_block_groups;
    3387             :         }
    3388             : 
    3389        3215 :         ret = btrfs_sysfs_add_mounted(fs_info);
    3390        3215 :         if (ret) {
    3391           0 :                 btrfs_err(fs_info, "failed to init sysfs interface: %d", ret);
    3392           0 :                 goto fail_fsdev_sysfs;
    3393             :         }
    3394             : 
    3395        3215 :         ret = btrfs_init_space_info(fs_info);
    3396        3215 :         if (ret) {
    3397           0 :                 btrfs_err(fs_info, "failed to initialize space info: %d", ret);
    3398           0 :                 goto fail_sysfs;
    3399             :         }
    3400             : 
    3401        3215 :         ret = btrfs_read_block_groups(fs_info);
    3402        3215 :         if (ret) {
    3403           0 :                 btrfs_err(fs_info, "failed to read block groups: %d", ret);
    3404           0 :                 goto fail_sysfs;
    3405             :         }
    3406             : 
    3407        3215 :         btrfs_free_zone_cache(fs_info);
    3408             : 
    3409        3215 :         if (!sb_rdonly(sb) && fs_info->fs_devices->missing_devices &&
    3410           0 :             !btrfs_check_rw_degradable(fs_info, NULL)) {
    3411           0 :                 btrfs_warn(fs_info,
    3412             :                 "writable mount is not allowed due to too many missing devices");
    3413           0 :                 ret = -EINVAL;
    3414           0 :                 goto fail_sysfs;
    3415             :         }
    3416             : 
    3417        3215 :         fs_info->cleaner_kthread = kthread_run(cleaner_kthread, fs_info,
    3418             :                                                "btrfs-cleaner");
    3419        3215 :         if (IS_ERR(fs_info->cleaner_kthread)) {
    3420           0 :                 ret = PTR_ERR(fs_info->cleaner_kthread);
    3421           0 :                 goto fail_sysfs;
    3422             :         }
    3423             : 
    3424        3215 :         fs_info->transaction_kthread = kthread_run(transaction_kthread,
    3425             :                                                    tree_root,
    3426             :                                                    "btrfs-transaction");
    3427        3215 :         if (IS_ERR(fs_info->transaction_kthread)) {
    3428           0 :                 ret = PTR_ERR(fs_info->transaction_kthread);
    3429           0 :                 goto fail_cleaner;
    3430             :         }
    3431             : 
    3432        3215 :         if (!btrfs_test_opt(fs_info, NOSSD) &&
    3433        3213 :             !fs_info->fs_devices->rotating) {
    3434          16 :                 btrfs_set_and_info(fs_info, SSD, "enabling ssd optimizations");
    3435             :         }
    3436             : 
    3437             :         /*
    3438             :          * For devices supporting discard turn on discard=async automatically,
    3439             :          * unless it's already set or disabled. This could be turned off by
    3440             :          * nodiscard for the same mount.
    3441             :          */
    3442        3215 :         if (!(btrfs_test_opt(fs_info, DISCARD_SYNC) ||
    3443             :               btrfs_test_opt(fs_info, DISCARD_ASYNC) ||
    3444        3206 :               btrfs_test_opt(fs_info, NODISCARD)) &&
    3445        3206 :             fs_info->fs_devices->discardable) {
    3446        3198 :                 btrfs_set_and_info(fs_info, DISCARD_ASYNC,
    3447             :                                    "auto enabling async discard");
    3448             :         }
    3449             : 
    3450             : #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
    3451             :         if (btrfs_test_opt(fs_info, CHECK_INTEGRITY)) {
    3452             :                 ret = btrfsic_mount(fs_info, fs_devices,
    3453             :                                     btrfs_test_opt(fs_info,
    3454             :                                         CHECK_INTEGRITY_DATA) ? 1 : 0,
    3455             :                                     fs_info->check_integrity_print_mask);
    3456             :                 if (ret)
    3457             :                         btrfs_warn(fs_info,
    3458             :                                 "failed to initialize integrity check module: %d",
    3459             :                                 ret);
    3460             :         }
    3461             : #endif
    3462        3215 :         ret = btrfs_read_qgroup_config(fs_info);
    3463        3215 :         if (ret)
    3464           0 :                 goto fail_trans_kthread;
    3465             : 
    3466        3215 :         if (btrfs_build_ref_tree(fs_info))
    3467             :                 btrfs_err(fs_info, "couldn't build ref tree");
    3468             : 
    3469             :         /* do not make disk changes in broken FS or nologreplay is given */
    3470        3215 :         if (btrfs_super_log_root(disk_super) != 0 &&
    3471         283 :             !btrfs_test_opt(fs_info, NOLOGREPLAY)) {
    3472         283 :                 btrfs_info(fs_info, "start tree-log replay");
    3473         283 :                 ret = btrfs_replay_log(fs_info, fs_devices);
    3474         283 :                 if (ret)
    3475           0 :                         goto fail_qgroup;
    3476             :         }
    3477             : 
    3478        3215 :         fs_info->fs_root = btrfs_get_fs_root(fs_info, BTRFS_FS_TREE_OBJECTID, true);
    3479        3215 :         if (IS_ERR(fs_info->fs_root)) {
    3480           0 :                 ret = PTR_ERR(fs_info->fs_root);
    3481           0 :                 btrfs_warn(fs_info, "failed to read fs tree: %d", ret);
    3482           0 :                 fs_info->fs_root = NULL;
    3483           0 :                 goto fail_qgroup;
    3484             :         }
    3485             : 
    3486        3215 :         if (sb_rdonly(sb))
    3487          38 :                 goto clear_oneshot;
    3488             : 
    3489        3177 :         ret = btrfs_start_pre_rw_mount(fs_info);
    3490        3177 :         if (ret) {
    3491           0 :                 close_ctree(fs_info);
    3492           0 :                 return ret;
    3493             :         }
    3494        3177 :         btrfs_discard_resume(fs_info);
    3495             : 
    3496        3177 :         if (fs_info->uuid_root &&
    3497        3177 :             (btrfs_test_opt(fs_info, RESCAN_UUID_TREE) ||
    3498        3176 :              fs_info->generation != btrfs_super_uuid_tree_generation(disk_super))) {
    3499         991 :                 btrfs_info(fs_info, "checking UUID tree");
    3500         991 :                 ret = btrfs_check_uuid_tree(fs_info);
    3501         991 :                 if (ret) {
    3502           0 :                         btrfs_warn(fs_info,
    3503             :                                 "failed to check the UUID tree: %d", ret);
    3504           0 :                         close_ctree(fs_info);
    3505           0 :                         return ret;
    3506             :                 }
    3507             :         }
    3508             : 
    3509        3177 :         set_bit(BTRFS_FS_OPEN, &fs_info->flags);
    3510             : 
    3511             :         /* Kick the cleaner thread so it'll start deleting snapshots. */
    3512        6354 :         if (test_bit(BTRFS_FS_UNFINISHED_DROPS, &fs_info->flags))
    3513           0 :                 wake_up_process(fs_info->cleaner_kthread);
    3514             : 
    3515        3177 : clear_oneshot:
    3516        3215 :         btrfs_clear_oneshot_options(fs_info);
    3517        3215 :         return 0;
    3518             : 
    3519           0 : fail_qgroup:
    3520           0 :         btrfs_free_qgroup_config(fs_info);
    3521           0 : fail_trans_kthread:
    3522           0 :         kthread_stop(fs_info->transaction_kthread);
    3523           0 :         btrfs_cleanup_transaction(fs_info);
    3524           0 :         btrfs_free_fs_roots(fs_info);
    3525           0 : fail_cleaner:
    3526           0 :         kthread_stop(fs_info->cleaner_kthread);
    3527             : 
    3528             :         /*
    3529             :          * make sure we're done with the btree inode before we stop our
    3530             :          * kthreads
    3531             :          */
    3532           0 :         filemap_write_and_wait(fs_info->btree_inode->i_mapping);
    3533             : 
    3534           0 : fail_sysfs:
    3535           0 :         btrfs_sysfs_remove_mounted(fs_info);
    3536             : 
    3537           0 : fail_fsdev_sysfs:
    3538           0 :         btrfs_sysfs_remove_fsid(fs_info->fs_devices);
    3539             : 
    3540           0 : fail_block_groups:
    3541           0 :         btrfs_put_block_group_cache(fs_info);
    3542             : 
    3543           1 : fail_tree_roots:
    3544           1 :         if (fs_info->data_reloc_root)
    3545           0 :                 btrfs_drop_and_free_fs_root(fs_info, fs_info->data_reloc_root);
    3546           1 :         free_root_pointers(fs_info, true);
    3547           1 :         invalidate_inode_pages2(fs_info->btree_inode->i_mapping);
    3548             : 
    3549           1 : fail_sb_buffer:
    3550           1 :         btrfs_stop_all_workers(fs_info);
    3551           1 :         btrfs_free_block_groups(fs_info);
    3552          27 : fail_alloc:
    3553          27 :         btrfs_mapping_tree_free(&fs_info->mapping_tree);
    3554             : 
    3555          27 :         iput(fs_info->btree_inode);
    3556          27 : fail:
    3557          27 :         btrfs_close_devices(fs_info->fs_devices);
    3558          27 :         ASSERT(ret < 0);
    3559          27 :         return ret;
    3560             : }
    3561             : ALLOW_ERROR_INJECTION(open_ctree, ERRNO);
    3562             : 
    3563      645607 : static void btrfs_end_super_write(struct bio *bio)
    3564             : {
    3565      645607 :         struct btrfs_device *device = bio->bi_private;
    3566      645607 :         struct bio_vec *bvec;
    3567      645607 :         struct bvec_iter_all iter_all;
    3568      645607 :         struct page *page;
    3569             : 
    3570     1291214 :         bio_for_each_segment_all(bvec, bio, iter_all) {
    3571      645607 :                 page = bvec->bv_page;
    3572             : 
    3573      645607 :                 if (bio->bi_status) {
    3574           0 :                         btrfs_warn_rl_in_rcu(device->fs_info,
    3575             :                                 "lost page write due to IO error on %s (%d)",
    3576             :                                 btrfs_dev_name(device),
    3577             :                                 blk_status_to_errno(bio->bi_status));
    3578           0 :                         ClearPageUptodate(page);
    3579           0 :                         SetPageError(page);
    3580           0 :                         btrfs_dev_stat_inc_and_print(device,
    3581             :                                                      BTRFS_DEV_STAT_WRITE_ERRS);
    3582             :                 } else {
    3583      645607 :                         SetPageUptodate(page);
    3584             :                 }
    3585             : 
    3586      645607 :                 put_page(page);
    3587      645607 :                 unlock_page(page);
    3588             :         }
    3589             : 
    3590      645607 :         bio_put(bio);
    3591      645607 : }
    3592             : 
    3593        6530 : struct btrfs_super_block *btrfs_read_dev_one_super(struct block_device *bdev,
    3594             :                                                    int copy_num, bool drop_cache)
    3595             : {
    3596        6530 :         struct btrfs_super_block *super;
    3597        6530 :         struct page *page;
    3598        6530 :         u64 bytenr, bytenr_orig;
    3599        6530 :         struct address_space *mapping = bdev->bd_inode->i_mapping;
    3600        6530 :         int ret;
    3601             : 
    3602        6530 :         bytenr_orig = btrfs_sb_offset(copy_num);
    3603        6530 :         ret = btrfs_sb_log_location_bdev(bdev, copy_num, READ, &bytenr);
    3604        6530 :         if (ret == -ENOENT)
    3605             :                 return ERR_PTR(-EINVAL);
    3606        6530 :         else if (ret)
    3607           0 :                 return ERR_PTR(ret);
    3608             : 
    3609        6530 :         if (bytenr + BTRFS_SUPER_INFO_SIZE >= bdev_nr_bytes(bdev))
    3610             :                 return ERR_PTR(-EINVAL);
    3611             : 
    3612        6530 :         if (drop_cache) {
    3613             :                 /* This should only be called with the primary sb. */
    3614          46 :                 ASSERT(copy_num == 0);
    3615             : 
    3616             :                 /*
    3617             :                  * Drop the page of the primary superblock, so later read will
    3618             :                  * always read from the device.
    3619             :                  */
    3620          46 :                 invalidate_inode_pages2_range(mapping,
    3621          46 :                                 bytenr >> PAGE_SHIFT,
    3622          46 :                                 (bytenr + BTRFS_SUPER_INFO_SIZE) >> PAGE_SHIFT);
    3623             :         }
    3624             : 
    3625        6530 :         page = read_cache_page_gfp(mapping, bytenr >> PAGE_SHIFT, GFP_NOFS);
    3626        6530 :         if (IS_ERR(page))
    3627             :                 return ERR_CAST(page);
    3628             : 
    3629        6530 :         super = page_address(page);
    3630        6530 :         if (btrfs_super_magic(super) != BTRFS_MAGIC) {
    3631           0 :                 btrfs_release_disk_super(super);
    3632           0 :                 return ERR_PTR(-ENODATA);
    3633             :         }
    3634             : 
    3635        6530 :         if (btrfs_super_bytenr(super) != bytenr_orig) {
    3636           0 :                 btrfs_release_disk_super(super);
    3637           0 :                 return ERR_PTR(-EINVAL);
    3638             :         }
    3639             : 
    3640             :         return super;
    3641             : }
    3642             : 
    3643             : 
    3644        6484 : struct btrfs_super_block *btrfs_read_dev_super(struct block_device *bdev)
    3645             : {
    3646        6484 :         struct btrfs_super_block *super, *latest = NULL;
    3647        6484 :         int i;
    3648        6484 :         u64 transid = 0;
    3649             : 
    3650             :         /* we would like to check all the supers, but that would make
    3651             :          * a btrfs mount succeed after a mkfs from a different FS.
    3652             :          * So, we need to add a special mount option to scan for
    3653             :          * later supers, using BTRFS_SUPER_MIRROR_MAX instead
    3654             :          */
    3655       12968 :         for (i = 0; i < 1; i++) {
    3656        6484 :                 super = btrfs_read_dev_one_super(bdev, i, false);
    3657        6484 :                 if (IS_ERR(super))
    3658           0 :                         continue;
    3659             : 
    3660        6484 :                 if (!latest || btrfs_super_generation(super) > transid) {
    3661        6484 :                         if (latest)
    3662           0 :                                 btrfs_release_disk_super(super);
    3663             : 
    3664        6484 :                         latest = super;
    3665        6484 :                         transid = btrfs_super_generation(super);
    3666             :                 }
    3667             :         }
    3668             : 
    3669        6484 :         return super;
    3670             : }
    3671             : 
    3672             : /*
    3673             :  * Write superblock @sb to the @device. Do not wait for completion, all the
    3674             :  * pages we use for writing are locked.
    3675             :  *
    3676             :  * Write @max_mirrors copies of the superblock, where 0 means default that fit
    3677             :  * the expected device size at commit time. Note that max_mirrors must be
    3678             :  * same for write and wait phases.
    3679             :  *
    3680             :  * Return number of errors when page is not found or submission fails.
    3681             :  */
    3682      442595 : static int write_dev_supers(struct btrfs_device *device,
    3683             :                             struct btrfs_super_block *sb, int max_mirrors)
    3684             : {
    3685      442595 :         struct btrfs_fs_info *fs_info = device->fs_info;
    3686      442595 :         struct address_space *mapping = device->bdev->bd_inode->i_mapping;
    3687      442595 :         SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
    3688      442595 :         int i;
    3689      442595 :         int errors = 0;
    3690      442595 :         int ret;
    3691      442595 :         u64 bytenr, bytenr_orig;
    3692             : 
    3693      442595 :         if (max_mirrors == 0)
    3694      203010 :                 max_mirrors = BTRFS_SUPER_MIRROR_MAX;
    3695             : 
    3696      442595 :         shash->tfm = fs_info->csum_shash;
    3697             : 
    3698     1088202 :         for (i = 0; i < max_mirrors; i++) {
    3699      848615 :                 struct page *page;
    3700      848615 :                 struct bio *bio;
    3701      848615 :                 struct btrfs_super_block *disk_super;
    3702             : 
    3703      848615 :                 bytenr_orig = btrfs_sb_offset(i);
    3704      848615 :                 ret = btrfs_sb_log_location(device, i, WRITE, &bytenr);
    3705      848615 :                 if (ret == -ENOENT) {
    3706           0 :                         continue;
    3707      848615 :                 } else if (ret < 0) {
    3708           0 :                         btrfs_err(device->fs_info,
    3709             :                                 "couldn't get super block location for mirror %d",
    3710             :                                 i);
    3711           0 :                         errors++;
    3712           0 :                         continue;
    3713             :                 }
    3714      848615 :                 if (bytenr + BTRFS_SUPER_INFO_SIZE >=
    3715      848615 :                     device->commit_total_bytes)
    3716             :                         break;
    3717             : 
    3718      645607 :                 btrfs_set_super_bytenr(sb, bytenr_orig);
    3719             : 
    3720      645607 :                 crypto_shash_digest(shash, (const char *)sb + BTRFS_CSUM_SIZE,
    3721             :                                     BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE,
    3722      645607 :                                     sb->csum);
    3723             : 
    3724      645607 :                 page = find_or_create_page(mapping, bytenr >> PAGE_SHIFT,
    3725             :                                            GFP_NOFS);
    3726      645607 :                 if (!page) {
    3727           0 :                         btrfs_err(device->fs_info,
    3728             :                             "couldn't get super block page for bytenr %llu",
    3729             :                             bytenr);
    3730           0 :                         errors++;
    3731           0 :                         continue;
    3732             :                 }
    3733             : 
    3734             :                 /* Bump the refcount for wait_dev_supers() */
    3735      645607 :                 get_page(page);
    3736             : 
    3737      645607 :                 disk_super = page_address(page);
    3738     1291214 :                 memcpy(disk_super, sb, BTRFS_SUPER_INFO_SIZE);
    3739             : 
    3740             :                 /*
    3741             :                  * Directly use bios here instead of relying on the page cache
    3742             :                  * to do I/O, so we don't lose the ability to do integrity
    3743             :                  * checking.
    3744             :                  */
    3745      645607 :                 bio = bio_alloc(device->bdev, 1,
    3746             :                                 REQ_OP_WRITE | REQ_SYNC | REQ_META | REQ_PRIO,
    3747             :                                 GFP_NOFS);
    3748      645607 :                 bio->bi_iter.bi_sector = bytenr >> SECTOR_SHIFT;
    3749      645607 :                 bio->bi_private = device;
    3750      645607 :                 bio->bi_end_io = btrfs_end_super_write;
    3751      645607 :                 __bio_add_page(bio, page, BTRFS_SUPER_INFO_SIZE,
    3752             :                                offset_in_page(bytenr));
    3753             : 
    3754             :                 /*
    3755             :                  * We FUA only the first super block.  The others we allow to
    3756             :                  * go down lazy and there's a short window where the on-disk
    3757             :                  * copies might still contain the older version.
    3758             :                  */
    3759      645607 :                 if (i == 0 && !btrfs_test_opt(device->fs_info, NOBARRIER))
    3760      442594 :                         bio->bi_opf |= REQ_FUA;
    3761             : 
    3762      645607 :                 btrfsic_check_bio(bio);
    3763      645607 :                 submit_bio(bio);
    3764             : 
    3765      645607 :                 if (btrfs_advance_sb_log(device, i))
    3766           0 :                         errors++;
    3767             :         }
    3768      442595 :         return errors < i ? 0 : -1;
    3769             : }
    3770             : 
    3771             : /*
    3772             :  * Wait for write completion of superblocks done by write_dev_supers,
    3773             :  * @max_mirrors same for write and wait phases.
    3774             :  *
    3775             :  * Return number of errors when page is not found or not marked up to
    3776             :  * date.
    3777             :  */
    3778      442595 : static int wait_dev_supers(struct btrfs_device *device, int max_mirrors)
    3779             : {
    3780      442595 :         int i;
    3781      442595 :         int errors = 0;
    3782      442595 :         bool primary_failed = false;
    3783      442595 :         int ret;
    3784      442595 :         u64 bytenr;
    3785             : 
    3786      442595 :         if (max_mirrors == 0)
    3787      203010 :                 max_mirrors = BTRFS_SUPER_MIRROR_MAX;
    3788             : 
    3789     1088202 :         for (i = 0; i < max_mirrors; i++) {
    3790      848615 :                 struct page *page;
    3791             : 
    3792      848615 :                 ret = btrfs_sb_log_location(device, i, READ, &bytenr);
    3793      848615 :                 if (ret == -ENOENT) {
    3794             :                         break;
    3795      848615 :                 } else if (ret < 0) {
    3796           0 :                         errors++;
    3797           0 :                         if (i == 0)
    3798           0 :                                 primary_failed = true;
    3799           0 :                         continue;
    3800             :                 }
    3801      848615 :                 if (bytenr + BTRFS_SUPER_INFO_SIZE >=
    3802      848615 :                     device->commit_total_bytes)
    3803             :                         break;
    3804             : 
    3805      645607 :                 page = find_get_page(device->bdev->bd_inode->i_mapping,
    3806      645607 :                                      bytenr >> PAGE_SHIFT);
    3807      645607 :                 if (!page) {
    3808           0 :                         errors++;
    3809           0 :                         if (i == 0)
    3810           0 :                                 primary_failed = true;
    3811           0 :                         continue;
    3812             :                 }
    3813             :                 /* Page is submitted locked and unlocked once the IO completes */
    3814      645607 :                 wait_on_page_locked(page);
    3815     1291214 :                 if (PageError(page)) {
    3816           0 :                         errors++;
    3817           0 :                         if (i == 0)
    3818           0 :                                 primary_failed = true;
    3819             :                 }
    3820             : 
    3821             :                 /* Drop our reference */
    3822      645607 :                 put_page(page);
    3823             : 
    3824             :                 /* Drop the reference from the writing run */
    3825      645607 :                 put_page(page);
    3826             :         }
    3827             : 
    3828             :         /* log error, force error return */
    3829      442595 :         if (primary_failed) {
    3830           0 :                 btrfs_err(device->fs_info, "error writing primary super block to device %llu",
    3831             :                           device->devid);
    3832           0 :                 return -1;
    3833             :         }
    3834             : 
    3835      442595 :         return errors < i ? 0 : -1;
    3836             : }
    3837             : 
    3838             : /*
    3839             :  * endio for the write_dev_flush, this will wake anyone waiting
    3840             :  * for the barrier when it is done
    3841             :  */
    3842         732 : static void btrfs_end_empty_barrier(struct bio *bio)
    3843             : {
    3844         732 :         bio_uninit(bio);
    3845         732 :         complete(bio->bi_private);
    3846         732 : }
    3847             : 
    3848             : /*
    3849             :  * Submit a flush request to the device if it supports it. Error handling is
    3850             :  * done in the waiting counterpart.
    3851             :  */
    3852      442594 : static void write_dev_flush(struct btrfs_device *device)
    3853             : {
    3854      442594 :         struct bio *bio = &device->flush_bio;
    3855             : 
    3856      442594 :         device->last_flush_error = BLK_STS_OK;
    3857             : 
    3858             : #ifndef CONFIG_BTRFS_FS_CHECK_INTEGRITY
    3859             :         /*
    3860             :          * When a disk has write caching disabled, we skip submission of a bio
    3861             :          * with flush and sync requests before writing the superblock, since
    3862             :          * it's not needed. However when the integrity checker is enabled, this
    3863             :          * results in reports that there are metadata blocks referred by a
    3864             :          * superblock that were not properly flushed. So don't skip the bio
    3865             :          * submission only when the integrity checker is enabled for the sake
    3866             :          * of simplicity, since this is a debug tool and not meant for use in
    3867             :          * non-debug builds.
    3868             :          */
    3869      442594 :         if (!bdev_write_cache(device->bdev))
    3870             :                 return;
    3871             : #endif
    3872             : 
    3873         732 :         bio_init(bio, device->bdev, NULL, 0,
    3874             :                  REQ_OP_WRITE | REQ_SYNC | REQ_PREFLUSH);
    3875         732 :         bio->bi_end_io = btrfs_end_empty_barrier;
    3876         732 :         init_completion(&device->flush_wait);
    3877         732 :         bio->bi_private = &device->flush_wait;
    3878             : 
    3879         732 :         btrfsic_check_bio(bio);
    3880         732 :         submit_bio(bio);
    3881         732 :         set_bit(BTRFS_DEV_STATE_FLUSH_SENT, &device->dev_state);
    3882             : }
    3883             : 
    3884             : /*
    3885             :  * If the flush bio has been submitted by write_dev_flush, wait for it.
    3886             :  * Return true for any error, and false otherwise.
    3887             :  */
    3888      442594 : static bool wait_dev_flush(struct btrfs_device *device)
    3889             : {
    3890      442594 :         struct bio *bio = &device->flush_bio;
    3891             : 
    3892      442594 :         if (!test_and_clear_bit(BTRFS_DEV_STATE_FLUSH_SENT, &device->dev_state))
    3893             :                 return false;
    3894             : 
    3895         732 :         wait_for_completion_io(&device->flush_wait);
    3896             : 
    3897         732 :         if (bio->bi_status) {
    3898           0 :                 device->last_flush_error = bio->bi_status;
    3899           0 :                 btrfs_dev_stat_inc_and_print(device, BTRFS_DEV_STAT_FLUSH_ERRS);
    3900           0 :                 return true;
    3901             :         }
    3902             : 
    3903             :         return false;
    3904             : }
    3905             : 
    3906             : /*
    3907             :  * send an empty flush down to each device in parallel,
    3908             :  * then wait for them
    3909             :  */
    3910      442594 : static int barrier_all_devices(struct btrfs_fs_info *info)
    3911             : {
    3912      442594 :         struct list_head *head;
    3913      442594 :         struct btrfs_device *dev;
    3914      442594 :         int errors_wait = 0;
    3915             : 
    3916      442594 :         lockdep_assert_held(&info->fs_devices->device_list_mutex);
    3917             :         /* send down all the barriers */
    3918      442594 :         head = &info->fs_devices->devices;
    3919      885188 :         list_for_each_entry(dev, head, dev_list) {
    3920      885188 :                 if (test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state))
    3921           0 :                         continue;
    3922      442594 :                 if (!dev->bdev)
    3923           0 :                         continue;
    3924      442594 :                 if (!test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &dev->dev_state) ||
    3925           0 :                     !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state))
    3926           0 :                         continue;
    3927             : 
    3928      442594 :                 write_dev_flush(dev);
    3929             :         }
    3930             : 
    3931             :         /* wait for all the barriers */
    3932      885188 :         list_for_each_entry(dev, head, dev_list) {
    3933      885188 :                 if (test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state))
    3934           0 :                         continue;
    3935      442594 :                 if (!dev->bdev) {
    3936           0 :                         errors_wait++;
    3937           0 :                         continue;
    3938             :                 }
    3939      442594 :                 if (!test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &dev->dev_state) ||
    3940           0 :                     !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state))
    3941           0 :                         continue;
    3942             : 
    3943      442594 :                 if (wait_dev_flush(dev))
    3944           0 :                         errors_wait++;
    3945             :         }
    3946             : 
    3947             :         /*
    3948             :          * Checks last_flush_error of disks in order to determine the device
    3949             :          * state.
    3950             :          */
    3951      442594 :         if (errors_wait && !btrfs_check_rw_degradable(info, NULL))
    3952           0 :                 return -EIO;
    3953             : 
    3954             :         return 0;
    3955             : }
    3956             : 
    3957         396 : int btrfs_get_num_tolerated_disk_barrier_failures(u64 flags)
    3958             : {
    3959         396 :         int raid_type;
    3960         396 :         int min_tolerated = INT_MAX;
    3961             : 
    3962         396 :         if ((flags & BTRFS_BLOCK_GROUP_PROFILE_MASK) == 0 ||
    3963         197 :             (flags & BTRFS_AVAIL_ALLOC_BIT_SINGLE))
    3964         199 :                 min_tolerated = min_t(int, min_tolerated,
    3965             :                                     btrfs_raid_array[BTRFS_RAID_SINGLE].
    3966             :                                     tolerated_failures);
    3967             : 
    3968        3960 :         for (raid_type = 0; raid_type < BTRFS_NR_RAID_TYPES; raid_type++) {
    3969        3564 :                 if (raid_type == BTRFS_RAID_SINGLE)
    3970         396 :                         continue;
    3971        3168 :                 if (!(flags & btrfs_raid_array[raid_type].bg_flag))
    3972        2971 :                         continue;
    3973         197 :                 min_tolerated = min_t(int, min_tolerated,
    3974             :                                     btrfs_raid_array[raid_type].
    3975             :                                     tolerated_failures);
    3976             :         }
    3977             : 
    3978         396 :         if (min_tolerated == INT_MAX) {
    3979           0 :                 pr_warn("BTRFS: unknown raid flag: %llu", flags);
    3980           0 :                 min_tolerated = 0;
    3981             :         }
    3982             : 
    3983         396 :         return min_tolerated;
    3984             : }
    3985             : 
    3986      442595 : int write_all_supers(struct btrfs_fs_info *fs_info, int max_mirrors)
    3987             : {
    3988      442595 :         struct list_head *head;
    3989      442595 :         struct btrfs_device *dev;
    3990      442595 :         struct btrfs_super_block *sb;
    3991      442595 :         struct btrfs_dev_item *dev_item;
    3992      442595 :         int ret;
    3993      442595 :         int do_barriers;
    3994      442595 :         int max_errors;
    3995      442595 :         int total_errors = 0;
    3996      442595 :         u64 flags;
    3997             : 
    3998      442595 :         do_barriers = !btrfs_test_opt(fs_info, NOBARRIER);
    3999             : 
    4000             :         /*
    4001             :          * max_mirrors == 0 indicates we're from commit_transaction,
    4002             :          * not from fsync where the tree roots in fs_info have not
    4003             :          * been consistent on disk.
    4004             :          */
    4005      442595 :         if (max_mirrors == 0)
    4006      203010 :                 backup_super_roots(fs_info);
    4007             : 
    4008      442595 :         sb = fs_info->super_for_commit;
    4009      442595 :         dev_item = &sb->dev_item;
    4010             : 
    4011      442595 :         mutex_lock(&fs_info->fs_devices->device_list_mutex);
    4012      442595 :         head = &fs_info->fs_devices->devices;
    4013      442595 :         max_errors = btrfs_super_num_devices(fs_info->super_copy) - 1;
    4014             : 
    4015      442595 :         if (do_barriers) {
    4016      442594 :                 ret = barrier_all_devices(fs_info);
    4017      442594 :                 if (ret) {
    4018           0 :                         mutex_unlock(
    4019           0 :                                 &fs_info->fs_devices->device_list_mutex);
    4020           0 :                         btrfs_handle_fs_error(fs_info, ret,
    4021             :                                               "errors while submitting device barriers.");
    4022           0 :                         return ret;
    4023             :                 }
    4024             :         }
    4025             : 
    4026      885190 :         list_for_each_entry(dev, head, dev_list) {
    4027      442595 :                 if (!dev->bdev) {
    4028           0 :                         total_errors++;
    4029           0 :                         continue;
    4030             :                 }
    4031      442595 :                 if (!test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &dev->dev_state) ||
    4032           0 :                     !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state))
    4033           0 :                         continue;
    4034             : 
    4035      442595 :                 btrfs_set_stack_device_generation(dev_item, 0);
    4036      442595 :                 btrfs_set_stack_device_type(dev_item, dev->type);
    4037      442595 :                 btrfs_set_stack_device_id(dev_item, dev->devid);
    4038      442595 :                 btrfs_set_stack_device_total_bytes(dev_item,
    4039             :                                                    dev->commit_total_bytes);
    4040      442595 :                 btrfs_set_stack_device_bytes_used(dev_item,
    4041             :                                                   dev->commit_bytes_used);
    4042      442595 :                 btrfs_set_stack_device_io_align(dev_item, dev->io_align);
    4043      442595 :                 btrfs_set_stack_device_io_width(dev_item, dev->io_width);
    4044      442595 :                 btrfs_set_stack_device_sector_size(dev_item, dev->sector_size);
    4045      885190 :                 memcpy(dev_item->uuid, dev->uuid, BTRFS_UUID_SIZE);
    4046      885190 :                 memcpy(dev_item->fsid, dev->fs_devices->metadata_uuid,
    4047             :                        BTRFS_FSID_SIZE);
    4048             : 
    4049      442595 :                 flags = btrfs_super_flags(sb);
    4050      442595 :                 btrfs_set_super_flags(sb, flags | BTRFS_HEADER_FLAG_WRITTEN);
    4051             : 
    4052      442595 :                 ret = btrfs_validate_write_super(fs_info, sb);
    4053      442595 :                 if (ret < 0) {
    4054           0 :                         mutex_unlock(&fs_info->fs_devices->device_list_mutex);
    4055           0 :                         btrfs_handle_fs_error(fs_info, -EUCLEAN,
    4056             :                                 "unexpected superblock corruption detected");
    4057           0 :                         return -EUCLEAN;
    4058             :                 }
    4059             : 
    4060      442595 :                 ret = write_dev_supers(dev, sb, max_mirrors);
    4061      442595 :                 if (ret)
    4062           0 :                         total_errors++;
    4063             :         }
    4064      442595 :         if (total_errors > max_errors) {
    4065           0 :                 btrfs_err(fs_info, "%d errors while writing supers",
    4066             :                           total_errors);
    4067           0 :                 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
    4068             : 
    4069             :                 /* FUA is masked off if unsupported and can't be the reason */
    4070           0 :                 btrfs_handle_fs_error(fs_info, -EIO,
    4071             :                                       "%d errors while writing supers",
    4072             :                                       total_errors);
    4073           0 :                 return -EIO;
    4074             :         }
    4075             : 
    4076      442595 :         total_errors = 0;
    4077      885190 :         list_for_each_entry(dev, head, dev_list) {
    4078      442595 :                 if (!dev->bdev)
    4079           0 :                         continue;
    4080      442595 :                 if (!test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &dev->dev_state) ||
    4081           0 :                     !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state))
    4082           0 :                         continue;
    4083             : 
    4084      442595 :                 ret = wait_dev_supers(dev, max_mirrors);
    4085      442595 :                 if (ret)
    4086           0 :                         total_errors++;
    4087             :         }
    4088      442595 :         mutex_unlock(&fs_info->fs_devices->device_list_mutex);
    4089      442595 :         if (total_errors > max_errors) {
    4090           0 :                 btrfs_handle_fs_error(fs_info, -EIO,
    4091             :                                       "%d errors while writing supers",
    4092             :                                       total_errors);
    4093           0 :                 return -EIO;
    4094             :         }
    4095             :         return 0;
    4096             : }
    4097             : 
    4098             : /* Drop a fs root from the radix tree and free it. */
    4099       12121 : void btrfs_drop_and_free_fs_root(struct btrfs_fs_info *fs_info,
    4100             :                                   struct btrfs_root *root)
    4101             : {
    4102       12121 :         bool drop_ref = false;
    4103             : 
    4104       12121 :         spin_lock(&fs_info->fs_roots_radix_lock);
    4105       12121 :         radix_tree_delete(&fs_info->fs_roots_radix,
    4106       12121 :                           (unsigned long)root->root_key.objectid);
    4107       12121 :         if (test_and_clear_bit(BTRFS_ROOT_IN_RADIX, &root->state))
    4108       12121 :                 drop_ref = true;
    4109       12121 :         spin_unlock(&fs_info->fs_roots_radix_lock);
    4110             : 
    4111       12121 :         if (BTRFS_FS_ERROR(fs_info)) {
    4112          34 :                 ASSERT(root->log_root == NULL);
    4113          34 :                 if (root->reloc_root) {
    4114           0 :                         btrfs_put_root(root->reloc_root);
    4115           0 :                         root->reloc_root = NULL;
    4116             :                 }
    4117             :         }
    4118             : 
    4119       12121 :         if (drop_ref)
    4120       12121 :                 btrfs_put_root(root);
    4121       12121 : }
    4122             : 
    4123        3179 : int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info)
    4124             : {
    4125        3179 :         u64 root_objectid = 0;
    4126        3179 :         struct btrfs_root *gang[8];
    4127        3179 :         int i = 0;
    4128        3179 :         int err = 0;
    4129        3179 :         unsigned int ret = 0;
    4130             : 
    4131       10583 :         while (1) {
    4132        6881 :                 spin_lock(&fs_info->fs_roots_radix_lock);
    4133        6881 :                 ret = radix_tree_gang_lookup(&fs_info->fs_roots_radix,
    4134             :                                              (void **)gang, root_objectid,
    4135             :                                              ARRAY_SIZE(gang));
    4136        6881 :                 if (!ret) {
    4137        3179 :                         spin_unlock(&fs_info->fs_roots_radix_lock);
    4138             :                         break;
    4139             :                 }
    4140        3702 :                 root_objectid = gang[ret - 1]->root_key.objectid + 1;
    4141             : 
    4142       14463 :                 for (i = 0; i < ret; i++) {
    4143             :                         /* Avoid to grab roots in dead_roots */
    4144       10761 :                         if (btrfs_root_refs(&gang[i]->root_item) == 0) {
    4145         151 :                                 gang[i] = NULL;
    4146         151 :                                 continue;
    4147             :                         }
    4148             :                         /* grab all the search result for later use */
    4149       10610 :                         gang[i] = btrfs_grab_root(gang[i]);
    4150             :                 }
    4151        3702 :                 spin_unlock(&fs_info->fs_roots_radix_lock);
    4152             : 
    4153       18165 :                 for (i = 0; i < ret; i++) {
    4154       10761 :                         if (!gang[i])
    4155         151 :                                 continue;
    4156       10610 :                         root_objectid = gang[i]->root_key.objectid;
    4157       10610 :                         err = btrfs_orphan_cleanup(gang[i]);
    4158       10610 :                         if (err)
    4159           0 :                                 goto out;
    4160       10610 :                         btrfs_put_root(gang[i]);
    4161             :                 }
    4162        3702 :                 root_objectid++;
    4163             :         }
    4164        3179 : out:
    4165             :         /* release the uncleaned roots due to error */
    4166        3179 :         for (; i < ret; i++) {
    4167           0 :                 if (gang[i])
    4168           0 :                         btrfs_put_root(gang[i]);
    4169             :         }
    4170        3179 :         return err;
    4171             : }
    4172             : 
    4173        3164 : int btrfs_commit_super(struct btrfs_fs_info *fs_info)
    4174             : {
    4175        3164 :         struct btrfs_root *root = fs_info->tree_root;
    4176        3164 :         struct btrfs_trans_handle *trans;
    4177             : 
    4178        3164 :         mutex_lock(&fs_info->cleaner_mutex);
    4179        3164 :         btrfs_run_delayed_iputs(fs_info);
    4180        3164 :         mutex_unlock(&fs_info->cleaner_mutex);
    4181        3164 :         wake_up_process(fs_info->cleaner_kthread);
    4182             : 
    4183             :         /* wait until ongoing cleanup work done */
    4184        3164 :         down_write(&fs_info->cleanup_work_sem);
    4185        3164 :         up_write(&fs_info->cleanup_work_sem);
    4186             : 
    4187        3164 :         trans = btrfs_join_transaction(root);
    4188        3164 :         if (IS_ERR(trans))
    4189           0 :                 return PTR_ERR(trans);
    4190        3164 :         return btrfs_commit_transaction(trans);
    4191             : }
    4192             : 
    4193        3215 : static void warn_about_uncommitted_trans(struct btrfs_fs_info *fs_info)
    4194             : {
    4195        3215 :         struct btrfs_transaction *trans;
    4196        3215 :         struct btrfs_transaction *tmp;
    4197        3215 :         bool found = false;
    4198             : 
    4199        3215 :         if (list_empty(&fs_info->trans_list))
    4200             :                 return;
    4201             : 
    4202             :         /*
    4203             :          * This function is only called at the very end of close_ctree(),
    4204             :          * thus no other running transaction, no need to take trans_lock.
    4205             :          */
    4206           0 :         ASSERT(test_bit(BTRFS_FS_CLOSING_DONE, &fs_info->flags));
    4207           0 :         list_for_each_entry_safe(trans, tmp, &fs_info->trans_list, list) {
    4208           0 :                 struct extent_state *cached = NULL;
    4209           0 :                 u64 dirty_bytes = 0;
    4210           0 :                 u64 cur = 0;
    4211           0 :                 u64 found_start;
    4212           0 :                 u64 found_end;
    4213             : 
    4214           0 :                 found = true;
    4215           0 :                 while (!find_first_extent_bit(&trans->dirty_pages, cur,
    4216             :                         &found_start, &found_end, EXTENT_DIRTY, &cached)) {
    4217           0 :                         dirty_bytes += found_end + 1 - found_start;
    4218           0 :                         cur = found_end + 1;
    4219             :                 }
    4220           0 :                 btrfs_warn(fs_info,
    4221             :         "transaction %llu (with %llu dirty metadata bytes) is not committed",
    4222             :                            trans->transid, dirty_bytes);
    4223           0 :                 btrfs_cleanup_one_transaction(trans, fs_info);
    4224             : 
    4225           0 :                 if (trans == fs_info->running_transaction)
    4226           0 :                         fs_info->running_transaction = NULL;
    4227           0 :                 list_del_init(&trans->list);
    4228             : 
    4229           0 :                 btrfs_put_transaction(trans);
    4230           0 :                 trace_btrfs_transaction_commit(fs_info);
    4231             :         }
    4232        3215 :         ASSERT(!found);
    4233             : }
    4234             : 
    4235        3215 : void __cold close_ctree(struct btrfs_fs_info *fs_info)
    4236             : {
    4237        3215 :         int ret;
    4238             : 
    4239        3215 :         set_bit(BTRFS_FS_CLOSING_START, &fs_info->flags);
    4240             : 
    4241             :         /*
    4242             :          * If we had UNFINISHED_DROPS we could still be processing them, so
    4243             :          * clear that bit and wake up relocation so it can stop.
    4244             :          * We must do this before stopping the block group reclaim task, because
    4245             :          * at btrfs_relocate_block_group() we wait for this bit, and after the
    4246             :          * wait we stop with -EINTR if btrfs_fs_closing() returns non-zero - we
    4247             :          * have just set BTRFS_FS_CLOSING_START, so btrfs_fs_closing() will
    4248             :          * return 1.
    4249             :          */
    4250        3215 :         btrfs_wake_unfinished_drop(fs_info);
    4251             : 
    4252             :         /*
    4253             :          * We may have the reclaim task running and relocating a data block group,
    4254             :          * in which case it may create delayed iputs. So stop it before we park
    4255             :          * the cleaner kthread otherwise we can get new delayed iputs after
    4256             :          * parking the cleaner, and that can make the async reclaim task to hang
    4257             :          * if it's waiting for delayed iputs to complete, since the cleaner is
    4258             :          * parked and can not run delayed iputs - this will make us hang when
    4259             :          * trying to stop the async reclaim task.
    4260             :          */
    4261        3215 :         cancel_work_sync(&fs_info->reclaim_bgs_work);
    4262             :         /*
    4263             :          * We don't want the cleaner to start new transactions, add more delayed
    4264             :          * iputs, etc. while we're closing. We can't use kthread_stop() yet
    4265             :          * because that frees the task_struct, and the transaction kthread might
    4266             :          * still try to wake up the cleaner.
    4267             :          */
    4268        3215 :         kthread_park(fs_info->cleaner_kthread);
    4269             : 
    4270             :         /* wait for the qgroup rescan worker to stop */
    4271        3215 :         btrfs_qgroup_wait_for_completion(fs_info, false);
    4272             : 
    4273             :         /* wait for the uuid_scan task to finish */
    4274        3215 :         down(&fs_info->uuid_tree_rescan_sem);
    4275             :         /* avoid complains from lockdep et al., set sem back to initial state */
    4276        3215 :         up(&fs_info->uuid_tree_rescan_sem);
    4277             : 
    4278             :         /* pause restriper - we want to resume on mount */
    4279        3215 :         btrfs_pause_balance(fs_info);
    4280             : 
    4281        3215 :         btrfs_dev_replace_suspend_for_unmount(fs_info);
    4282             : 
    4283        3215 :         btrfs_scrub_cancel(fs_info);
    4284             : 
    4285             :         /* wait for any defraggers to finish */
    4286        3215 :         wait_event(fs_info->transaction_wait,
    4287             :                    (atomic_read(&fs_info->defrag_running) == 0));
    4288             : 
    4289             :         /* clear out the rbtree of defraggable inodes */
    4290        3215 :         btrfs_cleanup_defrag_inodes(fs_info);
    4291             : 
    4292             :         /*
    4293             :          * After we parked the cleaner kthread, ordered extents may have
    4294             :          * completed and created new delayed iputs. If one of the async reclaim
    4295             :          * tasks is running and in the RUN_DELAYED_IPUTS flush state, then we
    4296             :          * can hang forever trying to stop it, because if a delayed iput is
    4297             :          * added after it ran btrfs_run_delayed_iputs() and before it called
    4298             :          * btrfs_wait_on_delayed_iputs(), it will hang forever since there is
    4299             :          * no one else to run iputs.
    4300             :          *
    4301             :          * So wait for all ongoing ordered extents to complete and then run
    4302             :          * delayed iputs. This works because once we reach this point no one
    4303             :          * can either create new ordered extents nor create delayed iputs
    4304             :          * through some other means.
    4305             :          *
    4306             :          * Also note that btrfs_wait_ordered_roots() is not safe here, because
    4307             :          * it waits for BTRFS_ORDERED_COMPLETE to be set on an ordered extent,
    4308             :          * but the delayed iput for the respective inode is made only when doing
    4309             :          * the final btrfs_put_ordered_extent() (which must happen at
    4310             :          * btrfs_finish_ordered_io() when we are unmounting).
    4311             :          */
    4312        3215 :         btrfs_flush_workqueue(fs_info->endio_write_workers);
    4313             :         /* Ordered extents for free space inodes. */
    4314        3215 :         btrfs_flush_workqueue(fs_info->endio_freespace_worker);
    4315        3215 :         btrfs_run_delayed_iputs(fs_info);
    4316             : 
    4317        3215 :         cancel_work_sync(&fs_info->async_reclaim_work);
    4318        3215 :         cancel_work_sync(&fs_info->async_data_reclaim_work);
    4319        3215 :         cancel_work_sync(&fs_info->preempt_reclaim_work);
    4320             : 
    4321             :         /* Cancel or finish ongoing discard work */
    4322        3215 :         btrfs_discard_cleanup(fs_info);
    4323             : 
    4324        3215 :         if (!sb_rdonly(fs_info->sb)) {
    4325             :                 /*
    4326             :                  * The cleaner kthread is stopped, so do one final pass over
    4327             :                  * unused block groups.
    4328             :                  */
    4329        3160 :                 btrfs_delete_unused_bgs(fs_info);
    4330             : 
    4331             :                 /*
    4332             :                  * There might be existing delayed inode workers still running
    4333             :                  * and holding an empty delayed inode item. We must wait for
    4334             :                  * them to complete first because they can create a transaction.
    4335             :                  * This happens when someone calls btrfs_balance_delayed_items()
    4336             :                  * and then a transaction commit runs the same delayed nodes
    4337             :                  * before any delayed worker has done something with the nodes.
    4338             :                  * We must wait for any worker here and not at transaction
    4339             :                  * commit time since that could cause a deadlock.
    4340             :                  * This is a very rare case.
    4341             :                  */
    4342        3160 :                 btrfs_flush_workqueue(fs_info->delayed_workers);
    4343             : 
    4344        3160 :                 ret = btrfs_commit_super(fs_info);
    4345        3160 :                 if (ret)
    4346           2 :                         btrfs_err(fs_info, "commit super ret %d", ret);
    4347             :         }
    4348             : 
    4349        3215 :         if (BTRFS_FS_ERROR(fs_info))
    4350          17 :                 btrfs_error_commit_super(fs_info);
    4351             : 
    4352        3215 :         kthread_stop(fs_info->transaction_kthread);
    4353        3215 :         kthread_stop(fs_info->cleaner_kthread);
    4354             : 
    4355        3215 :         ASSERT(list_empty(&fs_info->delayed_iputs));
    4356        3215 :         set_bit(BTRFS_FS_CLOSING_DONE, &fs_info->flags);
    4357             : 
    4358        3215 :         if (btrfs_check_quota_leak(fs_info)) {
    4359           0 :                 WARN_ON(IS_ENABLED(CONFIG_BTRFS_DEBUG));
    4360           0 :                 btrfs_err(fs_info, "qgroup reserved space leaked");
    4361             :         }
    4362             : 
    4363        3215 :         btrfs_free_qgroup_config(fs_info);
    4364        3215 :         ASSERT(list_empty(&fs_info->delalloc_roots));
    4365             : 
    4366        3215 :         if (percpu_counter_sum(&fs_info->delalloc_bytes)) {
    4367           0 :                 btrfs_info(fs_info, "at unmount delalloc count %lld",
    4368             :                        percpu_counter_sum(&fs_info->delalloc_bytes));
    4369             :         }
    4370             : 
    4371        3215 :         if (percpu_counter_sum(&fs_info->ordered_bytes))
    4372           0 :                 btrfs_info(fs_info, "at unmount dio bytes count %lld",
    4373             :                            percpu_counter_sum(&fs_info->ordered_bytes));
    4374             : 
    4375        3215 :         btrfs_sysfs_remove_mounted(fs_info);
    4376        3215 :         btrfs_sysfs_remove_fsid(fs_info->fs_devices);
    4377             : 
    4378        3215 :         btrfs_put_block_group_cache(fs_info);
    4379             : 
    4380             :         /*
    4381             :          * we must make sure there is not any read request to
    4382             :          * submit after we stopping all workers.
    4383             :          */
    4384        3215 :         invalidate_inode_pages2(fs_info->btree_inode->i_mapping);
    4385        3215 :         btrfs_stop_all_workers(fs_info);
    4386             : 
    4387             :         /* We shouldn't have any transaction open at this point */
    4388        3215 :         warn_about_uncommitted_trans(fs_info);
    4389             : 
    4390        3215 :         clear_bit(BTRFS_FS_OPEN, &fs_info->flags);
    4391        3215 :         free_root_pointers(fs_info, true);
    4392        3215 :         btrfs_free_fs_roots(fs_info);
    4393             : 
    4394             :         /*
    4395             :          * We must free the block groups after dropping the fs_roots as we could
    4396             :          * have had an IO error and have left over tree log blocks that aren't
    4397             :          * cleaned up until the fs roots are freed.  This makes the block group
    4398             :          * accounting appear to be wrong because there's pending reserved bytes,
    4399             :          * so make sure we do the block group cleanup afterwards.
    4400             :          */
    4401        3215 :         btrfs_free_block_groups(fs_info);
    4402             : 
    4403        3215 :         iput(fs_info->btree_inode);
    4404             : 
    4405             : #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
    4406             :         if (btrfs_test_opt(fs_info, CHECK_INTEGRITY))
    4407             :                 btrfsic_unmount(fs_info->fs_devices);
    4408             : #endif
    4409             : 
    4410        3215 :         btrfs_mapping_tree_free(&fs_info->mapping_tree);
    4411        3215 :         btrfs_close_devices(fs_info->fs_devices);
    4412        3215 : }
    4413             : 
    4414   303649136 : void btrfs_mark_buffer_dirty(struct extent_buffer *buf)
    4415             : {
    4416   303649136 :         struct btrfs_fs_info *fs_info = buf->fs_info;
    4417   303649136 :         u64 transid = btrfs_header_generation(buf);
    4418             : 
    4419             : #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
    4420             :         /*
    4421             :          * This is a fast path so only do this check if we have sanity tests
    4422             :          * enabled.  Normal people shouldn't be using unmapped buffers as dirty
    4423             :          * outside of the sanity tests.
    4424             :          */
    4425             :         if (unlikely(test_bit(EXTENT_BUFFER_UNMAPPED, &buf->bflags)))
    4426             :                 return;
    4427             : #endif
    4428   303649136 :         btrfs_assert_tree_write_locked(buf);
    4429   303649136 :         if (transid != fs_info->generation)
    4430           0 :                 WARN(1, KERN_CRIT "btrfs transid mismatch buffer %llu, found %llu running %llu\n",
    4431             :                         buf->start, transid, fs_info->generation);
    4432   303649136 :         set_extent_buffer_dirty(buf);
    4433             : #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
    4434             :         /*
    4435             :          * btrfs_check_leaf() won't check item data if we don't have WRITTEN
    4436             :          * set, so this will only validate the basic structure of the items.
    4437             :          */
    4438             :         if (btrfs_header_level(buf) == 0 && btrfs_check_leaf(buf)) {
    4439             :                 btrfs_print_leaf(buf);
    4440             :                 ASSERT(0);
    4441             :         }
    4442             : #endif
    4443   303706414 : }
    4444             : 
    4445    19206145 : static void __btrfs_btree_balance_dirty(struct btrfs_fs_info *fs_info,
    4446             :                                         int flush_delayed)
    4447             : {
    4448             :         /*
    4449             :          * looks as though older kernels can get into trouble with
    4450             :          * this code, they end up stuck in balance_dirty_pages forever
    4451             :          */
    4452    19206145 :         int ret;
    4453             : 
    4454    19206145 :         if (current->flags & PF_MEMALLOC)
    4455             :                 return;
    4456             : 
    4457    19206145 :         if (flush_delayed)
    4458    12512624 :                 btrfs_balance_delayed_items(fs_info);
    4459             : 
    4460    19205934 :         ret = __percpu_counter_compare(&fs_info->dirty_metadata_bytes,
    4461             :                                      BTRFS_DIRTY_METADATA_THRESH,
    4462             :                                      fs_info->dirty_metadata_batch);
    4463    19206420 :         if (ret > 0) {
    4464     3694690 :                 balance_dirty_pages_ratelimited(fs_info->btree_inode->i_mapping);
    4465             :         }
    4466             : }
    4467             : 
    4468    12513603 : void btrfs_btree_balance_dirty(struct btrfs_fs_info *fs_info)
    4469             : {
    4470    12513603 :         __btrfs_btree_balance_dirty(fs_info, 1);
    4471    12512241 : }
    4472             : 
    4473     6696061 : void btrfs_btree_balance_dirty_nodelay(struct btrfs_fs_info *fs_info)
    4474             : {
    4475     6696061 :         __btrfs_btree_balance_dirty(fs_info, 0);
    4476     6693708 : }
    4477             : 
    4478          17 : static void btrfs_error_commit_super(struct btrfs_fs_info *fs_info)
    4479             : {
    4480             :         /* cleanup FS via transaction */
    4481          17 :         btrfs_cleanup_transaction(fs_info);
    4482             : 
    4483          17 :         mutex_lock(&fs_info->cleaner_mutex);
    4484          17 :         btrfs_run_delayed_iputs(fs_info);
    4485          17 :         mutex_unlock(&fs_info->cleaner_mutex);
    4486             : 
    4487          17 :         down_write(&fs_info->cleanup_work_sem);
    4488          17 :         up_write(&fs_info->cleanup_work_sem);
    4489          17 : }
    4490             : 
    4491          20 : static void btrfs_drop_all_logs(struct btrfs_fs_info *fs_info)
    4492             : {
    4493          20 :         struct btrfs_root *gang[8];
    4494          20 :         u64 root_objectid = 0;
    4495          20 :         int ret;
    4496             : 
    4497          20 :         spin_lock(&fs_info->fs_roots_radix_lock);
    4498          80 :         while ((ret = radix_tree_gang_lookup(&fs_info->fs_roots_radix,
    4499             :                                              (void **)gang, root_objectid,
    4500          40 :                                              ARRAY_SIZE(gang))) != 0) {
    4501             :                 int i;
    4502             : 
    4503          60 :                 for (i = 0; i < ret; i++)
    4504          40 :                         gang[i] = btrfs_grab_root(gang[i]);
    4505          20 :                 spin_unlock(&fs_info->fs_roots_radix_lock);
    4506             : 
    4507          80 :                 for (i = 0; i < ret; i++) {
    4508          40 :                         if (!gang[i])
    4509           0 :                                 continue;
    4510          40 :                         root_objectid = gang[i]->root_key.objectid;
    4511          40 :                         btrfs_free_log(NULL, gang[i]);
    4512          40 :                         btrfs_put_root(gang[i]);
    4513             :                 }
    4514          20 :                 root_objectid++;
    4515          20 :                 spin_lock(&fs_info->fs_roots_radix_lock);
    4516             :         }
    4517          20 :         spin_unlock(&fs_info->fs_roots_radix_lock);
    4518          20 :         btrfs_free_log_root_tree(NULL, fs_info);
    4519          20 : }
    4520             : 
    4521           0 : static void btrfs_destroy_ordered_extents(struct btrfs_root *root)
    4522             : {
    4523           0 :         struct btrfs_ordered_extent *ordered;
    4524             : 
    4525           0 :         spin_lock(&root->ordered_extent_lock);
    4526             :         /*
    4527             :          * This will just short circuit the ordered completion stuff which will
    4528             :          * make sure the ordered extent gets properly cleaned up.
    4529             :          */
    4530           0 :         list_for_each_entry(ordered, &root->ordered_extents,
    4531             :                             root_extent_list)
    4532           0 :                 set_bit(BTRFS_ORDERED_IOERR, &ordered->flags);
    4533           0 :         spin_unlock(&root->ordered_extent_lock);
    4534           0 : }
    4535             : 
    4536          20 : static void btrfs_destroy_all_ordered_extents(struct btrfs_fs_info *fs_info)
    4537             : {
    4538          20 :         struct btrfs_root *root;
    4539          20 :         struct list_head splice;
    4540             : 
    4541          20 :         INIT_LIST_HEAD(&splice);
    4542             : 
    4543          20 :         spin_lock(&fs_info->ordered_root_lock);
    4544          20 :         list_splice_init(&fs_info->ordered_roots, &splice);
    4545          20 :         while (!list_empty(&splice)) {
    4546           0 :                 root = list_first_entry(&splice, struct btrfs_root,
    4547             :                                         ordered_root);
    4548           0 :                 list_move_tail(&root->ordered_root,
    4549             :                                &fs_info->ordered_roots);
    4550             : 
    4551           0 :                 spin_unlock(&fs_info->ordered_root_lock);
    4552           0 :                 btrfs_destroy_ordered_extents(root);
    4553             : 
    4554           0 :                 cond_resched();
    4555           0 :                 spin_lock(&fs_info->ordered_root_lock);
    4556             :         }
    4557          20 :         spin_unlock(&fs_info->ordered_root_lock);
    4558             : 
    4559             :         /*
    4560             :          * We need this here because if we've been flipped read-only we won't
    4561             :          * get sync() from the umount, so we need to make sure any ordered
    4562             :          * extents that haven't had their dirty pages IO start writeout yet
    4563             :          * actually get run and error out properly.
    4564             :          */
    4565          20 :         btrfs_wait_ordered_roots(fs_info, U64_MAX, 0, (u64)-1);
    4566          20 : }
    4567             : 
    4568          17 : static void btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
    4569             :                                        struct btrfs_fs_info *fs_info)
    4570             : {
    4571          17 :         struct rb_node *node;
    4572          17 :         struct btrfs_delayed_ref_root *delayed_refs;
    4573          17 :         struct btrfs_delayed_ref_node *ref;
    4574             : 
    4575          17 :         delayed_refs = &trans->delayed_refs;
    4576             : 
    4577          17 :         spin_lock(&delayed_refs->lock);
    4578          17 :         if (atomic_read(&delayed_refs->num_entries) == 0) {
    4579          15 :                 spin_unlock(&delayed_refs->lock);
    4580          15 :                 btrfs_debug(fs_info, "delayed_refs has NO entry");
    4581          15 :                 return;
    4582             :         }
    4583             : 
    4584          32 :         while ((node = rb_first_cached(&delayed_refs->href_root)) != NULL) {
    4585          30 :                 struct btrfs_delayed_ref_head *head;
    4586          30 :                 struct rb_node *n;
    4587          30 :                 bool pin_bytes = false;
    4588             : 
    4589          30 :                 head = rb_entry(node, struct btrfs_delayed_ref_head,
    4590             :                                 href_node);
    4591          30 :                 if (btrfs_delayed_ref_lock(delayed_refs, head))
    4592           0 :                         continue;
    4593             : 
    4594          30 :                 spin_lock(&head->lock);
    4595          59 :                 while ((n = rb_first_cached(&head->ref_tree)) != NULL) {
    4596          29 :                         ref = rb_entry(n, struct btrfs_delayed_ref_node,
    4597             :                                        ref_node);
    4598          29 :                         rb_erase_cached(&ref->ref_node, &head->ref_tree);
    4599          29 :                         RB_CLEAR_NODE(&ref->ref_node);
    4600          29 :                         if (!list_empty(&ref->add_list))
    4601          28 :                                 list_del(&ref->add_list);
    4602          29 :                         atomic_dec(&delayed_refs->num_entries);
    4603          29 :                         btrfs_put_delayed_ref(ref);
    4604             :                 }
    4605          30 :                 if (head->must_insert_reserved)
    4606          28 :                         pin_bytes = true;
    4607          30 :                 btrfs_free_delayed_extent_op(head->extent_op);
    4608          30 :                 btrfs_delete_ref_head(delayed_refs, head);
    4609          30 :                 spin_unlock(&head->lock);
    4610          30 :                 spin_unlock(&delayed_refs->lock);
    4611          30 :                 mutex_unlock(&head->mutex);
    4612             : 
    4613          30 :                 if (pin_bytes) {
    4614          28 :                         struct btrfs_block_group *cache;
    4615             : 
    4616          28 :                         cache = btrfs_lookup_block_group(fs_info, head->bytenr);
    4617          28 :                         BUG_ON(!cache);
    4618             : 
    4619          28 :                         spin_lock(&cache->space_info->lock);
    4620          28 :                         spin_lock(&cache->lock);
    4621          28 :                         cache->pinned += head->num_bytes;
    4622          28 :                         btrfs_space_info_update_bytes_pinned(fs_info,
    4623          28 :                                 cache->space_info, head->num_bytes);
    4624          28 :                         cache->reserved -= head->num_bytes;
    4625          28 :                         cache->space_info->bytes_reserved -= head->num_bytes;
    4626          28 :                         spin_unlock(&cache->lock);
    4627          28 :                         spin_unlock(&cache->space_info->lock);
    4628             : 
    4629          28 :                         btrfs_put_block_group(cache);
    4630             : 
    4631          28 :                         btrfs_error_unpin_extent_range(fs_info, head->bytenr,
    4632          28 :                                 head->bytenr + head->num_bytes - 1);
    4633             :                 }
    4634          30 :                 btrfs_cleanup_ref_head_accounting(fs_info, delayed_refs, head);
    4635          30 :                 btrfs_put_delayed_ref_head(head);
    4636          30 :                 cond_resched();
    4637          30 :                 spin_lock(&delayed_refs->lock);
    4638             :         }
    4639           2 :         btrfs_qgroup_destroy_extent_records(trans);
    4640             : 
    4641           2 :         spin_unlock(&delayed_refs->lock);
    4642             : }
    4643             : 
    4644           0 : static void btrfs_destroy_delalloc_inodes(struct btrfs_root *root)
    4645             : {
    4646           0 :         struct btrfs_inode *btrfs_inode;
    4647           0 :         struct list_head splice;
    4648             : 
    4649           0 :         INIT_LIST_HEAD(&splice);
    4650             : 
    4651           0 :         spin_lock(&root->delalloc_lock);
    4652           0 :         list_splice_init(&root->delalloc_inodes, &splice);
    4653             : 
    4654           0 :         while (!list_empty(&splice)) {
    4655           0 :                 struct inode *inode = NULL;
    4656           0 :                 btrfs_inode = list_first_entry(&splice, struct btrfs_inode,
    4657             :                                                delalloc_inodes);
    4658           0 :                 __btrfs_del_delalloc_inode(root, btrfs_inode);
    4659           0 :                 spin_unlock(&root->delalloc_lock);
    4660             : 
    4661             :                 /*
    4662             :                  * Make sure we get a live inode and that it'll not disappear
    4663             :                  * meanwhile.
    4664             :                  */
    4665           0 :                 inode = igrab(&btrfs_inode->vfs_inode);
    4666           0 :                 if (inode) {
    4667           0 :                         unsigned int nofs_flag;
    4668             : 
    4669           0 :                         nofs_flag = memalloc_nofs_save();
    4670           0 :                         invalidate_inode_pages2(inode->i_mapping);
    4671           0 :                         memalloc_nofs_restore(nofs_flag);
    4672           0 :                         iput(inode);
    4673             :                 }
    4674           0 :                 spin_lock(&root->delalloc_lock);
    4675             :         }
    4676           0 :         spin_unlock(&root->delalloc_lock);
    4677           0 : }
    4678             : 
    4679          20 : static void btrfs_destroy_all_delalloc_inodes(struct btrfs_fs_info *fs_info)
    4680             : {
    4681          20 :         struct btrfs_root *root;
    4682          20 :         struct list_head splice;
    4683             : 
    4684          20 :         INIT_LIST_HEAD(&splice);
    4685             : 
    4686          20 :         spin_lock(&fs_info->delalloc_root_lock);
    4687          20 :         list_splice_init(&fs_info->delalloc_roots, &splice);
    4688          20 :         while (!list_empty(&splice)) {
    4689           0 :                 root = list_first_entry(&splice, struct btrfs_root,
    4690             :                                          delalloc_root);
    4691           0 :                 root = btrfs_grab_root(root);
    4692           0 :                 BUG_ON(!root);
    4693           0 :                 spin_unlock(&fs_info->delalloc_root_lock);
    4694             : 
    4695           0 :                 btrfs_destroy_delalloc_inodes(root);
    4696           0 :                 btrfs_put_root(root);
    4697             : 
    4698           0 :                 spin_lock(&fs_info->delalloc_root_lock);
    4699             :         }
    4700          20 :         spin_unlock(&fs_info->delalloc_root_lock);
    4701          20 : }
    4702             : 
    4703          17 : static int btrfs_destroy_marked_extents(struct btrfs_fs_info *fs_info,
    4704             :                                         struct extent_io_tree *dirty_pages,
    4705             :                                         int mark)
    4706             : {
    4707          17 :         int ret;
    4708          17 :         struct extent_buffer *eb;
    4709          17 :         u64 start = 0;
    4710          83 :         u64 end;
    4711             : 
    4712          83 :         while (1) {
    4713          83 :                 ret = find_first_extent_bit(dirty_pages, start, &start, &end,
    4714             :                                             mark, NULL);
    4715          83 :                 if (ret)
    4716             :                         break;
    4717             : 
    4718          66 :                 clear_extent_bits(dirty_pages, start, end, mark);
    4719         172 :                 while (start <= end) {
    4720         106 :                         eb = find_extent_buffer(fs_info, start);
    4721         106 :                         start += fs_info->nodesize;
    4722         106 :                         if (!eb)
    4723           0 :                                 continue;
    4724             : 
    4725         106 :                         btrfs_tree_lock(eb);
    4726         106 :                         wait_on_extent_buffer_writeback(eb);
    4727         106 :                         btrfs_clear_buffer_dirty(NULL, eb);
    4728         106 :                         btrfs_tree_unlock(eb);
    4729             : 
    4730         106 :                         free_extent_buffer_stale(eb);
    4731             :                 }
    4732             :         }
    4733             : 
    4734          17 :         return ret;
    4735             : }
    4736             : 
    4737          17 : static int btrfs_destroy_pinned_extent(struct btrfs_fs_info *fs_info,
    4738             :                                        struct extent_io_tree *unpin)
    4739             : {
    4740         130 :         u64 start;
    4741         130 :         u64 end;
    4742         130 :         int ret;
    4743             : 
    4744         243 :         while (1) {
    4745         130 :                 struct extent_state *cached_state = NULL;
    4746             : 
    4747             :                 /*
    4748             :                  * The btrfs_finish_extent_commit() may get the same range as
    4749             :                  * ours between find_first_extent_bit and clear_extent_dirty.
    4750             :                  * Hence, hold the unused_bg_unpin_mutex to avoid double unpin
    4751             :                  * the same extent range.
    4752             :                  */
    4753         130 :                 mutex_lock(&fs_info->unused_bg_unpin_mutex);
    4754         130 :                 ret = find_first_extent_bit(unpin, 0, &start, &end,
    4755             :                                             EXTENT_DIRTY, &cached_state);
    4756         130 :                 if (ret) {
    4757          17 :                         mutex_unlock(&fs_info->unused_bg_unpin_mutex);
    4758          17 :                         break;
    4759             :                 }
    4760             : 
    4761         113 :                 clear_extent_dirty(unpin, start, end, &cached_state);
    4762         113 :                 free_extent_state(cached_state);
    4763         113 :                 btrfs_error_unpin_extent_range(fs_info, start, end);
    4764         113 :                 mutex_unlock(&fs_info->unused_bg_unpin_mutex);
    4765         113 :                 cond_resched();
    4766             :         }
    4767             : 
    4768          17 :         return 0;
    4769             : }
    4770             : 
    4771           0 : static void btrfs_cleanup_bg_io(struct btrfs_block_group *cache)
    4772             : {
    4773           0 :         struct inode *inode;
    4774             : 
    4775           0 :         inode = cache->io_ctl.inode;
    4776           0 :         if (inode) {
    4777           0 :                 unsigned int nofs_flag;
    4778             : 
    4779           0 :                 nofs_flag = memalloc_nofs_save();
    4780           0 :                 invalidate_inode_pages2(inode->i_mapping);
    4781           0 :                 memalloc_nofs_restore(nofs_flag);
    4782             : 
    4783           0 :                 BTRFS_I(inode)->generation = 0;
    4784           0 :                 cache->io_ctl.inode = NULL;
    4785           0 :                 iput(inode);
    4786             :         }
    4787           0 :         ASSERT(cache->io_ctl.pages == NULL);
    4788           0 :         btrfs_put_block_group(cache);
    4789           0 : }
    4790             : 
    4791          17 : void btrfs_cleanup_dirty_bgs(struct btrfs_transaction *cur_trans,
    4792             :                              struct btrfs_fs_info *fs_info)
    4793             : {
    4794          17 :         struct btrfs_block_group *cache;
    4795             : 
    4796          17 :         spin_lock(&cur_trans->dirty_bgs_lock);
    4797          25 :         while (!list_empty(&cur_trans->dirty_bgs)) {
    4798           8 :                 cache = list_first_entry(&cur_trans->dirty_bgs,
    4799             :                                          struct btrfs_block_group,
    4800             :                                          dirty_list);
    4801             : 
    4802           8 :                 if (!list_empty(&cache->io_list)) {
    4803           0 :                         spin_unlock(&cur_trans->dirty_bgs_lock);
    4804           0 :                         list_del_init(&cache->io_list);
    4805           0 :                         btrfs_cleanup_bg_io(cache);
    4806           0 :                         spin_lock(&cur_trans->dirty_bgs_lock);
    4807             :                 }
    4808             : 
    4809           8 :                 list_del_init(&cache->dirty_list);
    4810           8 :                 spin_lock(&cache->lock);
    4811           8 :                 cache->disk_cache_state = BTRFS_DC_ERROR;
    4812           8 :                 spin_unlock(&cache->lock);
    4813             : 
    4814           8 :                 spin_unlock(&cur_trans->dirty_bgs_lock);
    4815           8 :                 btrfs_put_block_group(cache);
    4816           8 :                 btrfs_delayed_refs_rsv_release(fs_info, 1);
    4817           8 :                 spin_lock(&cur_trans->dirty_bgs_lock);
    4818             :         }
    4819          17 :         spin_unlock(&cur_trans->dirty_bgs_lock);
    4820             : 
    4821             :         /*
    4822             :          * Refer to the definition of io_bgs member for details why it's safe
    4823             :          * to use it without any locking
    4824             :          */
    4825          17 :         while (!list_empty(&cur_trans->io_bgs)) {
    4826           0 :                 cache = list_first_entry(&cur_trans->io_bgs,
    4827             :                                          struct btrfs_block_group,
    4828             :                                          io_list);
    4829             : 
    4830           0 :                 list_del_init(&cache->io_list);
    4831           0 :                 spin_lock(&cache->lock);
    4832           0 :                 cache->disk_cache_state = BTRFS_DC_ERROR;
    4833           0 :                 spin_unlock(&cache->lock);
    4834           0 :                 btrfs_cleanup_bg_io(cache);
    4835             :         }
    4836          17 : }
    4837             : 
    4838          17 : void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans,
    4839             :                                    struct btrfs_fs_info *fs_info)
    4840             : {
    4841          17 :         struct btrfs_device *dev, *tmp;
    4842             : 
    4843          17 :         btrfs_cleanup_dirty_bgs(cur_trans, fs_info);
    4844          17 :         ASSERT(list_empty(&cur_trans->dirty_bgs));
    4845          17 :         ASSERT(list_empty(&cur_trans->io_bgs));
    4846             : 
    4847          18 :         list_for_each_entry_safe(dev, tmp, &cur_trans->dev_update_list,
    4848             :                                  post_commit_list) {
    4849           1 :                 list_del_init(&dev->post_commit_list);
    4850             :         }
    4851             : 
    4852          17 :         btrfs_destroy_delayed_refs(cur_trans, fs_info);
    4853             : 
    4854          17 :         cur_trans->state = TRANS_STATE_COMMIT_START;
    4855          17 :         wake_up(&fs_info->transaction_blocked_wait);
    4856             : 
    4857          17 :         cur_trans->state = TRANS_STATE_UNBLOCKED;
    4858          17 :         wake_up(&fs_info->transaction_wait);
    4859             : 
    4860          17 :         btrfs_destroy_delayed_inodes(fs_info);
    4861             : 
    4862          17 :         btrfs_destroy_marked_extents(fs_info, &cur_trans->dirty_pages,
    4863             :                                      EXTENT_DIRTY);
    4864          17 :         btrfs_destroy_pinned_extent(fs_info, &cur_trans->pinned_extents);
    4865             : 
    4866          17 :         cur_trans->state =TRANS_STATE_COMPLETED;
    4867          17 :         wake_up(&cur_trans->commit_wait);
    4868          17 : }
    4869             : 
    4870          20 : static int btrfs_cleanup_transaction(struct btrfs_fs_info *fs_info)
    4871             : {
    4872          20 :         struct btrfs_transaction *t;
    4873             : 
    4874          20 :         mutex_lock(&fs_info->transaction_kthread_mutex);
    4875             : 
    4876          20 :         spin_lock(&fs_info->trans_lock);
    4877          21 :         while (!list_empty(&fs_info->trans_list)) {
    4878           1 :                 t = list_first_entry(&fs_info->trans_list,
    4879             :                                      struct btrfs_transaction, list);
    4880           1 :                 if (t->state >= TRANS_STATE_COMMIT_START) {
    4881           0 :                         refcount_inc(&t->use_count);
    4882           0 :                         spin_unlock(&fs_info->trans_lock);
    4883           0 :                         btrfs_wait_for_commit(fs_info, t->transid);
    4884           0 :                         btrfs_put_transaction(t);
    4885           0 :                         spin_lock(&fs_info->trans_lock);
    4886           0 :                         continue;
    4887             :                 }
    4888           1 :                 if (t == fs_info->running_transaction) {
    4889           1 :                         t->state = TRANS_STATE_COMMIT_DOING;
    4890           1 :                         spin_unlock(&fs_info->trans_lock);
    4891             :                         /*
    4892             :                          * We wait for 0 num_writers since we don't hold a trans
    4893             :                          * handle open currently for this transaction.
    4894             :                          */
    4895           1 :                         wait_event(t->writer_wait,
    4896             :                                    atomic_read(&t->num_writers) == 0);
    4897             :                 } else {
    4898           0 :                         spin_unlock(&fs_info->trans_lock);
    4899             :                 }
    4900           1 :                 btrfs_cleanup_one_transaction(t, fs_info);
    4901             : 
    4902           1 :                 spin_lock(&fs_info->trans_lock);
    4903           1 :                 if (t == fs_info->running_transaction)
    4904           1 :                         fs_info->running_transaction = NULL;
    4905           1 :                 list_del_init(&t->list);
    4906           1 :                 spin_unlock(&fs_info->trans_lock);
    4907             : 
    4908           1 :                 btrfs_put_transaction(t);
    4909           1 :                 trace_btrfs_transaction_commit(fs_info);
    4910           1 :                 spin_lock(&fs_info->trans_lock);
    4911             :         }
    4912          20 :         spin_unlock(&fs_info->trans_lock);
    4913          20 :         btrfs_destroy_all_ordered_extents(fs_info);
    4914          20 :         btrfs_destroy_delayed_inodes(fs_info);
    4915          20 :         btrfs_assert_delayed_root_empty(fs_info);
    4916          20 :         btrfs_destroy_all_delalloc_inodes(fs_info);
    4917          20 :         btrfs_drop_all_logs(fs_info);
    4918          20 :         mutex_unlock(&fs_info->transaction_kthread_mutex);
    4919             : 
    4920          20 :         return 0;
    4921             : }
    4922             : 
    4923       19784 : int btrfs_init_root_free_objectid(struct btrfs_root *root)
    4924             : {
    4925       19784 :         struct btrfs_path *path;
    4926       19784 :         int ret;
    4927       19784 :         struct extent_buffer *l;
    4928       19784 :         struct btrfs_key search_key;
    4929       19784 :         struct btrfs_key found_key;
    4930       19784 :         int slot;
    4931             : 
    4932       19784 :         path = btrfs_alloc_path();
    4933       19784 :         if (!path)
    4934             :                 return -ENOMEM;
    4935             : 
    4936       19784 :         search_key.objectid = BTRFS_LAST_FREE_OBJECTID;
    4937       19784 :         search_key.type = -1;
    4938       19784 :         search_key.offset = (u64)-1;
    4939       19784 :         ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0);
    4940       19784 :         if (ret < 0)
    4941           0 :                 goto error;
    4942       19784 :         BUG_ON(ret == 0); /* Corruption */
    4943       19784 :         if (path->slots[0] > 0) {
    4944       19532 :                 slot = path->slots[0] - 1;
    4945       19532 :                 l = path->nodes[0];
    4946       19532 :                 btrfs_item_key_to_cpu(l, &found_key, slot);
    4947       19532 :                 root->free_objectid = max_t(u64, found_key.objectid + 1,
    4948             :                                             BTRFS_FIRST_FREE_OBJECTID);
    4949             :         } else {
    4950         252 :                 root->free_objectid = BTRFS_FIRST_FREE_OBJECTID;
    4951             :         }
    4952             :         ret = 0;
    4953       19784 : error:
    4954       19784 :         btrfs_free_path(path);
    4955       19784 :         return ret;
    4956             : }
    4957             : 
    4958     3256648 : int btrfs_get_free_objectid(struct btrfs_root *root, u64 *objectid)
    4959             : {
    4960     3256648 :         int ret;
    4961     3256648 :         mutex_lock(&root->objectid_mutex);
    4962             : 
    4963     3256985 :         if (unlikely(root->free_objectid >= BTRFS_LAST_FREE_OBJECTID)) {
    4964           0 :                 btrfs_warn(root->fs_info,
    4965             :                            "the objectid of root %llu reaches its highest value",
    4966             :                            root->root_key.objectid);
    4967           0 :                 ret = -ENOSPC;
    4968           0 :                 goto out;
    4969             :         }
    4970             : 
    4971     3256985 :         *objectid = root->free_objectid++;
    4972     3256985 :         ret = 0;
    4973     3256985 : out:
    4974     3256985 :         mutex_unlock(&root->objectid_mutex);
    4975     3256513 :         return ret;
    4976             : }

Generated by: LCOV version 1.14