LCOV - code coverage report
Current view: top level - fs/xfs/scrub - rmap_repair.c (source / functions) Hit Total Coverage
Test: fstests of 6.5.0-rc4-xfsa @ Mon Jul 31 20:08:27 PDT 2023 Lines: 600 675 88.9 %
Date: 2023-07-31 20:08:27 Functions: 41 41 100.0 %

          Line data    Source code
       1             : // SPDX-License-Identifier: GPL-2.0-or-later
       2             : /*
       3             :  * Copyright (C) 2018-2023 Oracle.  All Rights Reserved.
       4             :  * Author: Darrick J. Wong <djwong@kernel.org>
       5             :  */
       6             : #include "xfs.h"
       7             : #include "xfs_fs.h"
       8             : #include "xfs_shared.h"
       9             : #include "xfs_format.h"
      10             : #include "xfs_trans_resv.h"
      11             : #include "xfs_mount.h"
      12             : #include "xfs_defer.h"
      13             : #include "xfs_btree.h"
      14             : #include "xfs_btree_staging.h"
      15             : #include "xfs_btree_mem.h"
      16             : #include "xfs_bit.h"
      17             : #include "xfs_log_format.h"
      18             : #include "xfs_trans.h"
      19             : #include "xfs_sb.h"
      20             : #include "xfs_alloc.h"
      21             : #include "xfs_alloc_btree.h"
      22             : #include "xfs_ialloc.h"
      23             : #include "xfs_ialloc_btree.h"
      24             : #include "xfs_rmap.h"
      25             : #include "xfs_rmap_btree.h"
      26             : #include "xfs_inode.h"
      27             : #include "xfs_icache.h"
      28             : #include "xfs_bmap.h"
      29             : #include "xfs_bmap_btree.h"
      30             : #include "xfs_refcount.h"
      31             : #include "xfs_refcount_btree.h"
      32             : #include "xfs_ag.h"
      33             : #include "xfs_rtrmap_btree.h"
      34             : #include "xfs_rtgroup.h"
      35             : #include "xfs_rtrefcount_btree.h"
      36             : #include "scrub/xfs_scrub.h"
      37             : #include "scrub/scrub.h"
      38             : #include "scrub/common.h"
      39             : #include "scrub/btree.h"
      40             : #include "scrub/trace.h"
      41             : #include "scrub/repair.h"
      42             : #include "scrub/bitmap.h"
      43             : #include "scrub/xfile.h"
      44             : #include "scrub/xfarray.h"
      45             : #include "scrub/iscan.h"
      46             : #include "scrub/newbt.h"
      47             : #include "scrub/reap.h"
      48             : #include "scrub/xfbtree.h"
      49             : 
      50             : /*
      51             :  * Reverse Mapping Btree Repair
      52             :  * ============================
      53             :  *
      54             :  * This is the most involved of all the AG space btree rebuilds.  Everywhere
      55             :  * else in XFS we lock inodes and then AG data structures, but generating the
      56             :  * list of rmap records requires that we be able to scan both block mapping
      57             :  * btrees of every inode in the filesystem to see if it owns any extents in
      58             :  * this AG.  We can't tolerate any inode updates while we do this, so we
      59             :  * freeze the filesystem to lock everyone else out, and grant ourselves
      60             :  * special privileges to run transactions with regular background reclamation
      61             :  * turned off.
      62             :  *
      63             :  * We also have to be very careful not to allow inode reclaim to start a
      64             :  * transaction because all transactions (other than our own) will block.
      65             :  * Deferred inode inactivation helps us out there.
      66             :  *
      67             :  * I) Reverse mappings for all non-space metadata and file data are collected
      68             :  * according to the following algorithm:
      69             :  *
      70             :  * 1. For each fork of each inode:
      71             :  * 1.1. Create a bitmap BMBIT to track bmbt blocks if necessary.
      72             :  * 1.2. If the incore extent map isn't loaded, walk the bmbt to accumulate
      73             :  *      bmaps into rmap records (see 1.1.4).  Set bits in BMBIT for each btree
      74             :  *      block.
      75             :  * 1.3. If the incore extent map is loaded but the fork is in btree format,
      76             :  *      just visit the bmbt blocks to set the corresponding BMBIT areas.
      77             :  * 1.4. From the incore extent map, accumulate each bmap that falls into our
      78             :  *      target AG.  Remember, multiple bmap records can map to a single rmap
      79             :  *      record, so we cannot simply emit rmap records 1:1.
      80             :  * 1.5. Emit rmap records for each extent in BMBIT and free it.
      81             :  * 2. Create bitmaps INOBIT and ICHUNKBIT.
      82             :  * 3. For each record in the inobt, set the corresponding areas in ICHUNKBIT,
      83             :  *    and set bits in INOBIT for each btree block.  If the inobt has no records
      84             :  *    at all, we must be careful to record its root in INOBIT.
      85             :  * 4. For each block in the finobt, set the corresponding INOBIT area.
      86             :  * 5. Emit rmap records for each extent in INOBIT and ICHUNKBIT and free them.
      87             :  * 6. Create bitmaps REFCBIT and COWBIT.
      88             :  * 7. For each CoW staging extent in the refcountbt, set the corresponding
      89             :  *    areas in COWBIT.
      90             :  * 8. For each block in the refcountbt, set the corresponding REFCBIT area.
      91             :  * 9. Emit rmap records for each extent in REFCBIT and COWBIT and free them.
      92             :  * A. Emit rmap for the AG headers.
      93             :  * B. Emit rmap for the log, if there is one.
      94             :  *
      95             :  * II) The rmapbt shape and space metadata rmaps are computed as follows:
      96             :  *
      97             :  * 1. Count the rmaps collected in the previous step. (= NR)
      98             :  * 2. Estimate the number of rmapbt blocks needed to store NR records. (= RMB)
      99             :  * 3. Reserve RMB blocks through the newbt using the allocator in normap mode.
     100             :  * 4. Create bitmap AGBIT.
     101             :  * 5. For each reservation in the newbt, set the corresponding areas in AGBIT.
     102             :  * 6. For each block in the AGFL, bnobt, and cntbt, set the bits in AGBIT.
     103             :  * 7. Count the extents in AGBIT. (= AGNR)
     104             :  * 8. Estimate the number of rmapbt blocks needed for NR + AGNR rmaps. (= RMB')
     105             :  * 9. If RMB' >= RMB, reserve RMB' - RMB more newbt blocks, set RMB = RMB',
     106             :  *    and clear AGBIT.  Go to step 5.
     107             :  * A. Emit rmaps for each extent in AGBIT.
     108             :  *
     109             :  * III) The rmapbt is constructed and set in place as follows:
     110             :  *
     111             :  * 1. Sort the rmap records.
     112             :  * 2. Bulk load the rmaps.
     113             :  *
     114             :  * IV) Reap the old btree blocks.
     115             :  *
     116             :  * 1. Create a bitmap OLDRMBIT.
     117             :  * 2. For each gap in the new rmapbt, set the corresponding areas of OLDRMBIT.
     118             :  * 3. For each extent in the bnobt, clear the corresponding parts of OLDRMBIT.
     119             :  * 4. Reap the extents corresponding to the set areas in OLDRMBIT.  These are
     120             :  *    the parts of the AG that the rmap didn't find during its scan of the
     121             :  *    primary metadata and aren't known to be in the free space, which implies
     122             :  *    that they were the old rmapbt blocks.
     123             :  * 5. Commit.
     124             :  *
     125             :  * We use the 'xrep_rmap' prefix for all the rmap functions.
     126             :  */
     127             : 
     128             : /* Context for collecting rmaps */
     129             : struct xrep_rmap {
     130             :         /* new rmapbt information */
     131             :         struct xrep_newbt       new_btree;
     132             : 
     133             :         /* lock for the xfbtree and xfile */
     134             :         struct mutex            lock;
     135             : 
     136             :         /* rmap records generated from primary metadata */
     137             :         struct xfbtree          *rmap_btree;
     138             : 
     139             :         struct xfs_scrub        *sc;
     140             : 
     141             :         /* in-memory btree cursor for the xfs_btree_bload iteration */
     142             :         struct xfs_btree_cur    *mcur;
     143             : 
     144             :         /* Hooks into rmap update code. */
     145             :         struct xfs_rmap_hook    hooks;
     146             : 
     147             :         /* inode scan cursor */
     148             :         struct xchk_iscan       iscan;
     149             : 
     150             :         /* Number of non-freespace records found. */
     151             :         unsigned long long      nr_records;
     152             : 
     153             :         /* bnobt/cntbt contribution to btreeblks */
     154             :         xfs_agblock_t           freesp_btblocks;
     155             : 
     156             :         /* old agf_rmap_blocks counter */
     157             :         unsigned int            old_rmapbt_fsbcount;
     158             : };
     159             : 
     160             : /* Set us up to repair reverse mapping btrees. */
     161             : int
     162        9312 : xrep_setup_ag_rmapbt(
     163             :         struct xfs_scrub        *sc)
     164             : {
     165        9312 :         struct xrep_rmap        *rr;
     166        9312 :         char                    *descr;
     167        9312 :         int                     error;
     168             : 
     169        9312 :         xchk_fsgates_enable(sc, XCHK_FSGATES_RMAP);
     170             : 
     171        9314 :         descr = xchk_xfile_ag_descr(sc, "reverse mapping records");
     172        9249 :         error = xrep_setup_buftarg(sc, descr);
     173        9319 :         kfree(descr);
     174        9319 :         if (error)
     175             :                 return error;
     176             : 
     177        9319 :         rr = kzalloc(sizeof(struct xrep_rmap), XCHK_GFP_FLAGS);
     178        9319 :         if (!rr)
     179             :                 return -ENOMEM;
     180             : 
     181        9319 :         rr->sc = sc;
     182        9319 :         sc->buf = rr;
     183        9319 :         return 0;
     184             : }
     185             : 
     186             : /* Make sure there's nothing funny about this mapping. */
     187             : STATIC int
     188    19163077 : xrep_rmap_check_mapping(
     189             :         struct xfs_scrub        *sc,
     190             :         const struct xfs_rmap_irec *rec)
     191             : {
     192    19163077 :         enum xbtree_recpacking  outcome;
     193    19163077 :         int                     error;
     194             : 
     195    19163077 :         if (xfs_rmap_check_perag_irec(sc->sa.pag, rec) != NULL)
     196             :                 return -EFSCORRUPTED;
     197             : 
     198             :         /* Make sure this isn't free space. */
     199    19163078 :         error = xfs_alloc_has_records(sc->sa.bno_cur, rec->rm_startblock,
     200    19163078 :                         rec->rm_blockcount, &outcome);
     201    19163081 :         if (error)
     202             :                 return error;
     203    19163081 :         if (outcome != XBTREE_RECPACKING_EMPTY)
     204           0 :                 return -EFSCORRUPTED;
     205             : 
     206             :         return 0;
     207             : }
     208             : 
     209             : /* Store a reverse-mapping record. */
     210             : static inline int
     211    19457132 : xrep_rmap_stash(
     212             :         struct xrep_rmap        *rr,
     213             :         xfs_agblock_t           startblock,
     214             :         xfs_extlen_t            blockcount,
     215             :         uint64_t                owner,
     216             :         uint64_t                offset,
     217             :         unsigned int            flags)
     218             : {
     219    19457132 :         struct xfs_rmap_irec    rmap = {
     220             :                 .rm_startblock  = startblock,
     221             :                 .rm_blockcount  = blockcount,
     222             :                 .rm_owner       = owner,
     223             :                 .rm_offset      = offset,
     224             :                 .rm_flags       = flags,
     225             :         };
     226    19457132 :         struct xfs_scrub        *sc = rr->sc;
     227    19457132 :         struct xfs_btree_cur    *mcur;
     228    19457132 :         struct xfs_buf          *mhead_bp;
     229    19457132 :         int                     error = 0;
     230             : 
     231    19457132 :         if (xchk_should_terminate(sc, &error))
     232           3 :                 return error;
     233             : 
     234    19457141 :         if (xchk_iscan_aborted(&rr->iscan))
     235             :                 return -EFSCORRUPTED;
     236             : 
     237    19457140 :         trace_xrep_rmap_found(sc->mp, sc->sa.pag->pag_agno, &rmap);
     238             : 
     239    19457140 :         mutex_lock(&rr->lock);
     240    19457165 :         error = xfbtree_head_read_buf(rr->rmap_btree, sc->tp, &mhead_bp);
     241    19457180 :         if (error)
     242           0 :                 goto out_abort;
     243             : 
     244    19457180 :         mcur = xfs_rmapbt_mem_cursor(sc->sa.pag, sc->tp, mhead_bp,
     245             :                         rr->rmap_btree);
     246    19457163 :         error = xfs_rmap_map_raw(mcur, &rmap);
     247    19457113 :         xfs_btree_del_cursor(mcur, error);
     248    19457162 :         if (error)
     249           0 :                 goto out_cancel;
     250             : 
     251    19457162 :         error = xfbtree_trans_commit(rr->rmap_btree, sc->tp);
     252    19457185 :         if (error)
     253           0 :                 goto out_abort;
     254             : 
     255    19457185 :         mutex_unlock(&rr->lock);
     256    19457185 :         return 0;
     257             : 
     258             : out_cancel:
     259           0 :         xfbtree_trans_cancel(rr->rmap_btree, sc->tp);
     260           0 : out_abort:
     261           0 :         xchk_iscan_abort(&rr->iscan);
     262           0 :         mutex_unlock(&rr->lock);
     263           0 :         return error;
     264             : }
     265             : 
     266             : struct xrep_rmap_stash_run {
     267             :         struct xrep_rmap        *rr;
     268             :         uint64_t                owner;
     269             :         unsigned int            rmap_flags;
     270             : };
     271             : 
     272             : static int
     273     3353395 : xrep_rmap_stash_run(
     274             :         uint64_t                        start,
     275             :         uint64_t                        len,
     276             :         void                            *priv)
     277             : {
     278     3353395 :         struct xrep_rmap_stash_run      *rsr = priv;
     279     3353395 :         struct xrep_rmap                *rr = rsr->rr;
     280             : 
     281     3353395 :         return xrep_rmap_stash(rr, start, len, rsr->owner, 0, rsr->rmap_flags);
     282             : }
     283             : 
     284             : /*
     285             :  * Emit rmaps for every extent of bits set in the bitmap.  Caller must ensure
     286             :  * that the ranges are in units of FS blocks.
     287             :  */
     288             : STATIC int
     289     9063469 : xrep_rmap_stash_bitmap(
     290             :         struct xrep_rmap                *rr,
     291             :         struct xagb_bitmap              *bitmap,
     292             :         const struct xfs_owner_info     *oinfo)
     293             : {
     294     9063469 :         struct xrep_rmap_stash_run      rsr = {
     295             :                 .rr                     = rr,
     296     9063469 :                 .owner                  = oinfo->oi_owner,
     297             :                 .rmap_flags             = 0,
     298             :         };
     299             : 
     300     9063469 :         if (oinfo->oi_flags & XFS_OWNER_INFO_ATTR_FORK)
     301           0 :                 rsr.rmap_flags |= XFS_RMAP_ATTR_FORK;
     302     9063469 :         if (oinfo->oi_flags & XFS_OWNER_INFO_BMBT_BLOCK)
     303     9017228 :                 rsr.rmap_flags |= XFS_RMAP_BMBT_BLOCK;
     304             : 
     305     9063469 :         return xagb_bitmap_walk(bitmap, xrep_rmap_stash_run, &rsr);
     306             : }
     307             : 
     308             : /* Section (I): Finding all file and bmbt extents. */
     309             : 
     310             : /* Context for accumulating rmaps for an inode fork. */
     311             : struct xrep_rmap_ifork {
     312             :         /*
     313             :          * Accumulate rmap data here to turn multiple adjacent bmaps into a
     314             :          * single rmap.
     315             :          */
     316             :         struct xfs_rmap_irec    accum;
     317             : 
     318             :         /* Bitmap of bmbt blocks in this AG. */
     319             :         struct xagb_bitmap      bmbt_blocks;
     320             : 
     321             :         struct xrep_rmap        *rr;
     322             : 
     323             :         /* Which inode fork? */
     324             :         int                     whichfork;
     325             : };
     326             : 
     327             : /* Stash an rmap that we accumulated while walking an inode fork. */
     328             : STATIC int
     329    35357424 : xrep_rmap_stash_accumulated(
     330             :         struct xrep_rmap_ifork  *rf)
     331             : {
     332    35357424 :         if (rf->accum.rm_blockcount == 0)
     333             :                 return 0;
     334             : 
     335    16092193 :         return xrep_rmap_stash(rf->rr, rf->accum.rm_startblock,
     336             :                         rf->accum.rm_blockcount, rf->accum.rm_owner,
     337             :                         rf->accum.rm_offset, rf->accum.rm_flags);
     338             : }
     339             : 
     340             : /* Accumulate a bmbt record. */
     341             : STATIC int
     342    64311808 : xrep_rmap_visit_bmbt(
     343             :         struct xfs_btree_cur    *cur,
     344             :         struct xfs_bmbt_irec    *rec,
     345             :         void                    *priv)
     346             : {
     347    64311808 :         struct xrep_rmap_ifork  *rf = priv;
     348    64311808 :         struct xfs_mount        *mp = rf->rr->sc->mp;
     349    64311808 :         struct xfs_rmap_irec    *accum = &rf->accum;
     350    64311808 :         xfs_agblock_t           agbno;
     351    64311808 :         unsigned int            rmap_flags = 0;
     352    64311808 :         int                     error;
     353             : 
     354    64311808 :         if (XFS_FSB_TO_AGNO(mp, rec->br_startblock) !=
     355    64311808 :                         rf->rr->sc->sa.pag->pag_agno)
     356             :                 return 0;
     357             : 
     358    16092201 :         agbno = XFS_FSB_TO_AGBNO(mp, rec->br_startblock);
     359    16092201 :         if (rf->whichfork == XFS_ATTR_FORK)
     360      128340 :                 rmap_flags |= XFS_RMAP_ATTR_FORK;
     361    16092201 :         if (rec->br_state == XFS_EXT_UNWRITTEN)
     362     2125410 :                 rmap_flags |= XFS_RMAP_UNWRITTEN;
     363             : 
     364             :         /* If this bmap is adjacent to the previous one, just add it. */
     365    16092201 :         if (accum->rm_blockcount > 0 &&
     366    13394703 :             rec->br_startoff == accum->rm_offset + accum->rm_blockcount &&
     367     2133565 :             agbno == accum->rm_startblock + accum->rm_blockcount &&
     368      788612 :             rmap_flags == accum->rm_flags) {
     369           0 :                 accum->rm_blockcount += rec->br_blockcount;
     370           0 :                 return 0;
     371             :         }
     372             : 
     373             :         /* Otherwise stash the old rmap and start accumulating a new one. */
     374    16092201 :         error = xrep_rmap_stash_accumulated(rf);
     375    16092189 :         if (error)
     376             :                 return error;
     377             : 
     378    16092186 :         accum->rm_startblock = agbno;
     379    16092186 :         accum->rm_blockcount = rec->br_blockcount;
     380    16092186 :         accum->rm_offset = rec->br_startoff;
     381    16092186 :         accum->rm_flags = rmap_flags;
     382    16092186 :         return 0;
     383             : }
     384             : 
     385             : /* Add a btree block to the bitmap. */
     386             : STATIC int
     387    20583656 : xrep_rmap_visit_iroot_btree_block(
     388             :         struct xfs_btree_cur    *cur,
     389             :         int                     level,
     390             :         void                    *priv)
     391             : {
     392    20583656 :         struct xrep_rmap_ifork  *rf = priv;
     393    20583656 :         struct xfs_buf          *bp;
     394    20583656 :         xfs_fsblock_t           fsbno;
     395    20583656 :         xfs_agblock_t           agbno;
     396             : 
     397    20583656 :         xfs_btree_get_block(cur, level, &bp);
     398    20583661 :         if (!bp)
     399             :                 return 0;
     400             : 
     401    11566433 :         fsbno = XFS_DADDR_TO_FSB(cur->bc_mp, xfs_buf_daddr(bp));
     402    11566433 :         if (XFS_FSB_TO_AGNO(cur->bc_mp, fsbno) != rf->rr->sc->sa.pag->pag_agno)
     403             :                 return 0;
     404             : 
     405     2891393 :         agbno = XFS_FSB_TO_AGBNO(cur->bc_mp, fsbno);
     406     2891393 :         return xagb_bitmap_set(&rf->bmbt_blocks, agbno, 1);
     407             : }
     408             : 
     409             : /*
     410             :  * Iterate a metadata btree rooted in an inode to collect rmap records for
     411             :  * anything in this fork that matches the AG.
     412             :  */
     413             : STATIC int
     414     9017228 : xrep_rmap_scan_iroot_btree(
     415             :         struct xrep_rmap_ifork  *rf,
     416             :         struct xfs_btree_cur    *cur)
     417             : {
     418     9017228 :         struct xfs_owner_info   oinfo;
     419     9017228 :         struct xrep_rmap        *rr = rf->rr;
     420     9017228 :         int                     error;
     421             : 
     422     9017228 :         xagb_bitmap_init(&rf->bmbt_blocks);
     423             : 
     424             :         /* Record all the blocks in the btree itself. */
     425     9017228 :         error = xfs_btree_visit_blocks(cur, xrep_rmap_visit_iroot_btree_block,
     426             :                         XFS_BTREE_VISIT_ALL, rf);
     427     9017228 :         if (error)
     428           0 :                 goto out;
     429             : 
     430             :         /* Emit rmaps for the btree blocks. */
     431     9017228 :         xfs_rmap_ino_bmbt_owner(&oinfo, rf->accum.rm_owner, rf->whichfork);
     432     9017228 :         error = xrep_rmap_stash_bitmap(rr, &rf->bmbt_blocks, &oinfo);
     433     9017226 :         if (error)
     434           0 :                 goto out;
     435             : 
     436             :         /* Stash any remaining accumulated rmaps. */
     437     9017226 :         error = xrep_rmap_stash_accumulated(rf);
     438     9017225 : out:
     439     9017225 :         xagb_bitmap_destroy(&rf->bmbt_blocks);
     440     9017225 :         return error;
     441             : }
     442             : 
     443             : static inline bool
     444             : is_rt_data_fork(
     445             :         struct xfs_inode        *ip,
     446             :         int                     whichfork)
     447             : {
     448             :         return XFS_IS_REALTIME_INODE(ip) && whichfork == XFS_DATA_FORK;
     449             : }
     450             : 
     451             : /*
     452             :  * Iterate the block mapping btree to collect rmap records for anything in this
     453             :  * fork that matches the AG.  Sets @mappings_done to true if we've scanned the
     454             :  * block mappings in this fork.
     455             :  */
     456             : STATIC int
     457     8967991 : xrep_rmap_scan_bmbt(
     458             :         struct xrep_rmap_ifork  *rf,
     459             :         struct xfs_inode        *ip,
     460             :         bool                    *mappings_done)
     461             : {
     462     8967991 :         struct xrep_rmap        *rr = rf->rr;
     463     8967991 :         struct xfs_btree_cur    *cur;
     464     8967991 :         struct xfs_ifork        *ifp;
     465     8967991 :         int                     error;
     466             : 
     467     8967991 :         *mappings_done = false;
     468     8967991 :         ifp = xfs_ifork_ptr(ip, rf->whichfork);
     469     8967991 :         cur = xfs_bmbt_init_cursor(rr->sc->mp, rr->sc->tp, ip, rf->whichfork);
     470             : 
     471     9641228 :         if (!xfs_ifork_is_realtime(ip, rf->whichfork) &&
     472             :             xfs_need_iread_extents(ifp)) {
     473             :                 /*
     474             :                  * If the incore extent cache isn't loaded, scan the bmbt for
     475             :                  * mapping records.  This avoids loading the incore extent
     476             :                  * tree, which will increase memory pressure at a time when
     477             :                  * we're trying to run as quickly as we possibly can.  Ignore
     478             :                  * realtime extents.
     479             :                  */
     480       51248 :                 error = xfs_bmap_query_all(cur, xrep_rmap_visit_bmbt, rf);
     481       51248 :                 if (error)
     482           0 :                         goto out_cur;
     483             : 
     484       51248 :                 *mappings_done = true;
     485             :         }
     486             : 
     487             :         /* Scan for the bmbt blocks, which always live on the data device. */
     488     8967990 :         error = xrep_rmap_scan_iroot_btree(rf, cur);
     489     8967988 : out_cur:
     490     8967988 :         xfs_btree_del_cursor(cur, error);
     491     8967991 :         return error;
     492             : }
     493             : 
     494             : /*
     495             :  * Iterate the in-core extent cache to collect rmap records for anything in
     496             :  * this fork that matches the AG.
     497             :  */
     498             : STATIC int
     499    10248109 : xrep_rmap_scan_iext(
     500             :         struct xrep_rmap_ifork  *rf,
     501             :         struct xfs_ifork        *ifp)
     502             : {
     503    10248109 :         struct xfs_bmbt_irec    rec;
     504    10248109 :         struct xfs_iext_cursor  icur;
     505    10248109 :         int                     error;
     506             : 
     507    73629276 :         for_each_xfs_iext(ifp, &icur, &rec) {
     508    63381143 :                 if (isnullstartblock(rec.br_startblock))
     509        9884 :                         continue;
     510    63371259 :                 error = xrep_rmap_visit_bmbt(NULL, &rec, rf);
     511    63371286 :                 if (error)
     512           3 :                         return error;
     513             :         }
     514             : 
     515    10248105 :         return xrep_rmap_stash_accumulated(rf);
     516             : }
     517             : 
     518             : static int
     519       24620 : xrep_rmap_scan_rtrmapbt(
     520             :         struct xrep_rmap_ifork  *rf,
     521             :         struct xfs_inode        *ip)
     522             : {
     523       24620 :         struct xfs_scrub        *sc = rf->rr->sc;
     524       24620 :         struct xfs_btree_cur    *cur;
     525       24620 :         struct xfs_rtgroup      *rtg;
     526       24620 :         xfs_rgnumber_t          rgno;
     527       24620 :         int                     error;
     528             : 
     529       24620 :         if (rf->whichfork != XFS_DATA_FORK)
     530             :                 return -EFSCORRUPTED;
     531             : 
     532       73860 :         for_each_rtgroup(sc->mp, rgno, rtg) {
     533       73860 :                 if (ip == rtg->rtg_rmapip) {
     534       24620 :                         cur = xfs_rtrmapbt_init_cursor(sc->mp, sc->tp, rtg, ip);
     535       24620 :                         error = xrep_rmap_scan_iroot_btree(rf, cur);
     536       24620 :                         xfs_btree_del_cursor(cur, error);
     537       24620 :                         xfs_rtgroup_rele(rtg);
     538       24620 :                         return error;
     539             :                 }
     540             :         }
     541             : 
     542             :         /*
     543             :          * We shouldn't find an rmap format inode that isn't associated with
     544             :          * an rtgroup!
     545             :          */
     546           0 :         ASSERT(0);
     547           0 :         return -EFSCORRUPTED;
     548             : }
     549             : 
     550             : static int
     551       24620 : xrep_rmap_scan_rtrefcountbt(
     552             :         struct xrep_rmap_ifork  *rf,
     553             :         struct xfs_inode        *ip)
     554             : {
     555       24620 :         struct xfs_scrub        *sc = rf->rr->sc;
     556       24620 :         struct xfs_btree_cur    *cur;
     557       24620 :         struct xfs_rtgroup      *rtg;
     558       24620 :         xfs_rgnumber_t          rgno;
     559       24620 :         int                     error;
     560             : 
     561       24620 :         if (rf->whichfork != XFS_DATA_FORK)
     562             :                 return -EFSCORRUPTED;
     563             : 
     564       73859 :         for_each_rtgroup(sc->mp, rgno, rtg) {
     565       73858 :                 if (ip == rtg->rtg_refcountip) {
     566       24619 :                         cur = xfs_rtrefcountbt_init_cursor(sc->mp, sc->tp, rtg,
     567             :                                         ip);
     568       24619 :                         error = xrep_rmap_scan_iroot_btree(rf, cur);
     569       24620 :                         xfs_btree_del_cursor(cur, error);
     570       24620 :                         xfs_rtgroup_rele(rtg);
     571       24620 :                         return error;
     572             :                 }
     573             :         }
     574             : 
     575             :         /*
     576             :          * We shouldn't find a refcount format inode that isn't associated with
     577             :          * an rtgroup!
     578             :          */
     579           0 :         ASSERT(0);
     580           0 :         return -EFSCORRUPTED;
     581             : }
     582             : 
     583             : /* Find all the extents from a given AG in an inode fork. */
     584             : STATIC int
     585   188374328 : xrep_rmap_scan_ifork(
     586             :         struct xrep_rmap        *rr,
     587             :         struct xfs_inode        *ip,
     588             :         int                     whichfork)
     589             : {
     590   188374328 :         struct xrep_rmap_ifork  rf = {
     591   188374328 :                 .accum          = { .rm_owner = ip->i_ino, },
     592             :                 .rr             = rr,
     593             :                 .whichfork      = whichfork,
     594             :         };
     595   188374328 :         struct xfs_ifork        *ifp = xfs_ifork_ptr(ip, whichfork);
     596   188374629 :         int                     error = 0;
     597             : 
     598   188374629 :         if (!ifp)
     599             :                 return 0;
     600             : 
     601   188361341 :         if (ifp->if_format == XFS_DINODE_FMT_BTREE) {
     602     8967991 :                 bool            mappings_done;
     603             : 
     604             :                 /*
     605             :                  * Scan the bmap btree for data device mappings.  This includes
     606             :                  * the btree blocks themselves, even if this is a realtime
     607             :                  * file.
     608             :                  */
     609     8967991 :                 error = xrep_rmap_scan_bmbt(&rf, ip, &mappings_done);
     610     8967991 :                 if (error || mappings_done)
     611       51248 :                         return error;
     612   179393350 :         } else if (ifp->if_format == XFS_DINODE_FMT_RMAP) {
     613       24620 :                 return xrep_rmap_scan_rtrmapbt(&rf, ip);
     614   179368730 :         } else if (ifp->if_format == XFS_DINODE_FMT_REFCOUNT) {
     615       24620 :                 return xrep_rmap_scan_rtrefcountbt(&rf, ip);
     616   179344110 :         } else if (ifp->if_format != XFS_DINODE_FMT_EXTENTS) {
     617             :                 return 0;
     618             :         }
     619             : 
     620             :         /* Scan incore extent cache if this isn't a realtime file. */
     621    40531527 :         if (xfs_ifork_is_realtime(ip, whichfork))
     622             :                 return 0;
     623             : 
     624    10248110 :         return xrep_rmap_scan_iext(&rf, ifp);
     625             : }
     626             : 
     627             : /*
     628             :  * Take ILOCK on a file that we want to scan.
     629             :  *
     630             :  * Select ILOCK_EXCL if the file has an unloaded data bmbt or has an unloaded
     631             :  * attr bmbt.  Otherwise, take ILOCK_SHARED.
     632             :  */
     633             : static inline unsigned int
     634    94187071 : xrep_rmap_scan_ilock(
     635             :         struct xfs_inode        *ip)
     636             : {
     637    94187071 :         uint                    lock_mode = XFS_ILOCK_SHARED;
     638             : 
     639    94187071 :         if (xfs_need_iread_extents(&ip->i_df)) {
     640      520037 :                 lock_mode = XFS_ILOCK_EXCL;
     641      520037 :                 goto lock;
     642             :         }
     643             : 
     644   187320775 :         if (xfs_inode_has_attr_fork(ip) && xfs_need_iread_extents(&ip->i_af))
     645           0 :                 lock_mode = XFS_ILOCK_EXCL;
     646             : 
     647    93667099 : lock:
     648    94187136 :         xfs_ilock(ip, lock_mode);
     649    94187314 :         return lock_mode;
     650             : }
     651             : 
     652             : /* Record reverse mappings for a file. */
     653             : STATIC int
     654    94187101 : xrep_rmap_scan_inode(
     655             :         struct xrep_rmap        *rr,
     656             :         struct xfs_inode        *ip)
     657             : {
     658    94187101 :         unsigned int            lock_mode = xrep_rmap_scan_ilock(ip);
     659    94187375 :         int                     error;
     660             : 
     661             :         /* Check the data fork. */
     662    94187375 :         error = xrep_rmap_scan_ifork(rr, ip, XFS_DATA_FORK);
     663    94187318 :         if (error)
     664           3 :                 goto out_unlock;
     665             : 
     666             :         /* Check the attr fork. */
     667    94187315 :         error = xrep_rmap_scan_ifork(rr, ip, XFS_ATTR_FORK);
     668    94187430 :         if (error)
     669           0 :                 goto out_unlock;
     670             : 
     671             :         /* COW fork extents are "owned" by the refcount btree. */
     672             : 
     673    94187430 :         xchk_iscan_mark_visited(&rr->iscan, ip);
     674    94187146 : out_unlock:
     675    94187146 :         xfs_iunlock(ip, lock_mode);
     676    94187664 :         return error;
     677             : }
     678             : 
     679             : /* Section (I): Find all AG metadata extents except for free space metadata. */
     680             : 
     681             : struct xrep_rmap_inodes {
     682             :         struct xrep_rmap        *rr;
     683             :         struct xagb_bitmap      inobt_blocks;   /* INOBIT */
     684             :         struct xagb_bitmap      ichunk_blocks;  /* ICHUNKBIT */
     685             : };
     686             : 
     687             : /* Record inode btree rmaps. */
     688             : STATIC int
     689      488061 : xrep_rmap_walk_inobt(
     690             :         struct xfs_btree_cur            *cur,
     691             :         const union xfs_btree_rec       *rec,
     692             :         void                            *priv)
     693             : {
     694      488061 :         struct xfs_inobt_rec_incore     irec;
     695      488061 :         struct xrep_rmap_inodes         *ri = priv;
     696      488061 :         struct xfs_mount                *mp = cur->bc_mp;
     697      488061 :         xfs_agblock_t                   agbno;
     698      488061 :         xfs_agino_t                     agino;
     699      488061 :         xfs_agino_t                     iperhole;
     700      488061 :         unsigned int                    i;
     701      488061 :         int                             error;
     702             : 
     703             :         /* Record the inobt blocks. */
     704      488061 :         error = xagb_bitmap_set_btcur_path(&ri->inobt_blocks, cur);
     705      488061 :         if (error)
     706             :                 return error;
     707             : 
     708      488061 :         xfs_inobt_btrec_to_irec(mp, rec, &irec);
     709      488061 :         if (xfs_inobt_check_irec(cur, &irec) != NULL)
     710             :                 return -EFSCORRUPTED;
     711             : 
     712      488061 :         agino = irec.ir_startino;
     713             : 
     714             :         /* Record a non-sparse inode chunk. */
     715      488061 :         if (!xfs_inobt_issparse(irec.ir_holemask)) {
     716      252477 :                 agbno = XFS_AGINO_TO_AGBNO(mp, agino);
     717             : 
     718      252477 :                 return xagb_bitmap_set(&ri->ichunk_blocks, agbno,
     719      252477 :                                 XFS_INODES_PER_CHUNK / mp->m_sb.sb_inopblock);
     720             :         }
     721             : 
     722             :         /* Iterate each chunk. */
     723      235584 :         iperhole = max_t(xfs_agino_t, mp->m_sb.sb_inopblock,
     724             :                         XFS_INODES_PER_HOLEMASK_BIT);
     725      235584 :         for (i = 0, agino = irec.ir_startino;
     726     2114028 :              i < XFS_INOBT_HOLEMASK_BITS;
     727     1878444 :              i += iperhole / XFS_INODES_PER_HOLEMASK_BIT, agino += iperhole) {
     728             :                 /* Skip holes. */
     729     1878444 :                 if (irec.ir_holemask & (1 << i))
     730      939222 :                         continue;
     731             : 
     732             :                 /* Record the inode chunk otherwise. */
     733      939222 :                 agbno = XFS_AGINO_TO_AGBNO(mp, agino);
     734      939222 :                 error = xagb_bitmap_set(&ri->ichunk_blocks, agbno,
     735      939222 :                                 iperhole / mp->m_sb.sb_inopblock);
     736      939222 :                 if (error)
     737           0 :                         return error;
     738             :         }
     739             : 
     740             :         return 0;
     741             : }
     742             : 
     743             : /* Collect rmaps for the blocks containing inode btrees and the inode chunks. */
     744             : STATIC int
     745        9261 : xrep_rmap_find_inode_rmaps(
     746             :         struct xrep_rmap        *rr)
     747             : {
     748        9261 :         struct xrep_rmap_inodes ri = {
     749             :                 .rr             = rr,
     750             :         };
     751        9261 :         struct xfs_scrub        *sc = rr->sc;
     752        9261 :         int                     error;
     753             : 
     754        9261 :         xagb_bitmap_init(&ri.inobt_blocks);
     755        9260 :         xagb_bitmap_init(&ri.ichunk_blocks);
     756             : 
     757             :         /*
     758             :          * Iterate every record in the inobt so we can capture all the inode
     759             :          * chunks and the blocks in the inobt itself.
     760             :          */
     761        9252 :         error = xfs_btree_query_all(sc->sa.ino_cur, xrep_rmap_walk_inobt, &ri);
     762        9261 :         if (error)
     763           0 :                 goto out_bitmap;
     764             : 
     765             :         /*
     766             :          * Note that if there are zero records in the inobt then query_all does
     767             :          * nothing and we have to account the empty inobt root manually.
     768             :          */
     769        9261 :         if (xagb_bitmap_empty(&ri.ichunk_blocks)) {
     770        4217 :                 struct xfs_agi  *agi = sc->sa.agi_bp->b_addr;
     771             : 
     772        4217 :                 error = xagb_bitmap_set(&ri.inobt_blocks,
     773        4217 :                                 be32_to_cpu(agi->agi_root), 1);
     774        4217 :                 if (error)
     775           0 :                         goto out_bitmap;
     776             :         }
     777             : 
     778             :         /* Scan the finobt too. */
     779        9261 :         if (xfs_has_finobt(sc->mp)) {
     780        9261 :                 error = xagb_bitmap_set_btblocks(&ri.inobt_blocks,
     781             :                                 sc->sa.fino_cur);
     782        9260 :                 if (error)
     783           0 :                         goto out_bitmap;
     784             :         }
     785             : 
     786             :         /* Generate rmaps for everything. */
     787        9260 :         error = xrep_rmap_stash_bitmap(rr, &ri.inobt_blocks,
     788             :                         &XFS_RMAP_OINFO_INOBT);
     789        9258 :         if (error)
     790           0 :                 goto out_bitmap;
     791        9258 :         error = xrep_rmap_stash_bitmap(rr, &ri.ichunk_blocks,
     792             :                         &XFS_RMAP_OINFO_INODES);
     793             : 
     794        9260 : out_bitmap:
     795        9260 :         xagb_bitmap_destroy(&ri.inobt_blocks);
     796        9261 :         xagb_bitmap_destroy(&ri.ichunk_blocks);
     797        9261 :         return error;
     798             : }
     799             : 
     800             : /* Record a CoW staging extent. */
     801             : STATIC int
     802        2680 : xrep_rmap_walk_cowblocks(
     803             :         struct xfs_btree_cur            *cur,
     804             :         const struct xfs_refcount_irec  *irec,
     805             :         void                            *priv)
     806             : {
     807        2680 :         struct xagb_bitmap              *bitmap = priv;
     808             : 
     809        2680 :         if (!xfs_refcount_check_domain(irec) ||
     810        2680 :             irec->rc_domain != XFS_REFC_DOMAIN_COW)
     811             :                 return -EFSCORRUPTED;
     812             : 
     813        2680 :         return xagb_bitmap_set(bitmap, irec->rc_startblock, irec->rc_blockcount);
     814             : }
     815             : 
     816             : /*
     817             :  * Collect rmaps for the blocks containing the refcount btree, and all CoW
     818             :  * staging extents.
     819             :  */
     820             : STATIC int
     821        9258 : xrep_rmap_find_refcount_rmaps(
     822             :         struct xrep_rmap        *rr)
     823             : {
     824        9258 :         struct xagb_bitmap      refcountbt_blocks;      /* REFCBIT */
     825        9258 :         struct xagb_bitmap      cow_blocks;             /* COWBIT */
     826        9258 :         struct xfs_refcount_irec low = {
     827             :                 .rc_startblock  = 0,
     828             :                 .rc_domain      = XFS_REFC_DOMAIN_COW,
     829             :         };
     830        9258 :         struct xfs_refcount_irec high = {
     831             :                 .rc_startblock  = -1U,
     832             :                 .rc_domain      = XFS_REFC_DOMAIN_COW,
     833             :         };
     834        9258 :         struct xfs_scrub        *sc = rr->sc;
     835        9258 :         int                     error;
     836             : 
     837        9258 :         if (!xfs_has_reflink(sc->mp))
     838             :                 return 0;
     839             : 
     840        9257 :         xagb_bitmap_init(&refcountbt_blocks);
     841        9258 :         xagb_bitmap_init(&cow_blocks);
     842             : 
     843             :         /* refcountbt */
     844        9259 :         error = xagb_bitmap_set_btblocks(&refcountbt_blocks, sc->sa.refc_cur);
     845        9261 :         if (error)
     846           0 :                 goto out_bitmap;
     847             : 
     848             :         /* Collect rmaps for CoW staging extents. */
     849        9261 :         error = xfs_refcount_query_range(sc->sa.refc_cur, &low, &high,
     850             :                         xrep_rmap_walk_cowblocks, &cow_blocks);
     851        9261 :         if (error)
     852           0 :                 goto out_bitmap;
     853             : 
     854             :         /* Generate rmaps for everything. */
     855        9261 :         error = xrep_rmap_stash_bitmap(rr, &cow_blocks, &XFS_RMAP_OINFO_COW);
     856        9261 :         if (error)
     857           0 :                 goto out_bitmap;
     858        9261 :         error = xrep_rmap_stash_bitmap(rr, &refcountbt_blocks,
     859             :                         &XFS_RMAP_OINFO_REFC);
     860             : 
     861        9261 : out_bitmap:
     862        9261 :         xagb_bitmap_destroy(&cow_blocks);
     863        9261 :         xagb_bitmap_destroy(&refcountbt_blocks);
     864        9261 :         return error;
     865             : }
     866             : 
     867             : /* Generate rmaps for the AG headers (AGI/AGF/AGFL) */
     868             : STATIC int
     869        9261 : xrep_rmap_find_agheader_rmaps(
     870             :         struct xrep_rmap        *rr)
     871             : {
     872        9261 :         struct xfs_scrub        *sc = rr->sc;
     873             : 
     874             :         /* Create a record for the AG sb->agfl. */
     875       18522 :         return xrep_rmap_stash(rr, XFS_SB_BLOCK(sc->mp),
     876        9261 :                         XFS_AGFL_BLOCK(sc->mp) - XFS_SB_BLOCK(sc->mp) + 1,
     877             :                         XFS_RMAP_OWN_FS, 0, 0);
     878             : }
     879             : 
     880             : /* Generate rmaps for the log, if it's in this AG. */
     881             : STATIC int
     882        9261 : xrep_rmap_find_log_rmaps(
     883             :         struct xrep_rmap        *rr)
     884             : {
     885        9261 :         struct xfs_scrub        *sc = rr->sc;
     886             : 
     887       18522 :         if (!xfs_ag_contains_log(sc->mp, sc->sa.pag->pag_agno))
     888             :                 return 0;
     889             : 
     890        2308 :         return xrep_rmap_stash(rr,
     891        2308 :                         XFS_FSB_TO_AGBNO(sc->mp, sc->mp->m_sb.sb_logstart),
     892             :                         sc->mp->m_sb.sb_logblocks, XFS_RMAP_OWN_LOG, 0, 0);
     893             : }
     894             : 
     895             : /* Check and count all the records that we gathered. */
     896             : STATIC int
     897    19163078 : xrep_rmap_check_record(
     898             :         struct xfs_btree_cur            *cur,
     899             :         const struct xfs_rmap_irec      *rec,
     900             :         void                            *priv)
     901             : {
     902    19163078 :         struct xrep_rmap                *rr = priv;
     903    19163078 :         int                             error;
     904             : 
     905    19163078 :         error = xrep_rmap_check_mapping(rr->sc, rec);
     906    19163081 :         if (error)
     907             :                 return error;
     908             : 
     909    19163081 :         rr->nr_records++;
     910    19163081 :         return 0;
     911             : }
     912             : 
     913             : /*
     914             :  * Generate all the reverse-mappings for this AG, a list of the old rmapbt
     915             :  * blocks, and the new btreeblks count.  Figure out if we have enough free
     916             :  * space to reconstruct the inode btrees.  The caller must clean up the lists
     917             :  * if anything goes wrong.  This implements section (I) above.
     918             :  */
     919             : STATIC int
     920        9254 : xrep_rmap_find_rmaps(
     921             :         struct xrep_rmap        *rr)
     922             : {
     923        9254 :         struct xfs_scrub        *sc = rr->sc;
     924        9254 :         struct xchk_ag          *sa = &sc->sa;
     925        9254 :         struct xfs_inode        *ip;
     926        9254 :         struct xfs_buf          *mhead_bp;
     927        9254 :         struct xfs_btree_cur    *mcur;
     928        9254 :         int                     error;
     929             : 
     930             :         /* Find all the per-AG metadata. */
     931        9254 :         xrep_ag_btcur_init(sc, &sc->sa);
     932             : 
     933        9250 :         error = xrep_rmap_find_inode_rmaps(rr);
     934        9259 :         if (error)
     935           0 :                 goto end_agscan;
     936             : 
     937        9259 :         error = xrep_rmap_find_refcount_rmaps(rr);
     938        9261 :         if (error)
     939           0 :                 goto end_agscan;
     940             : 
     941        9261 :         error = xrep_rmap_find_agheader_rmaps(rr);
     942        9261 :         if (error)
     943           0 :                 goto end_agscan;
     944             : 
     945        9261 :         error = xrep_rmap_find_log_rmaps(rr);
     946        9261 : end_agscan:
     947        9261 :         xchk_ag_btcur_free(&sc->sa);
     948        9261 :         if (error)
     949             :                 return error;
     950             : 
     951             :         /*
     952             :          * Set up for a potentially lengthy filesystem scan by reducing our
     953             :          * transaction resource usage for the duration.  Specifically:
     954             :          *
     955             :          * Unlock the AG header buffers and cancel the transaction to release
     956             :          * the log grant space while we scan the filesystem.
     957             :          *
     958             :          * Create a new empty transaction to eliminate the possibility of the
     959             :          * inode scan deadlocking on cyclical metadata.
     960             :          *
     961             :          * We pass the empty transaction to the file scanning function to avoid
     962             :          * repeatedly cycling empty transactions.  This can be done even though
     963             :          * we take the IOLOCK to quiesce the file because empty transactions
     964             :          * do not take sb_internal.
     965             :          */
     966        9261 :         sa->agf_bp = NULL;
     967        9261 :         sa->agi_bp = NULL;
     968        9261 :         xchk_trans_cancel(sc);
     969        9261 :         error = xchk_trans_alloc_empty(sc);
     970        9261 :         if (error)
     971             :                 return error;
     972             : 
     973             :         /* Iterate all AGs for inodes rmaps. */
     974    94196912 :         while ((error = xchk_iscan_iter(&rr->iscan, &ip)) == 1) {
     975    94187211 :                 error = xrep_rmap_scan_inode(rr, ip);
     976    94187664 :                 xchk_irele(sc, ip);
     977    94187655 :                 if (error)
     978             :                         break;
     979             : 
     980    94187652 :                 if (xchk_should_terminate(sc, &error))
     981             :                         break;
     982             :         }
     983        9261 :         xchk_iscan_iter_finish(&rr->iscan);
     984        9261 :         if (error)
     985             :                 return error;
     986             : 
     987             :         /*
     988             :          * Switch out for a real transaction and lock the AG headers in
     989             :          * preparation for building a new tree.
     990             :          */
     991        9257 :         xchk_trans_cancel(sc);
     992        9257 :         error = xchk_setup_fs(sc);
     993        9257 :         if (error)
     994             :                 return error;
     995        9257 :         error = xchk_perag_drain_and_lock(sc);
     996        9257 :         if (error)
     997             :                 return error;
     998             : 
     999             :         /*
    1000             :          * If a hook failed to update the in-memory btree, we lack the data to
    1001             :          * continue the repair.
    1002             :          */
    1003        9219 :         if (xchk_iscan_aborted(&rr->iscan))
    1004             :                 return -EFSCORRUPTED;
    1005             : 
    1006             :         /*
    1007             :          * Now that we have everything locked again, we need to count the
    1008             :          * number of rmap records stashed in the btree.  This should reflect
    1009             :          * all actively-owned space in the filesystem.  At the same time, check
    1010             :          * all our records before we start building a new btree, which requires
    1011             :          * a bnobt cursor.
    1012             :          */
    1013        9219 :         error = xfbtree_head_read_buf(rr->rmap_btree, NULL, &mhead_bp);
    1014        9219 :         if (error)
    1015             :                 return error;
    1016             : 
    1017        9219 :         mcur = xfs_rmapbt_mem_cursor(rr->sc->sa.pag, NULL, mhead_bp,
    1018             :                         rr->rmap_btree);
    1019        9219 :         sc->sa.bno_cur = xfs_allocbt_init_cursor(sc->mp, sc->tp, sc->sa.agf_bp,
    1020             :                         sc->sa.pag, XFS_BTNUM_BNO);
    1021             : 
    1022        9217 :         rr->nr_records = 0;
    1023        9217 :         error = xfs_rmap_query_all(mcur, xrep_rmap_check_record, rr);
    1024             : 
    1025        9219 :         xfs_btree_del_cursor(sc->sa.bno_cur, error);
    1026        9219 :         sc->sa.bno_cur = NULL;
    1027        9219 :         xfs_btree_del_cursor(mcur, error);
    1028        9219 :         xfs_buf_relse(mhead_bp);
    1029             : 
    1030        9219 :         return error;
    1031             : }
    1032             : 
    1033             : /* Section (II): Reserving space for new rmapbt and setting free space bitmap */
    1034             : 
    1035             : struct xrep_rmap_agfl {
    1036             :         struct xagb_bitmap      *bitmap;
    1037             :         xfs_agnumber_t          agno;
    1038             : };
    1039             : 
    1040             : /* Add an AGFL block to the rmap list. */
    1041             : STATIC int
    1042       60801 : xrep_rmap_walk_agfl(
    1043             :         struct xfs_mount        *mp,
    1044             :         xfs_agblock_t           agbno,
    1045             :         void                    *priv)
    1046             : {
    1047       60801 :         struct xrep_rmap_agfl   *ra = priv;
    1048             : 
    1049       60801 :         return xagb_bitmap_set(ra->bitmap, agbno, 1);
    1050             : }
    1051             : 
    1052             : /*
    1053             :  * Run one round of reserving space for the new rmapbt and recomputing the
    1054             :  * number of blocks needed to store the previously observed rmapbt records and
    1055             :  * the ones we'll create for the free space metadata.  When we don't need more
    1056             :  * blocks, return a bitmap of OWN_AG extents in @freesp_blocks and set @done to
    1057             :  * true.
    1058             :  */
    1059             : STATIC int
    1060        9419 : xrep_rmap_try_reserve(
    1061             :         struct xrep_rmap        *rr,
    1062             :         struct xfs_btree_cur    *rmap_cur,
    1063             :         struct xagb_bitmap      *freesp_blocks,
    1064             :         uint64_t                *blocks_reserved,
    1065             :         bool                    *done)
    1066             : {
    1067        9419 :         struct xrep_rmap_agfl   ra = {
    1068             :                 .bitmap         = freesp_blocks,
    1069        9419 :                 .agno           = rr->sc->sa.pag->pag_agno,
    1070             :         };
    1071        9419 :         struct xfs_scrub        *sc = rr->sc;
    1072        9419 :         struct xrep_newbt_resv  *resv, *n;
    1073        9419 :         struct xfs_agf          *agf = sc->sa.agf_bp->b_addr;
    1074        9419 :         struct xfs_buf          *agfl_bp;
    1075        9419 :         uint64_t                nr_blocks;      /* RMB */
    1076        9419 :         uint64_t                freesp_records;
    1077        9419 :         int                     error;
    1078             : 
    1079             :         /*
    1080             :          * We're going to recompute new_btree.bload.nr_blocks at the end of
    1081             :          * this function to reflect however many btree blocks we need to store
    1082             :          * all the rmap records (including the ones that reflect the changes we
    1083             :          * made to support the new rmapbt blocks), so we save the old value
    1084             :          * here so we can decide if we've reserved enough blocks.
    1085             :          */
    1086        9419 :         nr_blocks = rr->new_btree.bload.nr_blocks;
    1087             : 
    1088             :         /*
    1089             :          * Make sure we've reserved enough space for the new btree.  This can
    1090             :          * change the shape of the free space btrees, which can cause secondary
    1091             :          * interactions with the rmap records because all three space btrees
    1092             :          * have the same rmap owner.  We'll account for all that below.
    1093             :          */
    1094        9419 :         error = xrep_newbt_alloc_blocks(&rr->new_btree,
    1095        9419 :                         nr_blocks - *blocks_reserved);
    1096        9418 :         if (error)
    1097             :                 return error;
    1098             : 
    1099        9418 :         *blocks_reserved = rr->new_btree.bload.nr_blocks;
    1100             : 
    1101             :         /* Clear everything in the bitmap. */
    1102        9418 :         xagb_bitmap_destroy(freesp_blocks);
    1103             : 
    1104             :         /* Set all the bnobt blocks in the bitmap. */
    1105        9421 :         sc->sa.bno_cur = xfs_allocbt_init_cursor(sc->mp, sc->tp, sc->sa.agf_bp,
    1106             :                         sc->sa.pag, XFS_BTNUM_BNO);
    1107        9410 :         error = xagb_bitmap_set_btblocks(freesp_blocks, sc->sa.bno_cur);
    1108        9418 :         xfs_btree_del_cursor(sc->sa.bno_cur, error);
    1109        9422 :         sc->sa.bno_cur = NULL;
    1110        9422 :         if (error)
    1111             :                 return error;
    1112             : 
    1113             :         /* Set all the cntbt blocks in the bitmap. */
    1114        9422 :         sc->sa.cnt_cur = xfs_allocbt_init_cursor(sc->mp, sc->tp, sc->sa.agf_bp,
    1115             :                         sc->sa.pag, XFS_BTNUM_CNT);
    1116        9421 :         error = xagb_bitmap_set_btblocks(freesp_blocks, sc->sa.cnt_cur);
    1117        9417 :         xfs_btree_del_cursor(sc->sa.cnt_cur, error);
    1118        9422 :         sc->sa.cnt_cur = NULL;
    1119        9422 :         if (error)
    1120             :                 return error;
    1121             : 
    1122             :         /* Record our new btreeblks value. */
    1123        9422 :         rr->freesp_btblocks = xagb_bitmap_hweight(freesp_blocks) - 2;
    1124             : 
    1125             :         /* Set all the new rmapbt blocks in the bitmap. */
    1126       20579 :         for_each_xrep_newbt_reservation(&rr->new_btree, resv, n) {
    1127       11159 :                 error = xagb_bitmap_set(freesp_blocks, resv->agbno, resv->len);
    1128       11161 :                 if (error)
    1129           0 :                         return error;
    1130             :         }
    1131             : 
    1132             :         /* Set all the AGFL blocks in the bitmap. */
    1133        9420 :         error = xfs_alloc_read_agfl(sc->sa.pag, sc->tp, &agfl_bp);
    1134        9422 :         if (error)
    1135             :                 return error;
    1136             : 
    1137        9422 :         error = xfs_agfl_walk(sc->mp, agf, agfl_bp, xrep_rmap_walk_agfl, &ra);
    1138        9419 :         if (error)
    1139             :                 return error;
    1140             : 
    1141             :         /* Count the extents in the bitmap. */
    1142        9418 :         freesp_records = xagb_bitmap_count_set_regions(freesp_blocks);
    1143             : 
    1144             :         /* Compute how many blocks we'll need for all the rmaps. */
    1145        9412 :         error = xfs_btree_bload_compute_geometry(rmap_cur,
    1146        9412 :                         &rr->new_btree.bload, rr->nr_records + freesp_records);
    1147        9416 :         if (error)
    1148             :                 return error;
    1149             : 
    1150             :         /* We're done when we don't need more blocks. */
    1151        9416 :         *done = nr_blocks >= rr->new_btree.bload.nr_blocks;
    1152        9416 :         return 0;
    1153             : }
    1154             : 
    1155             : /*
    1156             :  * Iteratively reserve space for rmap btree while recording OWN_AG rmaps for
    1157             :  * the free space metadata.  This implements section (II) above.
    1158             :  */
    1159             : STATIC int
    1160        9219 : xrep_rmap_reserve_space(
    1161             :         struct xrep_rmap        *rr,
    1162             :         struct xfs_btree_cur    *rmap_cur)
    1163             : {
    1164        9219 :         struct xagb_bitmap      freesp_blocks;  /* AGBIT */
    1165        9219 :         uint64_t                blocks_reserved = 0;
    1166        9219 :         bool                    done = false;
    1167        9219 :         int                     error;
    1168             : 
    1169             :         /* Compute how many blocks we'll need for the rmaps collected so far. */
    1170        9219 :         error = xfs_btree_bload_compute_geometry(rmap_cur,
    1171             :                         &rr->new_btree.bload, rr->nr_records);
    1172        9219 :         if (error)
    1173             :                 return error;
    1174             : 
    1175             :         /* Last chance to abort before we start committing fixes. */
    1176        9219 :         if (xchk_should_terminate(rr->sc, &error))
    1177           0 :                 return error;
    1178             : 
    1179        9219 :         xagb_bitmap_init(&freesp_blocks);
    1180             : 
    1181             :         /*
    1182             :          * Iteratively reserve space for the new rmapbt and recompute the
    1183             :          * number of blocks needed to store the previously observed rmapbt
    1184             :          * records and the ones we'll create for the free space metadata.
    1185             :          * Finish when we don't need more blocks.
    1186             :          */
    1187        9423 :         do {
    1188        9423 :                 error = xrep_rmap_try_reserve(rr, rmap_cur, &freesp_blocks,
    1189             :                                 &blocks_reserved, &done);
    1190        9418 :                 if (error)
    1191           0 :                         goto out_bitmap;
    1192        9418 :         } while (!done);
    1193             : 
    1194             :         /* Emit rmaps for everything in the free space bitmap. */
    1195        9214 :         xrep_ag_btcur_init(rr->sc, &rr->sc->sa);
    1196        9215 :         error = xrep_rmap_stash_bitmap(rr, &freesp_blocks, &XFS_RMAP_OINFO_AG);
    1197        9219 :         xchk_ag_btcur_free(&rr->sc->sa);
    1198             : 
    1199        9219 : out_bitmap:
    1200        9219 :         xagb_bitmap_destroy(&freesp_blocks);
    1201        9219 :         return error;
    1202             : }
    1203             : 
    1204             : /* Section (III): Building the new rmap btree. */
    1205             : 
    1206             : /* Update the AGF counters. */
    1207             : STATIC int
    1208        9219 : xrep_rmap_reset_counters(
    1209             :         struct xrep_rmap        *rr)
    1210             : {
    1211        9219 :         struct xfs_scrub        *sc = rr->sc;
    1212        9219 :         struct xfs_perag        *pag = sc->sa.pag;
    1213        9219 :         struct xfs_agf          *agf = sc->sa.agf_bp->b_addr;
    1214        9219 :         xfs_agblock_t           rmap_btblocks;
    1215             : 
    1216             :         /*
    1217             :          * The AGF header contains extra information related to the reverse
    1218             :          * mapping btree, so we must update those fields here.
    1219             :          */
    1220        9219 :         rmap_btblocks = rr->new_btree.afake.af_blocks - 1;
    1221        9219 :         agf->agf_btreeblks = cpu_to_be32(rr->freesp_btblocks + rmap_btblocks);
    1222        9219 :         xfs_alloc_log_agf(sc->tp, sc->sa.agf_bp, XFS_AGF_BTREEBLKS);
    1223             : 
    1224             :         /*
    1225             :          * After we commit the new btree to disk, it is possible that the
    1226             :          * process to reap the old btree blocks will race with the AIL trying
    1227             :          * to checkpoint the old btree blocks into the filesystem.  If the new
    1228             :          * tree is shorter than the old one, the rmapbt write verifier will
    1229             :          * fail and the AIL will shut down the filesystem.
    1230             :          *
    1231             :          * To avoid this, save the old incore btree height values as the alt
    1232             :          * height values before re-initializing the perag info from the updated
    1233             :          * AGF to capture all the new values.
    1234             :          */
    1235        9219 :         pag->pagf_alt_levels[XFS_BTNUM_RMAPi] =
    1236        9219 :                                         pag->pagf_levels[XFS_BTNUM_RMAPi];
    1237             : 
    1238             :         /* Reinitialize with the values we just logged. */
    1239        9219 :         return xrep_reinit_pagf(sc);
    1240             : }
    1241             : 
    1242             : /* Retrieve rmapbt data for bulk load. */
    1243             : STATIC int
    1244      154930 : xrep_rmap_get_records(
    1245             :         struct xfs_btree_cur    *cur,
    1246             :         unsigned int            idx,
    1247             :         struct xfs_btree_block  *block,
    1248             :         unsigned int            nr_wanted,
    1249             :         void                    *priv)
    1250             : {
    1251      154930 :         struct xrep_rmap        *rr = priv;
    1252      154930 :         union xfs_btree_rec     *block_rec;
    1253      154930 :         unsigned int            loaded;
    1254      154930 :         int                     error;
    1255             : 
    1256    19361304 :         for (loaded = 0; loaded < nr_wanted; loaded++, idx++) {
    1257    19206366 :                 int             stat = 0;
    1258             : 
    1259    19206366 :                 error = xfs_btree_increment(rr->mcur, 0, &stat);
    1260    19206361 :                 if (error)
    1261           0 :                         return error;
    1262    19206361 :                 if (!stat)
    1263             :                         return -EFSCORRUPTED;
    1264             : 
    1265    19206361 :                 error = xfs_rmap_get_rec(rr->mcur, &cur->bc_rec.r, &stat);
    1266    19206366 :                 if (error)
    1267           0 :                         return error;
    1268    19206366 :                 if (!stat)
    1269             :                         return -EFSCORRUPTED;
    1270             : 
    1271    19206366 :                 block_rec = xfs_btree_rec_addr(cur, idx, block);
    1272    19206373 :                 cur->bc_ops->init_rec_from_cur(cur, block_rec);
    1273             :         }
    1274             : 
    1275      154938 :         return loaded;
    1276             : }
    1277             : 
    1278             : /* Feed one of the new btree blocks to the bulk loader. */
    1279             : STATIC int
    1280      159263 : xrep_rmap_claim_block(
    1281             :         struct xfs_btree_cur    *cur,
    1282             :         union xfs_btree_ptr     *ptr,
    1283             :         void                    *priv)
    1284             : {
    1285      159263 :         struct xrep_rmap        *rr = priv;
    1286      159263 :         int                     error;
    1287             : 
    1288      159263 :         error = xrep_newbt_relog_autoreap(&rr->new_btree);
    1289      159258 :         if (error)
    1290             :                 return error;
    1291             : 
    1292      159258 :         return xrep_newbt_claim_block(cur, &rr->new_btree, ptr);
    1293             : }
    1294             : 
    1295             : /* Custom allocation function for new rmap btrees. */
    1296             : STATIC int
    1297       10842 : xrep_rmap_alloc_vextent(
    1298             :         struct xfs_scrub        *sc,
    1299             :         struct xfs_alloc_arg    *args,
    1300             :         xfs_fsblock_t           alloc_hint)
    1301             : {
    1302       10842 :         int                     error;
    1303             : 
    1304             :         /*
    1305             :          * We don't want an rmap update on the allocation, since we iteratively
    1306             :          * compute the OWN_AG records /after/ allocating blocks for the records
    1307             :          * that we already know we need to store.  Therefore, fix the freelist
    1308             :          * with the NORMAP flag set so that we don't also try to create an rmap
    1309             :          * for new AGFL blocks.
    1310             :          */
    1311       10842 :         error = xrep_fix_freelist(sc, XFS_ALLOC_FLAG_NORMAP);
    1312       10847 :         if (error)
    1313             :                 return error;
    1314             : 
    1315             :         /*
    1316             :          * If xrep_fix_freelist fixed the freelist by moving blocks from the
    1317             :          * free space btrees or by removing blocks from the AGFL and queueing
    1318             :          * an EFI to free the block, the transaction will be dirty.  This
    1319             :          * second case is of interest to us.
    1320             :          *
    1321             :          * Later on, we will need to compare gaps in the new recordset against
    1322             :          * the block usage of all OWN_AG owners in order to free the old
    1323             :          * btree's blocks, which means that we can't have EFIs for former AGFL
    1324             :          * blocks attached to the repair transaction when we commit the new
    1325             :          * btree.
    1326             :          *
    1327             :          * xrep_newbt_alloc_blocks guarantees this for us by calling
    1328             :          * xrep_defer_finish to commit anything that fix_freelist may have
    1329             :          * added to the transaction.
    1330             :          */
    1331       10847 :         return xfs_alloc_vextent_near_bno(args, alloc_hint);
    1332             : }
    1333             : 
    1334             : 
    1335             : /* Count the records in this btree. */
    1336             : STATIC int
    1337        9219 : xrep_rmap_count_records(
    1338             :         struct xfs_btree_cur    *cur,
    1339             :         unsigned long long      *nr)
    1340             : {
    1341        9219 :         int                     running = 1;
    1342        9219 :         int                     error;
    1343             : 
    1344        9219 :         *nr = 0;
    1345             : 
    1346        9219 :         error = xfs_btree_goto_left_edge(cur);
    1347        9206 :         if (error)
    1348             :                 return error;
    1349             : 
    1350    19224768 :         while (running && !(error = xfs_btree_increment(cur, 0, &running))) {
    1351    19215562 :                 if (running)
    1352    19206351 :                         (*nr)++;
    1353             :         }
    1354             : 
    1355             :         return error;
    1356             : }
    1357             : /*
    1358             :  * Use the collected rmap information to stage a new rmap btree.  If this is
    1359             :  * successful we'll return with the new btree root information logged to the
    1360             :  * repair transaction but not yet committed.  This implements section (III)
    1361             :  * above.
    1362             :  */
    1363             : STATIC int
    1364        9219 : xrep_rmap_build_new_tree(
    1365             :         struct xrep_rmap        *rr)
    1366             : {
    1367        9219 :         struct xfs_scrub        *sc = rr->sc;
    1368        9219 :         struct xfs_perag        *pag = sc->sa.pag;
    1369        9219 :         struct xfs_agf          *agf = sc->sa.agf_bp->b_addr;
    1370        9219 :         struct xfs_btree_cur    *rmap_cur;
    1371        9219 :         struct xfs_buf          *mhead_bp;
    1372        9219 :         xfs_fsblock_t           fsbno;
    1373        9219 :         int                     error;
    1374             : 
    1375             :         /*
    1376             :          * Preserve the old rmapbt block count so that we can adjust the
    1377             :          * per-AG rmapbt reservation after we commit the new btree root and
    1378             :          * want to dispose of the old btree blocks.
    1379             :          */
    1380        9219 :         rr->old_rmapbt_fsbcount = be32_to_cpu(agf->agf_rmap_blocks);
    1381             : 
    1382             :         /*
    1383             :          * Prepare to construct the new btree by reserving disk space for the
    1384             :          * new btree and setting up all the accounting information we'll need
    1385             :          * to root the new btree while it's under construction and before we
    1386             :          * attach it to the AG header.  The new blocks are accounted to the
    1387             :          * rmapbt per-AG reservation, which we will adjust further after
    1388             :          * committing the new btree.
    1389             :          */
    1390        9219 :         fsbno = XFS_AGB_TO_FSB(sc->mp, pag->pag_agno, XFS_RMAP_BLOCK(sc->mp));
    1391        9219 :         xrep_newbt_init_ag(&rr->new_btree, sc, &XFS_RMAP_OINFO_SKIP_UPDATE,
    1392             :                         fsbno, XFS_AG_RESV_RMAPBT);
    1393        9219 :         rr->new_btree.bload.get_records = xrep_rmap_get_records;
    1394        9219 :         rr->new_btree.bload.claim_block = xrep_rmap_claim_block;
    1395        9219 :         rr->new_btree.alloc_vextent = xrep_rmap_alloc_vextent;
    1396        9219 :         rmap_cur = xfs_rmapbt_stage_cursor(sc->mp, &rr->new_btree.afake, pag);
    1397             : 
    1398             :         /*
    1399             :          * Initialize @rr->new_btree, reserve space for the new rmapbt,
    1400             :          * and compute OWN_AG rmaps.
    1401             :          */
    1402        9219 :         error = xrep_rmap_reserve_space(rr, rmap_cur);
    1403        9219 :         if (error)
    1404           0 :                 goto err_cur;
    1405             : 
    1406             :         /*
    1407             :          * Count the rmapbt records again, because the space reservation
    1408             :          * for the rmapbt itself probably added more records to the btree.
    1409             :          */
    1410        9219 :         error = xfbtree_head_read_buf(rr->rmap_btree, NULL, &mhead_bp);
    1411        9219 :         if (error)
    1412           0 :                 goto err_cur;
    1413             : 
    1414        9219 :         rr->mcur = xfs_rmapbt_mem_cursor(rr->sc->sa.pag, NULL, mhead_bp,
    1415             :                         rr->rmap_btree);
    1416             : 
    1417        9219 :         error = xrep_rmap_count_records(rr->mcur, &rr->nr_records);
    1418        9216 :         if (error)
    1419           0 :                 goto err_mcur;
    1420             : 
    1421             :         /*
    1422             :          * Due to btree slack factors, it's possible for a new btree to be one
    1423             :          * level taller than the old btree.  Update the incore btree height so
    1424             :          * that we don't trip the verifiers when writing the new btree blocks
    1425             :          * to disk.
    1426             :          */
    1427        9216 :         pag->pagf_alt_levels[XFS_BTNUM_RMAPi] =
    1428        9216 :                                         rr->new_btree.bload.btree_height;
    1429             : 
    1430             :         /*
    1431             :          * Move the cursor to the left edge of the tree so that the first
    1432             :          * increment in ->get_records positions us at the first record.
    1433             :          */
    1434        9216 :         error = xfs_btree_goto_left_edge(rr->mcur);
    1435        9219 :         if (error)
    1436           0 :                 goto err_level;
    1437             : 
    1438             :         /* Add all observed rmap records. */
    1439        9219 :         error = xfs_btree_bload(rmap_cur, &rr->new_btree.bload, rr);
    1440        9218 :         if (error)
    1441           0 :                 goto err_level;
    1442             : 
    1443             :         /*
    1444             :          * Install the new btree in the AG header.  After this point the old
    1445             :          * btree is no longer accessible and the new tree is live.
    1446             :          */
    1447        9218 :         xfs_rmapbt_commit_staged_btree(rmap_cur, sc->tp, sc->sa.agf_bp);
    1448        9219 :         xfs_btree_del_cursor(rmap_cur, 0);
    1449        9219 :         xfs_btree_del_cursor(rr->mcur, 0);
    1450        9219 :         rr->mcur = NULL;
    1451        9219 :         xfs_buf_relse(mhead_bp);
    1452             : 
    1453             :         /*
    1454             :          * Now that we've written the new btree to disk, we don't need to keep
    1455             :          * updating the in-memory btree.  Abort the scan to stop live updates.
    1456             :          */
    1457        9219 :         xchk_iscan_abort(&rr->iscan);
    1458             : 
    1459             :         /*
    1460             :          * The newly committed rmap recordset includes mappings for the blocks
    1461             :          * that we reserved to build the new btree.  If there is excess space
    1462             :          * reservation to be freed, the corresponding rmap records must also be
    1463             :          * removed.
    1464             :          */
    1465        9219 :         rr->new_btree.oinfo = XFS_RMAP_OINFO_AG;
    1466             : 
    1467             :         /* Reset the AGF counters now that we've changed the btree shape. */
    1468        9219 :         error = xrep_rmap_reset_counters(rr);
    1469        9218 :         if (error)
    1470           0 :                 goto err_newbt;
    1471             : 
    1472             :         /* Dispose of any unused blocks and the accounting information. */
    1473        9218 :         error = xrep_newbt_commit(&rr->new_btree);
    1474        9219 :         if (error)
    1475             :                 return error;
    1476             : 
    1477        9219 :         return xrep_roll_ag_trans(sc);
    1478             : 
    1479           0 : err_level:
    1480           0 :         pag->pagf_alt_levels[XFS_BTNUM_RMAPi] = 0;
    1481           0 : err_mcur:
    1482           0 :         xfs_btree_del_cursor(rr->mcur, error);
    1483           0 :         xfs_buf_relse(mhead_bp);
    1484           0 : err_cur:
    1485           0 :         xfs_btree_del_cursor(rmap_cur, error);
    1486           0 : err_newbt:
    1487           0 :         xrep_newbt_cancel(&rr->new_btree);
    1488           0 :         return error;
    1489             : }
    1490             : 
    1491             : /* Section (IV): Reaping the old btree. */
    1492             : 
    1493             : struct xrep_rmap_find_gaps {
    1494             :         struct xagb_bitmap      rmap_gaps;
    1495             :         xfs_agblock_t           next_agbno;
    1496             : };
    1497             : 
    1498             : /* Subtract each free extent in the bnobt from the rmap gaps. */
    1499             : STATIC int
    1500      931599 : xrep_rmap_find_freesp(
    1501             :         struct xfs_btree_cur            *cur,
    1502             :         const struct xfs_alloc_rec_incore *rec,
    1503             :         void                            *priv)
    1504             : {
    1505      931599 :         struct xrep_rmap_find_gaps      *rfg = priv;
    1506             : 
    1507     1863196 :         return xagb_bitmap_clear(&rfg->rmap_gaps, rec->ar_startblock,
    1508      931599 :                         rec->ar_blockcount);
    1509             : }
    1510             : 
    1511             : /* Record the free space we find, as part of cleaning out the btree. */
    1512             : STATIC int
    1513    19206392 : xrep_rmap_find_gaps(
    1514             :         struct xfs_btree_cur            *cur,
    1515             :         const struct xfs_rmap_irec      *rec,
    1516             :         void                            *priv)
    1517             : {
    1518    19206392 :         struct xrep_rmap_find_gaps      *rfg = priv;
    1519    19206392 :         int                             error;
    1520             : 
    1521    19206392 :         if (rec->rm_startblock > rfg->next_agbno) {
    1522      930155 :                 error = xagb_bitmap_set(&rfg->rmap_gaps, rfg->next_agbno,
    1523             :                                 rec->rm_startblock - rfg->next_agbno);
    1524      930154 :                 if (error)
    1525             :                         return error;
    1526             :         }
    1527             : 
    1528    19206391 :         rfg->next_agbno = max_t(xfs_agblock_t, rfg->next_agbno,
    1529             :                                 rec->rm_startblock + rec->rm_blockcount);
    1530    19206391 :         return 0;
    1531             : }
    1532             : 
    1533             : /*
    1534             :  * Reap the old rmapbt blocks.  Now that the rmapbt is fully rebuilt, we make
    1535             :  * a list of gaps in the rmap records and a list of the extents mentioned in
    1536             :  * the bnobt.  Any block that's in the new rmapbt gap list but not mentioned
    1537             :  * in the bnobt is a block from the old rmapbt and can be removed.
    1538             :  */
    1539             : STATIC int
    1540        9219 : xrep_rmap_remove_old_tree(
    1541             :         struct xrep_rmap        *rr)
    1542             : {
    1543        9219 :         struct xrep_rmap_find_gaps rfg = {
    1544             :                 .next_agbno     = 0,
    1545             :         };
    1546        9219 :         struct xfs_scrub        *sc = rr->sc;
    1547        9219 :         struct xfs_agf          *agf = sc->sa.agf_bp->b_addr;
    1548        9219 :         struct xfs_perag        *pag = sc->sa.pag;
    1549        9219 :         struct xfs_btree_cur    *mcur;
    1550        9219 :         struct xfs_buf          *mhead_bp;
    1551        9219 :         xfs_agblock_t           agend;
    1552        9219 :         int                     error;
    1553             : 
    1554        9219 :         xagb_bitmap_init(&rfg.rmap_gaps);
    1555             : 
    1556             :         /* Compute free space from the new rmapbt. */
    1557        9219 :         error = xfbtree_head_read_buf(rr->rmap_btree, NULL, &mhead_bp);
    1558        9219 :         mcur = xfs_rmapbt_mem_cursor(rr->sc->sa.pag, NULL, mhead_bp,
    1559             :                         rr->rmap_btree);
    1560             : 
    1561        9219 :         error = xfs_rmap_query_all(mcur, xrep_rmap_find_gaps, &rfg);
    1562        9219 :         xfs_btree_del_cursor(mcur, error);
    1563        9219 :         xfs_buf_relse(mhead_bp);
    1564        9219 :         if (error)
    1565           0 :                 goto out_bitmap;
    1566             : 
    1567             :         /* Insert a record for space between the last rmap and EOAG. */
    1568        9219 :         agend = be32_to_cpu(agf->agf_length);
    1569        9219 :         if (rfg.next_agbno < agend) {
    1570        9219 :                 error = xagb_bitmap_set(&rfg.rmap_gaps, rfg.next_agbno,
    1571             :                                 agend - rfg.next_agbno);
    1572        9219 :                 if (error)
    1573           0 :                         goto out_bitmap;
    1574             :         }
    1575             : 
    1576             :         /* Compute free space from the existing bnobt. */
    1577        9219 :         sc->sa.bno_cur = xfs_allocbt_init_cursor(sc->mp, sc->tp, sc->sa.agf_bp,
    1578             :                         sc->sa.pag, XFS_BTNUM_BNO);
    1579        9219 :         error = xfs_alloc_query_all(sc->sa.bno_cur, xrep_rmap_find_freesp,
    1580             :                         &rfg);
    1581        9219 :         xfs_btree_del_cursor(sc->sa.bno_cur, error);
    1582        9219 :         sc->sa.bno_cur = NULL;
    1583        9219 :         if (error)
    1584           0 :                 goto out_bitmap;
    1585             : 
    1586             :         /*
    1587             :          * Free the "free" blocks that the new rmapbt knows about but the bnobt
    1588             :          * doesn't--these are the old rmapbt blocks.  Credit the old rmapbt
    1589             :          * block usage count back to the per-AG rmapbt reservation (and not
    1590             :          * fdblocks, since the rmap btree lives in free space) to keep the
    1591             :          * reservation and free space accounting correct.
    1592             :          */
    1593        9219 :         error = xrep_reap_agblocks(sc, &rfg.rmap_gaps,
    1594             :                         &XFS_RMAP_OINFO_ANY_OWNER, XFS_AG_RESV_RMAPBT);
    1595        9219 :         if (error)
    1596           0 :                 goto out_bitmap;
    1597             : 
    1598             :         /*
    1599             :          * Now that we've zapped all the old rmapbt blocks we can turn off
    1600             :          * the alternate height mechanism and reset the per-AG space
    1601             :          * reservation.
    1602             :          */
    1603        9219 :         pag->pagf_alt_levels[XFS_BTNUM_RMAPi] = 0;
    1604        9219 :         sc->flags |= XREP_RESET_PERAG_RESV;
    1605        9219 : out_bitmap:
    1606        9219 :         xagb_bitmap_destroy(&rfg.rmap_gaps);
    1607        9219 :         return error;
    1608             : }
    1609             : 
    1610             : static inline bool
    1611      271211 : xrep_rmapbt_want_live_update(
    1612             :         struct xchk_iscan               *iscan,
    1613             :         const struct xfs_owner_info     *oi)
    1614             : {
    1615      271211 :         if (xchk_iscan_aborted(iscan))
    1616             :                 return false;
    1617             : 
    1618             :         /*
    1619             :          * Before unlocking the AG header to perform the inode scan, we
    1620             :          * recorded reverse mappings for all AG metadata except for the OWN_AG
    1621             :          * metadata.  IOWs, the in-memory btree knows about the AG headers, the
    1622             :          * two inode btrees, the CoW staging extents, and the refcount btrees.
    1623             :          * For these types of metadata, we need to record the live updates in
    1624             :          * the in-memory rmap btree.
    1625             :          *
    1626             :          * However, we do not scan the free space btrees or the AGFL until we
    1627             :          * have re-locked the AGF and are ready to reserve space for the new
    1628             :          * new rmap btree, so we do not want live updates for OWN_AG metadata.
    1629             :          */
    1630      258483 :         if (XFS_RMAP_NON_INODE_OWNER(oi->oi_owner))
    1631        2234 :                 return oi->oi_owner != XFS_RMAP_OWN_AG;
    1632             : 
    1633             :         /* Ignore updates to files that the scanner hasn't visited yet. */
    1634      256249 :         return xchk_iscan_want_live_update(iscan, oi->oi_owner);
    1635             : }
    1636             : 
    1637             : /*
    1638             :  * Apply a rmapbt update from the regular filesystem into our shadow btree.
    1639             :  * We're running from the thread that owns the AGF buffer and is generating
    1640             :  * the update, so we must be careful about which parts of the struct xrep_rmap
    1641             :  * that we change.
    1642             :  */
    1643             : static int
    1644      271211 : xrep_rmapbt_live_update(
    1645             :         struct notifier_block           *nb,
    1646             :         unsigned long                   action,
    1647             :         void                            *data)
    1648             : {
    1649      271211 :         struct xfs_rmap_update_params   *p = data;
    1650      271211 :         struct xrep_rmap                *rr;
    1651      271211 :         struct xfs_mount                *mp;
    1652      271211 :         struct xfs_btree_cur            *mcur;
    1653      271211 :         struct xfs_buf                  *mhead_bp;
    1654      271211 :         struct xfs_trans                *tp;
    1655      271211 :         void                            *txcookie;
    1656      271211 :         int                             error;
    1657             : 
    1658      271211 :         rr = container_of(nb, struct xrep_rmap, hooks.update_hook.nb);
    1659      271211 :         mp = rr->sc->mp;
    1660             : 
    1661      271211 :         if (!xrep_rmapbt_want_live_update(&rr->iscan, &p->oinfo))
    1662      143482 :                 goto out_unlock;
    1663             : 
    1664      127729 :         trace_xrep_rmap_live_update(mp, rr->sc->sa.pag->pag_agno, action, p);
    1665             : 
    1666      127729 :         error = xrep_trans_alloc_hook_dummy(mp, &txcookie, &tp);
    1667      127729 :         if (error)
    1668           0 :                 goto out_abort;
    1669             : 
    1670      127729 :         mutex_lock(&rr->lock);
    1671      127729 :         error = xfbtree_head_read_buf(rr->rmap_btree, tp, &mhead_bp);
    1672      127729 :         if (error)
    1673           0 :                 goto out_cancel;
    1674             : 
    1675      127729 :         mcur = xfs_rmapbt_mem_cursor(rr->sc->sa.pag, tp, mhead_bp,
    1676             :                         rr->rmap_btree);
    1677      127729 :         error = __xfs_rmap_finish_intent(mcur, action, p->startblock,
    1678             :                         p->blockcount, &p->oinfo, p->unwritten);
    1679      127729 :         xfs_btree_del_cursor(mcur, error);
    1680      127729 :         if (error)
    1681           0 :                 goto out_cancel;
    1682             : 
    1683      127729 :         error = xfbtree_trans_commit(rr->rmap_btree, tp);
    1684      127729 :         if (error)
    1685           0 :                 goto out_cancel;
    1686             : 
    1687      127729 :         xrep_trans_cancel_hook_dummy(&txcookie, tp);
    1688      127729 :         mutex_unlock(&rr->lock);
    1689      127729 :         return NOTIFY_DONE;
    1690             : 
    1691           0 : out_cancel:
    1692           0 :         xfbtree_trans_cancel(rr->rmap_btree, tp);
    1693           0 :         xrep_trans_cancel_hook_dummy(&txcookie, tp);
    1694           0 : out_abort:
    1695           0 :         mutex_unlock(&rr->lock);
    1696           0 :         xchk_iscan_abort(&rr->iscan);
    1697             : out_unlock:
    1698             :         return NOTIFY_DONE;
    1699             : }
    1700             : 
    1701             : /* Set up the filesystem scan components. */
    1702             : STATIC int
    1703        9261 : xrep_rmap_setup_scan(
    1704             :         struct xrep_rmap        *rr)
    1705             : {
    1706        9261 :         struct xfs_scrub        *sc = rr->sc;
    1707        9261 :         int                     error;
    1708             : 
    1709        9261 :         mutex_init(&rr->lock);
    1710             : 
    1711             :         /* Set up in-memory rmap btree */
    1712        9261 :         error = xfs_rmapbt_mem_create(sc->mp, sc->sa.pag->pag_agno,
    1713             :                         sc->xfile_buftarg, &rr->rmap_btree);
    1714        9261 :         if (error)
    1715           0 :                 goto out_mutex;
    1716             : 
    1717             :         /* Retry iget every tenth of a second for up to 30 seconds. */
    1718        9261 :         xchk_iscan_start(sc, 30000, 100, &rr->iscan);
    1719             : 
    1720             :         /*
    1721             :          * Hook into live rmap operations so that we can update our in-memory
    1722             :          * btree to reflect live changes on the filesystem.  Since we drop the
    1723             :          * AGF buffer to scan all the inodes, we need this piece to avoid
    1724             :          * installing a stale btree.
    1725             :          */
    1726        9250 :         ASSERT(sc->flags & XCHK_FSGATES_RMAP);
    1727        9250 :         xfs_hook_setup(&rr->hooks.update_hook, xrep_rmapbt_live_update);
    1728        9250 :         error = xfs_rmap_hook_add(sc->sa.pag, &rr->hooks);
    1729        9248 :         if (error)
    1730           0 :                 goto out_iscan;
    1731             :         return 0;
    1732             : 
    1733             : out_iscan:
    1734           0 :         xchk_iscan_teardown(&rr->iscan);
    1735           0 :         xfbtree_destroy(rr->rmap_btree);
    1736           0 : out_mutex:
    1737           0 :         mutex_destroy(&rr->lock);
    1738           0 :         return error;
    1739             : }
    1740             : 
    1741             : /* Tear down scan components. */
    1742             : STATIC void
    1743        9261 : xrep_rmap_teardown(
    1744             :         struct xrep_rmap        *rr)
    1745             : {
    1746        9261 :         struct xfs_scrub        *sc = rr->sc;
    1747             : 
    1748        9261 :         xchk_iscan_abort(&rr->iscan);
    1749        9261 :         xfs_rmap_hook_del(sc->sa.pag, &rr->hooks);
    1750        9259 :         xchk_iscan_teardown(&rr->iscan);
    1751        9257 :         xfbtree_destroy(rr->rmap_btree);
    1752        9261 :         mutex_destroy(&rr->lock);
    1753        9261 : }
    1754             : 
    1755             : /* Repair the rmap btree for some AG. */
    1756             : int
    1757        9261 : xrep_rmapbt(
    1758             :         struct xfs_scrub        *sc)
    1759             : {
    1760        9261 :         struct xrep_rmap        *rr = sc->buf;
    1761        9261 :         int                     error;
    1762             : 
    1763        9261 :         error = xrep_rmap_setup_scan(rr);
    1764        9252 :         if (error)
    1765             :                 return error;
    1766             : 
    1767             :         /*
    1768             :          * Collect rmaps for everything in this AG that isn't space metadata.
    1769             :          * These rmaps won't change even as we try to allocate blocks.
    1770             :          */
    1771        9252 :         error = xrep_rmap_find_rmaps(rr);
    1772        9261 :         if (error)
    1773          42 :                 goto out_records;
    1774             : 
    1775             :         /* Rebuild the rmap information. */
    1776        9219 :         error = xrep_rmap_build_new_tree(rr);
    1777        9219 :         if (error)
    1778           0 :                 goto out_records;
    1779             : 
    1780             :         /* Kill the old tree. */
    1781        9219 :         error = xrep_rmap_remove_old_tree(rr);
    1782             : 
    1783        9261 : out_records:
    1784        9261 :         xrep_rmap_teardown(rr);
    1785        9261 :         return error;
    1786             : }

Generated by: LCOV version 1.14