LCOV - code coverage report
Current view: top level - fs/xfs/scrub - reap.c (source / functions) Hit Total Coverage
Test: fstests of 6.5.0-rc4-xfsx @ Mon Jul 31 20:08:34 PDT 2023 Lines: 410 489 83.8 %
Date: 2023-07-31 20:08:34 Functions: 23 24 95.8 %

          Line data    Source code
       1             : // SPDX-License-Identifier: GPL-2.0-or-later
       2             : /*
       3             :  * Copyright (C) 2022-2023 Oracle.  All Rights Reserved.
       4             :  * Author: Darrick J. Wong <djwong@kernel.org>
       5             :  */
       6             : #include "xfs.h"
       7             : #include "xfs_fs.h"
       8             : #include "xfs_shared.h"
       9             : #include "xfs_format.h"
      10             : #include "xfs_trans_resv.h"
      11             : #include "xfs_mount.h"
      12             : #include "xfs_btree.h"
      13             : #include "xfs_log_format.h"
      14             : #include "xfs_trans.h"
      15             : #include "xfs_sb.h"
      16             : #include "xfs_inode.h"
      17             : #include "xfs_alloc.h"
      18             : #include "xfs_alloc_btree.h"
      19             : #include "xfs_ialloc.h"
      20             : #include "xfs_ialloc_btree.h"
      21             : #include "xfs_rmap.h"
      22             : #include "xfs_rmap_btree.h"
      23             : #include "xfs_refcount.h"
      24             : #include "xfs_refcount_btree.h"
      25             : #include "xfs_extent_busy.h"
      26             : #include "xfs_ag.h"
      27             : #include "xfs_ag_resv.h"
      28             : #include "xfs_quota.h"
      29             : #include "xfs_qm.h"
      30             : #include "xfs_bmap.h"
      31             : #include "xfs_da_format.h"
      32             : #include "xfs_da_btree.h"
      33             : #include "xfs_attr.h"
      34             : #include "xfs_attr_remote.h"
      35             : #include "xfs_defer.h"
      36             : #include "xfs_imeta.h"
      37             : #include "xfs_rtgroup.h"
      38             : #include "xfs_rtrmap_btree.h"
      39             : #include "scrub/scrub.h"
      40             : #include "scrub/common.h"
      41             : #include "scrub/trace.h"
      42             : #include "scrub/repair.h"
      43             : #include "scrub/bitmap.h"
      44             : #include "scrub/reap.h"
      45             : 
      46             : /*
      47             :  * Disposal of Blocks from Old Metadata
      48             :  *
      49             :  * Now that we've constructed a new btree to replace the damaged one, we want
      50             :  * to dispose of the blocks that (we think) the old btree was using.
      51             :  * Previously, we used the rmapbt to collect the extents (bitmap) with the
      52             :  * rmap owner corresponding to the tree we rebuilt, collected extents for any
      53             :  * blocks with the same rmap owner that are owned by another data structure
      54             :  * (sublist), and subtracted sublist from bitmap.  In theory the extents
      55             :  * remaining in bitmap are the old btree's blocks.
      56             :  *
      57             :  * Unfortunately, it's possible that the btree was crosslinked with other
      58             :  * blocks on disk.  The rmap data can tell us if there are multiple owners, so
      59             :  * if the rmapbt says there is an owner of this block other than @oinfo, then
      60             :  * the block is crosslinked.  Remove the reverse mapping and continue.
      61             :  *
      62             :  * If there is one rmap record, we can free the block, which removes the
      63             :  * reverse mapping but doesn't add the block to the free space.  Our repair
      64             :  * strategy is to hope the other metadata objects crosslinked on this block
      65             :  * will be rebuilt (atop different blocks), thereby removing all the cross
      66             :  * links.
      67             :  *
      68             :  * If there are no rmap records at all, we also free the block.  If the btree
      69             :  * being rebuilt lives in the free space (bnobt/cntbt/rmapbt) then there isn't
      70             :  * supposed to be a rmap record and everything is ok.  For other btrees there
      71             :  * had to have been an rmap entry for the block to have ended up on @bitmap,
      72             :  * so if it's gone now there's something wrong and the fs will shut down.
      73             :  *
      74             :  * Note: If there are multiple rmap records with only the same rmap owner as
      75             :  * the btree we're trying to rebuild and the block is indeed owned by another
      76             :  * data structure with the same rmap owner, then the block will be in sublist
      77             :  * and therefore doesn't need disposal.  If there are multiple rmap records
      78             :  * with only the same rmap owner but the block is not owned by something with
      79             :  * the same rmap owner, the block will be freed.
      80             :  *
      81             :  * The caller is responsible for locking the AG headers/inode for the entire
      82             :  * rebuild operation so that nothing else can sneak in and change the incore
      83             :  * state while we're not looking.  We must also invalidate any buffers
      84             :  * associated with @bitmap.
      85             :  */
      86             : 
      87             : /* Information about reaping extents after a repair. */
      88             : struct xreap_state {
      89             :         struct xfs_scrub                *sc;
      90             : 
      91             :         /* Reverse mapping owner and metadata reservation type. */
      92             :         const struct xfs_owner_info     *oinfo;
      93             :         enum xfs_ag_resv_type           resv;
      94             : 
      95             :         /* If true, roll the transaction before reaping the next extent. */
      96             :         bool                            force_roll;
      97             : 
      98             :         /* Number of deferred reaps attached to the current transaction. */
      99             :         unsigned int                    deferred;
     100             : 
     101             :         /* Number of invalidated buffers logged to the current transaction. */
     102             :         unsigned int                    invalidated;
     103             : 
     104             :         /* Number of deferred reaps queued during the whole reap sequence. */
     105             :         unsigned long long              total_deferred;
     106             : };
     107             : 
     108             : /* Put a block back on the AGFL. */
     109             : STATIC int
     110           0 : xreap_put_freelist(
     111             :         struct xfs_scrub        *sc,
     112             :         xfs_agblock_t           agbno)
     113             : {
     114           0 :         struct xfs_buf          *agfl_bp;
     115           0 :         int                     error;
     116             : 
     117             :         /* Make sure there's space on the freelist. */
     118           0 :         error = xrep_fix_freelist(sc, 0);
     119           0 :         if (error)
     120             :                 return error;
     121             : 
     122             :         /*
     123             :          * Since we're "freeing" a lost block onto the AGFL, we have to
     124             :          * create an rmap for the block prior to merging it or else other
     125             :          * parts will break.
     126             :          */
     127           0 :         error = xfs_rmap_alloc(sc->tp, sc->sa.agf_bp, sc->sa.pag, agbno, 1,
     128             :                         &XFS_RMAP_OINFO_AG);
     129           0 :         if (error)
     130             :                 return error;
     131             : 
     132             :         /* Put the block on the AGFL. */
     133           0 :         error = xfs_alloc_read_agfl(sc->sa.pag, sc->tp, &agfl_bp);
     134           0 :         if (error)
     135             :                 return error;
     136             : 
     137           0 :         error = xfs_alloc_put_freelist(sc->sa.pag, sc->tp, sc->sa.agf_bp,
     138             :                         agfl_bp, agbno, 0);
     139           0 :         if (error)
     140             :                 return error;
     141           0 :         xfs_extent_busy_insert(sc->tp, sc->sa.pag, agbno, 1,
     142             :                         XFS_EXTENT_BUSY_SKIP_DISCARD);
     143             : 
     144           0 :         return 0;
     145             : }
     146             : 
     147             : /* Are there any uncommitted reap operations? */
     148     4127722 : static inline bool xreap_dirty(const struct xreap_state *rs)
     149             : {
     150     4127722 :         if (rs->force_roll)
     151             :                 return true;
     152     4127722 :         if (rs->deferred)
     153             :                 return true;
     154     3600933 :         if (rs->invalidated)
     155             :                 return true;
     156     3600933 :         if (rs->total_deferred)
     157           3 :                 return true;
     158             :         return false;
     159             : }
     160             : 
     161             : #define XREAP_MAX_DEFERRED      (128)
     162             : #define XREAP_MAX_BINVAL        (2048)
     163             : 
     164             : /*
     165             :  * Decide if we want to roll the transaction after reaping an extent.  We don't
     166             :  * want to overrun the transaction reservation, so we prohibit more than
     167             :  * 128 EFIs per transaction.  For the same reason, we limit the number
     168             :  * of buffer invalidations to 2048.
     169             :  */
     170      790840 : static inline bool xreap_want_roll(const struct xreap_state *rs)
     171             : {
     172      790840 :         if (rs->force_roll)
     173             :                 return true;
     174      790840 :         if (rs->deferred > XREAP_MAX_DEFERRED)
     175             :                 return true;
     176      789770 :         if (rs->invalidated > XREAP_MAX_BINVAL)
     177          36 :                 return true;
     178             :         return false;
     179             : }
     180             : 
     181             : static inline void xreap_reset(struct xreap_state *rs)
     182             : {
     183        1106 :         rs->total_deferred += rs->deferred;
     184        1106 :         rs->deferred = 0;
     185        1106 :         rs->invalidated = 0;
     186        1106 :         rs->force_roll = false;
     187        1106 : }
     188             : 
     189             : #define XREAP_MAX_DEFER_CHAIN           (2048)
     190             : 
     191             : /*
     192             :  * Decide if we want to finish the deferred ops that are attached to the scrub
     193             :  * transaction.  We don't want to queue huge chains of deferred ops because
     194             :  * that can consume a lot of log space and kernel memory.  Hence we trigger a
     195             :  * xfs_defer_finish if there are more than 2048 deferred reap operations or the
     196             :  * caller did some real work.
     197             :  */
     198             : static inline bool
     199      806680 : xreap_want_defer_finish(const struct xreap_state *rs)
     200             : {
     201      806680 :         if (rs->force_roll)
     202             :                 return true;
     203      791125 :         if (rs->total_deferred > XREAP_MAX_DEFER_CHAIN)
     204          54 :                 return true;
     205             :         return false;
     206             : }
     207             : 
     208             : static inline void xreap_defer_finish_reset(struct xreap_state *rs)
     209             : {
     210       15609 :         rs->total_deferred = 0;
     211       15609 :         rs->deferred = 0;
     212       15609 :         rs->invalidated = 0;
     213       15609 :         rs->force_roll = false;
     214       15609 : }
     215             : 
     216             : /*
     217             :  * Compute the maximum length of a buffer cache scan (in units of sectors),
     218             :  * given a quantity of fs blocks.
     219             :  */
     220             : xfs_daddr_t
     221     5530543 : xrep_bufscan_max_sectors(
     222             :         struct xfs_mount        *mp,
     223             :         xfs_extlen_t            fsblocks)
     224             : {
     225     5530543 :         int                     max_fsbs;
     226             : 
     227             :         /* Remote xattr values are the largest buffers that we support. */
     228     5530543 :         max_fsbs = xfs_attr3_rmt_blocks(mp, XFS_XATTR_SIZE_MAX);
     229             : 
     230     5530062 :         return XFS_FSB_TO_BB(mp, min_t(xfs_extlen_t, fsblocks, max_fsbs));
     231             : }
     232             : 
     233             : /*
     234             :  * Return an incore buffer from a sector scan, or NULL if there are no buffers
     235             :  * left to return.
     236             :  */
     237             : struct xfs_buf *
     238    10752299 : xrep_bufscan_advance(
     239             :         struct xfs_mount        *mp,
     240             :         struct xrep_bufscan     *scan)
     241             : {
     242    10752299 :         scan->__sector_count += scan->daddr_step;
     243    64735365 :         while (scan->__sector_count <= scan->max_sectors) {
     244    59289368 :                 struct xfs_buf  *bp = NULL;
     245    59289368 :                 int             error;
     246             : 
     247    59289368 :                 error = xfs_buf_incore(mp->m_ddev_targp, scan->daddr,
     248             :                                 scan->__sector_count, XBF_LIVESCAN, &bp);
     249    59290686 :                 if (!error)
     250     5307620 :                         return bp;
     251             : 
     252    53983066 :                 scan->__sector_count += scan->daddr_step;
     253             :         }
     254             : 
     255             :         return NULL;
     256             : }
     257             : 
     258             : /* Try to invalidate the incore buffers for an extent that we're freeing. */
     259             : STATIC void
     260      801626 : xreap_agextent_binval(
     261             :         struct xreap_state      *rs,
     262             :         xfs_agblock_t           agbno,
     263             :         xfs_extlen_t            *aglenp)
     264             : {
     265      801626 :         struct xfs_scrub        *sc = rs->sc;
     266      801626 :         struct xfs_perag        *pag = sc->sa.pag;
     267      801626 :         struct xfs_mount        *mp = sc->mp;
     268      801626 :         xfs_agnumber_t          agno = sc->sa.pag->pag_agno;
     269      801626 :         xfs_agblock_t           agbno_next = agbno + *aglenp;
     270      801626 :         xfs_agblock_t           bno = agbno;
     271             : 
     272             :         /*
     273             :          * Avoid invalidating AG headers and post-EOFS blocks because we never
     274             :          * own those.
     275             :          */
     276      801626 :         if (!xfs_verify_agbno(pag, agbno) ||
     277      801626 :             !xfs_verify_agbno(pag, agbno_next - 1))
     278             :                 return;
     279             : 
     280             :         /*
     281             :          * If there are incore buffers for these blocks, invalidate them.  We
     282             :          * assume that the lack of any other known owners means that the buffer
     283             :          * can be locked without risk of deadlocking.  The buffer cache cannot
     284             :          * detect aliasing, so employ nested loops to scan for incore buffers
     285             :          * of any plausible size.
     286             :          */
     287     3501560 :         while (bno < agbno_next) {
     288    10795728 :                 struct xrep_bufscan     scan = {
     289     2699079 :                         .daddr          = XFS_AGB_TO_DADDR(mp, agno, bno),
     290     2699079 :                         .max_sectors    = xrep_bufscan_max_sectors(mp,
     291             :                                                         agbno_next - bno),
     292     2698785 :                         .daddr_step     = XFS_FSB_TO_BB(mp, 1),
     293             :                 };
     294     2698785 :                 struct xfs_buf  *bp;
     295             : 
     296     5192526 :                 while ((bp = xrep_bufscan_advance(mp, &scan)) != NULL) {
     297     2492722 :                         xfs_trans_bjoin(sc->tp, bp);
     298     2493404 :                         xfs_trans_binval(sc->tp, bp);
     299     2493777 :                         rs->invalidated++;
     300             : 
     301             :                         /*
     302             :                          * Stop invalidating if we've hit the limit; we should
     303             :                          * still have enough reservation left to free however
     304             :                          * far we've gotten.
     305             :                          */
     306     2493777 :                         if (rs->invalidated > XREAP_MAX_BINVAL) {
     307          36 :                                 *aglenp -= agbno_next - bno;
     308          36 :                                 goto out;
     309             :                         }
     310             :                 }
     311             : 
     312     2699934 :                 bno++;
     313             :         }
     314             : 
     315      802481 : out:
     316      802517 :         trace_xreap_agextent_binval(sc->sa.pag, agbno, *aglenp);
     317             : }
     318             : 
     319             : /*
     320             :  * Figure out the longest run of blocks that we can dispose of with a single
     321             :  * call.  Cross-linked blocks should have their reverse mappings removed, but
     322             :  * single-owner extents can be freed.  AGFL blocks can only be put back one at
     323             :  * a time.
     324             :  */
     325             : STATIC int
     326      801871 : xreap_agextent_select(
     327             :         struct xreap_state      *rs,
     328             :         xfs_agblock_t           agbno,
     329             :         xfs_agblock_t           agbno_next,
     330             :         bool                    *crosslinked,
     331             :         xfs_extlen_t            *aglenp)
     332             : {
     333      801871 :         struct xfs_scrub        *sc = rs->sc;
     334      801871 :         struct xfs_btree_cur    *cur;
     335      801871 :         xfs_agblock_t           bno = agbno + 1;
     336      801871 :         xfs_extlen_t            len = 1;
     337      801871 :         int                     error;
     338             : 
     339             :         /*
     340             :          * Determine if there are any other rmap records covering the first
     341             :          * block of this extent.  If so, the block is crosslinked.
     342             :          */
     343      801871 :         cur = xfs_rmapbt_init_cursor(sc->mp, sc->tp, sc->sa.agf_bp,
     344             :                         sc->sa.pag);
     345      802597 :         error = xfs_rmap_has_other_keys(cur, agbno, 1, rs->oinfo,
     346             :                         crosslinked);
     347      802215 :         if (error)
     348           0 :                 goto out_cur;
     349             : 
     350             :         /* AGFL blocks can only be deal with one at a time. */
     351      802215 :         if (rs->resv == XFS_AG_RESV_AGFL)
     352           0 :                 goto out_found;
     353             : 
     354             :         /*
     355             :          * Figure out how many of the subsequent blocks have the same crosslink
     356             :          * status.
     357             :          */
     358     2840397 :         while (bno < agbno_next) {
     359     2038004 :                 bool            also_crosslinked;
     360             : 
     361     2038004 :                 error = xfs_rmap_has_other_keys(cur, bno, 1, rs->oinfo,
     362             :                                 &also_crosslinked);
     363     2038182 :                 if (error)
     364           0 :                         goto out_cur;
     365             : 
     366     2038182 :                 if (*crosslinked != also_crosslinked)
     367             :                         break;
     368             : 
     369     2038182 :                 len++;
     370     2038182 :                 bno++;
     371             :         }
     372             : 
     373      802393 : out_found:
     374      802393 :         *aglenp = len;
     375      802393 :         trace_xreap_agextent_select(sc->sa.pag, agbno, len, *crosslinked);
     376      801589 : out_cur:
     377      801589 :         xfs_btree_del_cursor(cur, error);
     378      802574 :         return error;
     379             : }
     380             : 
     381             : /*
     382             :  * Dispose of as much of the beginning of this AG extent as possible.  The
     383             :  * number of blocks disposed of will be returned in @aglenp.
     384             :  */
     385             : STATIC int
     386      801868 : xreap_agextent_iter(
     387             :         struct xreap_state      *rs,
     388             :         xfs_agblock_t           agbno,
     389             :         xfs_extlen_t            *aglenp,
     390             :         bool                    crosslinked)
     391             : {
     392      801868 :         struct xfs_scrub        *sc = rs->sc;
     393      801868 :         xfs_fsblock_t           fsbno;
     394      801868 :         int                     error = 0;
     395             : 
     396      801868 :         ASSERT(rs->resv != XFS_AG_RESV_IMETA);
     397             : 
     398      801868 :         fsbno = XFS_AGB_TO_FSB(sc->mp, sc->sa.pag->pag_agno, agbno);
     399             : 
     400             :         /*
     401             :          * If there are other rmappings, this block is cross linked and must
     402             :          * not be freed.  Remove the reverse mapping and move on.  Otherwise,
     403             :          * we were the only owner of the block, so free the extent, which will
     404             :          * also remove the rmap.
     405             :          *
     406             :          * XXX: XFS doesn't support detecting the case where a single block
     407             :          * metadata structure is crosslinked with a multi-block structure
     408             :          * because the buffer cache doesn't detect aliasing problems, so we
     409             :          * can't fix 100% of crosslinking problems (yet).  The verifiers will
     410             :          * blow on writeout, the filesystem will shut down, and the admin gets
     411             :          * to run xfs_repair.
     412             :          */
     413      801868 :         if (crosslinked) {
     414           0 :                 trace_xreap_dispose_unmap_extent(sc->sa.pag, agbno, *aglenp);
     415             : 
     416           0 :                 rs->force_roll = true;
     417             : 
     418           0 :                 if (rs->oinfo == &XFS_RMAP_OINFO_COW) {
     419             :                         /*
     420             :                          * If we're unmapping CoW staging extents, remove the
     421             :                          * records from the refcountbt, which will remove the
     422             :                          * rmap record as well.
     423             :                          */
     424           0 :                         xfs_refcount_free_cow_extent(sc->tp, false, fsbno,
     425             :                                         *aglenp);
     426           0 :                         return 0;
     427             :                 }
     428             : 
     429           0 :                 return xfs_rmap_free(sc->tp, sc->sa.agf_bp, sc->sa.pag, agbno,
     430             :                                 *aglenp, rs->oinfo);
     431             :         }
     432             : 
     433      801868 :         trace_xreap_dispose_free_extent(sc->sa.pag, agbno, *aglenp);
     434             : 
     435             :         /*
     436             :          * Invalidate as many buffers as we can, starting at agbno.  If this
     437             :          * function sets *aglenp to zero, the transaction is full of logged
     438             :          * buffer invalidations, so we need to return early so that we can
     439             :          * roll and retry.
     440             :          */
     441      801119 :         xreap_agextent_binval(rs, agbno, aglenp);
     442      802352 :         if (*aglenp == 0) {
     443           0 :                 ASSERT(xreap_want_roll(rs));
     444           0 :                 return 0;
     445             :         }
     446             : 
     447             :         /*
     448             :          * If we're getting rid of CoW staging extents, use deferred work items
     449             :          * to remove the refcountbt records (which removes the rmap records)
     450             :          * and free the extent.  We're not worried about the system going down
     451             :          * here because log recovery walks the refcount btree to clean out the
     452             :          * CoW staging extents.
     453             :          */
     454      802352 :         if (rs->oinfo == &XFS_RMAP_OINFO_COW) {
     455       15555 :                 ASSERT(rs->resv == XFS_AG_RESV_NONE);
     456             : 
     457       15555 :                 xfs_refcount_free_cow_extent(sc->tp, false, fsbno, *aglenp);
     458       15555 :                 error = xfs_free_extent_later(sc->tp, fsbno, *aglenp, NULL,
     459             :                                 rs->resv, XFS_FREE_EXTENT_SKIP_DISCARD);
     460       15555 :                 if (error)
     461             :                         return error;
     462             : 
     463       15555 :                 rs->force_roll = true;
     464       15555 :                 return 0;
     465             :         }
     466             : 
     467             :         /* Put blocks back on the AGFL one at a time. */
     468      786797 :         if (rs->resv == XFS_AG_RESV_AGFL) {
     469           0 :                 ASSERT(*aglenp == 1);
     470           0 :                 error = xreap_put_freelist(sc, agbno);
     471           0 :                 if (error)
     472             :                         return error;
     473             : 
     474           0 :                 rs->force_roll = true;
     475           0 :                 return 0;
     476             :         }
     477             : 
     478             :         /*
     479             :          * Use deferred frees to get rid of the old btree blocks to try to
     480             :          * minimize the window in which we could crash and lose the old blocks.
     481             :          */
     482      786797 :         error = xfs_free_extent_later(sc->tp, fsbno, *aglenp, rs->oinfo,
     483             :                         rs->resv, XFS_FREE_EXTENT_SKIP_DISCARD);
     484      786389 :         if (error)
     485             :                 return error;
     486             : 
     487      786389 :         rs->deferred++;
     488      786389 :         return 0;
     489             : }
     490             : 
     491             : /*
     492             :  * Break an AG metadata extent into sub-extents by fate (crosslinked, not
     493             :  * crosslinked), and dispose of each sub-extent separately.
     494             :  */
     495             : STATIC int
     496      557056 : xreap_agmeta_extent(
     497             :         uint64_t                fsbno,
     498             :         uint64_t                len,
     499             :         void                    *priv)
     500             : {
     501      557056 :         struct xreap_state      *rs = priv;
     502      557056 :         struct xfs_scrub        *sc = rs->sc;
     503      557056 :         xfs_agblock_t           agbno = fsbno;
     504      557056 :         xfs_agblock_t           agbno_next = agbno + len;
     505      557056 :         int                     error = 0;
     506             : 
     507      557056 :         ASSERT(len <= XFS_MAX_BMBT_EXTLEN);
     508      557056 :         ASSERT(sc->ip == NULL);
     509             : 
     510     1113029 :         while (agbno < agbno_next) {
     511      556612 :                 xfs_extlen_t    aglen;
     512      556612 :                 bool            crosslinked;
     513             : 
     514      556612 :                 error = xreap_agextent_select(rs, agbno, agbno_next,
     515             :                                 &crosslinked, &aglen);
     516      557912 :                 if (error)
     517           0 :                         return error;
     518             : 
     519      557912 :                 error = xreap_agextent_iter(rs, agbno, &aglen, crosslinked);
     520      557193 :                 if (error)
     521           0 :                         return error;
     522             : 
     523      557193 :                 if (xreap_want_defer_finish(rs)) {
     524          24 :                         error = xrep_defer_finish(sc);
     525          24 :                         if (error)
     526           0 :                                 return error;
     527          24 :                         xreap_defer_finish_reset(rs);
     528      556387 :                 } else if (xreap_want_roll(rs)) {
     529         509 :                         error = xrep_roll_ag_trans(sc);
     530         509 :                         if (error)
     531           0 :                                 return error;
     532         509 :                         xreap_reset(rs);
     533             :                 }
     534             : 
     535      555973 :                 agbno += aglen;
     536             :         }
     537             : 
     538             :         return 0;
     539             : }
     540             : 
     541             : /* Dispose of every block of every AG metadata extent in the bitmap. */
     542             : int
     543      634143 : xrep_reap_agblocks(
     544             :         struct xfs_scrub                *sc,
     545             :         struct xagb_bitmap              *bitmap,
     546             :         const struct xfs_owner_info     *oinfo,
     547             :         enum xfs_ag_resv_type           type)
     548             : {
     549      634143 :         struct xreap_state              rs = {
     550             :                 .sc                     = sc,
     551             :                 .oinfo                  = oinfo,
     552             :                 .resv                   = type,
     553             :         };
     554      634143 :         int                             error;
     555             : 
     556      634143 :         ASSERT(xfs_has_rmapbt(sc->mp));
     557      634143 :         ASSERT(sc->ip == NULL);
     558             : 
     559      634143 :         error = xagb_bitmap_walk(bitmap, xreap_agmeta_extent, &rs);
     560      633747 :         if (error)
     561             :                 return error;
     562             : 
     563      633510 :         if (xreap_dirty(&rs))
     564      409251 :                 return xrep_defer_finish(sc);
     565             : 
     566             :         return 0;
     567             : }
     568             : 
     569             : /*
     570             :  * Break a file metadata extent into sub-extents by fate (crosslinked, not
     571             :  * crosslinked), and dispose of each sub-extent separately.  The extent must
     572             :  * not cross an AG boundary.
     573             :  */
     574             : STATIC int
     575      244669 : xreap_fsmeta_extent(
     576             :         uint64_t                fsbno,
     577             :         uint64_t                len,
     578             :         void                    *priv)
     579             : {
     580      244669 :         struct xreap_state      *rs = priv;
     581      244669 :         struct xfs_scrub        *sc = rs->sc;
     582      244669 :         xfs_agnumber_t          agno = XFS_FSB_TO_AGNO(sc->mp, fsbno);
     583      244669 :         xfs_agblock_t           agbno = XFS_FSB_TO_AGBNO(sc->mp, fsbno);
     584      244663 :         xfs_agblock_t           agbno_next = agbno + len;
     585      244663 :         int                     error = 0;
     586             : 
     587      244663 :         ASSERT(len <= XFS_MAX_BMBT_EXTLEN);
     588      244663 :         ASSERT(sc->ip != NULL);
     589      244663 :         ASSERT(!sc->sa.pag);
     590             : 
     591             :         /*
     592             :          * We're reaping blocks after repairing file metadata, which means that
     593             :          * we have to init the xchk_ag structure ourselves.
     594             :          */
     595      244663 :         sc->sa.pag = xfs_perag_get(sc->mp, agno);
     596      244667 :         if (!sc->sa.pag)
     597             :                 return -EFSCORRUPTED;
     598             : 
     599      244667 :         error = xfs_alloc_read_agf(sc->sa.pag, sc->tp, 0, &sc->sa.agf_bp);
     600      244669 :         if (error)
     601           0 :                 goto out_pag;
     602             : 
     603      489366 :         while (agbno < agbno_next) {
     604      244698 :                 xfs_extlen_t    aglen;
     605      244698 :                 bool            crosslinked;
     606             : 
     607      244698 :                 error = xreap_agextent_select(rs, agbno, agbno_next,
     608             :                                 &crosslinked, &aglen);
     609      244700 :                 if (error)
     610           0 :                         goto out_agf;
     611             : 
     612      244700 :                 error = xreap_agextent_iter(rs, agbno, &aglen, crosslinked);
     613      244698 :                 if (error)
     614           0 :                         goto out_agf;
     615             : 
     616      244698 :                 if (xreap_want_defer_finish(rs)) {
     617             :                         /*
     618             :                          * Holds the AGF buffer across the deferred chain
     619             :                          * processing.
     620             :                          */
     621       15585 :                         error = xrep_defer_finish(sc);
     622       15585 :                         if (error)
     623           0 :                                 goto out_agf;
     624       15585 :                         xreap_defer_finish_reset(rs);
     625      229113 :                 } else if (xreap_want_roll(rs)) {
     626             :                         /*
     627             :                          * Hold the AGF buffer across the transaction roll so
     628             :                          * that we don't have to reattach it to the scrub
     629             :                          * context.
     630             :                          */
     631         597 :                         xfs_trans_bhold(sc->tp, sc->sa.agf_bp);
     632         597 :                         error = xfs_trans_roll_inode(&sc->tp, sc->ip);
     633         597 :                         xfs_trans_bjoin(sc->tp, sc->sa.agf_bp);
     634         597 :                         if (error)
     635           0 :                                 goto out_agf;
     636         597 :                         xreap_reset(rs);
     637             :                 }
     638             : 
     639      244697 :                 agbno += aglen;
     640             :         }
     641             : 
     642      244668 : out_agf:
     643      244668 :         xfs_trans_brelse(sc->tp, sc->sa.agf_bp);
     644      244670 :         sc->sa.agf_bp = NULL;
     645      244670 : out_pag:
     646      244670 :         xfs_perag_put(sc->sa.pag);
     647      244670 :         sc->sa.pag = NULL;
     648      244670 :         return error;
     649             : }
     650             : 
     651             : /*
     652             :  * Dispose of every block of every fs metadata extent in the bitmap.
     653             :  * Do not use this to dispose of the mappings in an ondisk inode fork.
     654             :  */
     655             : int
     656     3405775 : xrep_reap_fsblocks(
     657             :         struct xfs_scrub                *sc,
     658             :         struct xfsb_bitmap              *bitmap,
     659             :         const struct xfs_owner_info     *oinfo)
     660             : {
     661     3405775 :         struct xreap_state              rs = {
     662             :                 .sc                     = sc,
     663             :                 .oinfo                  = oinfo,
     664             :                 .resv                   = XFS_AG_RESV_NONE,
     665             :         };
     666     3405775 :         int                             error;
     667             : 
     668     3405775 :         ASSERT(xfs_has_rmapbt(sc->mp));
     669     3405775 :         ASSERT(sc->ip != NULL);
     670             : 
     671     3405775 :         error = xfsb_bitmap_walk(bitmap, xreap_fsmeta_extent, &rs);
     672     3405769 :         if (error)
     673             :                 return error;
     674             : 
     675     3405766 :         if (xreap_dirty(&rs))
     676       99040 :                 return xrep_defer_finish(sc);
     677             : 
     678             :         return 0;
     679             : }
     680             : 
     681             : #ifdef CONFIG_XFS_RT
     682             : /*
     683             :  * Figure out the longest run of blocks that we can dispose of with a single
     684             :  * call.  Cross-linked blocks should have their reverse mappings removed, but
     685             :  * single-owner extents can be freed.  Units are rt blocks, not rt extents.
     686             :  */
     687             : STATIC int
     688        5323 : xreap_rgextent_select(
     689             :         struct xreap_state      *rs,
     690             :         xfs_rgblock_t           rgbno,
     691             :         xfs_rgblock_t           rgbno_next,
     692             :         bool                    *crosslinked,
     693             :         xfs_extlen_t            *rglenp)
     694             : {
     695        5323 :         struct xfs_scrub        *sc = rs->sc;
     696        5323 :         struct xfs_btree_cur    *cur;
     697        5323 :         xfs_rgblock_t           bno = rgbno + 1;
     698        5323 :         xfs_extlen_t            len = 1;
     699        5323 :         int                     error;
     700             : 
     701             :         /*
     702             :          * Determine if there are any other rmap records covering the first
     703             :          * block of this extent.  If so, the block is crosslinked.
     704             :          */
     705        5323 :         cur = xfs_rtrmapbt_init_cursor(sc->mp, sc->tp, sc->sr.rtg,
     706        5323 :                         sc->sr.rtg->rtg_rmapip);
     707        5323 :         error = xfs_rmap_has_other_keys(cur, rgbno, 1, rs->oinfo,
     708             :                         crosslinked);
     709        5323 :         if (error)
     710           0 :                 goto out_cur;
     711             : 
     712             :         /*
     713             :          * Figure out how many of the subsequent blocks have the same crosslink
     714             :          * status.
     715             :          */
     716       69235 :         while (bno < rgbno_next) {
     717       63912 :                 bool            also_crosslinked;
     718             : 
     719       63912 :                 error = xfs_rmap_has_other_keys(cur, bno, 1, rs->oinfo,
     720             :                                 &also_crosslinked);
     721       63912 :                 if (error)
     722           0 :                         goto out_cur;
     723             : 
     724       63912 :                 if (*crosslinked != also_crosslinked)
     725             :                         break;
     726             : 
     727       63912 :                 len++;
     728       63912 :                 bno++;
     729             :         }
     730             : 
     731        5323 :         *rglenp = len;
     732        5323 :         trace_xreap_rgextent_select(sc->sr.rtg, rgbno, len, *crosslinked);
     733        5323 : out_cur:
     734        5323 :         xfs_btree_del_cursor(cur, error);
     735        5323 :         return error;
     736             : }
     737             : 
     738             : /*
     739             :  * Dispose of as much of the beginning of this rtgroup extent as possible.
     740             :  * The number of blocks disposed of will be returned in @rglenp.
     741             :  */
     742             : STATIC int
     743        5323 : xreap_rgextent_iter(
     744             :         struct xreap_state      *rs,
     745             :         xfs_rgblock_t           rgbno,
     746             :         xfs_extlen_t            *rglenp,
     747             :         bool                    crosslinked)
     748             : {
     749        5323 :         struct xfs_scrub        *sc = rs->sc;
     750        5323 :         xfs_rtblock_t           rtbno;
     751        5323 :         int                     error;
     752             : 
     753             :         /*
     754             :          * The only caller so far is CoW fork repair, so we only know how to
     755             :          * unlink or free CoW staging extents.  Here we don't have to worry
     756             :          * about invalidating buffers!
     757             :          */
     758        5323 :         if (rs->oinfo != &XFS_RMAP_OINFO_COW) {
     759           0 :                 ASSERT(rs->oinfo == &XFS_RMAP_OINFO_COW);
     760           0 :                 return -EFSCORRUPTED;
     761             :         }
     762        5323 :         ASSERT(rs->resv == XFS_AG_RESV_NONE);
     763             : 
     764        5323 :         rtbno = xfs_rgbno_to_rtb(sc->mp, sc->sr.rtg->rtg_rgno, rgbno);
     765             : 
     766             :         /*
     767             :          * If there are other rmappings, this block is cross linked and must
     768             :          * not be freed.  Remove the forward and reverse mapping and move on.
     769             :          */
     770        5323 :         if (crosslinked) {
     771           0 :                 trace_xreap_dispose_unmap_rtextent(sc->sr.rtg, rgbno, *rglenp);
     772             : 
     773           0 :                 xfs_refcount_free_cow_extent(sc->tp, true, rtbno, *rglenp);
     774           0 :                 rs->deferred++;
     775           0 :                 return 0;
     776             :         }
     777             : 
     778        5323 :         trace_xreap_dispose_free_rtextent(sc->sr.rtg, rgbno, *rglenp);
     779             : 
     780             :         /*
     781             :          * The CoW staging extent is not crosslinked.  Use deferred work items
     782             :          * to remove the refcountbt records (which removes the rmap records)
     783             :          * and free the extent.  We're not worried about the system going down
     784             :          * here because log recovery walks the refcount btree to clean out the
     785             :          * CoW staging extents.
     786             :          */
     787        5323 :         xfs_refcount_free_cow_extent(sc->tp, true, rtbno, *rglenp);
     788        5323 :         error = xfs_free_extent_later(sc->tp, rtbno, *rglenp, NULL,
     789             :                         rs->resv,
     790             :                         XFS_FREE_EXTENT_REALTIME |
     791             :                         XFS_FREE_EXTENT_SKIP_DISCARD);
     792        5323 :         if (error)
     793             :                 return error;
     794             : 
     795        5323 :         rs->deferred++;
     796        5323 :         return 0;
     797             : }
     798             : 
     799             : #define XREAP_RTGLOCK_ALL       (XFS_RTGLOCK_BITMAP | \
     800             :                                  XFS_RTGLOCK_RMAP | \
     801             :                                  XFS_RTGLOCK_REFCOUNT)
     802             : 
     803             : /*
     804             :  * Break a rt file metadata extent into sub-extents by fate (crosslinked, not
     805             :  * crosslinked), and dispose of each sub-extent separately.  The extent must
     806             :  * be aligned to a realtime extent.
     807             :  */
     808             : STATIC int
     809        5323 : xreap_rtmeta_extent(
     810             :         uint64_t                rtbno,
     811             :         uint64_t                len,
     812             :         void                    *priv)
     813             : {
     814        5323 :         struct xreap_state      *rs = priv;
     815        5323 :         struct xfs_scrub        *sc = rs->sc;
     816        5323 :         xfs_rgnumber_t          rgno;
     817        5323 :         xfs_rgblock_t           rgbno = xfs_rtb_to_rgbno(sc->mp, rtbno, &rgno);
     818        5323 :         xfs_rgblock_t           rgbno_next = rgbno + len;
     819        5323 :         int                     error = 0;
     820             : 
     821        5323 :         ASSERT(sc->ip != NULL);
     822        5323 :         ASSERT(!sc->sr.rtg);
     823             : 
     824             :         /*
     825             :          * We're reaping blocks after repairing file metadata, which means that
     826             :          * we have to init the xchk_ag structure ourselves.
     827             :          */
     828        5323 :         sc->sr.rtg = xfs_rtgroup_get(sc->mp, rgno);
     829        5323 :         if (!sc->sr.rtg)
     830             :                 return -EFSCORRUPTED;
     831             : 
     832        5323 :         xfs_rtgroup_lock(NULL, sc->sr.rtg, XREAP_RTGLOCK_ALL);
     833             : 
     834       10646 :         while (rgbno < rgbno_next) {
     835        5323 :                 xfs_extlen_t    rglen;
     836        5323 :                 bool            crosslinked;
     837             : 
     838        5323 :                 error = xreap_rgextent_select(rs, rgbno, rgbno_next,
     839             :                                 &crosslinked, &rglen);
     840        5323 :                 if (error)
     841           0 :                         goto out_unlock;
     842             : 
     843        5323 :                 error = xreap_rgextent_iter(rs, rgbno, &rglen, crosslinked);
     844        5323 :                 if (error)
     845           0 :                         goto out_unlock;
     846             : 
     847        5323 :                 if (xreap_want_defer_finish(rs)) {
     848           0 :                         error = xfs_defer_finish(&sc->tp);
     849           0 :                         if (error)
     850           0 :                                 goto out_unlock;
     851           0 :                         xreap_defer_finish_reset(rs);
     852        5323 :                 } else if (xreap_want_roll(rs)) {
     853           0 :                         error = xfs_trans_roll_inode(&sc->tp, sc->ip);
     854           0 :                         if (error)
     855           0 :                                 goto out_unlock;
     856           0 :                         xreap_reset(rs);
     857             :                 }
     858             : 
     859        5323 :                 rgbno += rglen;
     860             :         }
     861             : 
     862        5323 : out_unlock:
     863        5323 :         xfs_rtgroup_unlock(sc->sr.rtg, XREAP_RTGLOCK_ALL);
     864        5323 :         xfs_rtgroup_put(sc->sr.rtg);
     865        5323 :         sc->sr.rtg = NULL;
     866        5323 :         return error;
     867             : }
     868             : 
     869             : /*
     870             :  * Dispose of every block of every rt metadata extent in the bitmap.
     871             :  * Do not use this to dispose of the mappings in an ondisk inode fork.
     872             :  */
     873             : int
     874       49015 : xrep_reap_rtblocks(
     875             :         struct xfs_scrub                *sc,
     876             :         struct xrtb_bitmap              *bitmap,
     877             :         const struct xfs_owner_info     *oinfo)
     878             : {
     879       49015 :         struct xreap_state              rs = {
     880             :                 .sc                     = sc,
     881             :                 .oinfo                  = oinfo,
     882             :                 .resv                   = XFS_AG_RESV_NONE,
     883             :         };
     884       49015 :         int                             error;
     885             : 
     886       49015 :         ASSERT(xfs_has_rmapbt(sc->mp));
     887       49015 :         ASSERT(sc->ip != NULL);
     888             : 
     889       49015 :         error = xrtb_bitmap_walk(bitmap, xreap_rtmeta_extent, &rs);
     890       48997 :         if (error)
     891             :                 return error;
     892             : 
     893       48997 :         if (xreap_dirty(&rs))
     894        2848 :                 return xrep_defer_finish(sc);
     895             : 
     896             :         return 0;
     897             : }
     898             : #endif /* CONFIG_XFS_RT */
     899             : 
     900             : /*
     901             :  * Dispose of every block of an old metadata btree that used to be rooted in a
     902             :  * metadata directory file.
     903             :  */
     904             : int
     905       39434 : xrep_reap_metadir_fsblocks(
     906             :         struct xfs_scrub                *sc,
     907             :         struct xfsb_bitmap              *bitmap)
     908             : {
     909             :         /*
     910             :          * Reap old metadir btree blocks with XFS_AG_RESV_NONE because the old
     911             :          * blocks are no longer mapped by the inode, and inode metadata space
     912             :          * reservations can only account freed space to the i_nblocks.
     913             :          */
     914       39434 :         struct xfs_owner_info           oinfo;
     915       39434 :         struct xreap_state              rs = {
     916             :                 .sc                     = sc,
     917             :                 .oinfo                  = &oinfo,
     918             :                 .resv                   = XFS_AG_RESV_NONE,
     919             :         };
     920       39434 :         int                             error;
     921             : 
     922       39434 :         ASSERT(xfs_has_rmapbt(sc->mp));
     923       39434 :         ASSERT(sc->ip != NULL);
     924       39434 :         ASSERT(xfs_is_metadir_inode(sc->ip));
     925             : 
     926       39434 :         xfs_rmap_ino_bmbt_owner(&oinfo, sc->ip->i_ino, XFS_DATA_FORK);
     927             : 
     928       39434 :         error = xfsb_bitmap_walk(bitmap, xreap_fsmeta_extent, &rs);
     929       39418 :         if (error)
     930             :                 return error;
     931             : 
     932       39420 :         if (xreap_dirty(&rs))
     933       15176 :                 return xrep_defer_finish(sc);
     934             : 
     935             :         return 0;
     936             : }
     937             : 
     938             : /*
     939             :  * Metadata files are not supposed to share blocks with anything else.
     940             :  * If blocks are shared, we remove the reverse mapping (thus reducing the
     941             :  * crosslink factor); if blocks are not shared, we also need to free them.
     942             :  *
     943             :  * This first step determines the longest subset of the passed-in imap
     944             :  * (starting at its beginning) that is either crosslinked or not crosslinked.
     945             :  * The blockcount will be adjust down as needed.
     946             :  */
     947             : STATIC int
     948      282691 : xreap_bmapi_select(
     949             :         struct xfs_scrub        *sc,
     950             :         struct xfs_inode        *ip,
     951             :         int                     whichfork,
     952             :         struct xfs_bmbt_irec    *imap,
     953             :         bool                    *crosslinked)
     954             : {
     955      282691 :         struct xfs_owner_info   oinfo;
     956      282691 :         struct xfs_btree_cur    *cur;
     957      282691 :         xfs_filblks_t           len = 1;
     958      282691 :         xfs_agblock_t           bno;
     959      282691 :         xfs_agblock_t           agbno;
     960      282691 :         xfs_agblock_t           agbno_next;
     961      282691 :         int                     error;
     962             : 
     963      282691 :         agbno = XFS_FSB_TO_AGBNO(sc->mp, imap->br_startblock);
     964      282689 :         agbno_next = agbno + imap->br_blockcount;
     965             : 
     966      282689 :         cur = xfs_rmapbt_init_cursor(sc->mp, sc->tp, sc->sa.agf_bp,
     967             :                         sc->sa.pag);
     968             : 
     969      282692 :         xfs_rmap_ino_owner(&oinfo, ip->i_ino, whichfork, imap->br_startoff);
     970      282692 :         error = xfs_rmap_has_other_keys(cur, agbno, 1, &oinfo, crosslinked);
     971      282692 :         if (error)
     972           0 :                 goto out_cur;
     973             : 
     974      282692 :         bno = agbno + 1;
     975     2745220 :         while (bno < agbno_next) {
     976     2462528 :                 bool            also_crosslinked;
     977             : 
     978     2462528 :                 oinfo.oi_offset++;
     979     2462528 :                 error = xfs_rmap_has_other_keys(cur, bno, 1, &oinfo,
     980             :                                 &also_crosslinked);
     981     2462528 :                 if (error)
     982           0 :                         goto out_cur;
     983             : 
     984     2462528 :                 if (also_crosslinked != *crosslinked)
     985             :                         break;
     986             : 
     987     2462528 :                 len++;
     988     2462528 :                 bno++;
     989             :         }
     990             : 
     991      282692 :         imap->br_blockcount = len;
     992      282692 :         trace_xreap_bmapi_select(sc->sa.pag, agbno, len, *crosslinked);
     993      282692 : out_cur:
     994      282692 :         xfs_btree_del_cursor(cur, error);
     995      282692 :         return error;
     996             : }
     997             : 
     998             : /*
     999             :  * Decide if this buffer can be joined to a transaction.  This is true for most
    1000             :  * buffers, but there are two cases that we want to catch: large remote xattr
    1001             :  * value buffers are not logged and can overflow the buffer log item dirty
    1002             :  * bitmap size; and oversized cached buffers if things have really gone
    1003             :  * haywire.
    1004             :  */
    1005             : static inline bool
    1006     2728614 : xreap_buf_loggable(
    1007             :         const struct xfs_buf    *bp)
    1008             : {
    1009     2728614 :         int                     i;
    1010             : 
    1011     5457227 :         for (i = 0; i < bp->b_map_count; i++) {
    1012     2728613 :                 int             chunks;
    1013     2728613 :                 int             map_size;
    1014             : 
    1015     2728613 :                 chunks = DIV_ROUND_UP(BBTOB(bp->b_maps[i].bm_len),
    1016             :                                 XFS_BLF_CHUNK);
    1017     2728613 :                 map_size = DIV_ROUND_UP(chunks, NBWORD);
    1018     2728613 :                 if (map_size > XFS_BLF_DATAMAP_SIZE)
    1019             :                         return false;
    1020             :         }
    1021             : 
    1022             :         return true;
    1023             : }
    1024             : 
    1025             : /*
    1026             :  * Invalidate any buffers for this file mapping.  The @imap blockcount may be
    1027             :  * adjusted downward if we need to roll the transaction.
    1028             :  */
    1029             : STATIC int
    1030      282692 : xreap_bmapi_binval(
    1031             :         struct xfs_scrub        *sc,
    1032             :         struct xfs_inode        *ip,
    1033             :         int                     whichfork,
    1034             :         struct xfs_bmbt_irec    *imap)
    1035             : {
    1036      282692 :         struct xfs_mount        *mp = sc->mp;
    1037      282692 :         struct xfs_perag        *pag = sc->sa.pag;
    1038      282692 :         int                     bmap_flags = xfs_bmapi_aflag(whichfork);
    1039      282692 :         xfs_fileoff_t           off;
    1040      282692 :         xfs_fileoff_t           max_off;
    1041      282692 :         xfs_extlen_t            scan_blocks;
    1042      282692 :         xfs_agnumber_t          agno = sc->sa.pag->pag_agno;
    1043      282692 :         xfs_agblock_t           bno;
    1044      282692 :         xfs_agblock_t           agbno;
    1045      282692 :         xfs_agblock_t           agbno_next;
    1046      282692 :         unsigned int            invalidated = 0;
    1047      282692 :         int                     error;
    1048             : 
    1049             :         /*
    1050             :          * Avoid invalidating AG headers and post-EOFS blocks because we never
    1051             :          * own those.
    1052             :          */
    1053      282692 :         agbno = bno = XFS_FSB_TO_AGBNO(sc->mp, imap->br_startblock);
    1054      282691 :         agbno_next = agbno + imap->br_blockcount;
    1055      282691 :         if (!xfs_verify_agbno(pag, agbno) ||
    1056      282691 :             !xfs_verify_agbno(pag, agbno_next - 1))
    1057             :                 return 0;
    1058             : 
    1059             :         /*
    1060             :          * Buffers for file blocks can span multiple contiguous mappings.  This
    1061             :          * means that for each block in the mapping, there could exist an
    1062             :          * xfs_buf indexed by that block with any length up to the maximum
    1063             :          * buffer size (remote xattr values) or to the next hole in the fork.
    1064             :          * To set up our binval scan, first we need to figure out the location
    1065             :          * of the next hole.
    1066             :          */
    1067      282691 :         off = imap->br_startoff + imap->br_blockcount;
    1068      282691 :         max_off = off + xfs_attr3_rmt_blocks(mp, XFS_XATTR_SIZE_MAX);
    1069      505580 :         while (off < max_off) {
    1070      472129 :                 struct xfs_bmbt_irec    hmap;
    1071      472129 :                 int                     nhmaps = 1;
    1072             : 
    1073      472129 :                 error = xfs_bmapi_read(ip, off, max_off - off, &hmap,
    1074             :                                 &nhmaps, bmap_flags);
    1075      472130 :                 if (error)
    1076           0 :                         return error;
    1077      472130 :                 if (nhmaps != 1 || hmap.br_startblock == DELAYSTARTBLOCK) {
    1078           0 :                         ASSERT(0);
    1079           0 :                         return -EFSCORRUPTED;
    1080             :                 }
    1081             : 
    1082      695020 :                 if (!xfs_bmap_is_real_extent(&hmap))
    1083             :                         break;
    1084             : 
    1085      222890 :                 off = hmap.br_startoff + hmap.br_blockcount;
    1086             :         }
    1087      282691 :         scan_blocks = off - imap->br_startoff;
    1088             : 
    1089      282691 :         trace_xreap_bmapi_binval_scan(sc, imap, scan_blocks);
    1090             : 
    1091             :         /*
    1092             :          * If there are incore buffers for these blocks, invalidate them.  If
    1093             :          * we can't (try)lock the buffer we assume it's owned by someone else
    1094             :          * and leave it alone.  The buffer cache cannot detect aliasing, so
    1095             :          * employ nested loops to detect incore buffers of any plausible size.
    1096             :          */
    1097     3027892 :         while (bno < agbno_next) {
    1098    10980810 :                 struct xrep_bufscan     scan = {
    1099     2745202 :                         .daddr          = XFS_AGB_TO_DADDR(mp, agno, bno),
    1100     2745202 :                         .max_sectors    = xrep_bufscan_max_sectors(mp,
    1101             :                                                                 scan_blocks),
    1102     2745203 :                         .daddr_step     = XFS_FSB_TO_BB(mp, 1),
    1103             :                 };
    1104     2745203 :                 struct xfs_buf          *bp;
    1105             : 
    1106     5473814 :                 while ((bp = xrep_bufscan_advance(mp, &scan)) != NULL) {
    1107     2728614 :                         if (xreap_buf_loggable(bp)) {
    1108     2728613 :                                 xfs_trans_bjoin(sc->tp, bp);
    1109     2728614 :                                 xfs_trans_binval(sc->tp, bp);
    1110             :                         } else {
    1111           0 :                                 xfs_buf_stale(bp);
    1112           0 :                                 xfs_buf_relse(bp);
    1113             :                         }
    1114     2728613 :                         invalidated++;
    1115             : 
    1116             :                         /*
    1117             :                          * Stop invalidating if we've hit the limit; we should
    1118             :                          * still have enough reservation left to free however
    1119             :                          * much of the mapping we've seen so far.
    1120             :                          */
    1121     2728613 :                         if (invalidated > XREAP_MAX_BINVAL) {
    1122           2 :                                 imap->br_blockcount = agbno_next - bno;
    1123           2 :                                 goto out;
    1124             :                         }
    1125             :                 }
    1126             : 
    1127     2745201 :                 bno++;
    1128     2745201 :                 scan_blocks--;
    1129             :         }
    1130             : 
    1131      282689 : out:
    1132      282691 :         trace_xreap_bmapi_binval(sc->sa.pag, agbno, imap->br_blockcount);
    1133      282691 :         return 0;
    1134             : }
    1135             : 
    1136             : /*
    1137             :  * Dispose of as much of the beginning of this file fork mapping as possible.
    1138             :  * The number of blocks disposed of is returned in @imap->br_blockcount.
    1139             :  */
    1140             : STATIC int
    1141      282692 : xrep_reap_bmapi_iter(
    1142             :         struct xfs_scrub                *sc,
    1143             :         struct xfs_inode                *ip,
    1144             :         int                             whichfork,
    1145             :         struct xfs_bmbt_irec            *imap,
    1146             :         bool                            crosslinked)
    1147             : {
    1148      282692 :         int                             error;
    1149             : 
    1150      282692 :         if (crosslinked) {
    1151             :                 /*
    1152             :                  * If there are other rmappings, this block is cross linked and
    1153             :                  * must not be freed.  Remove the reverse mapping, leave the
    1154             :                  * buffer cache in its possibly confused state, and move on.
    1155             :                  * We don't want to risk discarding valid data buffers from
    1156             :                  * anybody else who thinks they own the block, even though that
    1157             :                  * runs the risk of stale buffer warnings in the future.
    1158             :                  */
    1159           0 :                 trace_xreap_dispose_unmap_extent(sc->sa.pag,
    1160           0 :                                 XFS_FSB_TO_AGBNO(sc->mp, imap->br_startblock),
    1161           0 :                                 imap->br_blockcount);
    1162             : 
    1163             :                 /*
    1164             :                  * Schedule removal of the mapping from the fork.  We use
    1165             :                  * deferred log intents in this function to control the exact
    1166             :                  * sequence of metadata updates.
    1167             :                  */
    1168           0 :                 xfs_bmap_unmap_extent(sc->tp, ip, whichfork, imap);
    1169           0 :                 xfs_trans_mod_dquot_byino(sc->tp, ip, XFS_TRANS_DQ_BCOUNT,
    1170           0 :                                 -(int64_t)imap->br_blockcount);
    1171           0 :                 xfs_rmap_unmap_extent(sc->tp, ip, whichfork, imap);
    1172           0 :                 return 0;
    1173             :         }
    1174             : 
    1175             :         /*
    1176             :          * If the block is not crosslinked, we can invalidate all the incore
    1177             :          * buffers for the extent, and then free the extent.  This is a bit of
    1178             :          * a mess since we don't detect discontiguous buffers that are indexed
    1179             :          * by a block starting before the first block of the extent but overlap
    1180             :          * anyway.
    1181             :          */
    1182      282692 :         trace_xreap_dispose_free_extent(sc->sa.pag,
    1183      282692 :                         XFS_FSB_TO_AGBNO(sc->mp, imap->br_startblock),
    1184      282692 :                         imap->br_blockcount);
    1185             : 
    1186             :         /*
    1187             :          * Invalidate as many buffers as we can, starting at the beginning of
    1188             :          * this mapping.  If this function sets blockcount to zero, the
    1189             :          * transaction is full of logged buffer invalidations, so we need to
    1190             :          * return early so that we can roll and retry.
    1191             :          */
    1192      282692 :         error = xreap_bmapi_binval(sc, ip, whichfork, imap);
    1193      282690 :         if (error || imap->br_blockcount == 0)
    1194             :                 return error;
    1195             : 
    1196             :         /*
    1197             :          * Schedule removal of the mapping from the fork.  We use deferred log
    1198             :          * intents in this function to control the exact sequence of metadata
    1199             :          * updates.
    1200             :          */
    1201      282691 :         xfs_bmap_unmap_extent(sc->tp, ip, whichfork, imap);
    1202      282692 :         xfs_trans_mod_dquot_byino(sc->tp, ip, XFS_TRANS_DQ_BCOUNT,
    1203      282692 :                         -(int64_t)imap->br_blockcount);
    1204      282691 :         return xfs_free_extent_later(sc->tp, imap->br_startblock,
    1205             :                         imap->br_blockcount, NULL, XFS_AG_RESV_NONE,
    1206             :                         XFS_FREE_EXTENT_SKIP_DISCARD);
    1207             : }
    1208             : 
    1209             : /*
    1210             :  * Dispose of as much of this file extent as we can.  Upon successful return,
    1211             :  * the imap will reflect the mapping that was removed from the fork.
    1212             :  */
    1213             : STATIC int
    1214      282688 : xreap_ifork_extent(
    1215             :         struct xfs_scrub                *sc,
    1216             :         struct xfs_inode                *ip,
    1217             :         int                             whichfork,
    1218             :         struct xfs_bmbt_irec            *imap)
    1219             : {
    1220      282688 :         xfs_agnumber_t                  agno;
    1221      282688 :         bool                            crosslinked;
    1222      282688 :         int                             error;
    1223             : 
    1224      282688 :         ASSERT(sc->sa.pag == NULL);
    1225             : 
    1226      282688 :         trace_xreap_ifork_extent(sc, ip, whichfork, imap);
    1227             : 
    1228      282687 :         agno = XFS_FSB_TO_AGNO(sc->mp, imap->br_startblock);
    1229      282687 :         sc->sa.pag = xfs_perag_get(sc->mp, agno);
    1230      282692 :         if (!sc->sa.pag)
    1231             :                 return -EFSCORRUPTED;
    1232             : 
    1233      282692 :         error = xfs_alloc_read_agf(sc->sa.pag, sc->tp, 0, &sc->sa.agf_bp);
    1234      282692 :         if (error)
    1235           0 :                 goto out_pag;
    1236             : 
    1237             :         /*
    1238             :          * Decide the fate of the blocks at the beginning of the mapping, then
    1239             :          * update the mapping to use it with the unmap calls.
    1240             :          */
    1241      282692 :         error = xreap_bmapi_select(sc, ip, whichfork, imap, &crosslinked);
    1242      282692 :         if (error)
    1243           0 :                 goto out_agf;
    1244             : 
    1245      282692 :         error = xrep_reap_bmapi_iter(sc, ip, whichfork, imap, crosslinked);
    1246      282692 :         if (error)
    1247           0 :                 goto out_agf;
    1248             : 
    1249      282692 : out_agf:
    1250      282692 :         xfs_trans_brelse(sc->tp, sc->sa.agf_bp);
    1251      282692 :         sc->sa.agf_bp = NULL;
    1252      282692 : out_pag:
    1253      282692 :         xfs_perag_put(sc->sa.pag);
    1254      282692 :         sc->sa.pag = NULL;
    1255      282692 :         return error;
    1256             : }
    1257             : 
    1258             : /*
    1259             :  * Dispose of each block mapped to the given fork of the given file.  Callers
    1260             :  * must hold ILOCK_EXCL, and ip can only be sc->ip or sc->tempip.  The fork
    1261             :  * must not have any delalloc reservations.
    1262             :  */
    1263             : int
    1264      190826 : xrep_reap_ifork(
    1265             :         struct xfs_scrub        *sc,
    1266             :         struct xfs_inode        *ip,
    1267             :         int                     whichfork)
    1268             : {
    1269      190826 :         xfs_fileoff_t           off = 0;
    1270      190826 :         int                     bmap_flags = xfs_bmapi_aflag(whichfork);
    1271      190826 :         int                     error;
    1272             : 
    1273      190826 :         ASSERT(xfs_has_rmapbt(sc->mp));
    1274      190826 :         ASSERT(ip == sc->ip || ip == sc->tempip);
    1275      190826 :         ASSERT(whichfork == XFS_ATTR_FORK || !XFS_IS_REALTIME_INODE(ip));
    1276             : 
    1277      705875 :         while (off < XFS_MAX_FILEOFF) {
    1278      515048 :                 struct xfs_bmbt_irec    imap;
    1279      515048 :                 int                     nimaps = 1;
    1280             : 
    1281             :                 /* Read the next extent, skip past holes and delalloc. */
    1282      515048 :                 error = xfs_bmapi_read(ip, off, XFS_MAX_FILEOFF - off, &imap,
    1283             :                                 &nimaps, bmap_flags);
    1284      515045 :                 if (error)
    1285           0 :                         return error;
    1286      515045 :                 if (nimaps != 1 || imap.br_startblock == DELAYSTARTBLOCK) {
    1287           0 :                         ASSERT(0);
    1288           0 :                         return -EFSCORRUPTED;
    1289             :                 }
    1290             : 
    1291             :                 /*
    1292             :                  * If this is a real space mapping, reap as much of it as we
    1293             :                  * can in a single transaction.
    1294             :                  */
    1295      797733 :                 if (xfs_bmap_is_real_extent(&imap)) {
    1296      282688 :                         error = xreap_ifork_extent(sc, ip, whichfork, &imap);
    1297      282692 :                         if (error)
    1298           0 :                                 return error;
    1299             : 
    1300      282692 :                         error = xfs_defer_finish(&sc->tp);
    1301      282692 :                         if (error)
    1302           0 :                                 return error;
    1303             :                 }
    1304             : 
    1305      515049 :                 off = imap.br_startoff + imap.br_blockcount;
    1306             :         }
    1307             : 
    1308             :         return 0;
    1309             : }

Generated by: LCOV version 1.14