LCOV - fstests of 6.5.0-rc4-xfsa @ Mon Jul 31 20:08:27 PDT 2023

LCOV - code coverage report

Current view:	top level - fs/xfs/scrub - reap.c (source / functions)		Hit	Total	Coverage
Test:	fstests of 6.5.0-rc4-xfsa @ Mon Jul 31 20:08:27 PDT 2023	Lines:	394	484	81.4 %
Date:	2023-07-31 20:08:27	Functions:	21	22	95.5 %

          Line data    Source code

       1             : // SPDX-License-Identifier: GPL-2.0-or-later
       2             : /*
       3             :  * Copyright (C) 2022-2023 Oracle.  All Rights Reserved.
       4             :  * Author: Darrick J. Wong <djwong@kernel.org>
       5             :  */
       6             : #include "xfs.h"
       7             : #include "xfs_fs.h"
       8             : #include "xfs_shared.h"
       9             : #include "xfs_format.h"
      10             : #include "xfs_trans_resv.h"
      11             : #include "xfs_mount.h"
      12             : #include "xfs_btree.h"
      13             : #include "xfs_log_format.h"
      14             : #include "xfs_trans.h"
      15             : #include "xfs_sb.h"
      16             : #include "xfs_inode.h"
      17             : #include "xfs_alloc.h"
      18             : #include "xfs_alloc_btree.h"
      19             : #include "xfs_ialloc.h"
      20             : #include "xfs_ialloc_btree.h"
      21             : #include "xfs_rmap.h"
      22             : #include "xfs_rmap_btree.h"
      23             : #include "xfs_refcount.h"
      24             : #include "xfs_refcount_btree.h"
      25             : #include "xfs_extent_busy.h"
      26             : #include "xfs_ag.h"
      27             : #include "xfs_ag_resv.h"
      28             : #include "xfs_quota.h"
      29             : #include "xfs_qm.h"
      30             : #include "xfs_bmap.h"
      31             : #include "xfs_da_format.h"
      32             : #include "xfs_da_btree.h"
      33             : #include "xfs_attr.h"
      34             : #include "xfs_attr_remote.h"
      35             : #include "xfs_defer.h"
      36             : #include "xfs_imeta.h"
      37             : #include "xfs_rtgroup.h"
      38             : #include "xfs_rtrmap_btree.h"
      39             : #include "scrub/scrub.h"
      40             : #include "scrub/common.h"
      41             : #include "scrub/trace.h"
      42             : #include "scrub/repair.h"
      43             : #include "scrub/bitmap.h"
      44             : #include "scrub/reap.h"
      45             : 
      46             : /*
      47             :  * Disposal of Blocks from Old Metadata
      48             :  *
      49             :  * Now that we've constructed a new btree to replace the damaged one, we want
      50             :  * to dispose of the blocks that (we think) the old btree was using.
      51             :  * Previously, we used the rmapbt to collect the extents (bitmap) with the
      52             :  * rmap owner corresponding to the tree we rebuilt, collected extents for any
      53             :  * blocks with the same rmap owner that are owned by another data structure
      54             :  * (sublist), and subtracted sublist from bitmap.  In theory the extents
      55             :  * remaining in bitmap are the old btree's blocks.
      56             :  *
      57             :  * Unfortunately, it's possible that the btree was crosslinked with other
      58             :  * blocks on disk.  The rmap data can tell us if there are multiple owners, so
      59             :  * if the rmapbt says there is an owner of this block other than @oinfo, then
      60             :  * the block is crosslinked.  Remove the reverse mapping and continue.
      61             :  *
      62             :  * If there is one rmap record, we can free the block, which removes the
      63             :  * reverse mapping but doesn't add the block to the free space.  Our repair
      64             :  * strategy is to hope the other metadata objects crosslinked on this block
      65             :  * will be rebuilt (atop different blocks), thereby removing all the cross
      66             :  * links.
      67             :  *
      68             :  * If there are no rmap records at all, we also free the block.  If the btree
      69             :  * being rebuilt lives in the free space (bnobt/cntbt/rmapbt) then there isn't
      70             :  * supposed to be a rmap record and everything is ok.  For other btrees there
      71             :  * had to have been an rmap entry for the block to have ended up on @bitmap,
      72             :  * so if it's gone now there's something wrong and the fs will shut down.
      73             :  *
      74             :  * Note: If there are multiple rmap records with only the same rmap owner as
      75             :  * the btree we're trying to rebuild and the block is indeed owned by another
      76             :  * data structure with the same rmap owner, then the block will be in sublist
      77             :  * and therefore doesn't need disposal.  If there are multiple rmap records
      78             :  * with only the same rmap owner but the block is not owned by something with
      79             :  * the same rmap owner, the block will be freed.
      80             :  *
      81             :  * The caller is responsible for locking the AG headers/inode for the entire
      82             :  * rebuild operation so that nothing else can sneak in and change the incore
      83             :  * state while we're not looking.  We must also invalidate any buffers
      84             :  * associated with @bitmap.
      85             :  */
      86             : 
      87             : /* Information about reaping extents after a repair. */
      88             : struct xreap_state {
      89             :         struct xfs_scrub                *sc;
      90             : 
      91             :         /* Reverse mapping owner and metadata reservation type. */
      92             :         const struct xfs_owner_info     *oinfo;
      93             :         enum xfs_ag_resv_type           resv;
      94             : 
      95             :         /* If true, roll the transaction before reaping the next extent. */
      96             :         bool                            force_roll;
      97             : 
      98             :         /* Number of deferred reaps attached to the current transaction. */
      99             :         unsigned int                    deferred;
     100             : 
     101             :         /* Number of invalidated buffers logged to the current transaction. */
     102             :         unsigned int                    invalidated;
     103             : 
     104             :         /* Number of deferred reaps queued during the whole reap sequence. */
     105             :         unsigned long long              total_deferred;
     106             : };
     107             : 
     108             : /* Put a block back on the AGFL. */
     109             : STATIC int
     110           0 : xreap_put_freelist(
     111             :         struct xfs_scrub        *sc,
     112             :         xfs_agblock_t           agbno)
     113             : {
     114           0 :         struct xfs_buf          *agfl_bp;
     115           0 :         int                     error;
     116             : 
     117             :         /* Make sure there's space on the freelist. */
     118           0 :         error = xrep_fix_freelist(sc, 0);
     119           0 :         if (error)
     120             :                 return error;
     121             : 
     122             :         /*
     123             :          * Since we're "freeing" a lost block onto the AGFL, we have to
     124             :          * create an rmap for the block prior to merging it or else other
     125             :          * parts will break.
     126             :          */
     127           0 :         error = xfs_rmap_alloc(sc->tp, sc->sa.agf_bp, sc->sa.pag, agbno, 1,
     128             :                         &XFS_RMAP_OINFO_AG);
     129           0 :         if (error)
     130             :                 return error;
     131             : 
     132             :         /* Put the block on the AGFL. */
     133           0 :         error = xfs_alloc_read_agfl(sc->sa.pag, sc->tp, &agfl_bp);
     134           0 :         if (error)
     135             :                 return error;
     136             : 
     137           0 :         error = xfs_alloc_put_freelist(sc->sa.pag, sc->tp, sc->sa.agf_bp,
     138             :                         agfl_bp, agbno, 0);
     139           0 :         if (error)
     140             :                 return error;
     141           0 :         xfs_extent_busy_insert(sc->tp, sc->sa.pag, agbno, 1,
     142             :                         XFS_EXTENT_BUSY_SKIP_DISCARD);
     143             : 
     144           0 :         return 0;
     145             : }
     146             : 
     147             : /* Are there any uncommitted reap operations? */
     148      523030 : static inline bool xreap_dirty(const struct xreap_state *rs)
     149             : {
     150      523030 :         if (rs->force_roll)
     151             :                 return true;
     152      523030 :         if (rs->deferred)
     153             :                 return true;
     154      273180 :         if (rs->invalidated)
     155             :                 return true;
     156      273180 :         if (rs->total_deferred)
     157           0 :                 return true;
     158             :         return false;
     159             : }
     160             : 
     161             : #define XREAP_MAX_DEFERRED      (128)
     162             : #define XREAP_MAX_BINVAL        (2048)
     163             : 
     164             : /*
     165             :  * Decide if we want to roll the transaction after reaping an extent.  We don't
     166             :  * want to overrun the transaction reservation, so we prohibit more than
     167             :  * 128 EFIs per transaction.  For the same reason, we limit the number
     168             :  * of buffer invalidations to 2048.
     169             :  */
     170             : static inline bool xreap_want_roll(const struct xreap_state *rs)
     171             : {
     172      300806 :         if (rs->force_roll)
     173             :                 return true;
     174           0 :         if (rs->deferred > XREAP_MAX_DEFERRED)
     175             :                 return true;
     176      300804 :         if (rs->invalidated > XREAP_MAX_BINVAL)
     177           0 :                 return true;
     178             :         return false;
     179             : }
     180             : 
     181             : static inline void xreap_reset(struct xreap_state *rs)
     182             : {
     183           2 :         rs->total_deferred += rs->deferred;
     184           2 :         rs->deferred = 0;
     185           2 :         rs->invalidated = 0;
     186           2 :         rs->force_roll = false;
     187           2 : }
     188             : 
     189             : #define XREAP_MAX_DEFER_CHAIN           (2048)
     190             : 
     191             : /*
     192             :  * Decide if we want to finish the deferred ops that are attached to the scrub
     193             :  * transaction.  We don't want to queue huge chains of deferred ops because
     194             :  * that can consume a lot of log space and kernel memory.  Hence we trigger a
     195             :  * xfs_defer_finish if there are more than 2048 deferred reap operations or the
     196             :  * caller did some real work.
     197             :  */
     198             : static inline bool
     199             : xreap_want_defer_finish(const struct xreap_state *rs)
     200             : {
     201      301454 :         if (rs->force_roll)
     202             :                 return true;
     203      300806 :         if (rs->total_deferred > XREAP_MAX_DEFER_CHAIN)
     204             :                 return true;
     205             :         return false;
     206             : }
     207             : 
     208             : static inline void xreap_defer_finish_reset(struct xreap_state *rs)
     209             : {
     210         648 :         rs->total_deferred = 0;
     211         648 :         rs->deferred = 0;
     212         648 :         rs->invalidated = 0;
     213         648 :         rs->force_roll = false;
     214         648 : }
     215             : 
     216             : /*
     217             :  * Compute the maximum length of a buffer cache scan (in units of sectors),
     218             :  * given a quantity of fs blocks.
     219             :  */
     220             : xfs_daddr_t
     221       42558 : xrep_bufscan_max_sectors(
     222             :         struct xfs_mount        *mp,
     223             :         xfs_extlen_t            fsblocks)
     224             : {
     225     3240130 :         int                     max_fsbs;
     226             : 
     227             :         /* Remote xattr values are the largest buffers that we support. */
     228       42558 :         max_fsbs = xfs_attr3_rmt_blocks(mp, XFS_XATTR_SIZE_MAX);
     229             : 
     230     3240120 :         return XFS_FSB_TO_BB(mp, min_t(xfs_extlen_t, fsblocks, max_fsbs));
     231             : }
     232             : 
     233             : /*
     234             :  * Return an incore buffer from a sector scan, or NULL if there are no buffers
     235             :  * left to return.
     236             :  */
     237             : struct xfs_buf *
     238     6425402 : xrep_bufscan_advance(
     239             :         struct xfs_mount        *mp,
     240             :         struct xrep_bufscan     *scan)
     241             : {
     242     6425402 :         scan->__sector_count += scan->daddr_step;
     243    40365981 :         while (scan->__sector_count <= scan->max_sectors) {
     244    37168411 :                 struct xfs_buf  *bp = NULL;
     245    37168411 :                 int             error;
     246             : 
     247    37168411 :                 error = xfs_buf_incore(mp->m_ddev_targp, scan->daddr,
     248             :                                 scan->__sector_count, XBF_LIVESCAN, &bp);
     249    37168421 :                 if (!error)
     250     3227842 :                         return bp;
     251             : 
     252    33940579 :                 scan->__sector_count += scan->daddr_step;
     253             :         }
     254             : 
     255             :         return NULL;
     256             : }
     257             : 
     258             : /* Try to invalidate the incore buffers for an extent that we're freeing. */
     259             : STATIC void
     260      298785 : xreap_agextent_binval(
     261             :         struct xreap_state      *rs,
     262             :         xfs_agblock_t           agbno,
     263             :         xfs_extlen_t            *aglenp)
     264             : {
     265      298785 :         struct xfs_scrub        *sc = rs->sc;
     266      298785 :         struct xfs_perag        *pag = sc->sa.pag;
     267      298785 :         struct xfs_mount        *mp = sc->mp;
     268      298785 :         xfs_agnumber_t          agno = sc->sa.pag->pag_agno;
     269      298785 :         xfs_agblock_t           agbno_next = agbno + *aglenp;
     270      298785 :         xfs_agblock_t           bno = agbno;
     271             : 
     272             :         /*
     273             :          * Avoid invalidating AG headers and post-EOFS blocks because we never
     274             :          * own those.
     275             :          */
     276      298785 :         if (!xfs_verify_agbno(pag, agbno) ||
     277      298785 :             !xfs_verify_agbno(pag, agbno_next - 1))
     278             :                 return;
     279             : 
     280             :         /*
     281             :          * If there are incore buffers for these blocks, invalidate them.  We
     282             :          * assume that the lack of any other known owners means that the buffer
     283             :          * can be locked without risk of deadlocking.  The buffer cache cannot
     284             :          * detect aliasing, so employ nested loops to scan for incore buffers
     285             :          * of any plausible size.
     286             :          */
     287     1486784 :         while (bno < agbno_next) {
     288     2375986 :                 struct xrep_bufscan     scan = {
     289     1187998 :                         .daddr          = XFS_AGB_TO_DADDR(mp, agno, bno),
     290     1187998 :                         .max_sectors    = xrep_bufscan_max_sectors(mp,
     291             :                                                         agbno_next - bno),
     292     1187988 :                         .daddr_step     = XFS_FSB_TO_BB(mp, 1),
     293             :                 };
     294     1187988 :                 struct xfs_buf  *bp;
     295             : 
     296     2365667 :                 while ((bp = xrep_bufscan_advance(mp, &scan)) != NULL) {
     297     1177680 :                         xfs_trans_bjoin(sc->tp, bp);
     298     1177673 :                         xfs_trans_binval(sc->tp, bp);
     299     1177679 :                         rs->invalidated++;
     300             : 
     301             :                         /*
     302             :                          * Stop invalidating if we've hit the limit; we should
     303             :                          * still have enough reservation left to free however
     304             :                          * far we've gotten.
     305             :                          */
     306     1177679 :                         if (rs->invalidated > XREAP_MAX_BINVAL) {
     307           0 :                                 *aglenp -= agbno_next - bno;
     308           0 :                                 goto out;
     309             :                         }
     310             :                 }
     311             : 
     312     1187999 :                 bno++;
     313             :         }
     314             : 
     315      298786 : out:
     316      298786 :         trace_xreap_agextent_binval(sc->sa.pag, agbno, *aglenp);
     317             : }
     318             : 
     319             : /*
     320             :  * Figure out the longest run of blocks that we can dispose of with a single
     321             :  * call.  Cross-linked blocks should have their reverse mappings removed, but
     322             :  * single-owner extents can be freed.  AGFL blocks can only be put back one at
     323             :  * a time.
     324             :  */
     325             : STATIC int
     326      298770 : xreap_agextent_select(
     327             :         struct xreap_state      *rs,
     328             :         xfs_agblock_t           agbno,
     329             :         xfs_agblock_t           agbno_next,
     330             :         bool                    *crosslinked,
     331             :         xfs_extlen_t            *aglenp)
     332             : {
     333      298770 :         struct xfs_scrub        *sc = rs->sc;
     334      298770 :         struct xfs_btree_cur    *cur;
     335      298770 :         xfs_agblock_t           bno = agbno + 1;
     336      298770 :         xfs_extlen_t            len = 1;
     337      298770 :         int                     error;
     338             : 
     339             :         /*
     340             :          * Determine if there are any other rmap records covering the first
     341             :          * block of this extent.  If so, the block is crosslinked.
     342             :          */
     343      298770 :         cur = xfs_rmapbt_init_cursor(sc->mp, sc->tp, sc->sa.agf_bp,
     344             :                         sc->sa.pag);
     345      298773 :         error = xfs_rmap_has_other_keys(cur, agbno, 1, rs->oinfo,
     346             :                         crosslinked);
     347      298775 :         if (error)
     348           0 :                 goto out_cur;
     349             : 
     350             :         /* AGFL blocks can only be deal with one at a time. */
     351      298775 :         if (rs->resv == XFS_AG_RESV_AGFL)
     352           0 :                 goto out_found;
     353             : 
     354             :         /*
     355             :          * Figure out how many of the subsequent blocks have the same crosslink
     356             :          * status.
     357             :          */
     358     1187990 :         while (bno < agbno_next) {
     359      889205 :                 bool            also_crosslinked;
     360             : 
     361      889205 :                 error = xfs_rmap_has_other_keys(cur, bno, 1, rs->oinfo,
     362             :                                 &also_crosslinked);
     363      889215 :                 if (error)
     364           0 :                         goto out_cur;
     365             : 
     366      889215 :                 if (*crosslinked != also_crosslinked)
     367             :                         break;
     368             : 
     369      889215 :                 len++;
     370      889215 :                 bno++;
     371             :         }
     372             : 
     373      298785 : out_found:
     374      298785 :         *aglenp = len;
     375      298785 :         trace_xreap_agextent_select(sc->sa.pag, agbno, len, *crosslinked);
     376      298786 : out_cur:
     377      298786 :         xfs_btree_del_cursor(cur, error);
     378      298786 :         return error;
     379             : }
     380             : 
     381             : /*
     382             :  * Dispose of as much of the beginning of this AG extent as possible.  The
     383             :  * number of blocks disposed of will be returned in @aglenp.
     384             :  */
     385             : STATIC int
     386      298783 : xreap_agextent_iter(
     387             :         struct xreap_state      *rs,
     388             :         xfs_agblock_t           agbno,
     389             :         xfs_extlen_t            *aglenp,
     390             :         bool                    crosslinked)
     391             : {
     392      298783 :         struct xfs_scrub        *sc = rs->sc;
     393      298783 :         xfs_fsblock_t           fsbno;
     394      298783 :         int                     error = 0;
     395             : 
     396      298783 :         ASSERT(rs->resv != XFS_AG_RESV_IMETA);
     397             : 
     398      298783 :         fsbno = XFS_AGB_TO_FSB(sc->mp, sc->sa.pag->pag_agno, agbno);
     399             : 
     400             :         /*
     401             :          * If there are other rmappings, this block is cross linked and must
     402             :          * not be freed.  Remove the reverse mapping and move on.  Otherwise,
     403             :          * we were the only owner of the block, so free the extent, which will
     404             :          * also remove the rmap.
     405             :          *
     406             :          * XXX: XFS doesn't support detecting the case where a single block
     407             :          * metadata structure is crosslinked with a multi-block structure
     408             :          * because the buffer cache doesn't detect aliasing problems, so we
     409             :          * can't fix 100% of crosslinking problems (yet).  The verifiers will
     410             :          * blow on writeout, the filesystem will shut down, and the admin gets
     411             :          * to run xfs_repair.
     412             :          */
     413      298783 :         if (crosslinked) {
     414           0 :                 trace_xreap_dispose_unmap_extent(sc->sa.pag, agbno, *aglenp);
     415             : 
     416           0 :                 rs->force_roll = true;
     417             : 
     418           0 :                 if (rs->oinfo == &XFS_RMAP_OINFO_COW) {
     419             :                         /*
     420             :                          * If we're unmapping CoW staging extents, remove the
     421             :                          * records from the refcountbt, which will remove the
     422             :                          * rmap record as well.
     423             :                          */
     424           0 :                         xfs_refcount_free_cow_extent(sc->tp, false, fsbno,
     425             :                                         *aglenp);
     426           0 :                         return 0;
     427             :                 }
     428             : 
     429           0 :                 return xfs_rmap_free(sc->tp, sc->sa.agf_bp, sc->sa.pag, agbno,
     430             :                                 *aglenp, rs->oinfo);
     431             :         }
     432             : 
     433      298783 :         trace_xreap_dispose_free_extent(sc->sa.pag, agbno, *aglenp);
     434             : 
     435             :         /*
     436             :          * Invalidate as many buffers as we can, starting at agbno.  If this
     437             :          * function sets *aglenp to zero, the transaction is full of logged
     438             :          * buffer invalidations, so we need to return early so that we can
     439             :          * roll and retry.
     440             :          */
     441      298776 :         xreap_agextent_binval(rs, agbno, aglenp);
     442      298784 :         if (*aglenp == 0) {
     443           0 :                 ASSERT(xreap_want_roll(rs));
     444           0 :                 return 0;
     445             :         }
     446             : 
     447             :         /*
     448             :          * If we're getting rid of CoW staging extents, use deferred work items
     449             :          * to remove the refcountbt records (which removes the rmap records)
     450             :          * and free the extent.  We're not worried about the system going down
     451             :          * here because log recovery walks the refcount btree to clean out the
     452             :          * CoW staging extents.
     453             :          */
     454      298784 :         if (rs->oinfo == &XFS_RMAP_OINFO_COW) {
     455         648 :                 ASSERT(rs->resv == XFS_AG_RESV_NONE);
     456             : 
     457         648 :                 xfs_refcount_free_cow_extent(sc->tp, false, fsbno, *aglenp);
     458         648 :                 error = xfs_free_extent_later(sc->tp, fsbno, *aglenp, NULL,
     459             :                                 rs->resv, XFS_FREE_EXTENT_SKIP_DISCARD);
     460         648 :                 if (error)
     461             :                         return error;
     462             : 
     463         648 :                 rs->force_roll = true;
     464         648 :                 return 0;
     465             :         }
     466             : 
     467             :         /* Put blocks back on the AGFL one at a time. */
     468      298136 :         if (rs->resv == XFS_AG_RESV_AGFL) {
     469           0 :                 ASSERT(*aglenp == 1);
     470           0 :                 error = xreap_put_freelist(sc, agbno);
     471           0 :                 if (error)
     472             :                         return error;
     473             : 
     474           0 :                 rs->force_roll = true;
     475           0 :                 return 0;
     476             :         }
     477             : 
     478             :         /*
     479             :          * Use deferred frees to get rid of the old btree blocks to try to
     480             :          * minimize the window in which we could crash and lose the old blocks.
     481             :          */
     482      298136 :         error = xfs_free_extent_later(sc->tp, fsbno, *aglenp, rs->oinfo,
     483             :                         rs->resv, XFS_FREE_EXTENT_SKIP_DISCARD);
     484      298129 :         if (error)
     485             :                 return error;
     486             : 
     487      298129 :         rs->deferred++;
     488      298129 :         return 0;
     489             : }
     490             : 
     491             : /*
     492             :  * Break an AG metadata extent into sub-extents by fate (crosslinked, not
     493             :  * crosslinked), and dispose of each sub-extent separately.
     494             :  */
     495             : STATIC int
     496      239093 : xreap_agmeta_extent(
     497             :         uint64_t                fsbno,
     498             :         uint64_t                len,
     499             :         void                    *priv)
     500             : {
     501      239093 :         struct xreap_state      *rs = priv;
     502      239093 :         struct xfs_scrub        *sc = rs->sc;
     503      239093 :         xfs_agblock_t           agbno = fsbno;
     504      239093 :         xfs_agblock_t           agbno_next = agbno + len;
     505      239093 :         int                     error = 0;
     506             : 
     507      239093 :         ASSERT(len <= XFS_MAX_BMBT_EXTLEN);
     508      239093 :         ASSERT(sc->ip == NULL);
     509             : 
     510      478198 :         while (agbno < agbno_next) {
     511      239101 :                 xfs_extlen_t    aglen;
     512      239101 :                 bool            crosslinked;
     513             : 
     514      239101 :                 error = xreap_agextent_select(rs, agbno, agbno_next,
     515             :                                 &crosslinked, &aglen);
     516      239107 :                 if (error)
     517           0 :                         return error;
     518             : 
     519      239107 :                 error = xreap_agextent_iter(rs, agbno, &aglen, crosslinked);
     520      239105 :                 if (error)
     521           0 :                         return error;
     522             : 
     523      239105 :                 if (xreap_want_defer_finish(rs)) {
     524           0 :                         error = xrep_defer_finish(sc);
     525           0 :                         if (error)
     526           0 :                                 return error;
     527           0 :                         xreap_defer_finish_reset(rs);
     528      239105 :                 } else if (xreap_want_roll(rs)) {
     529           0 :                         error = xrep_roll_ag_trans(sc);
     530           0 :                         if (error)
     531           0 :                                 return error;
     532           0 :                         xreap_reset(rs);
     533             :                 }
     534             : 
     535      239105 :                 agbno += aglen;
     536             :         }
     537             : 
     538             :         return 0;
     539             : }
     540             : 
     541             : /* Dispose of every block of every AG metadata extent in the bitmap. */
     542             : int
     543      334577 : xrep_reap_agblocks(
     544             :         struct xfs_scrub                *sc,
     545             :         struct xagb_bitmap              *bitmap,
     546             :         const struct xfs_owner_info     *oinfo,
     547             :         enum xfs_ag_resv_type           type)
     548             : {
     549      334577 :         struct xreap_state              rs = {
     550             :                 .sc                     = sc,
     551             :                 .oinfo                  = oinfo,
     552             :                 .resv                   = type,
     553             :         };
     554      334577 :         int                             error;
     555             : 
     556      334577 :         ASSERT(xfs_has_rmapbt(sc->mp));
     557      334577 :         ASSERT(sc->ip == NULL);
     558             : 
     559      334577 :         error = xagb_bitmap_walk(bitmap, xreap_agmeta_extent, &rs);
     560      334580 :         if (error)
     561             :                 return error;
     562             : 
     563      334580 :         if (xreap_dirty(&rs))
     564      203910 :                 return xrep_defer_finish(sc);
     565             : 
     566             :         return 0;
     567             : }
     568             : 
     569             : /*
     570             :  * Break a file metadata extent into sub-extents by fate (crosslinked, not
     571             :  * crosslinked), and dispose of each sub-extent separately.  The extent must
     572             :  * not cross an AG boundary.
     573             :  */
     574             : STATIC int
     575       59676 : xreap_fsmeta_extent(
     576             :         uint64_t                fsbno,
     577             :         uint64_t                len,
     578             :         void                    *priv)
     579             : {
     580       59676 :         struct xreap_state      *rs = priv;
     581       59676 :         struct xfs_scrub        *sc = rs->sc;
     582       59676 :         xfs_agnumber_t          agno = XFS_FSB_TO_AGNO(sc->mp, fsbno);
     583       59676 :         xfs_agblock_t           agbno = XFS_FSB_TO_AGBNO(sc->mp, fsbno);
     584       59676 :         xfs_agblock_t           agbno_next = agbno + len;
     585       59676 :         int                     error = 0;
     586             : 
     587       59676 :         ASSERT(len <= XFS_MAX_BMBT_EXTLEN);
     588       59676 :         ASSERT(sc->ip != NULL);
     589       59676 :         ASSERT(!sc->sa.pag);
     590             : 
     591             :         /*
     592             :          * We're reaping blocks after repairing file metadata, which means that
     593             :          * we have to init the xchk_ag structure ourselves.
     594             :          */
     595       59676 :         sc->sa.pag = xfs_perag_get(sc->mp, agno);
     596       59676 :         if (!sc->sa.pag)
     597             :                 return -EFSCORRUPTED;
     598             : 
     599       59676 :         error = xfs_alloc_read_agf(sc->sa.pag, sc->tp, 0, &sc->sa.agf_bp);
     600       59676 :         if (error)
     601           0 :                 goto out_pag;
     602             : 
     603      119352 :         while (agbno < agbno_next) {
     604       59676 :                 xfs_extlen_t    aglen;
     605       59676 :                 bool            crosslinked;
     606             : 
     607       59676 :                 error = xreap_agextent_select(rs, agbno, agbno_next,
     608             :                                 &crosslinked, &aglen);
     609       59676 :                 if (error)
     610           0 :                         goto out_agf;
     611             : 
     612       59676 :                 error = xreap_agextent_iter(rs, agbno, &aglen, crosslinked);
     613       59676 :                 if (error)
     614           0 :                         goto out_agf;
     615             : 
     616       59676 :                 if (xreap_want_defer_finish(rs)) {
     617             :                         /*
     618             :                          * Holds the AGF buffer across the deferred chain
     619             :                          * processing.
     620             :                          */
     621         648 :                         error = xrep_defer_finish(sc);
     622         648 :                         if (error)
     623           0 :                                 goto out_agf;
     624         648 :                         xreap_defer_finish_reset(rs);
     625       59028 :                 } else if (xreap_want_roll(rs)) {
     626             :                         /*
     627             :                          * Hold the AGF buffer across the transaction roll so
     628             :                          * that we don't have to reattach it to the scrub
     629             :                          * context.
     630             :                          */
     631           2 :                         xfs_trans_bhold(sc->tp, sc->sa.agf_bp);
     632           2 :                         error = xfs_trans_roll_inode(&sc->tp, sc->ip);
     633           2 :                         xfs_trans_bjoin(sc->tp, sc->sa.agf_bp);
     634           2 :                         if (error)
     635           0 :                                 goto out_agf;
     636           2 :                         xreap_reset(rs);
     637             :                 }
     638             : 
     639       59676 :                 agbno += aglen;
     640             :         }
     641             : 
     642       59676 : out_agf:
     643       59676 :         xfs_trans_brelse(sc->tp, sc->sa.agf_bp);
     644       59676 :         sc->sa.agf_bp = NULL;
     645       59676 : out_pag:
     646       59676 :         xfs_perag_put(sc->sa.pag);
     647       59676 :         sc->sa.pag = NULL;
     648       59676 :         return error;
     649             : }
     650             : 
     651             : /*
     652             :  * Dispose of every block of every fs metadata extent in the bitmap.
     653             :  * Do not use this to dispose of the mappings in an ondisk inode fork.
     654             :  */
     655             : int
     656      156799 : xrep_reap_fsblocks(
     657             :         struct xfs_scrub                *sc,
     658             :         struct xfsb_bitmap              *bitmap,
     659             :         const struct xfs_owner_info     *oinfo)
     660             : {
     661      156799 :         struct xreap_state              rs = {
     662             :                 .sc                     = sc,
     663             :                 .oinfo                  = oinfo,
     664             :                 .resv                   = XFS_AG_RESV_NONE,
     665             :         };
     666      156799 :         int                             error;
     667             : 
     668      156799 :         ASSERT(xfs_has_rmapbt(sc->mp));
     669      156799 :         ASSERT(sc->ip != NULL);
     670             : 
     671      156799 :         error = xfsb_bitmap_walk(bitmap, xreap_fsmeta_extent, &rs);
     672      156799 :         if (error)
     673             :                 return error;
     674             : 
     675      156799 :         if (xreap_dirty(&rs))
     676       35054 :                 return xrep_defer_finish(sc);
     677             : 
     678             :         return 0;
     679             : }
     680             : 
     681             : #ifdef CONFIG_XFS_RT
     682             : /*
     683             :  * Figure out the longest run of blocks that we can dispose of with a single
     684             :  * call.  Cross-linked blocks should have their reverse mappings removed, but
     685             :  * single-owner extents can be freed.  Units are rt blocks, not rt extents.
     686             :  */
     687             : STATIC int
     688        2673 : xreap_rgextent_select(
     689             :         struct xreap_state      *rs,
     690             :         xfs_rgblock_t           rgbno,
     691             :         xfs_rgblock_t           rgbno_next,
     692             :         bool                    *crosslinked,
     693             :         xfs_extlen_t            *rglenp)
     694             : {
     695        2673 :         struct xfs_scrub        *sc = rs->sc;
     696        2673 :         struct xfs_btree_cur    *cur;
     697        2673 :         xfs_rgblock_t           bno = rgbno + 1;
     698        2673 :         xfs_extlen_t            len = 1;
     699        2673 :         int                     error;
     700             : 
     701             :         /*
     702             :          * Determine if there are any other rmap records covering the first
     703             :          * block of this extent.  If so, the block is crosslinked.
     704             :          */
     705        2673 :         cur = xfs_rtrmapbt_init_cursor(sc->mp, sc->tp, sc->sr.rtg,
     706             :                         sc->sr.rtg->rtg_rmapip);
     707        2673 :         error = xfs_rmap_has_other_keys(cur, rgbno, 1, rs->oinfo,
     708             :                         crosslinked);
     709        2673 :         if (error)
     710           0 :                 goto out_cur;
     711             : 
     712             :         /*
     713             :          * Figure out how many of the subsequent blocks have the same crosslink
     714             :          * status.
     715             :          */
     716       37766 :         while (bno < rgbno_next) {
     717       35093 :                 bool            also_crosslinked;
     718             : 
     719       35093 :                 error = xfs_rmap_has_other_keys(cur, bno, 1, rs->oinfo,
     720             :                                 &also_crosslinked);
     721       35093 :                 if (error)
     722           0 :                         goto out_cur;
     723             : 
     724       35093 :                 if (*crosslinked != also_crosslinked)
     725             :                         break;
     726             : 
     727       35093 :                 len++;
     728       35093 :                 bno++;
     729             :         }
     730             : 
     731        2673 :         *rglenp = len;
     732        2673 :         trace_xreap_rgextent_select(sc->sr.rtg, rgbno, len, *crosslinked);
     733        2673 : out_cur:
     734        2673 :         xfs_btree_del_cursor(cur, error);
     735        2673 :         return error;
     736             : }
     737             : 
     738             : /*
     739             :  * Dispose of as much of the beginning of this rtgroup extent as possible.
     740             :  * The number of blocks disposed of will be returned in @rglenp.
     741             :  */
     742             : STATIC int
     743        2673 : xreap_rgextent_iter(
     744             :         struct xreap_state      *rs,
     745             :         xfs_rgblock_t           rgbno,
     746             :         xfs_extlen_t            *rglenp,
     747             :         bool                    crosslinked)
     748             : {
     749        2673 :         struct xfs_scrub        *sc = rs->sc;
     750        2673 :         xfs_rtblock_t           rtbno;
     751        2673 :         int                     error;
     752             : 
     753             :         /*
     754             :          * The only caller so far is CoW fork repair, so we only know how to
     755             :          * unlink or free CoW staging extents.  Here we don't have to worry
     756             :          * about invalidating buffers!
     757             :          */
     758        2673 :         if (rs->oinfo != &XFS_RMAP_OINFO_COW) {
     759           0 :                 ASSERT(rs->oinfo == &XFS_RMAP_OINFO_COW);
     760           0 :                 return -EFSCORRUPTED;
     761             :         }
     762        2673 :         ASSERT(rs->resv == XFS_AG_RESV_NONE);
     763             : 
     764        2673 :         rtbno = xfs_rgbno_to_rtb(sc->mp, sc->sr.rtg->rtg_rgno, rgbno);
     765             : 
     766             :         /*
     767             :          * If there are other rmappings, this block is cross linked and must
     768             :          * not be freed.  Remove the forward and reverse mapping and move on.
     769             :          */
     770        2673 :         if (crosslinked) {
     771           0 :                 trace_xreap_dispose_unmap_rtextent(sc->sr.rtg, rgbno, *rglenp);
     772             : 
     773           0 :                 xfs_refcount_free_cow_extent(sc->tp, true, rtbno, *rglenp);
     774           0 :                 rs->deferred++;
     775           0 :                 return 0;
     776             :         }
     777             : 
     778        2673 :         trace_xreap_dispose_free_rtextent(sc->sr.rtg, rgbno, *rglenp);
     779             : 
     780             :         /*
     781             :          * The CoW staging extent is not crosslinked.  Use deferred work items
     782             :          * to remove the refcountbt records (which removes the rmap records)
     783             :          * and free the extent.  We're not worried about the system going down
     784             :          * here because log recovery walks the refcount btree to clean out the
     785             :          * CoW staging extents.
     786             :          */
     787        2673 :         xfs_refcount_free_cow_extent(sc->tp, true, rtbno, *rglenp);
     788        2673 :         error = xfs_free_extent_later(sc->tp, rtbno, *rglenp, NULL,
     789             :                         rs->resv,
     790             :                         XFS_FREE_EXTENT_REALTIME |
     791             :                         XFS_FREE_EXTENT_SKIP_DISCARD);
     792        2673 :         if (error)
     793             :                 return error;
     794             : 
     795        2673 :         rs->deferred++;
     796        2673 :         return 0;
     797             : }
     798             : 
     799             : #define XREAP_RTGLOCK_ALL       (XFS_RTGLOCK_BITMAP | \
     800             :                                  XFS_RTGLOCK_RMAP | \
     801             :                                  XFS_RTGLOCK_REFCOUNT)
     802             : 
     803             : /*
     804             :  * Break a rt file metadata extent into sub-extents by fate (crosslinked, not
     805             :  * crosslinked), and dispose of each sub-extent separately.  The extent must
     806             :  * be aligned to a realtime extent.
     807             :  */
     808             : STATIC int
     809        2673 : xreap_rtmeta_extent(
     810             :         uint64_t                rtbno,
     811             :         uint64_t                len,
     812             :         void                    *priv)
     813             : {
     814        2673 :         struct xreap_state      *rs = priv;
     815        2673 :         struct xfs_scrub        *sc = rs->sc;
     816        2673 :         xfs_rgnumber_t          rgno;
     817        2673 :         xfs_rgblock_t           rgbno = xfs_rtb_to_rgbno(sc->mp, rtbno, &rgno);
     818        2673 :         xfs_rgblock_t           rgbno_next = rgbno + len;
     819        2673 :         int                     error = 0;
     820             : 
     821        2673 :         ASSERT(sc->ip != NULL);
     822        2673 :         ASSERT(!sc->sr.rtg);
     823             : 
     824             :         /*
     825             :          * We're reaping blocks after repairing file metadata, which means that
     826             :          * we have to init the xchk_ag structure ourselves.
     827             :          */
     828        2673 :         sc->sr.rtg = xfs_rtgroup_get(sc->mp, rgno);
     829        2673 :         if (!sc->sr.rtg)
     830             :                 return -EFSCORRUPTED;
     831             : 
     832        2673 :         xfs_rtgroup_lock(NULL, sc->sr.rtg, XREAP_RTGLOCK_ALL);
     833             : 
     834        5346 :         while (rgbno < rgbno_next) {
     835        2673 :                 xfs_extlen_t    rglen;
     836        2673 :                 bool            crosslinked;
     837             : 
     838        2673 :                 error = xreap_rgextent_select(rs, rgbno, rgbno_next,
     839             :                                 &crosslinked, &rglen);
     840        2673 :                 if (error)
     841           0 :                         goto out_unlock;
     842             : 
     843        2673 :                 error = xreap_rgextent_iter(rs, rgbno, &rglen, crosslinked);
     844        2673 :                 if (error)
     845           0 :                         goto out_unlock;
     846             : 
     847        2673 :                 if (xreap_want_defer_finish(rs)) {
     848           0 :                         error = xfs_defer_finish(&sc->tp);
     849           0 :                         if (error)
     850           0 :                                 goto out_unlock;
     851           0 :                         xreap_defer_finish_reset(rs);
     852        2673 :                 } else if (xreap_want_roll(rs)) {
     853           0 :                         error = xfs_trans_roll_inode(&sc->tp, sc->ip);
     854           0 :                         if (error)
     855           0 :                                 goto out_unlock;
     856           0 :                         xreap_reset(rs);
     857             :                 }
     858             : 
     859        2673 :                 rgbno += rglen;
     860             :         }
     861             : 
     862        2673 : out_unlock:
     863        2673 :         xfs_rtgroup_unlock(sc->sr.rtg, XREAP_RTGLOCK_ALL);
     864        2673 :         xfs_rtgroup_put(sc->sr.rtg);
     865        2673 :         sc->sr.rtg = NULL;
     866        2673 :         return error;
     867             : }
     868             : 
     869             : /*
     870             :  * Dispose of every block of every rt metadata extent in the bitmap.
     871             :  * Do not use this to dispose of the mappings in an ondisk inode fork.
     872             :  */
     873             : int
     874       13210 : xrep_reap_rtblocks(
     875             :         struct xfs_scrub                *sc,
     876             :         struct xrtb_bitmap              *bitmap,
     877             :         const struct xfs_owner_info     *oinfo)
     878             : {
     879       13210 :         struct xreap_state              rs = {
     880             :                 .sc                     = sc,
     881             :                 .oinfo                  = oinfo,
     882             :                 .resv                   = XFS_AG_RESV_NONE,
     883             :         };
     884       13210 :         int                             error;
     885             : 
     886       13210 :         ASSERT(xfs_has_rmapbt(sc->mp));
     887       13210 :         ASSERT(sc->ip != NULL);
     888             : 
     889       13210 :         error = xrtb_bitmap_walk(bitmap, xreap_rtmeta_extent, &rs);
     890       13210 :         if (error)
     891             :                 return error;
     892             : 
     893       13210 :         if (xreap_dirty(&rs))
     894        1569 :                 return xrep_defer_finish(sc);
     895             : 
     896             :         return 0;
     897             : }
     898             : #endif /* CONFIG_XFS_RT */
     899             : 
     900             : /*
     901             :  * Dispose of every block of an old metadata btree that used to be rooted in a
     902             :  * metadata directory file.
     903             :  */
     904             : int
     905       18431 : xrep_reap_metadir_fsblocks(
     906             :         struct xfs_scrub                *sc,
     907             :         struct xfsb_bitmap              *bitmap)
     908             : {
     909             :         /*
     910             :          * Reap old metadir btree blocks with XFS_AG_RESV_NONE because the old
     911             :          * blocks are no longer mapped by the inode, and inode metadata space
     912             :          * reservations can only account freed space to the i_nblocks.
     913             :          */
     914       18431 :         struct xfs_owner_info           oinfo;
     915       18431 :         struct xreap_state              rs = {
     916             :                 .sc                     = sc,
     917             :                 .oinfo                  = &oinfo,
     918             :                 .resv                   = XFS_AG_RESV_NONE,
     919             :         };
     920       18431 :         int                             error;
     921             : 
     922       18431 :         ASSERT(xfs_has_rmapbt(sc->mp));
     923       18431 :         ASSERT(sc->ip != NULL);
     924       18431 :         ASSERT(xfs_is_metadir_inode(sc->ip));
     925             : 
     926       18431 :         xfs_rmap_ino_bmbt_owner(&oinfo, sc->ip->i_ino, XFS_DATA_FORK);
     927             : 
     928       18431 :         error = xfsb_bitmap_walk(bitmap, xreap_fsmeta_extent, &rs);
     929       18433 :         if (error)
     930             :                 return error;
     931             : 
     932       18433 :         if (xreap_dirty(&rs))
     933        9317 :                 return xrep_defer_finish(sc);
     934             : 
     935             :         return 0;
     936             : }
     937             : 
     938             : /*
     939             :  * Metadata files are not supposed to share blocks with anything else.
     940             :  * If blocks are shared, we remove the reverse mapping (thus reducing the
     941             :  * crosslink factor); if blocks are not shared, we also need to free them.
     942             :  *
     943             :  * This first step determines the longest subset of the passed-in imap
     944             :  * (starting at its beginning) that is either crosslinked or not crosslinked.
     945             :  * The blockcount will be adjust down as needed.
     946             :  */
     947             : STATIC int
     948      157060 : xreap_bmapi_select(
     949             :         struct xfs_scrub        *sc,
     950             :         struct xfs_inode        *ip,
     951             :         int                     whichfork,
     952             :         struct xfs_bmbt_irec    *imap,
     953             :         bool                    *crosslinked)
     954             : {
     955      157060 :         struct xfs_owner_info   oinfo;
     956      157060 :         struct xfs_btree_cur    *cur;
     957      157060 :         xfs_filblks_t           len = 1;
     958      157060 :         xfs_agblock_t           bno;
     959      157060 :         xfs_agblock_t           agbno;
     960      157060 :         xfs_agblock_t           agbno_next;
     961      157060 :         int                     error;
     962             : 
     963      157060 :         agbno = XFS_FSB_TO_AGBNO(sc->mp, imap->br_startblock);
     964      157060 :         agbno_next = agbno + imap->br_blockcount;
     965             : 
     966      157060 :         cur = xfs_rmapbt_init_cursor(sc->mp, sc->tp, sc->sa.agf_bp,
     967             :                         sc->sa.pag);
     968             : 
     969      157060 :         xfs_rmap_ino_owner(&oinfo, ip->i_ino, whichfork, imap->br_startoff);
     970      157060 :         error = xfs_rmap_has_other_keys(cur, agbno, 1, &oinfo, crosslinked);
     971      157060 :         if (error)
     972           0 :                 goto out_cur;
     973             : 
     974      157060 :         bno = agbno + 1;
     975     2009574 :         while (bno < agbno_next) {
     976     1852514 :                 bool            also_crosslinked;
     977             : 
     978     1852514 :                 oinfo.oi_offset++;
     979     1852514 :                 error = xfs_rmap_has_other_keys(cur, bno, 1, &oinfo,
     980             :                                 &also_crosslinked);
     981     1852514 :                 if (error)
     982           0 :                         goto out_cur;
     983             : 
     984     1852514 :                 if (also_crosslinked != *crosslinked)
     985             :                         break;
     986             : 
     987     1852514 :                 len++;
     988     1852514 :                 bno++;
     989             :         }
     990             : 
     991      157060 :         imap->br_blockcount = len;
     992      157060 :         trace_xreap_bmapi_select(sc->sa.pag, agbno, len, *crosslinked);
     993      157060 : out_cur:
     994      157060 :         xfs_btree_del_cursor(cur, error);
     995      157060 :         return error;
     996             : }
     997             : 
     998             : /*
     999             :  * Decide if this buffer can be joined to a transaction.  This is true for most
    1000             :  * buffers, but there are two cases that we want to catch: large remote xattr
    1001             :  * value buffers are not logged and can overflow the buffer log item dirty
    1002             :  * bitmap size; and oversized cached buffers if things have really gone
    1003             :  * haywire.
    1004             :  */
    1005             : static inline bool
    1006     2007609 : xreap_buf_loggable(
    1007             :         const struct xfs_buf    *bp)
    1008             : {
    1009     2007609 :         int                     i;
    1010             : 
    1011     4015184 :         for (i = 0; i < bp->b_map_count; i++) {
    1012     2007609 :                 int             chunks;
    1013     2007609 :                 int             map_size;
    1014             : 
    1015     2007609 :                 chunks = DIV_ROUND_UP(BBTOB(bp->b_maps[i].bm_len),
    1016             :                                 XFS_BLF_CHUNK);
    1017     2007609 :                 map_size = DIV_ROUND_UP(chunks, NBWORD);
    1018     2007609 :                 if (map_size > XFS_BLF_DATAMAP_SIZE)
    1019             :                         return false;
    1020             :         }
    1021             : 
    1022             :         return true;
    1023             : }
    1024             : 
    1025             : /*
    1026             :  * Invalidate any buffers for this file mapping.  The @imap blockcount may be
    1027             :  * adjusted downward if we need to roll the transaction.
    1028             :  */
    1029             : STATIC int
    1030      157060 : xreap_bmapi_binval(
    1031             :         struct xfs_scrub        *sc,
    1032             :         struct xfs_inode        *ip,
    1033             :         int                     whichfork,
    1034             :         struct xfs_bmbt_irec    *imap)
    1035             : {
    1036      157060 :         struct xfs_mount        *mp = sc->mp;
    1037      157060 :         struct xfs_perag        *pag = sc->sa.pag;
    1038      157060 :         int                     bmap_flags = xfs_bmapi_aflag(whichfork);
    1039      157060 :         xfs_fileoff_t           off;
    1040      157060 :         xfs_fileoff_t           max_off;
    1041      157060 :         xfs_extlen_t            scan_blocks;
    1042      157060 :         xfs_agnumber_t          agno = sc->sa.pag->pag_agno;
    1043      157060 :         xfs_agblock_t           bno;
    1044      157060 :         xfs_agblock_t           agbno;
    1045      157060 :         xfs_agblock_t           agbno_next;
    1046      157060 :         unsigned int            invalidated = 0;
    1047      157060 :         int                     error;
    1048             : 
    1049             :         /*
    1050             :          * Avoid invalidating AG headers and post-EOFS blocks because we never
    1051             :          * own those.
    1052             :          */
    1053      157060 :         agbno = bno = XFS_FSB_TO_AGBNO(sc->mp, imap->br_startblock);
    1054      157060 :         agbno_next = agbno + imap->br_blockcount;
    1055      157060 :         if (!xfs_verify_agbno(pag, agbno) ||
    1056      157060 :             !xfs_verify_agbno(pag, agbno_next - 1))
    1057             :                 return 0;
    1058             : 
    1059             :         /*
    1060             :          * Buffers for file blocks can span multiple contiguous mappings.  This
    1061             :          * means that for each block in the mapping, there could exist an
    1062             :          * xfs_buf indexed by that block with any length up to the maximum
    1063             :          * buffer size (remote xattr values) or to the next hole in the fork.
    1064             :          * To set up our binval scan, first we need to figure out the location
    1065             :          * of the next hole.
    1066             :          */
    1067      157060 :         off = imap->br_startoff + imap->br_blockcount;
    1068      157060 :         max_off = off + xfs_attr3_rmt_blocks(mp, XFS_XATTR_SIZE_MAX);
    1069      232306 :         while (off < max_off) {
    1070      211374 :                 struct xfs_bmbt_irec    hmap;
    1071      211374 :                 int                     nhmaps = 1;
    1072             : 
    1073      211374 :                 error = xfs_bmapi_read(ip, off, max_off - off, &hmap,
    1074             :                                 &nhmaps, bmap_flags);
    1075      211374 :                 if (error)
    1076           0 :                         return error;
    1077      211374 :                 if (nhmaps != 1 || hmap.br_startblock == DELAYSTARTBLOCK) {
    1078           0 :                         ASSERT(0);
    1079           0 :                         return -EFSCORRUPTED;
    1080             :                 }
    1081             : 
    1082      286620 :                 if (!xfs_bmap_is_real_extent(&hmap))
    1083             :                         break;
    1084             : 
    1085       75246 :                 off = hmap.br_startoff + hmap.br_blockcount;
    1086             :         }
    1087      157060 :         scan_blocks = off - imap->br_startoff;
    1088             : 
    1089      157060 :         trace_xreap_bmapi_binval_scan(sc, imap, scan_blocks);
    1090             : 
    1091             :         /*
    1092             :          * If there are incore buffers for these blocks, invalidate them.  If
    1093             :          * we can't (try)lock the buffer we assume it's owned by someone else
    1094             :          * and leave it alone.  The buffer cache cannot detect aliasing, so
    1095             :          * employ nested loops to detect incore buffers of any plausible size.
    1096             :          */
    1097     2166634 :         while (bno < agbno_next) {
    1098     4019148 :                 struct xrep_bufscan     scan = {
    1099     2009574 :                         .daddr          = XFS_AGB_TO_DADDR(mp, agno, bno),
    1100             :                         .max_sectors    = xrep_bufscan_max_sectors(mp,
    1101             :                                                                 scan_blocks),
    1102     2009574 :                         .daddr_step     = XFS_FSB_TO_BB(mp, 1),
    1103             :                 };
    1104     2009574 :                 struct xfs_buf          *bp;
    1105             : 
    1106     4017183 :                 while ((bp = xrep_bufscan_advance(mp, &scan)) != NULL) {
    1107     2007609 :                         if (xreap_buf_loggable(bp)) {
    1108     2007575 :                                 xfs_trans_bjoin(sc->tp, bp);
    1109     2007575 :                                 xfs_trans_binval(sc->tp, bp);
    1110             :                         } else {
    1111          34 :                                 xfs_buf_stale(bp);
    1112          34 :                                 xfs_buf_relse(bp);
    1113             :                         }
    1114     2007609 :                         invalidated++;
    1115             : 
    1116             :                         /*
    1117             :                          * Stop invalidating if we've hit the limit; we should
    1118             :                          * still have enough reservation left to free however
    1119             :                          * much of the mapping we've seen so far.
    1120             :                          */
    1121     2007609 :                         if (invalidated > XREAP_MAX_BINVAL) {
    1122           0 :                                 imap->br_blockcount = agbno_next - bno;
    1123           0 :                                 goto out;
    1124             :                         }
    1125             :                 }
    1126             : 
    1127     2009574 :                 bno++;
    1128     2009574 :                 scan_blocks--;
    1129             :         }
    1130             : 
    1131      157060 : out:
    1132      157060 :         trace_xreap_bmapi_binval(sc->sa.pag, agbno, imap->br_blockcount);
    1133      157060 :         return 0;
    1134             : }
    1135             : 
    1136             : /*
    1137             :  * Dispose of as much of the beginning of this file fork mapping as possible.
    1138             :  * The number of blocks disposed of is returned in @imap->br_blockcount.
    1139             :  */
    1140             : STATIC int
    1141      157060 : xrep_reap_bmapi_iter(
    1142             :         struct xfs_scrub                *sc,
    1143             :         struct xfs_inode                *ip,
    1144             :         int                             whichfork,
    1145             :         struct xfs_bmbt_irec            *imap,
    1146             :         bool                            crosslinked)
    1147             : {
    1148      157060 :         int                             error;
    1149             : 
    1150      157060 :         if (crosslinked) {
    1151             :                 /*
    1152             :                  * If there are other rmappings, this block is cross linked and
    1153             :                  * must not be freed.  Remove the reverse mapping, leave the
    1154             :                  * buffer cache in its possibly confused state, and move on.
    1155             :                  * We don't want to risk discarding valid data buffers from
    1156             :                  * anybody else who thinks they own the block, even though that
    1157             :                  * runs the risk of stale buffer warnings in the future.
    1158             :                  */
    1159           0 :                 trace_xreap_dispose_unmap_extent(sc->sa.pag,
    1160           0 :                                 XFS_FSB_TO_AGBNO(sc->mp, imap->br_startblock),
    1161           0 :                                 imap->br_blockcount);
    1162             : 
    1163             :                 /*
    1164             :                  * Schedule removal of the mapping from the fork.  We use
    1165             :                  * deferred log intents in this function to control the exact
    1166             :                  * sequence of metadata updates.
    1167             :                  */
    1168           0 :                 xfs_bmap_unmap_extent(sc->tp, ip, whichfork, imap);
    1169           0 :                 xfs_trans_mod_dquot_byino(sc->tp, ip, XFS_TRANS_DQ_BCOUNT,
    1170           0 :                                 -(int64_t)imap->br_blockcount);
    1171           0 :                 xfs_rmap_unmap_extent(sc->tp, ip, whichfork, imap);
    1172           0 :                 return 0;
    1173             :         }
    1174             : 
    1175             :         /*
    1176             :          * If the block is not crosslinked, we can invalidate all the incore
    1177             :          * buffers for the extent, and then free the extent.  This is a bit of
    1178             :          * a mess since we don't detect discontiguous buffers that are indexed
    1179             :          * by a block starting before the first block of the extent but overlap
    1180             :          * anyway.
    1181             :          */
    1182      157060 :         trace_xreap_dispose_free_extent(sc->sa.pag,
    1183      157060 :                         XFS_FSB_TO_AGBNO(sc->mp, imap->br_startblock),
    1184      157060 :                         imap->br_blockcount);
    1185             : 
    1186             :         /*
    1187             :          * Invalidate as many buffers as we can, starting at the beginning of
    1188             :          * this mapping.  If this function sets blockcount to zero, the
    1189             :          * transaction is full of logged buffer invalidations, so we need to
    1190             :          * return early so that we can roll and retry.
    1191             :          */
    1192      157060 :         error = xreap_bmapi_binval(sc, ip, whichfork, imap);
    1193      157060 :         if (error || imap->br_blockcount == 0)
    1194             :                 return error;
    1195             : 
    1196             :         /*
    1197             :          * Schedule removal of the mapping from the fork.  We use deferred log
    1198             :          * intents in this function to control the exact sequence of metadata
    1199             :          * updates.
    1200             :          */
    1201      157060 :         xfs_bmap_unmap_extent(sc->tp, ip, whichfork, imap);
    1202      157060 :         xfs_trans_mod_dquot_byino(sc->tp, ip, XFS_TRANS_DQ_BCOUNT,
    1203      157060 :                         -(int64_t)imap->br_blockcount);
    1204      157060 :         return xfs_free_extent_later(sc->tp, imap->br_startblock,
    1205             :                         imap->br_blockcount, NULL, XFS_AG_RESV_NONE,
    1206             :                         XFS_FREE_EXTENT_SKIP_DISCARD);
    1207             : }
    1208             : 
    1209             : /*
    1210             :  * Dispose of as much of this file extent as we can.  Upon successful return,
    1211             :  * the imap will reflect the mapping that was removed from the fork.
    1212             :  */
    1213             : STATIC int
    1214      157060 : xreap_ifork_extent(
    1215             :         struct xfs_scrub                *sc,
    1216             :         struct xfs_inode                *ip,
    1217             :         int                             whichfork,
    1218             :         struct xfs_bmbt_irec            *imap)
    1219             : {
    1220      157060 :         xfs_agnumber_t                  agno;
    1221      157060 :         bool                            crosslinked;
    1222      157060 :         int                             error;
    1223             : 
    1224      157060 :         ASSERT(sc->sa.pag == NULL);
    1225             : 
    1226      157060 :         trace_xreap_ifork_extent(sc, ip, whichfork, imap);
    1227             : 
    1228      157060 :         agno = XFS_FSB_TO_AGNO(sc->mp, imap->br_startblock);
    1229      157060 :         sc->sa.pag = xfs_perag_get(sc->mp, agno);
    1230      157060 :         if (!sc->sa.pag)
    1231             :                 return -EFSCORRUPTED;
    1232             : 
    1233      157060 :         error = xfs_alloc_read_agf(sc->sa.pag, sc->tp, 0, &sc->sa.agf_bp);
    1234      157060 :         if (error)
    1235           0 :                 goto out_pag;
    1236             : 
    1237             :         /*
    1238             :          * Decide the fate of the blocks at the beginning of the mapping, then
    1239             :          * update the mapping to use it with the unmap calls.
    1240             :          */
    1241      157060 :         error = xreap_bmapi_select(sc, ip, whichfork, imap, &crosslinked);
    1242      157060 :         if (error)
    1243           0 :                 goto out_agf;
    1244             : 
    1245      157060 :         error = xrep_reap_bmapi_iter(sc, ip, whichfork, imap, crosslinked);
    1246      157060 :         if (error)
    1247           0 :                 goto out_agf;
    1248             : 
    1249      157060 : out_agf:
    1250      157060 :         xfs_trans_brelse(sc->tp, sc->sa.agf_bp);
    1251      157060 :         sc->sa.agf_bp = NULL;
    1252      157060 : out_pag:
    1253      157060 :         xfs_perag_put(sc->sa.pag);
    1254      157060 :         sc->sa.pag = NULL;
    1255      157060 :         return error;
    1256             : }
    1257             : 
    1258             : /*
    1259             :  * Dispose of each block mapped to the given fork of the given file.  Callers
    1260             :  * must hold ILOCK_EXCL, and ip can only be sc->ip or sc->tempip.  The fork
    1261             :  * must not have any delalloc reservations.
    1262             :  */
    1263             : int
    1264      101614 : xrep_reap_ifork(
    1265             :         struct xfs_scrub        *sc,
    1266             :         struct xfs_inode        *ip,
    1267             :         int                     whichfork)
    1268             : {
    1269      101614 :         xfs_fileoff_t           off = 0;
    1270      101614 :         int                     bmap_flags = xfs_bmapi_aflag(whichfork);
    1271      101614 :         int                     error;
    1272             : 
    1273      101614 :         ASSERT(xfs_has_rmapbt(sc->mp));
    1274      101614 :         ASSERT(ip == sc->ip || ip == sc->tempip);
    1275      101614 :         ASSERT(whichfork == XFS_ATTR_FORK || !XFS_IS_REALTIME_INODE(ip));
    1276             : 
    1277      386724 :         while (off < XFS_MAX_FILEOFF) {
    1278      285110 :                 struct xfs_bmbt_irec    imap;
    1279      285110 :                 int                     nimaps = 1;
    1280             : 
    1281             :                 /* Read the next extent, skip past holes and delalloc. */
    1282      285110 :                 error = xfs_bmapi_read(ip, off, XFS_MAX_FILEOFF - off, &imap,
    1283             :                                 &nimaps, bmap_flags);
    1284      285110 :                 if (error)
    1285           0 :                         return error;
    1286      285110 :                 if (nimaps != 1 || imap.br_startblock == DELAYSTARTBLOCK) {
    1287           0 :                         ASSERT(0);
    1288           0 :                         return -EFSCORRUPTED;
    1289             :                 }
    1290             : 
    1291             :                 /*
    1292             :                  * If this is a real space mapping, reap as much of it as we
    1293             :                  * can in a single transaction.
    1294             :                  */
    1295      442170 :                 if (xfs_bmap_is_real_extent(&imap)) {
    1296      157060 :                         error = xreap_ifork_extent(sc, ip, whichfork, &imap);
    1297      157060 :                         if (error)
    1298           0 :                                 return error;
    1299             : 
    1300      157060 :                         error = xfs_defer_finish(&sc->tp);
    1301      157060 :                         if (error)
    1302           0 :                                 return error;
    1303             :                 }
    1304             : 
    1305      285110 :                 off = imap.br_startoff + imap.br_blockcount;
    1306             :         }
    1307             : 
    1308             :         return 0;
    1309             : }

Generated by: LCOV version 1.14