LCOV - code coverage report
Current view: top level - fs/xfs/scrub - reap.c (source / functions) Hit Total Coverage
Test: fstests of 6.5.0-rc3-achx @ Mon Jul 31 20:08:12 PDT 2023 Lines: 325 386 84.2 %
Date: 2023-07-31 20:08:12 Functions: 18 19 94.7 %

          Line data    Source code
       1             : // SPDX-License-Identifier: GPL-2.0-or-later
       2             : /*
       3             :  * Copyright (C) 2022-2023 Oracle.  All Rights Reserved.
       4             :  * Author: Darrick J. Wong <djwong@kernel.org>
       5             :  */
       6             : #include "xfs.h"
       7             : #include "xfs_fs.h"
       8             : #include "xfs_shared.h"
       9             : #include "xfs_format.h"
      10             : #include "xfs_trans_resv.h"
      11             : #include "xfs_mount.h"
      12             : #include "xfs_btree.h"
      13             : #include "xfs_log_format.h"
      14             : #include "xfs_trans.h"
      15             : #include "xfs_sb.h"
      16             : #include "xfs_inode.h"
      17             : #include "xfs_alloc.h"
      18             : #include "xfs_alloc_btree.h"
      19             : #include "xfs_ialloc.h"
      20             : #include "xfs_ialloc_btree.h"
      21             : #include "xfs_rmap.h"
      22             : #include "xfs_rmap_btree.h"
      23             : #include "xfs_refcount.h"
      24             : #include "xfs_refcount_btree.h"
      25             : #include "xfs_extent_busy.h"
      26             : #include "xfs_ag.h"
      27             : #include "xfs_ag_resv.h"
      28             : #include "xfs_quota.h"
      29             : #include "xfs_qm.h"
      30             : #include "xfs_bmap.h"
      31             : #include "xfs_da_format.h"
      32             : #include "xfs_da_btree.h"
      33             : #include "xfs_attr.h"
      34             : #include "xfs_attr_remote.h"
      35             : #include "xfs_defer.h"
      36             : #include "scrub/scrub.h"
      37             : #include "scrub/common.h"
      38             : #include "scrub/trace.h"
      39             : #include "scrub/repair.h"
      40             : #include "scrub/bitmap.h"
      41             : #include "scrub/reap.h"
      42             : 
      43             : /*
      44             :  * Disposal of Blocks from Old Metadata
      45             :  *
      46             :  * Now that we've constructed a new btree to replace the damaged one, we want
      47             :  * to dispose of the blocks that (we think) the old btree was using.
      48             :  * Previously, we used the rmapbt to collect the extents (bitmap) with the
      49             :  * rmap owner corresponding to the tree we rebuilt, collected extents for any
      50             :  * blocks with the same rmap owner that are owned by another data structure
      51             :  * (sublist), and subtracted sublist from bitmap.  In theory the extents
      52             :  * remaining in bitmap are the old btree's blocks.
      53             :  *
      54             :  * Unfortunately, it's possible that the btree was crosslinked with other
      55             :  * blocks on disk.  The rmap data can tell us if there are multiple owners, so
      56             :  * if the rmapbt says there is an owner of this block other than @oinfo, then
      57             :  * the block is crosslinked.  Remove the reverse mapping and continue.
      58             :  *
      59             :  * If there is one rmap record, we can free the block, which removes the
      60             :  * reverse mapping but doesn't add the block to the free space.  Our repair
      61             :  * strategy is to hope the other metadata objects crosslinked on this block
      62             :  * will be rebuilt (atop different blocks), thereby removing all the cross
      63             :  * links.
      64             :  *
      65             :  * If there are no rmap records at all, we also free the block.  If the btree
      66             :  * being rebuilt lives in the free space (bnobt/cntbt/rmapbt) then there isn't
      67             :  * supposed to be a rmap record and everything is ok.  For other btrees there
      68             :  * had to have been an rmap entry for the block to have ended up on @bitmap,
      69             :  * so if it's gone now there's something wrong and the fs will shut down.
      70             :  *
      71             :  * Note: If there are multiple rmap records with only the same rmap owner as
      72             :  * the btree we're trying to rebuild and the block is indeed owned by another
      73             :  * data structure with the same rmap owner, then the block will be in sublist
      74             :  * and therefore doesn't need disposal.  If there are multiple rmap records
      75             :  * with only the same rmap owner but the block is not owned by something with
      76             :  * the same rmap owner, the block will be freed.
      77             :  *
      78             :  * The caller is responsible for locking the AG headers/inode for the entire
      79             :  * rebuild operation so that nothing else can sneak in and change the incore
      80             :  * state while we're not looking.  We must also invalidate any buffers
      81             :  * associated with @bitmap.
      82             :  */
      83             : 
      84             : /* Information about reaping extents after a repair. */
      85             : struct xreap_state {
      86             :         struct xfs_scrub                *sc;
      87             : 
      88             :         /* Reverse mapping owner and metadata reservation type. */
      89             :         const struct xfs_owner_info     *oinfo;
      90             :         enum xfs_ag_resv_type           resv;
      91             : 
      92             :         /* If true, roll the transaction before reaping the next extent. */
      93             :         bool                            force_roll;
      94             : 
      95             :         /* Number of deferred reaps attached to the current transaction. */
      96             :         unsigned int                    deferred;
      97             : 
      98             :         /* Number of invalidated buffers logged to the current transaction. */
      99             :         unsigned int                    invalidated;
     100             : 
     101             :         /* Number of deferred reaps queued during the whole reap sequence. */
     102             :         unsigned long long              total_deferred;
     103             : };
     104             : 
     105             : /* Put a block back on the AGFL. */
     106             : STATIC int
     107           0 : xreap_put_freelist(
     108             :         struct xfs_scrub        *sc,
     109             :         xfs_agblock_t           agbno)
     110             : {
     111           0 :         struct xfs_buf          *agfl_bp;
     112           0 :         int                     error;
     113             : 
     114             :         /* Make sure there's space on the freelist. */
     115           0 :         error = xrep_fix_freelist(sc, 0);
     116           0 :         if (error)
     117             :                 return error;
     118             : 
     119             :         /*
     120             :          * Since we're "freeing" a lost block onto the AGFL, we have to
     121             :          * create an rmap for the block prior to merging it or else other
     122             :          * parts will break.
     123             :          */
     124           0 :         error = xfs_rmap_alloc(sc->tp, sc->sa.agf_bp, sc->sa.pag, agbno, 1,
     125             :                         &XFS_RMAP_OINFO_AG);
     126           0 :         if (error)
     127             :                 return error;
     128             : 
     129             :         /* Put the block on the AGFL. */
     130           0 :         error = xfs_alloc_read_agfl(sc->sa.pag, sc->tp, &agfl_bp);
     131           0 :         if (error)
     132             :                 return error;
     133             : 
     134           0 :         error = xfs_alloc_put_freelist(sc->sa.pag, sc->tp, sc->sa.agf_bp,
     135             :                         agfl_bp, agbno, 0);
     136           0 :         if (error)
     137             :                 return error;
     138           0 :         xfs_extent_busy_insert(sc->tp, sc->sa.pag, agbno, 1,
     139             :                         XFS_EXTENT_BUSY_SKIP_DISCARD);
     140             : 
     141           0 :         return 0;
     142             : }
     143             : 
     144             : /* Are there any uncommitted reap operations? */
     145     2884222 : static inline bool xreap_dirty(const struct xreap_state *rs)
     146             : {
     147     2884222 :         if (rs->force_roll)
     148             :                 return true;
     149     2884222 :         if (rs->deferred)
     150             :                 return true;
     151     2701452 :         if (rs->invalidated)
     152             :                 return true;
     153     2701452 :         if (rs->total_deferred)
     154           1 :                 return true;
     155             :         return false;
     156             : }
     157             : 
     158             : #define XREAP_MAX_DEFERRED      (128)
     159             : #define XREAP_MAX_BINVAL        (2048)
     160             : 
     161             : /*
     162             :  * Decide if we want to roll the transaction after reaping an extent.  We don't
     163             :  * want to overrun the transaction reservation, so we prohibit more than
     164             :  * 128 EFIs per transaction.  For the same reason, we limit the number
     165             :  * of buffer invalidations to 2048.
     166             :  */
     167      330214 : static inline bool xreap_want_roll(const struct xreap_state *rs)
     168             : {
     169      330214 :         if (rs->force_roll)
     170             :                 return true;
     171      330214 :         if (rs->deferred > XREAP_MAX_DEFERRED)
     172             :                 return true;
     173      329548 :         if (rs->invalidated > XREAP_MAX_BINVAL)
     174           6 :                 return true;
     175             :         return false;
     176             : }
     177             : 
     178             : static inline void xreap_reset(struct xreap_state *rs)
     179             : {
     180         672 :         rs->total_deferred += rs->deferred;
     181         672 :         rs->deferred = 0;
     182         672 :         rs->invalidated = 0;
     183         672 :         rs->force_roll = false;
     184         672 : }
     185             : 
     186             : #define XREAP_MAX_DEFER_CHAIN           (2048)
     187             : 
     188             : /*
     189             :  * Decide if we want to finish the deferred ops that are attached to the scrub
     190             :  * transaction.  We don't want to queue huge chains of deferred ops because
     191             :  * that can consume a lot of log space and kernel memory.  Hence we trigger a
     192             :  * xfs_defer_finish if there are more than 2048 deferred reap operations or the
     193             :  * caller did some real work.
     194             :  */
     195             : static inline bool
     196      351066 : xreap_want_defer_finish(const struct xreap_state *rs)
     197             : {
     198      351066 :         if (rs->force_roll)
     199             :                 return true;
     200      330242 :         if (rs->total_deferred > XREAP_MAX_DEFER_CHAIN)
     201          35 :                 return true;
     202             :         return false;
     203             : }
     204             : 
     205             : static inline void xreap_defer_finish_reset(struct xreap_state *rs)
     206             : {
     207       20859 :         rs->total_deferred = 0;
     208       20859 :         rs->deferred = 0;
     209       20859 :         rs->invalidated = 0;
     210       20859 :         rs->force_roll = false;
     211       20859 : }
     212             : 
     213             : /*
     214             :  * Compute the maximum length of a buffer cache scan (in units of sectors),
     215             :  * given a quantity of fs blocks.
     216             :  */
     217             : xfs_daddr_t
     218     1417676 : xrep_bufscan_max_sectors(
     219             :         struct xfs_mount        *mp,
     220             :         xfs_extlen_t            fsblocks)
     221             : {
     222     1417676 :         int                     max_fsbs;
     223             : 
     224             :         /* Remote xattr values are the largest buffers that we support. */
     225     1417676 :         max_fsbs = xfs_attr3_rmt_blocks(mp, XFS_XATTR_SIZE_MAX);
     226             : 
     227     1417340 :         return XFS_FSB_TO_BB(mp, min_t(xfs_extlen_t, fsblocks, max_fsbs));
     228             : }
     229             : 
     230             : /*
     231             :  * Return an incore buffer from a sector scan, or NULL if there are no buffers
     232             :  * left to return.
     233             :  */
     234             : struct xfs_buf *
     235     2500624 : xrep_bufscan_advance(
     236             :         struct xfs_mount        *mp,
     237             :         struct xrep_bufscan     *scan)
     238             : {
     239     2500624 :         scan->__sector_count += scan->daddr_step;
     240    12795292 :         while (scan->__sector_count <= scan->max_sectors) {
     241    11434367 :                 struct xfs_buf  *bp = NULL;
     242    11434367 :                 int             error;
     243             : 
     244    11434367 :                 error = xfs_buf_incore(mp->m_ddev_targp, scan->daddr,
     245             :                                 scan->__sector_count, XBF_LIVESCAN, &bp);
     246    11435083 :                 if (!error)
     247     1140415 :                         return bp;
     248             : 
     249    10294668 :                 scan->__sector_count += scan->daddr_step;
     250             :         }
     251             : 
     252             :         return NULL;
     253             : }
     254             : 
     255             : /* Try to invalidate the incore buffers for an extent that we're freeing. */
     256             : STATIC void
     257      351330 : xreap_agextent_binval(
     258             :         struct xreap_state      *rs,
     259             :         xfs_agblock_t           agbno,
     260             :         xfs_extlen_t            *aglenp)
     261             : {
     262      351330 :         struct xfs_scrub        *sc = rs->sc;
     263      351330 :         struct xfs_perag        *pag = sc->sa.pag;
     264      351330 :         struct xfs_mount        *mp = sc->mp;
     265      351330 :         xfs_agnumber_t          agno = sc->sa.pag->pag_agno;
     266      351330 :         xfs_agblock_t           agbno_next = agbno + *aglenp;
     267      351330 :         xfs_agblock_t           bno = agbno;
     268             : 
     269             :         /*
     270             :          * Avoid invalidating AG headers and post-EOFS blocks because we never
     271             :          * own those.
     272             :          */
     273      351330 :         if (!xfs_verify_agbno(pag, agbno) ||
     274      351330 :             !xfs_verify_agbno(pag, agbno_next - 1))
     275             :                 return;
     276             : 
     277             :         /*
     278             :          * If there are incore buffers for these blocks, invalidate them.  We
     279             :          * assume that the lack of any other known owners means that the buffer
     280             :          * can be locked without risk of deadlocking.  The buffer cache cannot
     281             :          * detect aliasing, so employ nested loops to scan for incore buffers
     282             :          * of any plausible size.
     283             :          */
     284     1607607 :         while (bno < agbno_next) {
     285     5023248 :                 struct xrep_bufscan     scan = {
     286     1255938 :                         .daddr          = XFS_AGB_TO_DADDR(mp, agno, bno),
     287     1255938 :                         .max_sectors    = xrep_bufscan_max_sectors(mp,
     288             :                                                         agbno_next - bno),
     289     1255686 :                         .daddr_step     = XFS_FSB_TO_BB(mp, 1),
     290             :                 };
     291     1255686 :                 struct xfs_buf  *bp;
     292             : 
     293     2249119 :                 while ((bp = xrep_bufscan_advance(mp, &scan)) != NULL) {
     294      992992 :                         xfs_trans_bjoin(sc->tp, bp);
     295      993247 :                         xfs_trans_binval(sc->tp, bp);
     296      993439 :                         rs->invalidated++;
     297             : 
     298             :                         /*
     299             :                          * Stop invalidating if we've hit the limit; we should
     300             :                          * still have enough reservation left to free however
     301             :                          * far we've gotten.
     302             :                          */
     303      993439 :                         if (rs->invalidated > XREAP_MAX_BINVAL) {
     304           6 :                                 *aglenp -= agbno_next - bno;
     305           6 :                                 goto out;
     306             :                         }
     307             :                 }
     308             : 
     309     1256277 :                 bno++;
     310             :         }
     311             : 
     312      351669 : out:
     313      351675 :         trace_xreap_agextent_binval(sc->sa.pag, agbno, *aglenp);
     314             : }
     315             : 
     316             : /*
     317             :  * Figure out the longest run of blocks that we can dispose of with a single
     318             :  * call.  Cross-linked blocks should have their reverse mappings removed, but
     319             :  * single-owner extents can be freed.  AGFL blocks can only be put back one at
     320             :  * a time.
     321             :  */
     322             : STATIC int
     323      351198 : xreap_agextent_select(
     324             :         struct xreap_state      *rs,
     325             :         xfs_agblock_t           agbno,
     326             :         xfs_agblock_t           agbno_next,
     327             :         bool                    *crosslinked,
     328             :         xfs_extlen_t            *aglenp)
     329             : {
     330      351198 :         struct xfs_scrub        *sc = rs->sc;
     331      351198 :         struct xfs_btree_cur    *cur;
     332      351198 :         xfs_agblock_t           bno = agbno + 1;
     333      351198 :         xfs_extlen_t            len = 1;
     334      351198 :         int                     error;
     335             : 
     336             :         /*
     337             :          * Determine if there are any other rmap records covering the first
     338             :          * block of this extent.  If so, the block is crosslinked.
     339             :          */
     340      351198 :         cur = xfs_rmapbt_init_cursor(sc->mp, sc->tp, sc->sa.agf_bp,
     341             :                         sc->sa.pag);
     342      351681 :         error = xfs_rmap_has_other_keys(cur, agbno, 1, rs->oinfo,
     343             :                         crosslinked);
     344      351254 :         if (error)
     345           0 :                 goto out_cur;
     346             : 
     347             :         /* AGFL blocks can only be deal with one at a time. */
     348      351254 :         if (rs->resv == XFS_AG_RESV_AGFL)
     349           0 :                 goto out_found;
     350             : 
     351             :         /*
     352             :          * Figure out how many of the subsequent blocks have the same crosslink
     353             :          * status.
     354             :          */
     355     1268441 :         while (bno < agbno_next) {
     356      917090 :                 bool            also_crosslinked;
     357             : 
     358      917090 :                 error = xfs_rmap_has_other_keys(cur, bno, 1, rs->oinfo,
     359             :                                 &also_crosslinked);
     360      917187 :                 if (error)
     361           0 :                         goto out_cur;
     362             : 
     363      917187 :                 if (*crosslinked != also_crosslinked)
     364             :                         break;
     365             : 
     366      917187 :                 len++;
     367      917187 :                 bno++;
     368             :         }
     369             : 
     370      351351 : out_found:
     371      351351 :         *aglenp = len;
     372      351351 :         trace_xreap_agextent_select(sc->sa.pag, agbno, len, *crosslinked);
     373      351104 : out_cur:
     374      351104 :         xfs_btree_del_cursor(cur, error);
     375      351666 :         return error;
     376             : }
     377             : 
     378             : /*
     379             :  * Dispose of as much of the beginning of this AG extent as possible.  The
     380             :  * number of blocks disposed of will be returned in @aglenp.
     381             :  */
     382             : STATIC int
     383      351365 : xreap_agextent_iter(
     384             :         struct xreap_state      *rs,
     385             :         xfs_agblock_t           agbno,
     386             :         xfs_extlen_t            *aglenp,
     387             :         bool                    crosslinked)
     388             : {
     389      351365 :         struct xfs_scrub        *sc = rs->sc;
     390      351365 :         xfs_fsblock_t           fsbno;
     391      351365 :         int                     error = 0;
     392             : 
     393      351365 :         fsbno = XFS_AGB_TO_FSB(sc->mp, sc->sa.pag->pag_agno, agbno);
     394             : 
     395             :         /*
     396             :          * If there are other rmappings, this block is cross linked and must
     397             :          * not be freed.  Remove the reverse mapping and move on.  Otherwise,
     398             :          * we were the only owner of the block, so free the extent, which will
     399             :          * also remove the rmap.
     400             :          *
     401             :          * XXX: XFS doesn't support detecting the case where a single block
     402             :          * metadata structure is crosslinked with a multi-block structure
     403             :          * because the buffer cache doesn't detect aliasing problems, so we
     404             :          * can't fix 100% of crosslinking problems (yet).  The verifiers will
     405             :          * blow on writeout, the filesystem will shut down, and the admin gets
     406             :          * to run xfs_repair.
     407             :          */
     408      351365 :         if (crosslinked) {
     409           0 :                 trace_xreap_dispose_unmap_extent(sc->sa.pag, agbno, *aglenp);
     410             : 
     411           0 :                 rs->force_roll = true;
     412             : 
     413           0 :                 if (rs->oinfo == &XFS_RMAP_OINFO_COW) {
     414             :                         /*
     415             :                          * If we're unmapping CoW staging extents, remove the
     416             :                          * records from the refcountbt, which will remove the
     417             :                          * rmap record as well.
     418             :                          */
     419           0 :                         xfs_refcount_free_cow_extent(sc->tp, fsbno, *aglenp);
     420           0 :                         return 0;
     421             :                 }
     422             : 
     423           0 :                 return xfs_rmap_free(sc->tp, sc->sa.agf_bp, sc->sa.pag, agbno,
     424             :                                 *aglenp, rs->oinfo);
     425             :         }
     426             : 
     427      351365 :         trace_xreap_dispose_free_extent(sc->sa.pag, agbno, *aglenp);
     428             : 
     429             :         /*
     430             :          * Invalidate as many buffers as we can, starting at agbno.  If this
     431             :          * function sets *aglenp to zero, the transaction is full of logged
     432             :          * buffer invalidations, so we need to return early so that we can
     433             :          * roll and retry.
     434             :          */
     435      351023 :         xreap_agextent_binval(rs, agbno, aglenp);
     436      351491 :         if (*aglenp == 0) {
     437           0 :                 ASSERT(xreap_want_roll(rs));
     438           0 :                 return 0;
     439             :         }
     440             : 
     441             :         /*
     442             :          * If we're getting rid of CoW staging extents, use deferred work items
     443             :          * to remove the refcountbt records (which removes the rmap records)
     444             :          * and free the extent.  We're not worried about the system going down
     445             :          * here because log recovery walks the refcount btree to clean out the
     446             :          * CoW staging extents.
     447             :          */
     448      351491 :         if (rs->oinfo == &XFS_RMAP_OINFO_COW) {
     449       20824 :                 ASSERT(rs->resv == XFS_AG_RESV_NONE);
     450             : 
     451       20824 :                 xfs_refcount_free_cow_extent(sc->tp, fsbno, *aglenp);
     452       20824 :                 error = __xfs_free_extent_later(sc->tp, fsbno, *aglenp, NULL,
     453             :                                 rs->resv, true);
     454       20824 :                 if (error)
     455             :                         return error;
     456             : 
     457       20824 :                 rs->force_roll = true;
     458       20824 :                 return 0;
     459             :         }
     460             : 
     461             :         /* Put blocks back on the AGFL one at a time. */
     462      330667 :         if (rs->resv == XFS_AG_RESV_AGFL) {
     463           0 :                 ASSERT(*aglenp == 1);
     464           0 :                 error = xreap_put_freelist(sc, agbno);
     465           0 :                 if (error)
     466             :                         return error;
     467             : 
     468           0 :                 rs->force_roll = true;
     469           0 :                 return 0;
     470             :         }
     471             : 
     472             :         /*
     473             :          * Use deferred frees to get rid of the old btree blocks to try to
     474             :          * minimize the window in which we could crash and lose the old blocks.
     475             :          */
     476      330667 :         error = __xfs_free_extent_later(sc->tp, fsbno, *aglenp, rs->oinfo,
     477             :                         rs->resv, true);
     478      330625 :         if (error)
     479             :                 return error;
     480             : 
     481      330625 :         rs->deferred++;
     482      330625 :         return 0;
     483             : }
     484             : 
     485             : /*
     486             :  * Break an AG metadata extent into sub-extents by fate (crosslinked, not
     487             :  * crosslinked), and dispose of each sub-extent separately.
     488             :  */
     489             : STATIC int
     490      280948 : xreap_agmeta_extent(
     491             :         uint64_t                fsbno,
     492             :         uint64_t                len,
     493             :         void                    *priv)
     494             : {
     495      280948 :         struct xreap_state      *rs = priv;
     496      280948 :         struct xfs_scrub        *sc = rs->sc;
     497      280948 :         xfs_agblock_t           agbno = fsbno;
     498      280948 :         xfs_agblock_t           agbno_next = agbno + len;
     499      280948 :         int                     error = 0;
     500             : 
     501      280948 :         ASSERT(len <= XFS_MAX_BMBT_EXTLEN);
     502      280948 :         ASSERT(sc->ip == NULL);
     503             : 
     504      561441 :         while (agbno < agbno_next) {
     505      280672 :                 xfs_extlen_t    aglen;
     506      280672 :                 bool            crosslinked;
     507             : 
     508      280672 :                 error = xreap_agextent_select(rs, agbno, agbno_next,
     509             :                                 &crosslinked, &aglen);
     510      281414 :                 if (error)
     511           0 :                         return error;
     512             : 
     513      281414 :                 error = xreap_agextent_iter(rs, agbno, &aglen, crosslinked);
     514      281061 :                 if (error)
     515           0 :                         return error;
     516             : 
     517      281061 :                 if (xreap_want_defer_finish(rs)) {
     518          24 :                         error = xrep_defer_finish(sc);
     519          24 :                         if (error)
     520           0 :                                 return error;
     521          24 :                         xreap_defer_finish_reset(rs);
     522      280683 :                 } else if (xreap_want_roll(rs)) {
     523         491 :                         error = xrep_roll_ag_trans(sc);
     524         491 :                         if (error)
     525           0 :                                 return error;
     526         491 :                         xreap_reset(rs);
     527             :                 }
     528             : 
     529      280493 :                 agbno += aglen;
     530             :         }
     531             : 
     532             :         return 0;
     533             : }
     534             : 
     535             : /* Dispose of every block of every AG metadata extent in the bitmap. */
     536             : int
     537      254834 : xrep_reap_agblocks(
     538             :         struct xfs_scrub                *sc,
     539             :         struct xagb_bitmap              *bitmap,
     540             :         const struct xfs_owner_info     *oinfo,
     541             :         enum xfs_ag_resv_type           type)
     542             : {
     543      254834 :         struct xreap_state              rs = {
     544             :                 .sc                     = sc,
     545             :                 .oinfo                  = oinfo,
     546             :                 .resv                   = type,
     547             :         };
     548      254834 :         int                             error;
     549             : 
     550      254834 :         ASSERT(xfs_has_rmapbt(sc->mp));
     551      254834 :         ASSERT(sc->ip == NULL);
     552             : 
     553      254834 :         error = xagb_bitmap_walk(bitmap, xreap_agmeta_extent, &rs);
     554      254763 :         if (error)
     555             :                 return error;
     556             : 
     557      254627 :         if (xreap_dirty(&rs))
     558      158755 :                 return xrep_defer_finish(sc);
     559             : 
     560             :         return 0;
     561             : }
     562             : 
     563             : /*
     564             :  * Break a file metadata extent into sub-extents by fate (crosslinked, not
     565             :  * crosslinked), and dispose of each sub-extent separately.  The extent must
     566             :  * not cross an AG boundary.
     567             :  */
     568             : STATIC int
     569       70279 : xreap_fsmeta_extent(
     570             :         uint64_t                fsbno,
     571             :         uint64_t                len,
     572             :         void                    *priv)
     573             : {
     574       70279 :         struct xreap_state      *rs = priv;
     575       70279 :         struct xfs_scrub        *sc = rs->sc;
     576       70279 :         xfs_agnumber_t          agno = XFS_FSB_TO_AGNO(sc->mp, fsbno);
     577       70279 :         xfs_agblock_t           agbno = XFS_FSB_TO_AGBNO(sc->mp, fsbno);
     578       70279 :         xfs_agblock_t           agbno_next = agbno + len;
     579       70279 :         int                     error = 0;
     580             : 
     581       70279 :         ASSERT(len <= XFS_MAX_BMBT_EXTLEN);
     582       70279 :         ASSERT(sc->ip != NULL);
     583       70279 :         ASSERT(!sc->sa.pag);
     584             : 
     585             :         /*
     586             :          * We're reaping blocks after repairing file metadata, which means that
     587             :          * we have to init the xchk_ag structure ourselves.
     588             :          */
     589       70279 :         sc->sa.pag = xfs_perag_get(sc->mp, agno);
     590       70279 :         if (!sc->sa.pag)
     591             :                 return -EFSCORRUPTED;
     592             : 
     593       70279 :         error = xfs_alloc_read_agf(sc->sa.pag, sc->tp, 0, &sc->sa.agf_bp);
     594       70279 :         if (error)
     595           0 :                 goto out_pag;
     596             : 
     597      140558 :         while (agbno < agbno_next) {
     598       70279 :                 xfs_extlen_t    aglen;
     599       70279 :                 bool            crosslinked;
     600             : 
     601       70279 :                 error = xreap_agextent_select(rs, agbno, agbno_next,
     602             :                                 &crosslinked, &aglen);
     603       70279 :                 if (error)
     604           0 :                         goto out_agf;
     605             : 
     606       70279 :                 error = xreap_agextent_iter(rs, agbno, &aglen, crosslinked);
     607       70279 :                 if (error)
     608           0 :                         goto out_agf;
     609             : 
     610       70279 :                 if (xreap_want_defer_finish(rs)) {
     611             :                         /*
     612             :                          * Holds the AGF buffer across the deferred chain
     613             :                          * processing.
     614             :                          */
     615       20835 :                         error = xrep_defer_finish(sc);
     616       20835 :                         if (error)
     617           0 :                                 goto out_agf;
     618       20835 :                         xreap_defer_finish_reset(rs);
     619       49444 :                 } else if (xreap_want_roll(rs)) {
     620             :                         /*
     621             :                          * Hold the AGF buffer across the transaction roll so
     622             :                          * that we don't have to reattach it to the scrub
     623             :                          * context.
     624             :                          */
     625         181 :                         xfs_trans_bhold(sc->tp, sc->sa.agf_bp);
     626         181 :                         error = xfs_trans_roll_inode(&sc->tp, sc->ip);
     627         181 :                         xfs_trans_bjoin(sc->tp, sc->sa.agf_bp);
     628         181 :                         if (error)
     629           0 :                                 goto out_agf;
     630         181 :                         xreap_reset(rs);
     631             :                 }
     632             : 
     633       70279 :                 agbno += aglen;
     634             :         }
     635             : 
     636       70279 : out_agf:
     637       70279 :         xfs_trans_brelse(sc->tp, sc->sa.agf_bp);
     638       70279 :         sc->sa.agf_bp = NULL;
     639       70279 : out_pag:
     640       70279 :         xfs_perag_put(sc->sa.pag);
     641       70279 :         sc->sa.pag = NULL;
     642       70279 :         return error;
     643             : }
     644             : 
     645             : /*
     646             :  * Dispose of every block of every fs metadata extent in the bitmap.
     647             :  * Do not use this to dispose of the mappings in an ondisk inode fork.
     648             :  */
     649             : int
     650     2629676 : xrep_reap_fsblocks(
     651             :         struct xfs_scrub                *sc,
     652             :         struct xfsb_bitmap              *bitmap,
     653             :         const struct xfs_owner_info     *oinfo)
     654             : {
     655     2629676 :         struct xreap_state              rs = {
     656             :                 .sc                     = sc,
     657             :                 .oinfo                  = oinfo,
     658             :                 .resv                   = XFS_AG_RESV_NONE,
     659             :         };
     660     2629676 :         int                             error;
     661             : 
     662     2629676 :         ASSERT(xfs_has_rmapbt(sc->mp));
     663     2629676 :         ASSERT(sc->ip != NULL);
     664             : 
     665     2629676 :         error = xfsb_bitmap_walk(bitmap, xreap_fsmeta_extent, &rs);
     666     2629675 :         if (error)
     667             :                 return error;
     668             : 
     669     2629674 :         if (xreap_dirty(&rs))
     670       23757 :                 return xrep_defer_finish(sc);
     671             : 
     672             :         return 0;
     673             : }
     674             : 
     675             : /*
     676             :  * Metadata files are not supposed to share blocks with anything else.
     677             :  * If blocks are shared, we remove the reverse mapping (thus reducing the
     678             :  * crosslink factor); if blocks are not shared, we also need to free them.
     679             :  *
     680             :  * This first step determines the longest subset of the passed-in imap
     681             :  * (starting at its beginning) that is either crosslinked or not crosslinked.
     682             :  * The blockcount will be adjust down as needed.
     683             :  */
     684             : STATIC int
     685       77208 : xreap_bmapi_select(
     686             :         struct xfs_scrub        *sc,
     687             :         struct xfs_inode        *ip,
     688             :         int                     whichfork,
     689             :         struct xfs_bmbt_irec    *imap,
     690             :         bool                    *crosslinked)
     691             : {
     692       77208 :         struct xfs_owner_info   oinfo;
     693       77208 :         struct xfs_btree_cur    *cur;
     694       77208 :         xfs_filblks_t           len = 1;
     695       77208 :         xfs_agblock_t           bno;
     696       77208 :         xfs_agblock_t           agbno;
     697       77208 :         xfs_agblock_t           agbno_next;
     698       77208 :         int                     error;
     699             : 
     700       77208 :         agbno = XFS_FSB_TO_AGBNO(sc->mp, imap->br_startblock);
     701       77209 :         agbno_next = agbno + imap->br_blockcount;
     702             : 
     703       77209 :         cur = xfs_rmapbt_init_cursor(sc->mp, sc->tp, sc->sa.agf_bp,
     704             :                         sc->sa.pag);
     705             : 
     706       77209 :         xfs_rmap_ino_owner(&oinfo, ip->i_ino, whichfork, imap->br_startoff);
     707       77209 :         error = xfs_rmap_has_other_keys(cur, agbno, 1, &oinfo, crosslinked);
     708       77209 :         if (error)
     709           0 :                 goto out_cur;
     710             : 
     711       77209 :         bno = agbno + 1;
     712      104342 :         while (bno < agbno_next) {
     713       27133 :                 bool            also_crosslinked;
     714             : 
     715       27133 :                 oinfo.oi_offset++;
     716       27133 :                 error = xfs_rmap_has_other_keys(cur, bno, 1, &oinfo,
     717             :                                 &also_crosslinked);
     718       27133 :                 if (error)
     719           0 :                         goto out_cur;
     720             : 
     721       27133 :                 if (also_crosslinked != *crosslinked)
     722             :                         break;
     723             : 
     724       27133 :                 len++;
     725       27133 :                 bno++;
     726             :         }
     727             : 
     728       77209 :         imap->br_blockcount = len;
     729       77209 :         trace_xreap_bmapi_select(sc->sa.pag, agbno, len, *crosslinked);
     730       77209 : out_cur:
     731       77209 :         xfs_btree_del_cursor(cur, error);
     732       77209 :         return error;
     733             : }
     734             : 
     735             : /*
     736             :  * Decide if this buffer can be joined to a transaction.  This is true for most
     737             :  * buffers, but there are two cases that we want to catch: large remote xattr
     738             :  * value buffers are not logged and can overflow the buffer log item dirty
     739             :  * bitmap size; and oversized cached buffers if things have really gone
     740             :  * haywire.
     741             :  */
     742             : static inline bool
     743       89975 : xreap_buf_loggable(
     744             :         const struct xfs_buf    *bp)
     745             : {
     746       89975 :         int                     i;
     747             : 
     748      179950 :         for (i = 0; i < bp->b_map_count; i++) {
     749       89975 :                 int             chunks;
     750       89975 :                 int             map_size;
     751             : 
     752       89975 :                 chunks = DIV_ROUND_UP(BBTOB(bp->b_maps[i].bm_len),
     753             :                                 XFS_BLF_CHUNK);
     754       89975 :                 map_size = DIV_ROUND_UP(chunks, NBWORD);
     755       89975 :                 if (map_size > XFS_BLF_DATAMAP_SIZE)
     756             :                         return false;
     757             :         }
     758             : 
     759             :         return true;
     760             : }
     761             : 
     762             : /*
     763             :  * Invalidate any buffers for this file mapping.  The @imap blockcount may be
     764             :  * adjusted downward if we need to roll the transaction.
     765             :  */
     766             : STATIC int
     767       77209 : xreap_bmapi_binval(
     768             :         struct xfs_scrub        *sc,
     769             :         struct xfs_inode        *ip,
     770             :         int                     whichfork,
     771             :         struct xfs_bmbt_irec    *imap)
     772             : {
     773       77209 :         struct xfs_mount        *mp = sc->mp;
     774       77209 :         struct xfs_perag        *pag = sc->sa.pag;
     775       77209 :         int                     bmap_flags = xfs_bmapi_aflag(whichfork);
     776       77209 :         xfs_fileoff_t           off;
     777       77209 :         xfs_fileoff_t           max_off;
     778       77209 :         xfs_extlen_t            scan_blocks;
     779       77209 :         xfs_agnumber_t          agno = sc->sa.pag->pag_agno;
     780       77209 :         xfs_agblock_t           bno;
     781       77209 :         xfs_agblock_t           agbno;
     782       77209 :         xfs_agblock_t           agbno_next;
     783       77209 :         unsigned int            invalidated = 0;
     784       77209 :         int                     error;
     785             : 
     786             :         /*
     787             :          * Avoid invalidating AG headers and post-EOFS blocks because we never
     788             :          * own those.
     789             :          */
     790       77209 :         agbno = bno = XFS_FSB_TO_AGBNO(sc->mp, imap->br_startblock);
     791       77209 :         agbno_next = agbno + imap->br_blockcount;
     792       77209 :         if (!xfs_verify_agbno(pag, agbno) ||
     793       77209 :             !xfs_verify_agbno(pag, agbno_next - 1))
     794             :                 return 0;
     795             : 
     796             :         /*
     797             :          * Buffers for file blocks can span multiple contiguous mappings.  This
     798             :          * means that for each block in the mapping, there could exist an
     799             :          * xfs_buf indexed by that block with any length up to the maximum
     800             :          * buffer size (remote xattr values) or to the next hole in the fork.
     801             :          * To set up our binval scan, first we need to figure out the location
     802             :          * of the next hole.
     803             :          */
     804       77209 :         off = imap->br_startoff + imap->br_blockcount;
     805       77209 :         max_off = off + xfs_attr3_rmt_blocks(mp, XFS_XATTR_SIZE_MAX);
     806      137283 :         while (off < max_off) {
     807      133321 :                 struct xfs_bmbt_irec    hmap;
     808      133321 :                 int                     nhmaps = 1;
     809             : 
     810      133321 :                 error = xfs_bmapi_read(ip, off, max_off - off, &hmap,
     811             :                                 &nhmaps, bmap_flags);
     812      133321 :                 if (error)
     813           0 :                         return error;
     814      133321 :                 if (nhmaps != 1 || hmap.br_startblock == DELAYSTARTBLOCK) {
     815           0 :                         ASSERT(0);
     816           0 :                         return -EFSCORRUPTED;
     817             :                 }
     818             : 
     819      193395 :                 if (!xfs_bmap_is_real_extent(&hmap))
     820             :                         break;
     821             : 
     822       60074 :                 off = hmap.br_startoff + hmap.br_blockcount;
     823             :         }
     824       77209 :         scan_blocks = off - imap->br_startoff;
     825             : 
     826       77209 :         trace_xreap_bmapi_binval_scan(sc, imap, scan_blocks);
     827             : 
     828             :         /*
     829             :          * If there are incore buffers for these blocks, invalidate them.  If
     830             :          * we can't (try)lock the buffer we assume it's owned by someone else
     831             :          * and leave it alone.  The buffer cache cannot detect aliasing, so
     832             :          * employ nested loops to detect incore buffers of any plausible size.
     833             :          */
     834      181542 :         while (bno < agbno_next) {
     835      417336 :                 struct xrep_bufscan     scan = {
     836      104334 :                         .daddr          = XFS_AGB_TO_DADDR(mp, agno, bno),
     837      104334 :                         .max_sectors    = xrep_bufscan_max_sectors(mp,
     838             :                                                                 scan_blocks),
     839      104334 :                         .daddr_step     = XFS_FSB_TO_BB(mp, 1),
     840             :                 };
     841      104334 :                 struct xfs_buf          *bp;
     842             : 
     843      194308 :                 while ((bp = xrep_bufscan_advance(mp, &scan)) != NULL) {
     844       89975 :                         if (xreap_buf_loggable(bp)) {
     845       89975 :                                 xfs_trans_bjoin(sc->tp, bp);
     846       89975 :                                 xfs_trans_binval(sc->tp, bp);
     847             :                         } else {
     848           0 :                                 xfs_buf_stale(bp);
     849           0 :                                 xfs_buf_relse(bp);
     850             :                         }
     851       89975 :                         invalidated++;
     852             : 
     853             :                         /*
     854             :                          * Stop invalidating if we've hit the limit; we should
     855             :                          * still have enough reservation left to free however
     856             :                          * much of the mapping we've seen so far.
     857             :                          */
     858       89975 :                         if (invalidated > XREAP_MAX_BINVAL) {
     859           1 :                                 imap->br_blockcount = agbno_next - bno;
     860           1 :                                 goto out;
     861             :                         }
     862             :                 }
     863             : 
     864      104333 :                 bno++;
     865      104333 :                 scan_blocks--;
     866             :         }
     867             : 
     868       77208 : out:
     869       77209 :         trace_xreap_bmapi_binval(sc->sa.pag, agbno, imap->br_blockcount);
     870       77209 :         return 0;
     871             : }
     872             : 
     873             : /*
     874             :  * Dispose of as much of the beginning of this file fork mapping as possible.
     875             :  * The number of blocks disposed of is returned in @imap->br_blockcount.
     876             :  */
     877             : STATIC int
     878       77209 : xrep_reap_bmapi_iter(
     879             :         struct xfs_scrub                *sc,
     880             :         struct xfs_inode                *ip,
     881             :         int                             whichfork,
     882             :         struct xfs_bmbt_irec            *imap,
     883             :         bool                            crosslinked)
     884             : {
     885       77209 :         int                             error;
     886             : 
     887       77209 :         if (crosslinked) {
     888             :                 /*
     889             :                  * If there are other rmappings, this block is cross linked and
     890             :                  * must not be freed.  Remove the reverse mapping, leave the
     891             :                  * buffer cache in its possibly confused state, and move on.
     892             :                  * We don't want to risk discarding valid data buffers from
     893             :                  * anybody else who thinks they own the block, even though that
     894             :                  * runs the risk of stale buffer warnings in the future.
     895             :                  */
     896           0 :                 trace_xreap_dispose_unmap_extent(sc->sa.pag,
     897           0 :                                 XFS_FSB_TO_AGBNO(sc->mp, imap->br_startblock),
     898           0 :                                 imap->br_blockcount);
     899             : 
     900             :                 /*
     901             :                  * Schedule removal of the mapping from the fork.  We use
     902             :                  * deferred log intents in this function to control the exact
     903             :                  * sequence of metadata updates.
     904             :                  */
     905           0 :                 xfs_bmap_unmap_extent(sc->tp, ip, whichfork, imap);
     906           0 :                 xfs_trans_mod_dquot_byino(sc->tp, ip, XFS_TRANS_DQ_BCOUNT,
     907           0 :                                 -(int64_t)imap->br_blockcount);
     908           0 :                 xfs_rmap_unmap_extent(sc->tp, ip, whichfork, imap);
     909           0 :                 return 0;
     910             :         }
     911             : 
     912             :         /*
     913             :          * If the block is not crosslinked, we can invalidate all the incore
     914             :          * buffers for the extent, and then free the extent.  This is a bit of
     915             :          * a mess since we don't detect discontiguous buffers that are indexed
     916             :          * by a block starting before the first block of the extent but overlap
     917             :          * anyway.
     918             :          */
     919       77209 :         trace_xreap_dispose_free_extent(sc->sa.pag,
     920       77209 :                         XFS_FSB_TO_AGBNO(sc->mp, imap->br_startblock),
     921       77209 :                         imap->br_blockcount);
     922             : 
     923             :         /*
     924             :          * Invalidate as many buffers as we can, starting at the beginning of
     925             :          * this mapping.  If this function sets blockcount to zero, the
     926             :          * transaction is full of logged buffer invalidations, so we need to
     927             :          * return early so that we can roll and retry.
     928             :          */
     929       77209 :         error = xreap_bmapi_binval(sc, ip, whichfork, imap);
     930       77209 :         if (error || imap->br_blockcount == 0)
     931             :                 return error;
     932             : 
     933             :         /*
     934             :          * Schedule removal of the mapping from the fork.  We use deferred log
     935             :          * intents in this function to control the exact sequence of metadata
     936             :          * updates.
     937             :          */
     938       77209 :         xfs_bmap_unmap_extent(sc->tp, ip, whichfork, imap);
     939       77209 :         xfs_trans_mod_dquot_byino(sc->tp, ip, XFS_TRANS_DQ_BCOUNT,
     940       77209 :                         -(int64_t)imap->br_blockcount);
     941       77209 :         return __xfs_free_extent_later(sc->tp, imap->br_startblock,
     942             :                         imap->br_blockcount, NULL, XFS_AG_RESV_NONE, true);
     943             : }
     944             : 
     945             : /*
     946             :  * Dispose of as much of this file extent as we can.  Upon successful return,
     947             :  * the imap will reflect the mapping that was removed from the fork.
     948             :  */
     949             : STATIC int
     950       77206 : xreap_ifork_extent(
     951             :         struct xfs_scrub                *sc,
     952             :         struct xfs_inode                *ip,
     953             :         int                             whichfork,
     954             :         struct xfs_bmbt_irec            *imap)
     955             : {
     956       77206 :         xfs_agnumber_t                  agno;
     957       77206 :         bool                            crosslinked;
     958       77206 :         int                             error;
     959             : 
     960       77206 :         ASSERT(sc->sa.pag == NULL);
     961             : 
     962       77206 :         trace_xreap_ifork_extent(sc, ip, whichfork, imap);
     963             : 
     964       77207 :         agno = XFS_FSB_TO_AGNO(sc->mp, imap->br_startblock);
     965       77207 :         sc->sa.pag = xfs_perag_get(sc->mp, agno);
     966       77209 :         if (!sc->sa.pag)
     967             :                 return -EFSCORRUPTED;
     968             : 
     969       77209 :         error = xfs_alloc_read_agf(sc->sa.pag, sc->tp, 0, &sc->sa.agf_bp);
     970       77209 :         if (error)
     971           0 :                 goto out_pag;
     972             : 
     973             :         /*
     974             :          * Decide the fate of the blocks at the beginning of the mapping, then
     975             :          * update the mapping to use it with the unmap calls.
     976             :          */
     977       77209 :         error = xreap_bmapi_select(sc, ip, whichfork, imap, &crosslinked);
     978       77209 :         if (error)
     979           0 :                 goto out_agf;
     980             : 
     981       77209 :         error = xrep_reap_bmapi_iter(sc, ip, whichfork, imap, crosslinked);
     982       77209 :         if (error)
     983           0 :                 goto out_agf;
     984             : 
     985       77209 : out_agf:
     986       77209 :         xfs_trans_brelse(sc->tp, sc->sa.agf_bp);
     987       77209 :         sc->sa.agf_bp = NULL;
     988       77209 : out_pag:
     989       77209 :         xfs_perag_put(sc->sa.pag);
     990       77209 :         sc->sa.pag = NULL;
     991       77209 :         return error;
     992             : }
     993             : 
     994             : /*
     995             :  * Dispose of each block mapped to the given fork of the given file.  Callers
     996             :  * must hold ILOCK_EXCL, and ip can only be sc->ip or sc->tempip.  The fork
     997             :  * must not have any delalloc reservations.
     998             :  */
     999             : int
    1000       66729 : xrep_reap_ifork(
    1001             :         struct xfs_scrub        *sc,
    1002             :         struct xfs_inode        *ip,
    1003             :         int                     whichfork)
    1004             : {
    1005       66729 :         xfs_fileoff_t           off = 0;
    1006       66729 :         int                     bmap_flags = xfs_bmapi_aflag(whichfork);
    1007       66729 :         int                     error;
    1008             : 
    1009       66729 :         ASSERT(xfs_has_rmapbt(sc->mp));
    1010       66729 :         ASSERT(ip == sc->ip || ip == sc->tempip);
    1011       66729 :         ASSERT(whichfork == XFS_ATTR_FORK || !XFS_IS_REALTIME_INODE(ip));
    1012             : 
    1013      212569 :         while (off < XFS_MAX_FILEOFF) {
    1014      145838 :                 struct xfs_bmbt_irec    imap;
    1015      145838 :                 int                     nimaps = 1;
    1016             : 
    1017             :                 /* Read the next extent, skip past holes and delalloc. */
    1018      145838 :                 error = xfs_bmapi_read(ip, off, XFS_MAX_FILEOFF - off, &imap,
    1019             :                                 &nimaps, bmap_flags);
    1020      145839 :                 if (error)
    1021           0 :                         return error;
    1022      145839 :                 if (nimaps != 1 || imap.br_startblock == DELAYSTARTBLOCK) {
    1023           0 :                         ASSERT(0);
    1024           0 :                         return -EFSCORRUPTED;
    1025             :                 }
    1026             : 
    1027             :                 /*
    1028             :                  * If this is a real space mapping, reap as much of it as we
    1029             :                  * can in a single transaction.
    1030             :                  */
    1031      223047 :                 if (xfs_bmap_is_real_extent(&imap)) {
    1032       77208 :                         error = xreap_ifork_extent(sc, ip, whichfork, &imap);
    1033       77209 :                         if (error)
    1034           0 :                                 return error;
    1035             : 
    1036       77209 :                         error = xfs_defer_finish(&sc->tp);
    1037       77209 :                         if (error)
    1038           0 :                                 return error;
    1039             :                 }
    1040             : 
    1041      145840 :                 off = imap.br_startoff + imap.br_blockcount;
    1042             :         }
    1043             : 
    1044             :         return 0;
    1045             : }

Generated by: LCOV version 1.14