LCOV - code coverage report
Current view: top level - fs/xfs/scrub - ialloc_repair.c (source / functions) Hit Total Coverage
Test: fstests of 6.5.0-rc3-achx @ Mon Jul 31 20:08:12 PDT 2023 Lines: 317 343 92.4 %
Date: 2023-07-31 20:08:12 Functions: 18 18 100.0 %

          Line data    Source code
       1             : // SPDX-License-Identifier: GPL-2.0-or-later
       2             : /*
       3             :  * Copyright (C) 2018-2023 Oracle.  All Rights Reserved.
       4             :  * Author: Darrick J. Wong <djwong@kernel.org>
       5             :  */
       6             : #include "xfs.h"
       7             : #include "xfs_fs.h"
       8             : #include "xfs_shared.h"
       9             : #include "xfs_format.h"
      10             : #include "xfs_trans_resv.h"
      11             : #include "xfs_mount.h"
      12             : #include "xfs_defer.h"
      13             : #include "xfs_btree.h"
      14             : #include "xfs_btree_staging.h"
      15             : #include "xfs_bit.h"
      16             : #include "xfs_log_format.h"
      17             : #include "xfs_trans.h"
      18             : #include "xfs_sb.h"
      19             : #include "xfs_inode.h"
      20             : #include "xfs_alloc.h"
      21             : #include "xfs_ialloc.h"
      22             : #include "xfs_ialloc_btree.h"
      23             : #include "xfs_icache.h"
      24             : #include "xfs_rmap.h"
      25             : #include "xfs_rmap_btree.h"
      26             : #include "xfs_log.h"
      27             : #include "xfs_trans_priv.h"
      28             : #include "xfs_error.h"
      29             : #include "xfs_health.h"
      30             : #include "xfs_ag.h"
      31             : #include "scrub/xfs_scrub.h"
      32             : #include "scrub/scrub.h"
      33             : #include "scrub/common.h"
      34             : #include "scrub/btree.h"
      35             : #include "scrub/trace.h"
      36             : #include "scrub/repair.h"
      37             : #include "scrub/bitmap.h"
      38             : #include "scrub/xfile.h"
      39             : #include "scrub/xfarray.h"
      40             : #include "scrub/newbt.h"
      41             : #include "scrub/reap.h"
      42             : 
      43             : /*
      44             :  * Inode Btree Repair
      45             :  * ==================
      46             :  *
      47             :  * A quick refresher of inode btrees on a v5 filesystem:
      48             :  *
      49             :  * - Inode records are read into memory in units of 'inode clusters'.  However
      50             :  *   many inodes fit in a cluster buffer is the smallest number of inodes that
      51             :  *   can be allocated or freed.  Clusters are never smaller than one fs block
      52             :  *   though they can span multiple blocks.  The size (in fs blocks) is
      53             :  *   computed with xfs_icluster_size_fsb().  The fs block alignment of a
      54             :  *   cluster is computed with xfs_ialloc_cluster_alignment().
      55             :  *
      56             :  * - Each inode btree record can describe a single 'inode chunk'.  The chunk
      57             :  *   size is defined to be 64 inodes.  If sparse inodes are enabled, every
      58             :  *   inobt record must be aligned to the chunk size; if not, every record must
      59             :  *   be aligned to the start of a cluster.  It is possible to construct an XFS
      60             :  *   geometry where one inobt record maps to multiple inode clusters; it is
      61             :  *   also possible to construct a geometry where multiple inobt records map to
      62             :  *   different parts of one inode cluster.
      63             :  *
      64             :  * - If sparse inodes are not enabled, the smallest unit of allocation for
      65             :  *   inode records is enough to contain one inode chunk's worth of inodes.
      66             :  *
      67             :  * - If sparse inodes are enabled, the holemask field will be active.  Each
      68             :  *   bit of the holemask represents 4 potential inodes; if set, the
      69             :  *   corresponding space does *not* contain inodes and must be left alone.
      70             :  *   Clusters cannot be smaller than 4 inodes.  The smallest unit of allocation
      71             :  *   of inode records is one inode cluster.
      72             :  *
      73             :  * So what's the rebuild algorithm?
      74             :  *
      75             :  * Iterate the reverse mapping records looking for OWN_INODES and OWN_INOBT
      76             :  * records.  The OWN_INOBT records are the old inode btree blocks and will be
      77             :  * cleared out after we've rebuilt the tree.  Each possible inode cluster
      78             :  * within an OWN_INODES record will be read in; for each possible inobt record
      79             :  * associated with that cluster, compute the freemask calculated from the
      80             :  * i_mode data in the inode chunk.  For sparse inodes the holemask will be
      81             :  * calculated by creating the properly aligned inobt record and punching out
      82             :  * any chunk that's missing.  Inode allocations and frees grab the AGI first,
      83             :  * so repair protects itself from concurrent access by locking the AGI.
      84             :  *
      85             :  * Once we've reconstructed all the inode records, we can create new inode
      86             :  * btree roots and reload the btrees.  We rebuild both inode trees at the same
      87             :  * time because they have the same rmap owner and it would be more complex to
      88             :  * figure out if the other tree isn't in need of a rebuild and which OWN_INOBT
      89             :  * blocks it owns.  We have all the data we need to build both, so dump
      90             :  * everything and start over.
      91             :  *
      92             :  * We use the prefix 'xrep_ibt' because we rebuild both inode btrees at once.
      93             :  */
      94             : 
      95             : struct xrep_ibt {
      96             :         /* Record under construction. */
      97             :         struct xfs_inobt_rec_incore     rie;
      98             : 
      99             :         /* new inobt information */
     100             :         struct xrep_newbt       new_inobt;
     101             : 
     102             :         /* new finobt information */
     103             :         struct xrep_newbt       new_finobt;
     104             : 
     105             :         /* Old inode btree blocks we found in the rmap. */
     106             :         struct xagb_bitmap      old_iallocbt_blocks;
     107             : 
     108             :         /* Reconstructed inode records. */
     109             :         struct xfarray          *inode_records;
     110             : 
     111             :         struct xfs_scrub        *sc;
     112             : 
     113             :         /* Number of inodes assigned disk space. */
     114             :         unsigned int            icount;
     115             : 
     116             :         /* Number of inodes in use. */
     117             :         unsigned int            iused;
     118             : 
     119             :         /* Number of finobt records needed. */
     120             :         unsigned int            finobt_recs;
     121             : 
     122             :         /* get_records()'s position in the inode record array. */
     123             :         xfarray_idx_t           array_cur;
     124             : };
     125             : 
     126             : /*
     127             :  * Is this inode in use?  If the inode is in memory we can tell from i_mode,
     128             :  * otherwise we have to check di_mode in the on-disk buffer.  We only care
     129             :  * that the high (i.e. non-permission) bits of _mode are zero.  This should be
     130             :  * safe because repair keeps all AG headers locked until the end, and process
     131             :  * trying to perform an inode allocation/free must lock the AGI.
     132             :  *
     133             :  * @cluster_ag_base is the inode offset of the cluster within the AG.
     134             :  * @cluster_bp is the cluster buffer.
     135             :  * @cluster_index is the inode offset within the inode cluster.
     136             :  */
     137             : STATIC int
     138   190392486 : xrep_ibt_check_ifree(
     139             :         struct xrep_ibt         *ri,
     140             :         xfs_agino_t             cluster_ag_base,
     141             :         struct xfs_buf          *cluster_bp,
     142             :         unsigned int            cluster_index,
     143             :         bool                    *inuse)
     144             : {
     145   190392486 :         struct xfs_scrub        *sc = ri->sc;
     146   190392486 :         struct xfs_mount        *mp = sc->mp;
     147   190392486 :         struct xfs_dinode       *dip;
     148   190392486 :         xfs_ino_t               fsino;
     149   190392486 :         xfs_agino_t             agino;
     150   190392486 :         xfs_agnumber_t          agno = ri->sc->sa.pag->pag_agno;
     151   190392486 :         unsigned int            cluster_buf_base;
     152   190392486 :         unsigned int            offset;
     153   190392486 :         int                     error;
     154             : 
     155   190392486 :         agino = cluster_ag_base + cluster_index;
     156   190392486 :         fsino = XFS_AGINO_TO_INO(mp, agno, agino);
     157             : 
     158             :         /* Inode uncached or half assembled, read disk buffer */
     159   190392486 :         cluster_buf_base = XFS_INO_TO_OFFSET(mp, cluster_ag_base);
     160   190392486 :         offset = (cluster_buf_base + cluster_index) * mp->m_sb.sb_inodesize;
     161   190392486 :         if (offset >= BBTOB(cluster_bp->b_length))
     162             :                 return -EFSCORRUPTED;
     163   190392486 :         dip = xfs_buf_offset(cluster_bp, offset);
     164   190390280 :         if (be16_to_cpu(dip->di_magic) != XFS_DINODE_MAGIC)
     165             :                 return -EFSCORRUPTED;
     166             : 
     167   190390280 :         if (dip->di_version >= 3 && be64_to_cpu(dip->di_ino) != fsino)
     168             :                 return -EFSCORRUPTED;
     169             : 
     170             :         /* Will the in-core inode tell us if it's in use? */
     171   190390280 :         error = xchk_inode_is_allocated(sc, agino, inuse);
     172   190387823 :         if (!error)
     173             :                 return 0;
     174             : 
     175     6155744 :         *inuse = dip->di_mode != 0;
     176     6155744 :         return 0;
     177             : }
     178             : 
     179             : /* Stash the accumulated inobt record for rebuilding. */
     180             : STATIC int
     181     3904529 : xrep_ibt_stash(
     182             :         struct xrep_ibt         *ri)
     183             : {
     184     3904529 :         int                     error = 0;
     185             : 
     186     3904529 :         if (xchk_should_terminate(ri->sc, &error))
     187           0 :                 return error;
     188             : 
     189     3904526 :         ri->rie.ir_freecount = xfs_inobt_rec_freecount(&ri->rie);
     190     3904520 :         if (xfs_inobt_check_perag_irec(ri->sc->sa.pag, &ri->rie) != NULL)
     191             :                 return -EFSCORRUPTED;
     192             : 
     193     3904524 :         if (ri->rie.ir_freecount > 0)
     194       50029 :                 ri->finobt_recs++;
     195             : 
     196     3904524 :         trace_xrep_ibt_found(ri->sc->mp, ri->sc->sa.pag->pag_agno, &ri->rie);
     197             : 
     198     3904523 :         error = xfarray_append(ri->inode_records, &ri->rie);
     199     3904536 :         if (error)
     200             :                 return error;
     201             : 
     202     3904536 :         ri->rie.ir_startino = NULLAGINO;
     203     3904536 :         return 0;
     204             : }
     205             : 
     206             : /*
     207             :  * Given an extent of inodes and an inode cluster buffer, calculate the
     208             :  * location of the corresponding inobt record (creating it if necessary),
     209             :  * then update the parts of the holemask and freemask of that record that
     210             :  * correspond to the inode extent we were given.
     211             :  *
     212             :  * @cluster_ir_startino is the AG inode number of an inobt record that we're
     213             :  * proposing to create for this inode cluster.  If sparse inodes are enabled,
     214             :  * we must round down to a chunk boundary to find the actual sparse record.
     215             :  * @cluster_bp is the buffer of the inode cluster.
     216             :  * @nr_inodes is the number of inodes to check from the cluster.
     217             :  */
     218             : STATIC int
     219     5950546 : xrep_ibt_cluster_record(
     220             :         struct xrep_ibt         *ri,
     221             :         xfs_agino_t             cluster_ir_startino,
     222             :         struct xfs_buf          *cluster_bp,
     223             :         unsigned int            nr_inodes)
     224             : {
     225     5950546 :         struct xfs_scrub        *sc = ri->sc;
     226     5950546 :         struct xfs_mount        *mp = sc->mp;
     227     5950546 :         xfs_agino_t             ir_startino;
     228     5950546 :         unsigned int            cluster_base;
     229     5950546 :         unsigned int            cluster_index;
     230     5950546 :         int                     error = 0;
     231             : 
     232     5950546 :         ir_startino = cluster_ir_startino;
     233     5950546 :         if (xfs_has_sparseinodes(mp))
     234     5950544 :                 ir_startino = rounddown(ir_startino, XFS_INODES_PER_CHUNK);
     235     5950546 :         cluster_base = cluster_ir_startino - ir_startino;
     236             : 
     237             :         /*
     238             :          * If the accumulated inobt record doesn't map this cluster, add it to
     239             :          * the list and reset it.
     240             :          */
     241     5950546 :         if (ri->rie.ir_startino != NULLAGINO &&
     242     5913377 :             ri->rie.ir_startino + XFS_INODES_PER_CHUNK <= ir_startino) {
     243     3867360 :                 error = xrep_ibt_stash(ri);
     244     3867372 :                 if (error)
     245             :                         return error;
     246             :         }
     247             : 
     248     5950558 :         if (ri->rie.ir_startino == NULLAGINO) {
     249     3904540 :                 ri->rie.ir_startino = ir_startino;
     250     3904540 :                 ri->rie.ir_free = XFS_INOBT_ALL_FREE;
     251     3904540 :                 ri->rie.ir_holemask = 0xFFFF;
     252     3904540 :                 ri->rie.ir_count = 0;
     253             :         }
     254             : 
     255             :         /* Record the whole cluster. */
     256     5950558 :         ri->icount += nr_inodes;
     257     5950558 :         ri->rie.ir_count += nr_inodes;
     258     5950558 :         ri->rie.ir_holemask &= ~xfs_inobt_maskn(
     259             :                                 cluster_base / XFS_INODES_PER_HOLEMASK_BIT,
     260             :                                 nr_inodes / XFS_INODES_PER_HOLEMASK_BIT);
     261             : 
     262             :         /* Which inodes within this cluster are free? */
     263   196337789 :         for (cluster_index = 0; cluster_index < nr_inodes; cluster_index++) {
     264   190387202 :                 bool            inuse = false;
     265             : 
     266   190387202 :                 error = xrep_ibt_check_ifree(ri, cluster_ir_startino,
     267             :                                 cluster_bp, cluster_index, &inuse);
     268   190387222 :                 if (error)
     269           0 :                         return error;
     270   190387222 :                 if (!inuse)
     271     1264010 :                         continue;
     272   189123212 :                 ri->iused++;
     273   189123212 :                 ri->rie.ir_free &= ~XFS_INOBT_MASK(cluster_base +
     274             :                                                    cluster_index);
     275             :         }
     276             :         return 0;
     277             : }
     278             : 
     279             : /*
     280             :  * For each inode cluster covering the physical extent recorded by the rmapbt,
     281             :  * we must calculate the properly aligned startino of that cluster, then
     282             :  * iterate each cluster to fill in used and filled masks appropriately.  We
     283             :  * then use the (startino, used, filled) information to construct the
     284             :  * appropriate inode records.
     285             :  */
     286             : STATIC int
     287     5950582 : xrep_ibt_process_cluster(
     288             :         struct xrep_ibt         *ri,
     289             :         xfs_agblock_t           cluster_bno)
     290             : {
     291     5950582 :         struct xfs_imap         imap;
     292     5950582 :         struct xfs_buf          *cluster_bp;
     293     5950582 :         struct xfs_scrub        *sc = ri->sc;
     294     5950582 :         struct xfs_mount        *mp = sc->mp;
     295     5950582 :         struct xfs_ino_geometry *igeo = M_IGEO(mp);
     296     5950582 :         xfs_agino_t             cluster_ag_base;
     297     5950582 :         xfs_agino_t             irec_index;
     298     5950582 :         unsigned int            nr_inodes;
     299     5950582 :         int                     error;
     300             : 
     301     5950582 :         nr_inodes = min_t(unsigned int, igeo->inodes_per_cluster,
     302             :                         XFS_INODES_PER_CHUNK);
     303             : 
     304             :         /*
     305             :          * Grab the inode cluster buffer.  This is safe to do with a broken
     306             :          * inobt because imap_to_bp directly maps the buffer without touching
     307             :          * either inode btree.
     308             :          */
     309     5950582 :         imap.im_blkno = XFS_AGB_TO_DADDR(mp, sc->sa.pag->pag_agno, cluster_bno);
     310     5950582 :         imap.im_len = XFS_FSB_TO_BB(mp, igeo->blocks_per_cluster);
     311     5950582 :         imap.im_boffset = 0;
     312     5950582 :         error = xfs_imap_to_bp(mp, sc->tp, &imap, &cluster_bp);
     313     5950546 :         if (error)
     314             :                 return error;
     315             : 
     316             :         /*
     317             :          * Record the contents of each possible inobt record mapping this
     318             :          * cluster.
     319             :          */
     320     5950546 :         cluster_ag_base = XFS_AGB_TO_AGINO(mp, cluster_bno);
     321     5950546 :         for (irec_index = 0;
     322    11901123 :              irec_index < igeo->inodes_per_cluster;
     323     5950577 :              irec_index += XFS_INODES_PER_CHUNK) {
     324     5950550 :                 error = xrep_ibt_cluster_record(ri,
     325             :                                 cluster_ag_base + irec_index, cluster_bp,
     326             :                                 nr_inodes);
     327     5950577 :                 if (error)
     328             :                         break;
     329             : 
     330             :         }
     331             : 
     332     5950573 :         xfs_trans_brelse(sc->tp, cluster_bp);
     333     5950573 :         return error;
     334             : }
     335             : 
     336             : /* Check for any obvious conflicts in the inode chunk extent. */
     337             : STATIC int
     338     3711726 : xrep_ibt_check_inode_ext(
     339             :         struct xfs_scrub        *sc,
     340             :         xfs_agblock_t           agbno,
     341             :         xfs_extlen_t            len)
     342             : {
     343     3711726 :         struct xfs_mount        *mp = sc->mp;
     344     3711726 :         struct xfs_ino_geometry *igeo = M_IGEO(mp);
     345     3711726 :         xfs_agino_t             agino;
     346     3711726 :         enum xbtree_recpacking  outcome;
     347     3711726 :         int                     error;
     348             : 
     349             :         /* Inode records must be within the AG. */
     350     3711726 :         if (!xfs_verify_agbext(sc->sa.pag, agbno, len))
     351             :                 return -EFSCORRUPTED;
     352             : 
     353             :         /* The entire record must align to the inode cluster size. */
     354     3711726 :         if (!IS_ALIGNED(agbno, igeo->blocks_per_cluster) ||
     355     3711726 :             !IS_ALIGNED(agbno + len, igeo->blocks_per_cluster))
     356             :                 return -EFSCORRUPTED;
     357             : 
     358             :         /*
     359             :          * The entire record must also adhere to the inode cluster alignment
     360             :          * size if sparse inodes are not enabled.
     361             :          */
     362     3711726 :         if (!xfs_has_sparseinodes(mp) &&
     363           0 :             (!IS_ALIGNED(agbno, igeo->cluster_align) ||
     364           0 :              !IS_ALIGNED(agbno + len, igeo->cluster_align)))
     365             :                 return -EFSCORRUPTED;
     366             : 
     367             :         /*
     368             :          * On a sparse inode fs, this cluster could be part of a sparse chunk.
     369             :          * Sparse clusters must be aligned to sparse chunk alignment.
     370             :          */
     371     3711726 :         if (xfs_has_sparseinodes(mp) &&
     372     3711725 :             (!IS_ALIGNED(agbno, mp->m_sb.sb_spino_align) ||
     373     3711725 :              !IS_ALIGNED(agbno + len, mp->m_sb.sb_spino_align)))
     374             :                 return -EFSCORRUPTED;
     375             : 
     376             :         /* Make sure the entire range of blocks are valid AG inodes. */
     377     3711726 :         agino = XFS_AGB_TO_AGINO(mp, agbno);
     378     3711726 :         if (!xfs_verify_agino(sc->sa.pag, agino))
     379             :                 return -EFSCORRUPTED;
     380             : 
     381     3711726 :         agino = XFS_AGB_TO_AGINO(mp, agbno + len) - 1;
     382     3711726 :         if (!xfs_verify_agino(sc->sa.pag, agino))
     383             :                 return -EFSCORRUPTED;
     384             : 
     385             :         /* Make sure this isn't free space. */
     386     3711726 :         error = xfs_alloc_has_records(sc->sa.bno_cur, agbno, len, &outcome);
     387     3711727 :         if (error)
     388             :                 return error;
     389     3711727 :         if (outcome != XBTREE_RECPACKING_EMPTY)
     390           0 :                 return -EFSCORRUPTED;
     391             : 
     392             :         return 0;
     393             : }
     394             : 
     395             : /* Found a fragment of the old inode btrees; dispose of them later. */
     396             : STATIC int
     397      103889 : xrep_ibt_record_old_btree_blocks(
     398             :         struct xrep_ibt                 *ri,
     399             :         const struct xfs_rmap_irec      *rec)
     400             : {
     401      103889 :         if (!xfs_verify_agbext(ri->sc->sa.pag, rec->rm_startblock,
     402      103889 :                                 rec->rm_blockcount))
     403             :                 return -EFSCORRUPTED;
     404             : 
     405      103889 :         return xagb_bitmap_set(&ri->old_iallocbt_blocks, rec->rm_startblock,
     406             :                         rec->rm_blockcount);
     407             : }
     408             : 
     409             : /* Record extents that belong to inode btrees. */
     410             : STATIC int
     411   577132862 : xrep_ibt_walk_rmap(
     412             :         struct xfs_btree_cur            *cur,
     413             :         const struct xfs_rmap_irec      *rec,
     414             :         void                            *priv)
     415             : {
     416   577132862 :         struct xrep_ibt                 *ri = priv;
     417   577132862 :         struct xfs_mount                *mp = cur->bc_mp;
     418   577132862 :         struct xfs_ino_geometry         *igeo = M_IGEO(mp);
     419   577132862 :         xfs_agblock_t                   cluster_base;
     420   577132862 :         int                             error = 0;
     421             : 
     422   577132862 :         if (xchk_should_terminate(ri->sc, &error))
     423           0 :                 return error;
     424             : 
     425   577160753 :         if (rec->rm_owner == XFS_RMAP_OWN_INOBT)
     426      103934 :                 return xrep_ibt_record_old_btree_blocks(ri, rec);
     427             : 
     428             :         /* Skip extents which are not owned by this inode and fork. */
     429   577056819 :         if (rec->rm_owner != XFS_RMAP_OWN_INODES)
     430             :                 return 0;
     431             : 
     432     7423453 :         error = xrep_ibt_check_inode_ext(ri->sc, rec->rm_startblock,
     433     3711725 :                         rec->rm_blockcount);
     434     3711728 :         if (error)
     435             :                 return error;
     436             : 
     437     3711729 :         trace_xrep_ibt_walk_rmap(mp, ri->sc->sa.pag->pag_agno,
     438     3711729 :                         rec->rm_startblock, rec->rm_blockcount, rec->rm_owner,
     439     3711729 :                         rec->rm_offset, rec->rm_flags);
     440             : 
     441             :         /*
     442             :          * Record the free/hole masks for each inode cluster that could be
     443             :          * mapped by this rmap record.
     444             :          */
     445     3711729 :         for (cluster_base = 0;
     446     9662322 :              cluster_base < rec->rm_blockcount;
     447     5950593 :              cluster_base += igeo->blocks_per_cluster) {
     448    11901183 :                 error = xrep_ibt_process_cluster(ri,
     449     5950590 :                                 rec->rm_startblock + cluster_base);
     450     5950593 :                 if (error)
     451           0 :                         return error;
     452             :         }
     453             : 
     454             :         return 0;
     455             : }
     456             : 
     457             : /*
     458             :  * Iterate all reverse mappings to find the inodes (OWN_INODES) and the inode
     459             :  * btrees (OWN_INOBT).  Figure out if we have enough free space to reconstruct
     460             :  * the inode btrees.  The caller must clean up the lists if anything goes
     461             :  * wrong.
     462             :  */
     463             : STATIC int
     464       73732 : xrep_ibt_find_inodes(
     465             :         struct xrep_ibt         *ri)
     466             : {
     467       73732 :         struct xfs_scrub        *sc = ri->sc;
     468       73732 :         int                     error;
     469             : 
     470       73732 :         ri->rie.ir_startino = NULLAGINO;
     471             : 
     472             :         /* Collect all reverse mappings for inode blocks. */
     473       73732 :         xrep_ag_btcur_init(sc, &sc->sa);
     474       73773 :         error = xfs_rmap_query_all(sc->sa.rmap_cur, xrep_ibt_walk_rmap, ri);
     475       73775 :         xchk_ag_btcur_free(&sc->sa);
     476       73778 :         if (error)
     477             :                 return error;
     478             : 
     479             :         /* If we have a record ready to go, add it to the array. */
     480       73778 :         if (ri->rie.ir_startino == NULLAGINO)
     481             :                 return 0;
     482             : 
     483       37169 :         return xrep_ibt_stash(ri);
     484             : }
     485             : 
     486             : /* Update the AGI counters. */
     487             : STATIC int
     488       73705 : xrep_ibt_reset_counters(
     489             :         struct xrep_ibt         *ri)
     490             : {
     491       73705 :         struct xfs_scrub        *sc = ri->sc;
     492       73705 :         struct xfs_agi          *agi = sc->sa.agi_bp->b_addr;
     493       73705 :         unsigned int            freecount = ri->icount - ri->iused;
     494             : 
     495             :         /* Trigger inode count recalculation */
     496       73705 :         xfs_force_summary_recalc(sc->mp);
     497             : 
     498             :         /*
     499             :          * The AGI header contains extra information related to the inode
     500             :          * btrees, so we must update those fields here.
     501             :          */
     502       73744 :         agi->agi_count = cpu_to_be32(ri->icount);
     503       73744 :         agi->agi_freecount = cpu_to_be32(freecount);
     504       73744 :         xfs_ialloc_log_agi(sc->tp, sc->sa.agi_bp,
     505             :                            XFS_AGI_COUNT | XFS_AGI_FREECOUNT);
     506             : 
     507             :         /* Reinitialize with the values we just logged. */
     508       73741 :         return xrep_reinit_pagi(sc);
     509             : }
     510             : 
     511             : /* Retrieve finobt data for bulk load. */
     512             : STATIC int
     513       36363 : xrep_fibt_get_records(
     514             :         struct xfs_btree_cur            *cur,
     515             :         unsigned int                    idx,
     516             :         struct xfs_btree_block          *block,
     517             :         unsigned int                    nr_wanted,
     518             :         void                            *priv)
     519             : {
     520       36363 :         struct xfs_inobt_rec_incore     *irec = &cur->bc_rec.i;
     521       36363 :         struct xrep_ibt                 *ri = priv;
     522       36363 :         union xfs_btree_rec             *block_rec;
     523       36363 :         unsigned int                    loaded;
     524       36363 :         int                             error;
     525             : 
     526       86380 :         for (loaded = 0; loaded < nr_wanted; loaded++, idx++) {
     527     2937009 :                 do {
     528     2937009 :                         error = xfarray_load(ri->inode_records,
     529     2937009 :                                         ri->array_cur++, irec);
     530     2937029 :                 } while (error == 0 && xfs_inobt_rec_freecount(irec) == 0);
     531       50021 :                 if (error)
     532           0 :                         return error;
     533             : 
     534       50021 :                 block_rec = xfs_btree_rec_addr(cur, idx, block);
     535       50016 :                 cur->bc_ops->init_rec_from_cur(cur, block_rec);
     536             :         }
     537             : 
     538       36370 :         return loaded;
     539             : }
     540             : 
     541             : /* Retrieve inobt data for bulk load. */
     542             : STATIC int
     543       44260 : xrep_ibt_get_records(
     544             :         struct xfs_btree_cur            *cur,
     545             :         unsigned int                    idx,
     546             :         struct xfs_btree_block          *block,
     547             :         unsigned int                    nr_wanted,
     548             :         void                            *priv)
     549             : {
     550       44260 :         struct xfs_inobt_rec_incore     *irec = &cur->bc_rec.i;
     551       44260 :         struct xrep_ibt                 *ri = priv;
     552       44260 :         union xfs_btree_rec             *block_rec;
     553       44260 :         unsigned int                    loaded;
     554       44260 :         int                             error;
     555             : 
     556     3876680 :         for (loaded = 0; loaded < nr_wanted; loaded++, idx++) {
     557     3832417 :                 error = xfarray_load(ri->inode_records, ri->array_cur++, irec);
     558     3832428 :                 if (error)
     559           0 :                         return error;
     560             : 
     561     3832428 :                 block_rec = xfs_btree_rec_addr(cur, idx, block);
     562     3832420 :                 cur->bc_ops->init_rec_from_cur(cur, block_rec);
     563             :         }
     564             : 
     565       44263 :         return loaded;
     566             : }
     567             : 
     568             : /* Feed one of the new inobt blocks to the bulk loader. */
     569             : STATIC int
     570       87811 : xrep_ibt_claim_block(
     571             :         struct xfs_btree_cur    *cur,
     572             :         union xfs_btree_ptr     *ptr,
     573             :         void                    *priv)
     574             : {
     575       87811 :         struct xrep_ibt         *ri = priv;
     576       87811 :         int                     error;
     577             : 
     578       87811 :         error = xrep_newbt_relog_autoreap(&ri->new_inobt);
     579       87799 :         if (error)
     580             :                 return error;
     581             : 
     582       87801 :         return xrep_newbt_claim_block(cur, &ri->new_inobt, ptr);
     583             : }
     584             : 
     585             : /* Feed one of the new finobt blocks to the bulk loader. */
     586             : STATIC int
     587       73716 : xrep_fibt_claim_block(
     588             :         struct xfs_btree_cur    *cur,
     589             :         union xfs_btree_ptr     *ptr,
     590             :         void                    *priv)
     591             : {
     592       73716 :         struct xrep_ibt         *ri = priv;
     593       73716 :         int                     error;
     594             : 
     595       73716 :         error = xrep_newbt_relog_autoreap(&ri->new_finobt);
     596       73734 :         if (error)
     597             :                 return error;
     598             : 
     599       73734 :         return xrep_newbt_claim_block(cur, &ri->new_finobt, ptr);
     600             : }
     601             : 
     602             : /* Make sure the records do not overlap in inumber address space. */
     603             : STATIC int
     604       73728 : xrep_ibt_check_startino(
     605             :         struct xrep_ibt                 *ri)
     606             : {
     607       73728 :         struct xfs_inobt_rec_incore     irec;
     608       73728 :         xfarray_idx_t                   cur;
     609       73728 :         xfs_agino_t                     next_agino = 0;
     610       73728 :         int                             error = 0;
     611             : 
     612     3978288 :         foreach_xfarray_idx(ri->inode_records, cur) {
     613     3904558 :                 if (xchk_should_terminate(ri->sc, &error))
     614           0 :                         return error;
     615             : 
     616     3904558 :                 error = xfarray_load(ri->inode_records, cur, &irec);
     617     3904560 :                 if (error)
     618           0 :                         return error;
     619             : 
     620     3904560 :                 if (irec.ir_startino < next_agino)
     621             :                         return -EFSCORRUPTED;
     622             : 
     623     3904560 :                 next_agino = irec.ir_startino + XFS_INODES_PER_CHUNK;
     624             :         }
     625             : 
     626       73719 :         return error;
     627             : }
     628             : 
     629             : /* Build new inode btrees and dispose of the old one. */
     630             : STATIC int
     631       73754 : xrep_ibt_build_new_trees(
     632             :         struct xrep_ibt         *ri)
     633             : {
     634       73754 :         struct xfs_scrub        *sc = ri->sc;
     635       73754 :         struct xfs_btree_cur    *ino_cur;
     636       73754 :         struct xfs_btree_cur    *fino_cur = NULL;
     637       73754 :         xfs_fsblock_t           fsbno;
     638       73754 :         bool                    need_finobt;
     639       73754 :         int                     error;
     640             : 
     641       73754 :         need_finobt = xfs_has_finobt(sc->mp);
     642             : 
     643             :         /*
     644             :          * Create new btrees for staging all the inobt records we collected
     645             :          * earlier.  The records were collected in order of increasing agino,
     646             :          * so we do not have to sort them.  Ensure there are no overlapping
     647             :          * records.
     648             :          */
     649       73754 :         error = xrep_ibt_check_startino(ri);
     650       73732 :         if (error)
     651             :                 return error;
     652             : 
     653             :         /*
     654             :          * The new inode btrees will not be rooted in the AGI until we've
     655             :          * successfully rebuilt the tree.
     656             :          *
     657             :          * Start by setting up the inobt staging cursor.
     658             :          */
     659       73732 :         fsbno = XFS_AGB_TO_FSB(sc->mp, sc->sa.pag->pag_agno,
     660             :                         XFS_IBT_BLOCK(sc->mp)),
     661       73732 :         xrep_newbt_init_ag(&ri->new_inobt, sc, &XFS_RMAP_OINFO_INOBT, fsbno,
     662             :                         XFS_AG_RESV_NONE);
     663       73721 :         ri->new_inobt.bload.claim_block = xrep_ibt_claim_block;
     664       73721 :         ri->new_inobt.bload.get_records = xrep_ibt_get_records;
     665             : 
     666       73721 :         ino_cur = xfs_inobt_stage_cursor(sc->sa.pag, &ri->new_inobt.afake,
     667             :                         XFS_BTNUM_INO);
     668       73745 :         error = xfs_btree_bload_compute_geometry(ino_cur, &ri->new_inobt.bload,
     669             :                         xfarray_length(ri->inode_records));
     670       73697 :         if (error)
     671           0 :                 goto err_inocur;
     672             : 
     673             :         /* Set up finobt staging cursor. */
     674       73697 :         if (need_finobt) {
     675       73697 :                 enum xfs_ag_resv_type   resv = XFS_AG_RESV_METADATA;
     676             : 
     677       73697 :                 if (sc->mp->m_finobt_nores)
     678           0 :                         resv = XFS_AG_RESV_NONE;
     679             : 
     680       73697 :                 fsbno = XFS_AGB_TO_FSB(sc->mp, sc->sa.pag->pag_agno,
     681             :                                 XFS_FIBT_BLOCK(sc->mp)),
     682       73697 :                 xrep_newbt_init_ag(&ri->new_finobt, sc, &XFS_RMAP_OINFO_INOBT,
     683             :                                 fsbno, resv);
     684       73753 :                 ri->new_finobt.bload.claim_block = xrep_fibt_claim_block;
     685       73753 :                 ri->new_finobt.bload.get_records = xrep_fibt_get_records;
     686             : 
     687       73753 :                 fino_cur = xfs_inobt_stage_cursor(sc->sa.pag,
     688             :                                 &ri->new_finobt.afake, XFS_BTNUM_FINO);
     689      147564 :                 error = xfs_btree_bload_compute_geometry(fino_cur,
     690       73779 :                                 &ri->new_finobt.bload, ri->finobt_recs);
     691       73785 :                 if (error)
     692           0 :                         goto err_finocur;
     693             :         }
     694             : 
     695             :         /* Last chance to abort before we start committing fixes. */
     696       73785 :         if (xchk_should_terminate(sc, &error))
     697           0 :                 goto err_finocur;
     698             : 
     699             :         /* Reserve all the space we need to build the new btrees. */
     700       73782 :         error = xrep_newbt_alloc_blocks(&ri->new_inobt,
     701             :                         ri->new_inobt.bload.nr_blocks);
     702       73731 :         if (error)
     703          44 :                 goto err_finocur;
     704             : 
     705       73687 :         if (need_finobt) {
     706       73649 :                 error = xrep_newbt_alloc_blocks(&ri->new_finobt,
     707             :                                 ri->new_finobt.bload.nr_blocks);
     708       73720 :                 if (error)
     709           0 :                         goto err_finocur;
     710             :         }
     711             : 
     712             :         /* Add all inobt records. */
     713       73758 :         ri->array_cur = XFARRAY_CURSOR_INIT;
     714       73758 :         error = xfs_btree_bload(ino_cur, &ri->new_inobt.bload, ri);
     715       73705 :         if (error)
     716           0 :                 goto err_finocur;
     717             : 
     718             :         /* Add all finobt records. */
     719       73705 :         if (need_finobt) {
     720       73699 :                 ri->array_cur = XFARRAY_CURSOR_INIT;
     721       73699 :                 error = xfs_btree_bload(fino_cur, &ri->new_finobt.bload, ri);
     722       73731 :                 if (error)
     723           0 :                         goto err_finocur;
     724             :         }
     725             : 
     726             :         /*
     727             :          * Install the new btrees in the AG header.  After this point the old
     728             :          * btrees are no longer accessible and the new trees are live.
     729             :          */
     730       73737 :         xfs_inobt_commit_staged_btree(ino_cur, sc->tp, sc->sa.agi_bp);
     731       73718 :         xfs_btree_del_cursor(ino_cur, 0);
     732             : 
     733       73708 :         if (fino_cur) {
     734       73708 :                 xfs_inobt_commit_staged_btree(fino_cur, sc->tp, sc->sa.agi_bp);
     735       73724 :                 xfs_btree_del_cursor(fino_cur, 0);
     736             :         }
     737             : 
     738             :         /* Reset the AGI counters now that we've changed the inode roots. */
     739       73738 :         error = xrep_ibt_reset_counters(ri);
     740       73679 :         if (error)
     741           0 :                 goto err_finobt;
     742             : 
     743             :         /* Free unused blocks and bitmap. */
     744       73679 :         if (need_finobt) {
     745       73660 :                 error = xrep_newbt_commit(&ri->new_finobt);
     746       73652 :                 if (error)
     747           0 :                         goto err_inobt;
     748             :         }
     749       73671 :         error = xrep_newbt_commit(&ri->new_inobt);
     750       73740 :         if (error)
     751             :                 return error;
     752             : 
     753       73740 :         return xrep_roll_ag_trans(sc);
     754             : 
     755          44 : err_finocur:
     756          44 :         if (need_finobt)
     757          44 :                 xfs_btree_del_cursor(fino_cur, error);
     758           0 : err_inocur:
     759          44 :         xfs_btree_del_cursor(ino_cur, error);
     760          44 : err_finobt:
     761          44 :         if (need_finobt)
     762          44 :                 xrep_newbt_cancel(&ri->new_finobt);
     763           0 : err_inobt:
     764          44 :         xrep_newbt_cancel(&ri->new_inobt);
     765          44 :         return error;
     766             : }
     767             : 
     768             : /*
     769             :  * Now that we've logged the roots of the new btrees, invalidate all of the
     770             :  * old blocks and free them.
     771             :  */
     772             : STATIC int
     773       73476 : xrep_ibt_remove_old_trees(
     774             :         struct xrep_ibt         *ri)
     775             : {
     776       73476 :         struct xfs_scrub        *sc = ri->sc;
     777       73476 :         int                     error;
     778             : 
     779             :         /*
     780             :          * Free the old inode btree blocks if they're not in use.  It's ok to
     781             :          * reap with XFS_AG_RESV_NONE even if the finobt had a per-AG
     782             :          * reservation because we reset the reservation before releasing the
     783             :          * AGI and AGF header buffer locks.
     784             :          */
     785       73476 :         error = xrep_reap_agblocks(sc, &ri->old_iallocbt_blocks,
     786             :                         &XFS_RMAP_OINFO_INOBT, XFS_AG_RESV_NONE);
     787       73601 :         if (error)
     788             :                 return error;
     789             : 
     790             :         /*
     791             :          * If the finobt is enabled and has a per-AG reservation, make sure we
     792             :          * reinitialize the per-AG reservations.
     793             :          */
     794       73601 :         if (xfs_has_finobt(sc->mp) && !sc->mp->m_finobt_nores)
     795       73516 :                 sc->flags |= XREP_RESET_PERAG_RESV;
     796             : 
     797             :         return 0;
     798             : }
     799             : 
     800             : /* Repair both inode btrees. */
     801             : int
     802      102857 : xrep_iallocbt(
     803             :         struct xfs_scrub        *sc)
     804             : {
     805      102857 :         struct xrep_ibt         *ri;
     806      102857 :         struct xfs_mount        *mp = sc->mp;
     807      102857 :         char                    *descr;
     808      102857 :         xfs_agino_t             first_agino, last_agino;
     809      102857 :         int                     error = 0;
     810             : 
     811             :         /* We require the rmapbt to rebuild anything. */
     812      102857 :         if (!xfs_has_rmapbt(mp))
     813             :                 return -EOPNOTSUPP;
     814             : 
     815       73788 :         ri = kzalloc(sizeof(struct xrep_ibt), XCHK_GFP_FLAGS);
     816       73784 :         if (!ri)
     817             :                 return -ENOMEM;
     818       73784 :         ri->sc = sc;
     819             : 
     820             :         /* We rebuild both inode btrees. */
     821       73784 :         sc->sick_mask = XFS_SICK_AG_INOBT | XFS_SICK_AG_FINOBT;
     822             : 
     823             :         /* Set up enough storage to handle an AG with nothing but inodes. */
     824       73784 :         xfs_agino_range(mp, sc->sa.pag->pag_agno, &first_agino, &last_agino);
     825       73778 :         last_agino /= XFS_INODES_PER_CHUNK;
     826       73778 :         descr = xchk_xfile_ag_descr(sc, "inode index records");
     827       73758 :         error = xfarray_create(descr, last_agino,
     828             :                         sizeof(struct xfs_inobt_rec_incore),
     829             :                         &ri->inode_records);
     830       73731 :         kfree(descr);
     831       73739 :         if (error)
     832           0 :                 goto out_ri;
     833             : 
     834             :         /* Collect the inode data and find the old btree blocks. */
     835       73739 :         xagb_bitmap_init(&ri->old_iallocbt_blocks);
     836       73716 :         error = xrep_ibt_find_inodes(ri);
     837       73775 :         if (error)
     838           0 :                 goto out_bitmap;
     839             : 
     840             :         /* Rebuild the inode indexes. */
     841       73775 :         error = xrep_ibt_build_new_trees(ri);
     842       73560 :         if (error)
     843          44 :                 goto out_bitmap;
     844             : 
     845             :         /* Kill the old tree. */
     846       73516 :         error = xrep_ibt_remove_old_trees(ri);
     847             : 
     848       73665 : out_bitmap:
     849       73665 :         xagb_bitmap_destroy(&ri->old_iallocbt_blocks);
     850       73584 :         xfarray_destroy(ri->inode_records);
     851       73659 : out_ri:
     852       73659 :         kfree(ri);
     853       73659 :         return error;
     854             : }
     855             : 
     856             : /* Make sure both btrees are ok after we've rebuilt them. */
     857             : int
     858       73616 : xrep_revalidate_iallocbt(
     859             :         struct xfs_scrub        *sc)
     860             : {
     861       73616 :         __u32                   old_type = sc->sm->sm_type;
     862       73616 :         int                     error;
     863             : 
     864             :         /*
     865             :          * We must update sm_type temporarily so that the tree-to-tree cross
     866             :          * reference checks will work in the correct direction, and also so
     867             :          * that tracing will report correctly if there are more errors.
     868             :          */
     869       73616 :         sc->sm->sm_type = XFS_SCRUB_TYPE_INOBT;
     870       73616 :         error = xchk_inobt(sc);
     871       73709 :         if (error)
     872           0 :                 goto out;
     873             : 
     874       73709 :         if (xfs_has_finobt(sc->mp)) {
     875       73709 :                 sc->sm->sm_type = XFS_SCRUB_TYPE_FINOBT;
     876       73709 :                 error = xchk_finobt(sc);
     877             :         }
     878             : 
     879           0 : out:
     880       73731 :         sc->sm->sm_type = old_type;
     881       73731 :         return error;
     882             : }

Generated by: LCOV version 1.14