LCOV - code coverage report
Current view: top level - fs/xfs/scrub - health.c (source / functions) Hit Total Coverage
Test: fstests of 6.5.0-rc3-achx @ Mon Jul 31 20:08:12 PDT 2023 Lines: 74 90 82.2 %
Date: 2023-07-31 20:08:12 Functions: 5 5 100.0 %

          Line data    Source code
       1             : // SPDX-License-Identifier: GPL-2.0-or-later
       2             : /*
       3             :  * Copyright (C) 2019-2023 Oracle.  All Rights Reserved.
       4             :  * Author: Darrick J. Wong <djwong@kernel.org>
       5             :  */
       6             : #include "xfs.h"
       7             : #include "xfs_fs.h"
       8             : #include "xfs_shared.h"
       9             : #include "xfs_format.h"
      10             : #include "xfs_trans_resv.h"
      11             : #include "xfs_mount.h"
      12             : #include "xfs_btree.h"
      13             : #include "xfs_trans_resv.h"
      14             : #include "xfs_mount.h"
      15             : #include "xfs_ag.h"
      16             : #include "xfs_health.h"
      17             : #include "scrub/scrub.h"
      18             : #include "scrub/health.h"
      19             : #include "scrub/common.h"
      20             : 
      21             : /*
      22             :  * Scrub and In-Core Filesystem Health Assessments
      23             :  * ===============================================
      24             :  *
      25             :  * Online scrub and repair have the time and the ability to perform stronger
      26             :  * checks than we can do from the metadata verifiers, because they can
      27             :  * cross-reference records between data structures.  Therefore, scrub is in a
      28             :  * good position to update the online filesystem health assessments to reflect
      29             :  * the good/bad state of the data structure.
      30             :  *
      31             :  * We therefore extend scrub in the following ways to achieve this:
      32             :  *
      33             :  * 1. Create a "sick_mask" field in the scrub context.  When we're setting up a
      34             :  * scrub call, set this to the default XFS_SICK_* flag(s) for the selected
      35             :  * scrub type (call it A).  Scrub and repair functions can override the default
      36             :  * sick_mask value if they choose.
      37             :  *
      38             :  * 2. If the scrubber returns a runtime error code, we exit making no changes
      39             :  * to the incore sick state.
      40             :  *
      41             :  * 3. If the scrubber finds that A is clean, use sick_mask to clear the incore
      42             :  * sick flags before exiting.
      43             :  *
      44             :  * 4. If the scrubber finds that A is corrupt, use sick_mask to set the incore
      45             :  * sick flags.  If the user didn't want to repair then we exit, leaving the
      46             :  * metadata structure unfixed and the sick flag set.
      47             :  *
      48             :  * 5. Now we know that A is corrupt and the user wants to repair, so run the
      49             :  * repairer.  If the repairer returns an error code, we exit with that error
      50             :  * code, having made no further changes to the incore sick state.
      51             :  *
      52             :  * 6. If repair rebuilds A correctly and the subsequent re-scrub of A is clean,
      53             :  * use sick_mask to clear the incore sick flags.  This should have the effect
      54             :  * that A is no longer marked sick.
      55             :  *
      56             :  * 7. If repair rebuilds A incorrectly, the re-scrub will find it corrupt and
      57             :  * use sick_mask to set the incore sick flags.  This should have no externally
      58             :  * visible effect since we already set them in step (4).
      59             :  *
      60             :  * There are some complications to this story, however.  For certain types of
      61             :  * complementary metadata indices (e.g. inobt/finobt), it is easier to rebuild
      62             :  * both structures at the same time.  The following principles apply to this
      63             :  * type of repair strategy:
      64             :  *
      65             :  * 8. Any repair function that rebuilds multiple structures should update
      66             :  * sick_mask_visible to reflect whatever other structures are rebuilt, and
      67             :  * verify that all the rebuilt structures can pass a scrub check.  The outcomes
      68             :  * of 5-7 still apply, but with a sick_mask that covers everything being
      69             :  * rebuilt.
      70             :  */
      71             : 
      72             : /* Map our scrub type to a sick mask and a set of health update functions. */
      73             : 
      74             : enum xchk_health_group {
      75             :         XHG_FS = 1,
      76             :         XHG_RT,
      77             :         XHG_AG,
      78             :         XHG_INO,
      79             : };
      80             : 
      81             : struct xchk_health_map {
      82             :         enum xchk_health_group  group;
      83             :         unsigned int            sick_mask;
      84             : };
      85             : 
      86             : static const struct xchk_health_map type_to_health_flag[XFS_SCRUB_TYPE_NR] = {
      87             :         [XFS_SCRUB_TYPE_SB]             = { XHG_AG,  XFS_SICK_AG_SB },
      88             :         [XFS_SCRUB_TYPE_AGF]            = { XHG_AG,  XFS_SICK_AG_AGF },
      89             :         [XFS_SCRUB_TYPE_AGFL]           = { XHG_AG,  XFS_SICK_AG_AGFL },
      90             :         [XFS_SCRUB_TYPE_AGI]            = { XHG_AG,  XFS_SICK_AG_AGI },
      91             :         [XFS_SCRUB_TYPE_BNOBT]          = { XHG_AG,  XFS_SICK_AG_BNOBT },
      92             :         [XFS_SCRUB_TYPE_CNTBT]          = { XHG_AG,  XFS_SICK_AG_CNTBT },
      93             :         [XFS_SCRUB_TYPE_INOBT]          = { XHG_AG,  XFS_SICK_AG_INOBT },
      94             :         [XFS_SCRUB_TYPE_FINOBT]         = { XHG_AG,  XFS_SICK_AG_FINOBT },
      95             :         [XFS_SCRUB_TYPE_RMAPBT]         = { XHG_AG,  XFS_SICK_AG_RMAPBT },
      96             :         [XFS_SCRUB_TYPE_REFCNTBT]       = { XHG_AG,  XFS_SICK_AG_REFCNTBT },
      97             :         [XFS_SCRUB_TYPE_INODE]          = { XHG_INO, XFS_SICK_INO_CORE },
      98             :         [XFS_SCRUB_TYPE_BMBTD]          = { XHG_INO, XFS_SICK_INO_BMBTD },
      99             :         [XFS_SCRUB_TYPE_BMBTA]          = { XHG_INO, XFS_SICK_INO_BMBTA },
     100             :         [XFS_SCRUB_TYPE_BMBTC]          = { XHG_INO, XFS_SICK_INO_BMBTC },
     101             :         [XFS_SCRUB_TYPE_DIR]            = { XHG_INO, XFS_SICK_INO_DIR },
     102             :         [XFS_SCRUB_TYPE_XATTR]          = { XHG_INO, XFS_SICK_INO_XATTR },
     103             :         [XFS_SCRUB_TYPE_SYMLINK]        = { XHG_INO, XFS_SICK_INO_SYMLINK },
     104             :         [XFS_SCRUB_TYPE_PARENT]         = { XHG_INO, XFS_SICK_INO_PARENT },
     105             :         [XFS_SCRUB_TYPE_RTBITMAP]       = { XHG_RT,  XFS_SICK_RT_BITMAP },
     106             :         [XFS_SCRUB_TYPE_RTSUM]          = { XHG_RT,  XFS_SICK_RT_SUMMARY },
     107             :         [XFS_SCRUB_TYPE_UQUOTA]         = { XHG_FS,  XFS_SICK_FS_UQUOTA },
     108             :         [XFS_SCRUB_TYPE_GQUOTA]         = { XHG_FS,  XFS_SICK_FS_GQUOTA },
     109             :         [XFS_SCRUB_TYPE_PQUOTA]         = { XHG_FS,  XFS_SICK_FS_PQUOTA },
     110             :         [XFS_SCRUB_TYPE_FSCOUNTERS]     = { XHG_FS,  XFS_SICK_FS_COUNTERS },
     111             :         [XFS_SCRUB_TYPE_QUOTACHECK]     = { XHG_FS,  XFS_SICK_FS_QUOTACHECK },
     112             :         [XFS_SCRUB_TYPE_NLINKS]         = { XHG_FS,  XFS_SICK_FS_NLINKS },
     113             :         [XFS_SCRUB_TYPE_DIRTREE]        = { XHG_INO, XFS_SICK_INO_DIRTREE },
     114             : };
     115             : 
     116             : /* Return the health status mask for this scrub type. */
     117             : unsigned int
     118  1083774506 : xchk_health_mask_for_scrub_type(
     119             :         __u32                   scrub_type)
     120             : {
     121  1083774506 :         return type_to_health_flag[scrub_type].sick_mask;
     122             : }
     123             : 
     124             : /*
     125             :  * Scrub gave the filesystem a clean bill of health, so clear all the indirect
     126             :  * markers of past problems (at least for the fs and ags) so that we can be
     127             :  * healthy again.
     128             :  */
     129             : STATIC void
     130       25693 : xchk_mark_all_healthy(
     131             :         struct xfs_mount        *mp)
     132             : {
     133       25693 :         struct xfs_perag        *pag;
     134       25693 :         xfs_agnumber_t          agno;
     135             : 
     136       25693 :         xfs_fs_mark_healthy(mp, XFS_SICK_FS_INDIRECT);
     137       25693 :         xfs_rt_mark_healthy(mp, XFS_SICK_RT_INDIRECT);
     138      179681 :         for_each_perag(mp, agno, pag)
     139      153988 :                 xfs_ag_mark_healthy(pag, XFS_SICK_AG_INDIRECT);
     140       25693 : }
     141             : 
     142             : /*
     143             :  * Update filesystem health assessments based on what we found and did.
     144             :  *
     145             :  * If the scrubber finds errors, we mark sick whatever's mentioned in
     146             :  * sick_mask, no matter whether this is a first scan or an
     147             :  * evaluation of repair effectiveness.
     148             :  *
     149             :  * Otherwise, no direct corruption was found, so mark whatever's in
     150             :  * sick_mask as healthy.
     151             :  */
     152             : void
     153   924489844 : xchk_update_health(
     154             :         struct xfs_scrub        *sc)
     155             : {
     156   924489844 :         struct xfs_perag        *pag;
     157   924489844 :         bool                    bad;
     158             : 
     159             :         /*
     160             :          * The HEALTHY scrub type is a request from userspace to clear all the
     161             :          * indirect flags after a clean scan of the entire filesystem.  As such
     162             :          * there's no sick flag defined for it, so we branch here ahead of the
     163             :          * mask check.
     164             :          */
     165   924489844 :         if (sc->sm->sm_type == XFS_SCRUB_TYPE_HEALTHY &&
     166             :             !(sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)) {
     167       25693 :                 xchk_mark_all_healthy(sc->mp);
     168       25693 :                 return;
     169             :         }
     170             : 
     171   924464151 :         if (!sc->sick_mask)
     172             :                 return;
     173             : 
     174   924399628 :         bad = (sc->sm->sm_flags & (XFS_SCRUB_OFLAG_CORRUPT |
     175             :                                    XFS_SCRUB_OFLAG_XCORRUPT));
     176   924399628 :         switch (type_to_health_flag[sc->sm->sm_type].group) {
     177     4450136 :         case XHG_AG:
     178     4450136 :                 pag = xfs_perag_get(sc->mp, sc->sm->sm_agno);
     179     4451835 :                 if (bad) {
     180          99 :                         xfs_ag_mark_sick(pag, sc->sick_mask);
     181          99 :                         xfs_ag_mark_checked(pag, sc->sick_mask);
     182             :                 } else
     183     4451736 :                         xfs_ag_mark_healthy(pag, sc->sick_mask);
     184     4451539 :                 xfs_perag_put(pag);
     185     4451539 :                 break;
     186   918452451 :         case XHG_INO:
     187   918452451 :                 if (!sc->ip)
     188             :                         return;
     189   918452451 :                 if (bad) {
     190          33 :                         unsigned int    mask = sc->sick_mask;
     191             : 
     192             :                         /*
     193             :                          * If we're coming in for repairs then we don't want
     194             :                          * sickness flags to propagate to the incore health
     195             :                          * status if the inode gets inactivated before we can
     196             :                          * fix it.
     197             :                          */
     198          33 :                         if (sc->sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR)
     199           0 :                                 mask |= XFS_SICK_INO_FORGET;
     200          33 :                         xfs_inode_mark_sick(sc->ip, mask);
     201          33 :                         xfs_inode_mark_checked(sc->ip, sc->sick_mask);
     202             :                 } else
     203   918452418 :                         xfs_inode_mark_healthy(sc->ip, sc->sick_mask);
     204             :                 break;
     205      253256 :         case XHG_FS:
     206      253256 :                 if (bad) {
     207           0 :                         xfs_fs_mark_sick(sc->mp, sc->sick_mask);
     208           0 :                         xfs_fs_mark_checked(sc->mp, sc->sick_mask);
     209             :                 } else
     210      253256 :                         xfs_fs_mark_healthy(sc->mp, sc->sick_mask);
     211             :                 break;
     212       78132 :         case XHG_RT:
     213       78132 :                 if (bad) {
     214           0 :                         xfs_rt_mark_sick(sc->mp, sc->sick_mask);
     215           0 :                         xfs_rt_mark_checked(sc->mp, sc->sick_mask);
     216             :                 } else
     217       78132 :                         xfs_rt_mark_healthy(sc->mp, sc->sick_mask);
     218             :                 break;
     219           0 :         default:
     220           0 :                 ASSERT(0);
     221           0 :                 break;
     222             :         }
     223             : }
     224             : 
     225             : /* Is the given per-AG btree healthy enough for scanning? */
     226             : bool
     227  2434085051 : xchk_ag_btree_healthy_enough(
     228             :         struct xfs_scrub        *sc,
     229             :         struct xfs_perag        *pag,
     230             :         xfs_btnum_t             btnum)
     231             : {
     232  2434085051 :         unsigned int            mask = 0;
     233             : 
     234             :         /*
     235             :          * We always want the cursor if it's the same type as whatever we're
     236             :          * scrubbing, even if we already know the structure is corrupt.
     237             :          *
     238             :          * Otherwise, we're only interested in the btree for cross-referencing.
     239             :          * If we know the btree is bad then don't bother, just set XFAIL.
     240             :          */
     241  2434085051 :         switch (btnum) {
     242   442366291 :         case XFS_BTNUM_BNO:
     243   442366291 :                 if (sc->sm->sm_type == XFS_SCRUB_TYPE_BNOBT)
     244             :                         return true;
     245             :                 mask = XFS_SICK_AG_BNOBT;
     246             :                 break;
     247   443052641 :         case XFS_BTNUM_CNT:
     248   443052641 :                 if (sc->sm->sm_type == XFS_SCRUB_TYPE_CNTBT)
     249             :                         return true;
     250             :                 mask = XFS_SICK_AG_CNTBT;
     251             :                 break;
     252   443412479 :         case XFS_BTNUM_INO:
     253   443412479 :                 if (sc->sm->sm_type == XFS_SCRUB_TYPE_INOBT)
     254             :                         return true;
     255             :                 mask = XFS_SICK_AG_INOBT;
     256             :                 break;
     257   443042506 :         case XFS_BTNUM_FINO:
     258   443042506 :                 if (sc->sm->sm_type == XFS_SCRUB_TYPE_FINOBT)
     259             :                         return true;
     260             :                 mask = XFS_SICK_AG_FINOBT;
     261             :                 break;
     262   331225575 :         case XFS_BTNUM_RMAP:
     263   331225575 :                 if (sc->sm->sm_type == XFS_SCRUB_TYPE_RMAPBT)
     264             :                         return true;
     265             :                 mask = XFS_SICK_AG_RMAPBT;
     266             :                 break;
     267   330985559 :         case XFS_BTNUM_REFC:
     268   330985559 :                 if (sc->sm->sm_type == XFS_SCRUB_TYPE_REFCNTBT)
     269             :                         return true;
     270             :                 mask = XFS_SICK_AG_REFCNTBT;
     271             :                 break;
     272           0 :         default:
     273           0 :                 ASSERT(0);
     274           0 :                 return true;
     275             :         }
     276             : 
     277             :         /*
     278             :          * If we just repaired some AG metadata, sc->sick_mask will reflect all
     279             :          * the per-AG metadata types that were repaired.  Exclude these from
     280             :          * the filesystem health query because we have not yet updated the
     281             :          * health status and we want everything to be scanned.
     282             :          */
     283  2432379587 :         if ((sc->flags & XREP_ALREADY_FIXED) &&
     284   106067785 :             type_to_health_flag[sc->sm->sm_type].group == XHG_AG)
     285     3793360 :                 mask &= ~sc->sick_mask;
     286             : 
     287  2432379653 :         if (xfs_ag_has_sickness(pag, mask)) {
     288           0 :                 sc->sm->sm_flags |= XFS_SCRUB_OFLAG_XFAIL;
     289           0 :                 return false;
     290             :         }
     291             : 
     292             :         return true;
     293             : }
     294             : 
     295             : /*
     296             :  * Quick scan to double-check that there isn't any evidence of lingering
     297             :  * primary health problems.  If we're still clear, then the health update will
     298             :  * take care of clearing the indirect evidence.
     299             :  */
     300             : int
     301       25693 : xchk_health_record(
     302             :         struct xfs_scrub        *sc)
     303             : {
     304       25693 :         struct xfs_mount        *mp = sc->mp;
     305       25693 :         struct xfs_perag        *pag;
     306       25693 :         xfs_agnumber_t          agno;
     307             : 
     308       25693 :         unsigned int            sick;
     309       25693 :         unsigned int            checked;
     310             : 
     311       25693 :         xfs_fs_measure_sickness(mp, &sick, &checked);
     312       25693 :         if (sick & XFS_SICK_FS_PRIMARY)
     313           0 :                 xchk_set_corrupt(sc);
     314             : 
     315       25693 :         xfs_rt_measure_sickness(mp, &sick, &checked);
     316       25693 :         if (sick & XFS_SICK_RT_PRIMARY)
     317           0 :                 xchk_set_corrupt(sc);
     318             : 
     319      179681 :         for_each_perag(mp, agno, pag) {
     320      153988 :                 xfs_ag_measure_sickness(pag, &sick, &checked);
     321      153988 :                 if (sick & XFS_SICK_AG_PRIMARY)
     322           0 :                         xchk_set_corrupt(sc);
     323             :         }
     324             : 
     325       25693 :         return 0;
     326             : }

Generated by: LCOV version 1.14