LCOV - code coverage report
Current view: top level - fs/xfs/scrub - health.c (source / functions) Hit Total Coverage
Test: fstests of 6.5.0-rc3-djwa @ Mon Jul 31 20:08:17 PDT 2023 Lines: 42 52 80.8 %
Date: 2023-07-31 20:08:17 Functions: 3 3 100.0 %

          Line data    Source code
       1             : // SPDX-License-Identifier: GPL-2.0-or-later
       2             : /*
       3             :  * Copyright (C) 2019-2023 Oracle.  All Rights Reserved.
       4             :  * Author: Darrick J. Wong <djwong@kernel.org>
       5             :  */
       6             : #include "xfs.h"
       7             : #include "xfs_fs.h"
       8             : #include "xfs_shared.h"
       9             : #include "xfs_format.h"
      10             : #include "xfs_trans_resv.h"
      11             : #include "xfs_mount.h"
      12             : #include "xfs_btree.h"
      13             : #include "xfs_trans_resv.h"
      14             : #include "xfs_mount.h"
      15             : #include "xfs_ag.h"
      16             : #include "xfs_health.h"
      17             : #include "scrub/scrub.h"
      18             : #include "scrub/health.h"
      19             : 
      20             : /*
      21             :  * Scrub and In-Core Filesystem Health Assessments
      22             :  * ===============================================
      23             :  *
      24             :  * Online scrub and repair have the time and the ability to perform stronger
      25             :  * checks than we can do from the metadata verifiers, because they can
      26             :  * cross-reference records between data structures.  Therefore, scrub is in a
      27             :  * good position to update the online filesystem health assessments to reflect
      28             :  * the good/bad state of the data structure.
      29             :  *
      30             :  * We therefore extend scrub in the following ways to achieve this:
      31             :  *
      32             :  * 1. Create a "sick_mask" field in the scrub context.  When we're setting up a
      33             :  * scrub call, set this to the default XFS_SICK_* flag(s) for the selected
      34             :  * scrub type (call it A).  Scrub and repair functions can override the default
      35             :  * sick_mask value if they choose.
      36             :  *
      37             :  * 2. If the scrubber returns a runtime error code, we exit making no changes
      38             :  * to the incore sick state.
      39             :  *
      40             :  * 3. If the scrubber finds that A is clean, use sick_mask to clear the incore
      41             :  * sick flags before exiting.
      42             :  *
      43             :  * 4. If the scrubber finds that A is corrupt, use sick_mask to set the incore
      44             :  * sick flags.  If the user didn't want to repair then we exit, leaving the
      45             :  * metadata structure unfixed and the sick flag set.
      46             :  *
      47             :  * 5. Now we know that A is corrupt and the user wants to repair, so run the
      48             :  * repairer.  If the repairer returns an error code, we exit with that error
      49             :  * code, having made no further changes to the incore sick state.
      50             :  *
      51             :  * 6. If repair rebuilds A correctly and the subsequent re-scrub of A is clean,
      52             :  * use sick_mask to clear the incore sick flags.  This should have the effect
      53             :  * that A is no longer marked sick.
      54             :  *
      55             :  * 7. If repair rebuilds A incorrectly, the re-scrub will find it corrupt and
      56             :  * use sick_mask to set the incore sick flags.  This should have no externally
      57             :  * visible effect since we already set them in step (4).
      58             :  *
      59             :  * There are some complications to this story, however.  For certain types of
      60             :  * complementary metadata indices (e.g. inobt/finobt), it is easier to rebuild
      61             :  * both structures at the same time.  The following principles apply to this
      62             :  * type of repair strategy:
      63             :  *
      64             :  * 8. Any repair function that rebuilds multiple structures should update
      65             :  * sick_mask_visible to reflect whatever other structures are rebuilt, and
      66             :  * verify that all the rebuilt structures can pass a scrub check.  The outcomes
      67             :  * of 5-7 still apply, but with a sick_mask that covers everything being
      68             :  * rebuilt.
      69             :  */
      70             : 
      71             : /* Map our scrub type to a sick mask and a set of health update functions. */
      72             : 
      73             : enum xchk_health_group {
      74             :         XHG_FS = 1,
      75             :         XHG_RT,
      76             :         XHG_AG,
      77             :         XHG_INO,
      78             : };
      79             : 
      80             : struct xchk_health_map {
      81             :         enum xchk_health_group  group;
      82             :         unsigned int            sick_mask;
      83             : };
      84             : 
      85             : static const struct xchk_health_map type_to_health_flag[XFS_SCRUB_TYPE_NR] = {
      86             :         [XFS_SCRUB_TYPE_SB]             = { XHG_AG,  XFS_SICK_AG_SB },
      87             :         [XFS_SCRUB_TYPE_AGF]            = { XHG_AG,  XFS_SICK_AG_AGF },
      88             :         [XFS_SCRUB_TYPE_AGFL]           = { XHG_AG,  XFS_SICK_AG_AGFL },
      89             :         [XFS_SCRUB_TYPE_AGI]            = { XHG_AG,  XFS_SICK_AG_AGI },
      90             :         [XFS_SCRUB_TYPE_BNOBT]          = { XHG_AG,  XFS_SICK_AG_BNOBT },
      91             :         [XFS_SCRUB_TYPE_CNTBT]          = { XHG_AG,  XFS_SICK_AG_CNTBT },
      92             :         [XFS_SCRUB_TYPE_INOBT]          = { XHG_AG,  XFS_SICK_AG_INOBT },
      93             :         [XFS_SCRUB_TYPE_FINOBT]         = { XHG_AG,  XFS_SICK_AG_FINOBT },
      94             :         [XFS_SCRUB_TYPE_RMAPBT]         = { XHG_AG,  XFS_SICK_AG_RMAPBT },
      95             :         [XFS_SCRUB_TYPE_REFCNTBT]       = { XHG_AG,  XFS_SICK_AG_REFCNTBT },
      96             :         [XFS_SCRUB_TYPE_INODE]          = { XHG_INO, XFS_SICK_INO_CORE },
      97             :         [XFS_SCRUB_TYPE_BMBTD]          = { XHG_INO, XFS_SICK_INO_BMBTD },
      98             :         [XFS_SCRUB_TYPE_BMBTA]          = { XHG_INO, XFS_SICK_INO_BMBTA },
      99             :         [XFS_SCRUB_TYPE_BMBTC]          = { XHG_INO, XFS_SICK_INO_BMBTC },
     100             :         [XFS_SCRUB_TYPE_DIR]            = { XHG_INO, XFS_SICK_INO_DIR },
     101             :         [XFS_SCRUB_TYPE_XATTR]          = { XHG_INO, XFS_SICK_INO_XATTR },
     102             :         [XFS_SCRUB_TYPE_SYMLINK]        = { XHG_INO, XFS_SICK_INO_SYMLINK },
     103             :         [XFS_SCRUB_TYPE_PARENT]         = { XHG_INO, XFS_SICK_INO_PARENT },
     104             :         [XFS_SCRUB_TYPE_RTBITMAP]       = { XHG_RT,  XFS_SICK_RT_BITMAP },
     105             :         [XFS_SCRUB_TYPE_RTSUM]          = { XHG_RT,  XFS_SICK_RT_SUMMARY },
     106             :         [XFS_SCRUB_TYPE_UQUOTA]         = { XHG_FS,  XFS_SICK_FS_UQUOTA },
     107             :         [XFS_SCRUB_TYPE_GQUOTA]         = { XHG_FS,  XFS_SICK_FS_GQUOTA },
     108             :         [XFS_SCRUB_TYPE_PQUOTA]         = { XHG_FS,  XFS_SICK_FS_PQUOTA },
     109             :         [XFS_SCRUB_TYPE_FSCOUNTERS]     = { XHG_FS,  XFS_SICK_FS_COUNTERS },
     110             : };
     111             : 
     112             : /* Return the health status mask for this scrub type. */
     113             : unsigned int
     114  1873430064 : xchk_health_mask_for_scrub_type(
     115             :         __u32                   scrub_type)
     116             : {
     117  1873430064 :         return type_to_health_flag[scrub_type].sick_mask;
     118             : }
     119             : 
     120             : /*
     121             :  * Update filesystem health assessments based on what we found and did.
     122             :  *
     123             :  * If the scrubber finds errors, we mark sick whatever's mentioned in
     124             :  * sick_mask, no matter whether this is a first scan or an
     125             :  * evaluation of repair effectiveness.
     126             :  *
     127             :  * Otherwise, no direct corruption was found, so mark whatever's in
     128             :  * sick_mask as healthy.
     129             :  */
     130             : void
     131  1196649997 : xchk_update_health(
     132             :         struct xfs_scrub        *sc)
     133             : {
     134  1196649997 :         struct xfs_perag        *pag;
     135  1196649997 :         bool                    bad;
     136             : 
     137  1196649997 :         if (!sc->sick_mask)
     138             :                 return;
     139             : 
     140  1196391035 :         bad = (sc->sm->sm_flags & (XFS_SCRUB_OFLAG_CORRUPT |
     141             :                                    XFS_SCRUB_OFLAG_XCORRUPT));
     142  1196391035 :         switch (type_to_health_flag[sc->sm->sm_type].group) {
     143     9080537 :         case XHG_AG:
     144     9080537 :                 pag = xfs_perag_get(sc->mp, sc->sm->sm_agno);
     145     9080291 :                 if (bad)
     146           2 :                         xfs_ag_mark_sick(pag, sc->sick_mask);
     147             :                 else
     148     9080289 :                         xfs_ag_mark_healthy(pag, sc->sick_mask);
     149     9080571 :                 xfs_perag_put(pag);
     150     9080571 :                 break;
     151  1186628399 :         case XHG_INO:
     152  1186628399 :                 if (!sc->ip)
     153             :                         return;
     154  1186628399 :                 if (bad)
     155           6 :                         xfs_inode_mark_sick(sc->ip, sc->sick_mask);
     156             :                 else
     157  1186628393 :                         xfs_inode_mark_healthy(sc->ip, sc->sick_mask);
     158             :                 break;
     159      711642 :         case XHG_FS:
     160      711642 :                 if (bad)
     161           0 :                         xfs_fs_mark_sick(sc->mp, sc->sick_mask);
     162             :                 else
     163      711642 :                         xfs_fs_mark_healthy(sc->mp, sc->sick_mask);
     164             :                 break;
     165       79603 :         case XHG_RT:
     166       79603 :                 if (bad)
     167           0 :                         xfs_rt_mark_sick(sc->mp, sc->sick_mask);
     168             :                 else
     169       79603 :                         xfs_rt_mark_healthy(sc->mp, sc->sick_mask);
     170             :                 break;
     171           0 :         default:
     172           0 :                 ASSERT(0);
     173           0 :                 break;
     174             :         }
     175             : }
     176             : 
     177             : /* Is the given per-AG btree healthy enough for scanning? */
     178             : bool
     179  2771650084 : xchk_ag_btree_healthy_enough(
     180             :         struct xfs_scrub        *sc,
     181             :         struct xfs_perag        *pag,
     182             :         xfs_btnum_t             btnum)
     183             : {
     184  2771650084 :         unsigned int            mask = 0;
     185             : 
     186             :         /*
     187             :          * We always want the cursor if it's the same type as whatever we're
     188             :          * scrubbing, even if we already know the structure is corrupt.
     189             :          *
     190             :          * Otherwise, we're only interested in the btree for cross-referencing.
     191             :          * If we know the btree is bad then don't bother, just set XFAIL.
     192             :          */
     193  2771650084 :         switch (btnum) {
     194   526949382 :         case XFS_BTNUM_BNO:
     195   526949382 :                 if (sc->sm->sm_type == XFS_SCRUB_TYPE_BNOBT)
     196             :                         return true;
     197             :                 mask = XFS_SICK_AG_BNOBT;
     198             :                 break;
     199   526949678 :         case XFS_BTNUM_CNT:
     200   526949678 :                 if (sc->sm->sm_type == XFS_SCRUB_TYPE_CNTBT)
     201             :                         return true;
     202             :                 mask = XFS_SICK_AG_CNTBT;
     203             :                 break;
     204   526950940 :         case XFS_BTNUM_INO:
     205   526950940 :                 if (sc->sm->sm_type == XFS_SCRUB_TYPE_INOBT)
     206             :                         return true;
     207             :                 mask = XFS_SICK_AG_INOBT;
     208             :                 break;
     209   526950735 :         case XFS_BTNUM_FINO:
     210   526950735 :                 if (sc->sm->sm_type == XFS_SCRUB_TYPE_FINOBT)
     211             :                         return true;
     212             :                 mask = XFS_SICK_AG_FINOBT;
     213             :                 break;
     214   331924017 :         case XFS_BTNUM_RMAP:
     215   331924017 :                 if (sc->sm->sm_type == XFS_SCRUB_TYPE_RMAPBT)
     216             :                         return true;
     217             :                 mask = XFS_SICK_AG_RMAPBT;
     218             :                 break;
     219   331925332 :         case XFS_BTNUM_REFC:
     220   331925332 :                 if (sc->sm->sm_type == XFS_SCRUB_TYPE_REFCNTBT)
     221             :                         return true;
     222             :                 mask = XFS_SICK_AG_REFCNTBT;
     223             :                 break;
     224           0 :         default:
     225           0 :                 ASSERT(0);
     226           0 :                 return true;
     227             :         }
     228             : 
     229  2767430594 :         if (xfs_ag_has_sickness(pag, mask)) {
     230           0 :                 sc->sm->sm_flags |= XFS_SCRUB_OFLAG_XFAIL;
     231           0 :                 return false;
     232             :         }
     233             : 
     234             :         return true;
     235             : }

Generated by: LCOV version 1.14