LCOV - code coverage report
Current view: top level - fs/xfs/scrub - scrub.c (source / functions) Hit Total Coverage
Test: fstests of 6.5.0-rc4-xfsx @ Mon Jul 31 20:08:34 PDT 2023 Lines: 244 273 89.4 %
Date: 2023-07-31 20:08:34 Functions: 9 9 100.0 %

          Line data    Source code
       1             : // SPDX-License-Identifier: GPL-2.0-or-later
       2             : /*
       3             :  * Copyright (C) 2017-2023 Oracle.  All Rights Reserved.
       4             :  * Author: Darrick J. Wong <djwong@kernel.org>
       5             :  */
       6             : #include "xfs.h"
       7             : #include "xfs_fs.h"
       8             : #include "xfs_shared.h"
       9             : #include "xfs_format.h"
      10             : #include "xfs_trans_resv.h"
      11             : #include "xfs_mount.h"
      12             : #include "xfs_log_format.h"
      13             : #include "xfs_trans.h"
      14             : #include "xfs_inode.h"
      15             : #include "xfs_quota.h"
      16             : #include "xfs_qm.h"
      17             : #include "xfs_scrub.h"
      18             : #include "xfs_btree.h"
      19             : #include "xfs_btree_staging.h"
      20             : #include "xfs_buf_xfile.h"
      21             : #include "xfs_rmap.h"
      22             : #include "xfs_xchgrange.h"
      23             : #include "xfs_swapext.h"
      24             : #include "xfs_da_format.h"
      25             : #include "xfs_da_btree.h"
      26             : #include "xfs_xattr.h"
      27             : #include "xfs_dir2.h"
      28             : #include "xfs_icache.h"
      29             : #include "scrub/scrub.h"
      30             : #include "scrub/common.h"
      31             : #include "scrub/trace.h"
      32             : #include "scrub/repair.h"
      33             : #include "scrub/health.h"
      34             : #include "scrub/stats.h"
      35             : #include "scrub/xfile.h"
      36             : #include "scrub/tempfile.h"
      37             : #include "scrub/orphanage.h"
      38             : 
      39             : /*
      40             :  * Online Scrub and Repair
      41             :  *
      42             :  * Traditionally, XFS (the kernel driver) did not know how to check or
      43             :  * repair on-disk data structures.  That task was left to the xfs_check
      44             :  * and xfs_repair tools, both of which require taking the filesystem
      45             :  * offline for a thorough but time consuming examination.  Online
      46             :  * scrub & repair, on the other hand, enables us to check the metadata
      47             :  * for obvious errors while carefully stepping around the filesystem's
      48             :  * ongoing operations, locking rules, etc.
      49             :  *
      50             :  * Given that most XFS metadata consist of records stored in a btree,
      51             :  * most of the checking functions iterate the btree blocks themselves
      52             :  * looking for irregularities.  When a record block is encountered, each
      53             :  * record can be checked for obviously bad values.  Record values can
      54             :  * also be cross-referenced against other btrees to look for potential
      55             :  * misunderstandings between pieces of metadata.
      56             :  *
      57             :  * It is expected that the checkers responsible for per-AG metadata
      58             :  * structures will lock the AG headers (AGI, AGF, AGFL), iterate the
      59             :  * metadata structure, and perform any relevant cross-referencing before
      60             :  * unlocking the AG and returning the results to userspace.  These
      61             :  * scrubbers must not keep an AG locked for too long to avoid tying up
      62             :  * the block and inode allocators.
      63             :  *
      64             :  * Block maps and b-trees rooted in an inode present a special challenge
      65             :  * because they can involve extents from any AG.  The general scrubber
      66             :  * structure of lock -> check -> xref -> unlock still holds, but AG
      67             :  * locking order rules /must/ be obeyed to avoid deadlocks.  The
      68             :  * ordering rule, of course, is that we must lock in increasing AG
      69             :  * order.  Helper functions are provided to track which AG headers we've
      70             :  * already locked.  If we detect an imminent locking order violation, we
      71             :  * can signal a potential deadlock, in which case the scrubber can jump
      72             :  * out to the top level, lock all the AGs in order, and retry the scrub.
      73             :  *
      74             :  * For file data (directories, extended attributes, symlinks) scrub, we
      75             :  * can simply lock the inode and walk the data.  For btree data
      76             :  * (directories and attributes) we follow the same btree-scrubbing
      77             :  * strategy outlined previously to check the records.
      78             :  *
      79             :  * We use a bit of trickery with transactions to avoid buffer deadlocks
      80             :  * if there is a cycle in the metadata.  The basic problem is that
      81             :  * travelling down a btree involves locking the current buffer at each
      82             :  * tree level.  If a pointer should somehow point back to a buffer that
      83             :  * we've already examined, we will deadlock due to the second buffer
      84             :  * locking attempt.  Note however that grabbing a buffer in transaction
      85             :  * context links the locked buffer to the transaction.  If we try to
      86             :  * re-grab the buffer in the context of the same transaction, we avoid
      87             :  * the second lock attempt and continue.  Between the verifier and the
      88             :  * scrubber, something will notice that something is amiss and report
      89             :  * the corruption.  Therefore, each scrubber will allocate an empty
      90             :  * transaction, attach buffers to it, and cancel the transaction at the
      91             :  * end of the scrub run.  Cancelling a non-dirty transaction simply
      92             :  * unlocks the buffers.
      93             :  *
      94             :  * There are four pieces of data that scrub can communicate to
      95             :  * userspace.  The first is the error code (errno), which can be used to
      96             :  * communicate operational errors in performing the scrub.  There are
      97             :  * also three flags that can be set in the scrub context.  If the data
      98             :  * structure itself is corrupt, the CORRUPT flag will be set.  If
      99             :  * the metadata is correct but otherwise suboptimal, the PREEN flag
     100             :  * will be set.
     101             :  *
     102             :  * We perform secondary validation of filesystem metadata by
     103             :  * cross-referencing every record with all other available metadata.
     104             :  * For example, for block mapping extents, we verify that there are no
     105             :  * records in the free space and inode btrees corresponding to that
     106             :  * space extent and that there is a corresponding entry in the reverse
     107             :  * mapping btree.  Inconsistent metadata is noted by setting the
     108             :  * XCORRUPT flag; btree query function errors are noted by setting the
     109             :  * XFAIL flag and deleting the cursor to prevent further attempts to
     110             :  * cross-reference with a defective btree.
     111             :  *
     112             :  * If a piece of metadata proves corrupt or suboptimal, the userspace
     113             :  * program can ask the kernel to apply some tender loving care (TLC) to
     114             :  * the metadata object by setting the REPAIR flag and re-calling the
     115             :  * scrub ioctl.  "Corruption" is defined by metadata violating the
     116             :  * on-disk specification; operations cannot continue if the violation is
     117             :  * left untreated.  It is possible for XFS to continue if an object is
     118             :  * "suboptimal", however performance may be degraded.  Repairs are
     119             :  * usually performed by rebuilding the metadata entirely out of
     120             :  * redundant metadata.  Optimizing, on the other hand, can sometimes be
     121             :  * done without rebuilding entire structures.
     122             :  *
     123             :  * Generally speaking, the repair code has the following code structure:
     124             :  * Lock -> scrub -> repair -> commit -> re-lock -> re-scrub -> unlock.
     125             :  * The first check helps us figure out if we need to rebuild or simply
     126             :  * optimize the structure so that the rebuild knows what to do.  The
     127             :  * second check evaluates the completeness of the repair; that is what
     128             :  * is reported to userspace.
     129             :  *
     130             :  * A quick note on symbol prefixes:
     131             :  * - "xfs_" are general XFS symbols.
     132             :  * - "xchk_" are symbols related to metadata checking.
     133             :  * - "xrep_" are symbols related to metadata repair.
     134             :  * - "xfs_scrub_" are symbols that tie online fsck to the rest of XFS.
     135             :  */
     136             : 
     137             : /*
     138             :  * Scrub probe -- userspace uses this to probe if we're willing to scrub
     139             :  * or repair a given mountpoint.  This will be used by xfs_scrub to
     140             :  * probe the kernel's abilities to scrub (and repair) the metadata.  We
     141             :  * do this by validating the ioctl inputs from userspace, preparing the
     142             :  * filesystem for a scrub (or a repair) operation, and immediately
     143             :  * returning to userspace.  Userspace can use the returned errno and
     144             :  * structure state to decide (in broad terms) if scrub/repair are
     145             :  * supported by the running kernel.
     146             :  */
     147             : static int
     148       51449 : xchk_probe(
     149             :         struct xfs_scrub        *sc)
     150             : {
     151       51449 :         int                     error = 0;
     152             : 
     153       51449 :         if (xchk_should_terminate(sc, &error))
     154           0 :                 return error;
     155             : 
     156             :         return 0;
     157             : }
     158             : 
     159             : /* Scrub setup and teardown */
     160             : 
     161             : #define FSGATES_MASK    (XCHK_FSGATES_ALL | XREP_FSGATES_ALL)
     162             : static inline void
     163   875919989 : xchk_fsgates_disable(
     164             :         struct xfs_scrub        *sc)
     165             : {
     166   875919989 :         if (!(sc->flags & FSGATES_MASK))
     167             :                 return;
     168             : 
     169    33321447 :         trace_xchk_fsgates_disable(sc, sc->flags & FSGATES_MASK);
     170             : 
     171    33322926 :         if (sc->flags & XCHK_FSGATES_DRAIN)
     172      173576 :                 xfs_defer_drain_wait_disable();
     173             : 
     174    33322926 :         if (sc->flags & XCHK_FSGATES_QUOTA)
     175       34637 :                 xfs_dqtrx_hook_disable();
     176             : 
     177    33322926 :         if (sc->flags & XCHK_FSGATES_DIRENTS)
     178    32877526 :                 xfs_dir_hook_disable();
     179             : 
     180    33714774 :         if (sc->flags & XCHK_FSGATES_RMAP)
     181       43747 :                 xfs_rmap_hook_disable();
     182             : 
     183    33714932 :         if (sc->flags & XREP_FSGATES_ATOMIC_XCHG)
     184     4951290 :                 xfs_xchg_range_rele_log_assist(sc->mp);
     185             : 
     186    33717777 :         if (sc->flags & XREP_FSGATES_LARP)
     187     5000468 :                 xfs_attr_rele_log_assist(sc->mp);
     188             : 
     189    33714730 :         sc->flags &= ~FSGATES_MASK;
     190             : }
     191             : #undef FSGATES_MASK
     192             : 
     193             : /* Free all the resources and finish the transactions. */
     194             : STATIC int
     195   877183054 : xchk_teardown(
     196             :         struct xfs_scrub        *sc,
     197             :         int                     error)
     198             : {
     199   877183054 :         xchk_ag_free(sc, &sc->sa);
     200   875991584 :         xchk_rtgroup_btcur_free(&sc->sr);
     201             : 
     202   876098688 :         if (sc->tp) {
     203   863871211 :                 if (error == 0 && (sc->sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR))
     204    36875721 :                         error = xfs_trans_commit(sc->tp);
     205             :                 else
     206   826995490 :                         xfs_trans_cancel(sc->tp);
     207   863537579 :                 sc->tp = NULL;
     208             :         }
     209   875765056 :         if (sc->sr.rtg)
     210      627439 :                 xchk_rtgroup_free(sc, &sc->sr);
     211             :         else
     212   875231580 :                 xchk_rt_unlock(sc, &sc->sr);
     213   875433361 :         if (sc->ip) {
     214   856511801 :                 if (sc->ilock_flags)
     215   855544034 :                         xchk_iunlock(sc, sc->ilock_flags);
     216   858139277 :                 xchk_irele(sc, sc->ip);
     217   858090098 :                 sc->ip = NULL;
     218             :         }
     219   877011658 :         if (sc->flags & XCHK_HAVE_FREEZE_PROT) {
     220    44263813 :                 sc->flags &= ~XCHK_HAVE_FREEZE_PROT;
     221    44263813 :                 mnt_drop_write_file(sc->file);
     222             :         }
     223   877272435 :         if (sc->xfile_buftarg) {
     224      167841 :                 xfile_free_buftarg(sc->xfile_buftarg);
     225      167944 :                 sc->xfile_buftarg = NULL;
     226             :         }
     227   877272538 :         if (sc->xfile) {
     228      177404 :                 xfile_destroy(sc->xfile);
     229      177404 :                 sc->xfile = NULL;
     230             :         }
     231   877272538 :         if (sc->buf) {
     232   168958003 :                 if (sc->buf_cleanup)
     233   148346002 :                         sc->buf_cleanup(sc->buf);
     234   168820768 :                 kvfree(sc->buf);
     235   168907663 :                 sc->buf_cleanup = NULL;
     236   168907663 :                 sc->buf = NULL;
     237             :         }
     238             : 
     239   877222198 :         xrep_tempfile_rele(sc);
     240   875227049 :         xrep_orphanage_rele(sc);
     241   875975556 :         xchk_fsgates_disable(sc);
     242   876287254 :         return error;
     243             : }
     244             : 
     245             : /* Scrubbing dispatch. */
     246             : 
     247             : static const struct xchk_meta_ops meta_scrub_ops[] = {
     248             :         [XFS_SCRUB_TYPE_PROBE] = {      /* ioctl presence test */
     249             :                 .type   = ST_NONE,
     250             :                 .setup  = xchk_setup_fs,
     251             :                 .scrub  = xchk_probe,
     252             :                 .repair = xrep_probe,
     253             :         },
     254             :         [XFS_SCRUB_TYPE_SB] = {         /* superblock */
     255             :                 .type   = ST_PERAG,
     256             :                 .setup  = xchk_setup_agheader,
     257             :                 .scrub  = xchk_superblock,
     258             :                 .repair = xrep_superblock,
     259             :         },
     260             :         [XFS_SCRUB_TYPE_AGF] = {        /* agf */
     261             :                 .type   = ST_PERAG,
     262             :                 .setup  = xchk_setup_agheader,
     263             :                 .scrub  = xchk_agf,
     264             :                 .repair = xrep_agf,
     265             :         },
     266             :         [XFS_SCRUB_TYPE_AGFL]= {        /* agfl */
     267             :                 .type   = ST_PERAG,
     268             :                 .setup  = xchk_setup_agheader,
     269             :                 .scrub  = xchk_agfl,
     270             :                 .repair = xrep_agfl,
     271             :         },
     272             :         [XFS_SCRUB_TYPE_AGI] = {        /* agi */
     273             :                 .type   = ST_PERAG,
     274             :                 .setup  = xchk_setup_agheader,
     275             :                 .scrub  = xchk_agi,
     276             :                 .repair = xrep_agi,
     277             :         },
     278             :         [XFS_SCRUB_TYPE_BNOBT] = {      /* bnobt */
     279             :                 .type   = ST_PERAG,
     280             :                 .setup  = xchk_setup_ag_allocbt,
     281             :                 .scrub  = xchk_bnobt,
     282             :                 .repair = xrep_allocbt,
     283             :                 .repair_eval = xrep_revalidate_allocbt,
     284             :         },
     285             :         [XFS_SCRUB_TYPE_CNTBT] = {      /* cntbt */
     286             :                 .type   = ST_PERAG,
     287             :                 .setup  = xchk_setup_ag_allocbt,
     288             :                 .scrub  = xchk_cntbt,
     289             :                 .repair = xrep_allocbt,
     290             :                 .repair_eval = xrep_revalidate_allocbt,
     291             :         },
     292             :         [XFS_SCRUB_TYPE_INOBT] = {      /* inobt */
     293             :                 .type   = ST_PERAG,
     294             :                 .setup  = xchk_setup_ag_iallocbt,
     295             :                 .scrub  = xchk_inobt,
     296             :                 .repair = xrep_iallocbt,
     297             :                 .repair_eval = xrep_revalidate_iallocbt,
     298             :         },
     299             :         [XFS_SCRUB_TYPE_FINOBT] = {     /* finobt */
     300             :                 .type   = ST_PERAG,
     301             :                 .setup  = xchk_setup_ag_iallocbt,
     302             :                 .scrub  = xchk_finobt,
     303             :                 .has    = xfs_has_finobt,
     304             :                 .repair = xrep_iallocbt,
     305             :                 .repair_eval = xrep_revalidate_iallocbt,
     306             :         },
     307             :         [XFS_SCRUB_TYPE_RMAPBT] = {     /* rmapbt */
     308             :                 .type   = ST_PERAG,
     309             :                 .setup  = xchk_setup_ag_rmapbt,
     310             :                 .scrub  = xchk_rmapbt,
     311             :                 .has    = xfs_has_rmapbt,
     312             :                 .repair = xrep_rmapbt,
     313             :         },
     314             :         [XFS_SCRUB_TYPE_REFCNTBT] = {   /* refcountbt */
     315             :                 .type   = ST_PERAG,
     316             :                 .setup  = xchk_setup_ag_refcountbt,
     317             :                 .scrub  = xchk_refcountbt,
     318             :                 .has    = xfs_has_reflink,
     319             :                 .repair = xrep_refcountbt,
     320             :         },
     321             :         [XFS_SCRUB_TYPE_INODE] = {      /* inode record */
     322             :                 .type   = ST_INODE,
     323             :                 .setup  = xchk_setup_inode,
     324             :                 .scrub  = xchk_inode,
     325             :                 .repair = xrep_inode,
     326             :         },
     327             :         [XFS_SCRUB_TYPE_BMBTD] = {      /* inode data fork */
     328             :                 .type   = ST_INODE,
     329             :                 .setup  = xchk_setup_inode_bmap,
     330             :                 .scrub  = xchk_bmap_data,
     331             :                 .repair = xrep_bmap_data,
     332             :         },
     333             :         [XFS_SCRUB_TYPE_BMBTA] = {      /* inode attr fork */
     334             :                 .type   = ST_INODE,
     335             :                 .setup  = xchk_setup_inode_bmap,
     336             :                 .scrub  = xchk_bmap_attr,
     337             :                 .repair = xrep_bmap_attr,
     338             :         },
     339             :         [XFS_SCRUB_TYPE_BMBTC] = {      /* inode CoW fork */
     340             :                 .type   = ST_INODE,
     341             :                 .setup  = xchk_setup_inode_bmap,
     342             :                 .scrub  = xchk_bmap_cow,
     343             :                 .repair = xrep_bmap_cow,
     344             :         },
     345             :         [XFS_SCRUB_TYPE_DIR] = {        /* directory */
     346             :                 .type   = ST_INODE,
     347             :                 .setup  = xchk_setup_directory,
     348             :                 .scrub  = xchk_directory,
     349             :                 .repair = xrep_directory,
     350             :         },
     351             :         [XFS_SCRUB_TYPE_XATTR] = {      /* extended attributes */
     352             :                 .type   = ST_INODE,
     353             :                 .setup  = xchk_setup_xattr,
     354             :                 .scrub  = xchk_xattr,
     355             :                 .repair = xrep_xattr,
     356             :         },
     357             :         [XFS_SCRUB_TYPE_SYMLINK] = {    /* symbolic link */
     358             :                 .type   = ST_INODE,
     359             :                 .setup  = xchk_setup_symlink,
     360             :                 .scrub  = xchk_symlink,
     361             :                 .repair = xrep_symlink,
     362             :         },
     363             :         [XFS_SCRUB_TYPE_PARENT] = {     /* parent pointers */
     364             :                 .type   = ST_INODE,
     365             :                 .setup  = xchk_setup_parent,
     366             :                 .scrub  = xchk_parent,
     367             :                 .repair = xrep_parent,
     368             :         },
     369             :         [XFS_SCRUB_TYPE_RTBITMAP] = {   /* realtime bitmap */
     370             :                 .type   = ST_FS,
     371             :                 .setup  = xchk_setup_rtbitmap,
     372             :                 .scrub  = xchk_rtbitmap,
     373             :                 .has    = xfs_has_realtime,
     374             :                 .repair = xrep_rtbitmap,
     375             :         },
     376             :         [XFS_SCRUB_TYPE_RTSUM] = {      /* realtime summary */
     377             :                 .type   = ST_FS,
     378             :                 .setup  = xchk_setup_rtsummary,
     379             :                 .scrub  = xchk_rtsummary,
     380             :                 .has    = xfs_has_realtime,
     381             :                 .repair = xrep_rtsummary,
     382             :         },
     383             :         [XFS_SCRUB_TYPE_UQUOTA] = {     /* user quota */
     384             :                 .type   = ST_FS,
     385             :                 .setup  = xchk_setup_quota,
     386             :                 .scrub  = xchk_quota,
     387             :                 .repair = xrep_quota,
     388             :         },
     389             :         [XFS_SCRUB_TYPE_GQUOTA] = {     /* group quota */
     390             :                 .type   = ST_FS,
     391             :                 .setup  = xchk_setup_quota,
     392             :                 .scrub  = xchk_quota,
     393             :                 .repair = xrep_quota,
     394             :         },
     395             :         [XFS_SCRUB_TYPE_PQUOTA] = {     /* project quota */
     396             :                 .type   = ST_FS,
     397             :                 .setup  = xchk_setup_quota,
     398             :                 .scrub  = xchk_quota,
     399             :                 .repair = xrep_quota,
     400             :         },
     401             :         [XFS_SCRUB_TYPE_FSCOUNTERS] = { /* fs summary counters */
     402             :                 .type   = ST_FS,
     403             :                 .setup  = xchk_setup_fscounters,
     404             :                 .scrub  = xchk_fscounters,
     405             :                 .repair = xrep_fscounters,
     406             :         },
     407             :         [XFS_SCRUB_TYPE_QUOTACHECK] = { /* quota counters */
     408             :                 .type   = ST_FS,
     409             :                 .setup  = xchk_setup_quotacheck,
     410             :                 .scrub  = xchk_quotacheck,
     411             :                 .repair = xrep_quotacheck,
     412             :         },
     413             :         [XFS_SCRUB_TYPE_NLINKS] = {     /* inode link counts */
     414             :                 .type   = ST_FS,
     415             :                 .setup  = xchk_setup_nlinks,
     416             :                 .scrub  = xchk_nlinks,
     417             :                 .repair = xrep_nlinks,
     418             :         },
     419             :         [XFS_SCRUB_TYPE_HEALTHY] = {    /* fs healthy; clean all reminders */
     420             :                 .type   = ST_FS,
     421             :                 .setup  = xchk_setup_fs,
     422             :                 .scrub  = xchk_health_record,
     423             :                 .repair = xrep_notsupported,
     424             :         },
     425             :         [XFS_SCRUB_TYPE_DIRTREE] = {    /* directory tree structure */
     426             :                 .type   = ST_INODE,
     427             :                 .setup  = xchk_setup_dirtree,
     428             :                 .scrub  = xchk_dirtree,
     429             :                 .has    = xfs_has_parent,
     430             :                 .repair = xrep_dirtree,
     431             :         },
     432             :         [XFS_SCRUB_TYPE_RGSUPER] = {    /* realtime group superblock */
     433             :                 .type   = ST_RTGROUP,
     434             :                 .setup  = xchk_setup_rgsuperblock,
     435             :                 .scrub  = xchk_rgsuperblock,
     436             :                 .has    = xfs_has_rtgroups,
     437             :                 .repair = xrep_rgsuperblock,
     438             :         },
     439             :         [XFS_SCRUB_TYPE_RGBITMAP] = {   /* realtime group bitmap */
     440             :                 .type   = ST_RTGROUP,
     441             :                 .setup  = xchk_setup_rgbitmap,
     442             :                 .scrub  = xchk_rgbitmap,
     443             :                 .has    = xfs_has_rtgroups,
     444             :                 .repair = xrep_rgbitmap,
     445             :         },
     446             :         [XFS_SCRUB_TYPE_RTRMAPBT] = {   /* realtime group rmapbt */
     447             :                 .type   = ST_RTGROUP,
     448             :                 .setup  = xchk_setup_rtrmapbt,
     449             :                 .scrub  = xchk_rtrmapbt,
     450             :                 .has    = xfs_has_rtrmapbt,
     451             :                 .repair = xrep_rtrmapbt,
     452             :         },
     453             :         [XFS_SCRUB_TYPE_RTREFCBT] = {   /* realtime refcountbt */
     454             :                 .type   = ST_RTGROUP,
     455             :                 .setup  = xchk_setup_rtrefcountbt,
     456             :                 .scrub  = xchk_rtrefcountbt,
     457             :                 .has    = xfs_has_rtreflink,
     458             :                 .repair = xrep_rtrefcountbt,
     459             :         },
     460             : };
     461             : 
     462             : static int
     463   859019243 : xchk_validate_inputs(
     464             :         struct xfs_mount                *mp,
     465             :         struct xfs_scrub_metadata       *sm)
     466             : {
     467   859019243 :         int                             error;
     468   859019243 :         const struct xchk_meta_ops      *ops;
     469             : 
     470   859019243 :         error = -EINVAL;
     471             :         /* Check our inputs. */
     472   859019243 :         sm->sm_flags &= ~XFS_SCRUB_FLAGS_OUT;
     473   859019243 :         if (sm->sm_flags & ~XFS_SCRUB_FLAGS_IN)
     474           0 :                 goto out;
     475             :         /* sm_reserved[] must be zero */
     476  1718408624 :         if (memchr_inv(sm->sm_reserved, 0, sizeof(sm->sm_reserved)))
     477           0 :                 goto out;
     478             : 
     479   859389381 :         error = -ENOENT;
     480             :         /* Do we know about this type of metadata? */
     481   859389381 :         if (sm->sm_type >= XFS_SCRUB_TYPE_NR)
     482           0 :                 goto out;
     483   859389381 :         ops = &meta_scrub_ops[sm->sm_type];
     484   859846357 :         if (ops->setup == NULL || ops->scrub == NULL)
     485           0 :                 goto out;
     486             :         /* Does this fs even support this type of metadata? */
     487   859846357 :         if (ops->has && !ops->has(mp))
     488      645907 :                 goto out;
     489             : 
     490   859238341 :         error = -EINVAL;
     491             :         /* restricting fields must be appropriate for type */
     492   859238341 :         switch (ops->type) {
     493      558231 :         case ST_NONE:
     494             :         case ST_FS:
     495      558231 :                 if (sm->sm_ino || sm->sm_gen || sm->sm_agno)
     496           0 :                         goto out;
     497             :                 break;
     498     4802619 :         case ST_PERAG:
     499     4802619 :                 if (sm->sm_ino || sm->sm_gen ||
     500     4802619 :                     sm->sm_agno >= mp->m_sb.sb_agcount)
     501           0 :                         goto out;
     502             :                 break;
     503   853406252 :         case ST_INODE:
     504   853406252 :                 if (sm->sm_agno || (sm->sm_gen && !sm->sm_ino))
     505           0 :                         goto out;
     506             :                 break;
     507      471239 :         case ST_RTGROUP:
     508      471239 :                 if (sm->sm_ino || sm->sm_gen)
     509           0 :                         goto out;
     510      471239 :                 if (!xfs_has_rtgroups(mp) && sm->sm_agno != 0)
     511           0 :                         goto out;
     512      471239 :                 if (xfs_has_rtgroups(mp) && sm->sm_agno >= mp->m_sb.sb_rgcount)
     513           0 :                         goto out;
     514             :                 break;
     515           0 :         default:
     516           0 :                 goto out;
     517             :         }
     518             : 
     519             :         /* No rebuild without repair. */
     520   859238341 :         if ((sm->sm_flags & XFS_SCRUB_IFLAG_FORCE_REBUILD) &&
     521             :             !(sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR))
     522             :                 return -EINVAL;
     523             : 
     524             :         /*
     525             :          * We only want to repair read-write v5+ filesystems.  Defer the check
     526             :          * for ops->repair until after our scrub confirms that we need to
     527             :          * perform repairs so that we avoid failing due to not supporting
     528             :          * repairing an object that doesn't need repairs.
     529             :          */
     530   859238341 :         if (sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR) {
     531    26145678 :                 error = -EOPNOTSUPP;
     532    26145678 :                 if (!xfs_has_crc(mp))
     533           0 :                         goto out;
     534             : 
     535    26145678 :                 error = -EROFS;
     536    52291356 :                 if (xfs_is_readonly(mp))
     537        2648 :                         goto out;
     538             :         }
     539             : 
     540             :         error = 0;
     541             : out:
     542             :         return error;
     543             : }
     544             : 
     545             : #ifdef CONFIG_XFS_ONLINE_REPAIR
     546   711287942 : static inline void xchk_postmortem(struct xfs_scrub *sc)
     547             : {
     548             :         /*
     549             :          * Userspace asked us to repair something, we repaired it, rescanned
     550             :          * it, and the rescan says it's still broken.  Scream about this in
     551             :          * the system logs.
     552             :          */
     553   711287942 :         if ((sc->sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR) &&
     554    18776068 :             (sc->sm->sm_flags & (XFS_SCRUB_OFLAG_CORRUPT |
     555             :                                  XFS_SCRUB_OFLAG_XCORRUPT)))
     556           0 :                 xrep_failure(sc->mp);
     557   711287942 : }
     558             : #else
     559             : static inline void xchk_postmortem(struct xfs_scrub *sc)
     560             : {
     561             :         /*
     562             :          * Userspace asked us to scrub something, it's broken, and we have no
     563             :          * way of fixing it.  Scream in the logs.
     564             :          */
     565             :         if (sc->sm->sm_flags & (XFS_SCRUB_OFLAG_CORRUPT |
     566             :                                 XFS_SCRUB_OFLAG_XCORRUPT))
     567             :                 xfs_alert_ratelimited(sc->mp,
     568             :                                 "Corruption detected during scrub.");
     569             : }
     570             : #endif /* CONFIG_XFS_ONLINE_REPAIR */
     571             : 
     572             : static inline void
     573    18760879 : repair_outcomes(struct xfs_scrub *sc, int error)
     574             : {
     575    18760879 :         struct xfs_scrub_metadata *sm = sc->sm;
     576    18760879 :         const char *wut = NULL;
     577             : 
     578    18760879 :         if (sc->flags & XREP_ALREADY_FIXED) {
     579             :                 wut = "*** REPAIR SUCCESS";
     580             :                 error = 0;
     581      387725 :         } else if (error == -EBUSY) {
     582             :                 wut = "??? FILESYSTEM BUSY";
     583             :         } else if (error == -EAGAIN) {
     584             :                 wut = "??? REPAIR DEFERRED";
     585             :         } else if (error == -ECANCELED) {
     586             :                 wut = "??? REPAIR CANCELLED";
     587             :         } else if (error == -EINTR) {
     588             :                 wut = "??? REPAIR INTERRUPTED";
     589      387286 :         } else if (error != -EOPNOTSUPP && error != -ENOENT) {
     590         256 :                 wut = "!!! REPAIR FAILED";
     591         256 :                 xfs_info(sc->mp,
     592             : "%s ino 0x%llx type %s agno 0x%x inum 0x%llx gen 0x%x flags 0x%x error %d",
     593             :                                 wut, XFS_I(file_inode(sc->file))->i_ino,
     594             :                                 xchk_type_string(sm->sm_type), sm->sm_agno,
     595             :                                 sm->sm_ino, sm->sm_gen, sm->sm_flags, error);
     596         256 :                 return;
     597             :         } else {
     598             :                 return;
     599             :         }
     600             : 
     601    18373593 :         xfs_info_ratelimited(sc->mp,
     602             : "%s ino 0x%llx type %s agno 0x%x inum 0x%llx gen 0x%x flags 0x%x error %d",
     603             :                         wut, XFS_I(file_inode(sc->file))->i_ino,
     604             :                         xchk_type_string(sm->sm_type), sm->sm_agno, sm->sm_ino,
     605             :                         sm->sm_gen, sm->sm_flags, error);
     606             : }
     607             : 
     608             : /* Dispatch metadata scrubbing. */
     609             : int
     610   860638204 : xfs_scrub_metadata(
     611             :         struct file                     *file,
     612             :         struct xfs_scrub_metadata       *sm)
     613             : {
     614   860638204 :         struct xchk_stats_run           run = { };
     615   860638204 :         struct xfs_scrub                *sc;
     616   860638204 :         struct xfs_mount                *mp = XFS_I(file_inode(file))->i_mount;
     617   860638204 :         u64                             check_start;
     618   860638204 :         int                             error = 0;
     619             : 
     620   860638204 :         BUILD_BUG_ON(sizeof(meta_scrub_ops) !=
     621             :                 (sizeof(struct xchk_meta_ops) * XFS_SCRUB_TYPE_NR));
     622             : 
     623   860638204 :         trace_xchk_start(XFS_I(file_inode(file)), sm, error);
     624             : 
     625             :         /* Forbidden if we are shut down or mounted norecovery. */
     626   859090151 :         error = -ESHUTDOWN;
     627  1718180302 :         if (xfs_is_shutdown(mp))
     628           0 :                 goto out;
     629   859090151 :         error = -ENOTRECOVERABLE;
     630   859090151 :         if (xfs_has_norecovery(mp))
     631          11 :                 goto out;
     632             : 
     633   859090140 :         error = xchk_validate_inputs(mp, sm);
     634   859354717 :         if (error)
     635      647230 :                 goto out;
     636             : 
     637   858707487 :         xfs_warn_mount(mp, XFS_OPSTATE_WARNED_SCRUB,
     638             :  "EXPERIMENTAL online scrub feature in use. Use at your own risk!");
     639             : 
     640   859084159 :         sc = kzalloc(sizeof(struct xfs_scrub), XCHK_GFP_FLAGS);
     641   859498740 :         if (!sc) {
     642           0 :                 error = -ENOMEM;
     643           0 :                 goto out;
     644             :         }
     645             : 
     646   859498740 :         sc->mp = mp;
     647   859498740 :         sc->file = file;
     648   859498740 :         sc->sm = sm;
     649   859498740 :         sc->ops = &meta_scrub_ops[sm->sm_type];
     650   859586563 :         sc->sick_mask = xchk_health_mask_for_scrub_type(sm->sm_type);
     651             : retry_op:
     652             :         /*
     653             :          * When repairs are allowed, prevent freezing or readonly remount while
     654             :          * scrub is running with a real transaction.
     655             :          */
     656   876260546 :         if (sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR) {
     657    44632921 :                 error = mnt_want_write_file(sc->file);
     658    44634175 :                 if (error)
     659           3 :                         goto out_sc;
     660             : 
     661    44634172 :                 sc->flags |= XCHK_HAVE_FREEZE_PROT;
     662             :         }
     663             : 
     664             :         /* Set up for the operation. */
     665   876261797 :         error = sc->ops->setup(sc);
     666   877325308 :         if (error == -EDEADLOCK && !(sc->flags & XCHK_TRY_HARDER))
     667           0 :                 goto try_harder;
     668   877325308 :         if (error == -ECHRNG && !(sc->flags & XCHK_NEED_DRAIN))
     669       76285 :                 goto need_drain;
     670   877249023 :         if (error)
     671    12608464 :                 goto out_teardown;
     672             : 
     673             :         /* Scrub for errors. */
     674   864640559 :         check_start = xchk_stats_now();
     675   862821258 :         if ((sc->flags & XREP_ALREADY_FIXED) && sc->ops->repair_eval != NULL)
     676      286717 :                 error = sc->ops->repair_eval(sc);
     677             :         else
     678   862534541 :                 error = sc->ops->scrub(sc);
     679   864691381 :         run.scrub_ns += xchk_stats_elapsed_ns(check_start);
     680   865711589 :         if (error == -EDEADLOCK && !(sc->flags & XCHK_TRY_HARDER))
     681       21256 :                 goto try_harder;
     682   865690333 :         if (error == -ECHRNG && !(sc->flags & XCHK_NEED_DRAIN))
     683       79419 :                 goto need_drain;
     684   865610914 :         if (error || (sm->sm_flags & XFS_SCRUB_OFLAG_INCOMPLETE))
     685   135339194 :                 goto out_teardown;
     686             : 
     687   730271720 :         xchk_update_health(sc);
     688             : 
     689  1460487182 :         if (xchk_could_repair(sc)) {
     690             :                 /*
     691             :                  * If userspace asked for a repair but it wasn't necessary,
     692             :                  * report that back to userspace.
     693             :                  */
     694    18840530 :                 if (!xrep_will_attempt(sc)) {
     695        4368 :                         sc->sm->sm_flags |= XFS_SCRUB_OFLAG_NO_REPAIR_NEEDED;
     696        4368 :                         goto out_nofix;
     697             :                 }
     698             : 
     699             :                 /*
     700             :                  * If it's broken, userspace wants us to fix it, and we haven't
     701             :                  * already tried to fix it, then attempt a repair.
     702             :                  */
     703    18771123 :                 error = xrep_attempt(sc, &run);
     704    18765588 :                 repair_outcomes(sc, error);
     705    18767368 :                 if (error == -EAGAIN) {
     706             :                         /*
     707             :                          * Either the repair function succeeded or it couldn't
     708             :                          * get all the resources it needs; either way, we go
     709             :                          * back to the beginning and call the scrub function.
     710             :                          */
     711    18379883 :                         error = xchk_teardown(sc, 0);
     712    18371311 :                         if (error) {
     713           0 :                                 xrep_failure(mp);
     714           0 :                                 goto out_sc;
     715             :                         }
     716    18371311 :                         goto retry_op;
     717             :                 }
     718             :         }
     719             : 
     720   711790546 : out_nofix:
     721   711794914 :         xchk_postmortem(sc);
     722   858931040 : out_teardown:
     723   858931040 :         error = xchk_teardown(sc, error);
     724   857690358 : out_sc:
     725   857690358 :         kfree(sc);
     726   858758005 : out:
     727   858758005 :         trace_xchk_done(XFS_I(file_inode(file)), sm, error);
     728   857093851 :         if (error == -EFSCORRUPTED || error == -EFSBADCRC) {
     729           0 :                 sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
     730           0 :                 error = 0;
     731             :         }
     732   857093851 :         if (error != -ENOENT)
     733   715985915 :                 xchk_stats_merge(mp, sm, &run);
     734   861026417 :         return error;
     735      155704 : need_drain:
     736      155704 :         error = xchk_teardown(sc, 0);
     737      155704 :         if (error)
     738           0 :                 goto out_sc;
     739      155704 :         sc->flags |= XCHK_NEED_DRAIN;
     740      155704 :         run.retries++;
     741      155704 :         goto retry_op;
     742       21256 : try_harder:
     743             :         /*
     744             :          * Scrubbers return -EDEADLOCK to mean 'try harder'.  Tear down
     745             :          * everything we hold, then set up again with preparation for
     746             :          * worst-case scenarios.
     747             :          */
     748       21256 :         error = xchk_teardown(sc, 0);
     749       21256 :         if (error)
     750           0 :                 goto out_sc;
     751       21256 :         sc->flags |= XCHK_TRY_HARDER;
     752       21256 :         run.retries++;
     753       21256 :         goto retry_op;
     754             : }
     755             : 
     756             : /* Decide if there have been any scrub failures up to this point. */
     757             : static inline bool
     758   390159558 : xfs_scrubv_previous_failures(
     759             :         struct xfs_mount                *mp,
     760             :         struct xfs_scrub_vec_head       *vhead,
     761             :         struct xfs_scrub_vec            *barrier_vec)
     762             : {
     763   390159558 :         struct xfs_scrub_vec            *v;
     764   390159558 :         __u32                           failmask;
     765             : 
     766   390159558 :         failmask = barrier_vec->sv_flags & XFS_SCRUB_FLAGS_OUT;
     767             : 
     768  1559555695 :         for (v = vhead->svh_vecs; v < barrier_vec; v++) {
     769  1169409743 :                 if (v->sv_type == XFS_SCRUB_TYPE_BARRIER)
     770   389699758 :                         continue;
     771             : 
     772             :                 /*
     773             :                  * Runtime errors count as a previous failure, except the ones
     774             :                  * used to ask userspace to retry.
     775             :                  */
     776   779709985 :                 if (v->sv_ret && v->sv_ret != -EBUSY && v->sv_ret != -ENOENT &&
     777             :                     v->sv_ret != -EUSERS)
     778             :                         return true;
     779             : 
     780             :                 /*
     781             :                  * If any of the out-flags on the scrub vector match the mask
     782             :                  * that was set on the barrier vector, that's a previous fail.
     783             :                  */
     784   779696457 :                 if (v->sv_flags & failmask)
     785             :                         return true;
     786             :         }
     787             : 
     788             :         return false;
     789             : }
     790             : 
     791             : /* Vectored scrub implementation to reduce ioctl calls. */
     792             : int
     793   166356234 : xfs_scrubv_metadata(
     794             :         struct file                     *file,
     795             :         struct xfs_scrub_vec_head       *vhead)
     796             : {
     797   166356234 :         struct xfs_inode                *ip_in = XFS_I(file_inode(file));
     798   166356234 :         struct xfs_mount                *mp = ip_in->i_mount;
     799   166356234 :         struct xfs_inode                *ip = NULL;
     800   166356234 :         struct xfs_scrub_vec            *v;
     801   166356234 :         bool                            set_dontcache = false;
     802   166356234 :         unsigned int                    i;
     803   166356234 :         int                             error = 0;
     804             : 
     805   166356234 :         BUILD_BUG_ON(sizeof(struct xfs_scrub_vec_head) ==
     806             :                      sizeof(struct xfs_scrub_metadata));
     807   166356234 :         BUILD_BUG_ON(XFS_IOC_SCRUB_METADATA == XFS_IOC_SCRUBV_METADATA);
     808             : 
     809   166356234 :         trace_xchk_scrubv_start(ip_in, vhead);
     810             : 
     811   166000337 :         if (vhead->svh_flags & ~XFS_SCRUB_VEC_FLAGS_ALL)
     812             :                 return -EINVAL;
     813  1396266232 :         for (i = 0, v = vhead->svh_vecs; i < vhead->svh_nr; i++, v++) {
     814  1229548749 :                 if (v->sv_reserved)
     815             :                         return -EINVAL;
     816  1229548749 :                 if (v->sv_type == XFS_SCRUB_TYPE_BARRIER &&
     817   385327223 :                     (v->sv_flags & ~XFS_SCRUB_FLAGS_OUT))
     818             :                         return -EINVAL;
     819             : 
     820             :                 /*
     821             :                  * If we detect at least one inode-type scrub, we might
     822             :                  * consider setting dontcache at the end.
     823             :                  */
     824  1229548749 :                 if (v->sv_type < XFS_SCRUB_TYPE_NR &&
     825   845485506 :                     meta_scrub_ops[v->sv_type].type == ST_INODE)
     826   843190751 :                         set_dontcache = true;
     827             : 
     828  1229678592 :                 trace_xchk_scrubv_item(mp, vhead, v);
     829             :         }
     830             : 
     831             :         /*
     832             :          * If the caller provided us with a nonzero inode number that isn't the
     833             :          * ioctl file, try to grab a reference to it to eliminate all further
     834             :          * untrusted inode lookups.  If we can't get the inode, let each scrub
     835             :          * function try again.
     836             :          */
     837   166717483 :         if (vhead->svh_ino != ip_in->i_ino) {
     838   110426443 :                 xfs_iget(mp, NULL, vhead->svh_ino, XFS_IGET_UNTRUSTED, 0, &ip);
     839   110732239 :                 if (ip && (VFS_I(ip)->i_generation != vhead->svh_gen ||
     840      214552 :                            (xfs_is_metadir_inode(ip) &&
     841      214552 :                             !S_ISDIR(VFS_I(ip)->i_mode)))) {
     842      380704 :                         xfs_irele(ip);
     843      380707 :                         ip = NULL;
     844             :                 }
     845             :         }
     846   167023282 :         if (!ip) {
     847    57933387 :                 if (!igrab(VFS_I(ip_in)))
     848             :                         return -EFSCORRUPTED;
     849    58318911 :                 ip = ip_in;
     850             :         }
     851             : 
     852             :         /* Run all the scrubbers. */
     853  1412255088 :         for (i = 0, v = vhead->svh_vecs; i < vhead->svh_nr; i++, v++) {
     854  1244935907 :                 struct xfs_scrub_metadata       sm = {
     855  1244935907 :                         .sm_type        = v->sv_type,
     856  1244935907 :                         .sm_flags       = v->sv_flags,
     857  1244935907 :                         .sm_ino         = vhead->svh_ino,
     858  1244935907 :                         .sm_gen         = vhead->svh_gen,
     859  1244935907 :                         .sm_agno        = vhead->svh_agno,
     860             :                 };
     861             : 
     862  1244935907 :                 if (v->sv_type == XFS_SCRUB_TYPE_BARRIER) {
     863   390021894 :                         if (xfs_scrubv_previous_failures(mp, vhead, v)) {
     864       13605 :                                 v->sv_ret = -ECANCELED;
     865       13605 :                                 trace_xchk_scrubv_barrier_fail(mp, vhead, v);
     866       27531 :                                 break;
     867             :                         }
     868             : 
     869   390008289 :                         continue;
     870             :                 }
     871             : 
     872   854914013 :                 v->sv_ret = xfs_scrub_metadata(file, &sm);
     873   856125548 :                 v->sv_flags = sm.sm_flags;
     874             : 
     875             :                 /* Leave the inode in memory if something's wrong with it. */
     876   856125548 :                 if (xchk_needs_repair(&sm))
     877     4183356 :                         set_dontcache = false;
     878             : 
     879   856125548 :                 if (vhead->svh_rest_us) {
     880           0 :                         ktime_t         expires;
     881             : 
     882           0 :                         expires = ktime_add_ns(ktime_get(),
     883             :                                         vhead->svh_rest_us * 1000);
     884           0 :                         set_current_state(TASK_KILLABLE);
     885           0 :                         schedule_hrtimeout(&expires, HRTIMER_MODE_ABS);
     886             :                 }
     887   856125548 :                 if (fatal_signal_pending(current)) {
     888             :                         error = -EINTR;
     889             :                         break;
     890             :                 }
     891             :         }
     892             : 
     893             :         /*
     894             :          * If we're holding the only reference to this inode and the scan was
     895             :          * clean, mark it dontcache so that we don't pollute the cache.
     896             :          */
     897   167333107 :         if (set_dontcache && atomic_read(&VFS_I(ip)->i_count) == 1)
     898    30115883 :                 d_mark_dontcache(VFS_I(ip));
     899   167335387 :         xfs_irele(ip);
     900   167335387 :         return error;
     901             : }

Generated by: LCOV version 1.14