LCOV - code coverage report
Current view: top level - fs/xfs/scrub - common.c (source / functions) Hit Total Coverage
Test: fstests of 6.5.0-rc3-acha @ Mon Jul 31 20:08:06 PDT 2023 Lines: 398 534 74.5 %
Date: 2023-07-31 20:08:07 Functions: 44 52 84.6 %

          Line data    Source code
       1             : // SPDX-License-Identifier: GPL-2.0-or-later
       2             : /*
       3             :  * Copyright (C) 2017-2023 Oracle.  All Rights Reserved.
       4             :  * Author: Darrick J. Wong <djwong@kernel.org>
       5             :  */
       6             : #include "xfs.h"
       7             : #include "xfs_fs.h"
       8             : #include "xfs_shared.h"
       9             : #include "xfs_format.h"
      10             : #include "xfs_trans_resv.h"
      11             : #include "xfs_mount.h"
      12             : #include "xfs_btree.h"
      13             : #include "xfs_btree_staging.h"
      14             : #include "xfs_log_format.h"
      15             : #include "xfs_trans.h"
      16             : #include "xfs_inode.h"
      17             : #include "xfs_icache.h"
      18             : #include "xfs_alloc.h"
      19             : #include "xfs_alloc_btree.h"
      20             : #include "xfs_ialloc.h"
      21             : #include "xfs_ialloc_btree.h"
      22             : #include "xfs_refcount_btree.h"
      23             : #include "xfs_rmap.h"
      24             : #include "xfs_rmap_btree.h"
      25             : #include "xfs_log.h"
      26             : #include "xfs_trans_priv.h"
      27             : #include "xfs_da_format.h"
      28             : #include "xfs_da_btree.h"
      29             : #include "xfs_dir2_priv.h"
      30             : #include "xfs_attr.h"
      31             : #include "xfs_reflink.h"
      32             : #include "xfs_ag.h"
      33             : #include "xfs_error.h"
      34             : #include "xfs_quota.h"
      35             : #include "xfs_swapext.h"
      36             : #include "scrub/scrub.h"
      37             : #include "scrub/common.h"
      38             : #include "scrub/trace.h"
      39             : #include "scrub/repair.h"
      40             : #include "scrub/health.h"
      41             : 
      42             : /* Common code for the metadata scrubbers. */
      43             : 
      44             : /*
      45             :  * Handling operational errors.
      46             :  *
      47             :  * The *_process_error() family of functions are used to process error return
      48             :  * codes from functions called as part of a scrub operation.
      49             :  *
      50             :  * If there's no error, we return true to tell the caller that it's ok
      51             :  * to move on to the next check in its list.
      52             :  *
      53             :  * For non-verifier errors (e.g. ENOMEM) we return false to tell the
      54             :  * caller that something bad happened, and we preserve *error so that
      55             :  * the caller can return the *error up the stack to userspace.
      56             :  *
      57             :  * Verifier errors (EFSBADCRC/EFSCORRUPTED) are recorded by setting
      58             :  * OFLAG_CORRUPT in sm_flags and the *error is cleared.  In other words,
      59             :  * we track verifier errors (and failed scrub checks) via OFLAG_CORRUPT,
      60             :  * not via return codes.  We return false to tell the caller that
      61             :  * something bad happened.  Since the error has been cleared, the caller
      62             :  * will (presumably) return that zero and scrubbing will move on to
      63             :  * whatever's next.
      64             :  *
      65             :  * ftrace can be used to record the precise metadata location and the
      66             :  * approximate code location of the failed operation.
      67             :  */
      68             : 
      69             : /* Check for operational errors. */
      70             : static bool
      71   121550456 : __xchk_process_error(
      72             :         struct xfs_scrub        *sc,
      73             :         xfs_agnumber_t          agno,
      74             :         xfs_agblock_t           bno,
      75             :         int                     *error,
      76             :         __u32                   errflag,
      77             :         void                    *ret_ip)
      78             : {
      79   121550456 :         switch (*error) {
      80             :         case 0:
      81             :                 return true;
      82       21135 :         case -EDEADLOCK:
      83             :         case -ECHRNG:
      84             :                 /* Used to restart an op with deadlock avoidance. */
      85       42270 :                 trace_xchk_deadlock_retry(
      86       21135 :                                 sc->ip ? sc->ip : XFS_I(file_inode(sc->file)),
      87             :                                 sc->sm, *error);
      88       21135 :                 break;
      89          24 :         case -ECANCELED:
      90             :                 /*
      91             :                  * ECANCELED here means that the caller set one of the scrub
      92             :                  * outcome flags (corrupt, xfail, xcorrupt) and wants to exit
      93             :                  * quickly.  Set error to zero and do not continue.
      94             :                  */
      95          24 :                 trace_xchk_op_error(sc, agno, bno, *error, ret_ip);
      96          24 :                 *error = 0;
      97          24 :                 break;
      98          18 :         case -EFSBADCRC:
      99             :         case -EFSCORRUPTED:
     100             :                 /* Note the badness but don't abort. */
     101          18 :                 sc->sm->sm_flags |= errflag;
     102          18 :                 xchk_whine(sc->mp, "type %s agno 0x%x agbno 0x%x error %d errflag 0x%x ret_ip %pS",
     103          18 :                                 xchk_type_string(sc->sm->sm_type),
     104             :                                 agno,
     105             :                                 bno,
     106             :                                 *error,
     107             :                                 errflag,
     108             :                                 ret_ip);
     109          18 :                 *error = 0;
     110          31 :                 fallthrough;
     111          31 :         default:
     112          31 :                 if (*error)
     113          13 :                         xchk_whine(sc->mp, "type %s agno 0x%x agbno 0x%x error %d ret_ip %pS",
     114          13 :                                         xchk_type_string(sc->sm->sm_type),
     115             :                                         agno,
     116             :                                         bno,
     117             :                                         *error,
     118             :                                         ret_ip);
     119          31 :                 trace_xchk_op_error(sc, agno, bno, *error, ret_ip);
     120          31 :                 break;
     121             :         }
     122             :         return false;
     123             : }
     124             : 
     125             : bool
     126     2556733 : xchk_process_error(
     127             :         struct xfs_scrub        *sc,
     128             :         xfs_agnumber_t          agno,
     129             :         xfs_agblock_t           bno,
     130             :         int                     *error)
     131             : {
     132     2556733 :         return __xchk_process_error(sc, agno, bno, error,
     133             :                         XFS_SCRUB_OFLAG_CORRUPT, __return_address);
     134             : }
     135             : 
     136             : bool
     137   118993461 : xchk_xref_process_error(
     138             :         struct xfs_scrub        *sc,
     139             :         xfs_agnumber_t          agno,
     140             :         xfs_agblock_t           bno,
     141             :         int                     *error)
     142             : {
     143   118993461 :         return __xchk_process_error(sc, agno, bno, error,
     144             :                         XFS_SCRUB_OFLAG_XFAIL, __return_address);
     145             : }
     146             : 
     147             : /* Check for operational errors for a file offset. */
     148             : static bool
     149   888642169 : __xchk_fblock_process_error(
     150             :         struct xfs_scrub        *sc,
     151             :         int                     whichfork,
     152             :         xfs_fileoff_t           offset,
     153             :         int                     *error,
     154             :         __u32                   errflag,
     155             :         void                    *ret_ip)
     156             : {
     157   888642169 :         switch (*error) {
     158             :         case 0:
     159             :                 return true;
     160           0 :         case -EDEADLOCK:
     161             :         case -ECHRNG:
     162             :                 /* Used to restart an op with deadlock avoidance. */
     163           0 :                 trace_xchk_deadlock_retry(sc->ip, sc->sm, *error);
     164           0 :                 break;
     165           2 :         case -ECANCELED:
     166             :                 /*
     167             :                  * ECANCELED here means that the caller set one of the scrub
     168             :                  * outcome flags (corrupt, xfail, xcorrupt) and wants to exit
     169             :                  * quickly.  Set error to zero and do not continue.
     170             :                  */
     171           2 :                 trace_xchk_file_op_error(sc, whichfork, offset, *error,
     172             :                                 ret_ip);
     173           2 :                 *error = 0;
     174           2 :                 break;
     175           0 :         case -EFSBADCRC:
     176             :         case -EFSCORRUPTED:
     177             :                 /* Note the badness but don't abort. */
     178           0 :                 sc->sm->sm_flags |= errflag;
     179           0 :                 xchk_whine(sc->mp, "ino 0x%llx fork %d type %s offset %llu error %d errflag 0x%x ret_ip %pS",
     180           0 :                                 sc->ip->i_ino,
     181             :                                 whichfork,
     182           0 :                                 xchk_type_string(sc->sm->sm_type),
     183             :                                 offset,
     184             :                                 *error,
     185             :                                 errflag,
     186             :                                 ret_ip);
     187           0 :                 *error = 0;
     188           0 :                 fallthrough;
     189           0 :         default:
     190           0 :                 if (*error)
     191           0 :                         xchk_whine(sc->mp, "ino 0x%llx fork %d type %s offset %llu error %d ret_ip %pS",
     192           0 :                                         sc->ip->i_ino,
     193             :                                         whichfork,
     194           0 :                                         xchk_type_string(sc->sm->sm_type),
     195             :                                         offset,
     196             :                                         *error,
     197             :                                         ret_ip);
     198           0 :                 trace_xchk_file_op_error(sc, whichfork, offset, *error,
     199             :                                 ret_ip);
     200           0 :                 break;
     201             :         }
     202             :         return false;
     203             : }
     204             : 
     205             : bool
     206   535628329 : xchk_fblock_process_error(
     207             :         struct xfs_scrub        *sc,
     208             :         int                     whichfork,
     209             :         xfs_fileoff_t           offset,
     210             :         int                     *error)
     211             : {
     212   535683497 :         return __xchk_fblock_process_error(sc, whichfork, offset, error,
     213             :                         XFS_SCRUB_OFLAG_CORRUPT, __return_address);
     214             : }
     215             : 
     216             : bool
     217   353685893 : xchk_fblock_xref_process_error(
     218             :         struct xfs_scrub        *sc,
     219             :         int                     whichfork,
     220             :         xfs_fileoff_t           offset,
     221             :         int                     *error)
     222             : {
     223   353685893 :         return __xchk_fblock_process_error(sc, whichfork, offset, error,
     224             :                         XFS_SCRUB_OFLAG_XFAIL, __return_address);
     225             : }
     226             : 
     227             : /*
     228             :  * Handling scrub corruption/optimization/warning checks.
     229             :  *
     230             :  * The *_set_{corrupt,preen,warning}() family of functions are used to
     231             :  * record the presence of metadata that is incorrect (corrupt), could be
     232             :  * optimized somehow (preen), or should be flagged for administrative
     233             :  * review but is not incorrect (warn).
     234             :  *
     235             :  * ftrace can be used to record the precise metadata location and
     236             :  * approximate code location of the failed check.
     237             :  */
     238             : 
     239             : /* Record a block which could be optimized. */
     240             : void
     241      803978 : xchk_block_set_preen(
     242             :         struct xfs_scrub        *sc,
     243             :         struct xfs_buf          *bp)
     244             : {
     245      803978 :         sc->sm->sm_flags |= XFS_SCRUB_OFLAG_PREEN;
     246      803978 :         trace_xchk_block_preen(sc, xfs_buf_daddr(bp), __return_address);
     247      803976 : }
     248             : 
     249             : /*
     250             :  * Record an inode which could be optimized.  The trace data will
     251             :  * include the block given by bp if bp is given; otherwise it will use
     252             :  * the block location of the inode record itself.
     253             :  */
     254             : void
     255     2601530 : xchk_ino_set_preen(
     256             :         struct xfs_scrub        *sc,
     257             :         xfs_ino_t               ino)
     258             : {
     259     2601530 :         sc->sm->sm_flags |= XFS_SCRUB_OFLAG_PREEN;
     260     2601530 :         trace_xchk_ino_preen(sc, ino, __return_address);
     261     2601533 : }
     262             : 
     263             : /* Record something being wrong with the filesystem primary superblock. */
     264             : void
     265           0 : xchk_set_corrupt(
     266             :         struct xfs_scrub        *sc)
     267             : {
     268           0 :         sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
     269           0 :         xchk_whine(sc->mp, "type %s ret_ip %pS", xchk_type_string(sc->sm->sm_type),
     270             :                         __return_address);
     271           0 :         trace_xchk_fs_error(sc, 0, __return_address);
     272           0 : }
     273             : 
     274             : /* Record a corrupt block. */
     275             : void
     276           0 : xchk_block_set_corrupt(
     277             :         struct xfs_scrub        *sc,
     278             :         struct xfs_buf          *bp)
     279             : {
     280           0 :         sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
     281           0 :         trace_xchk_block_error(sc, xfs_buf_daddr(bp), __return_address);
     282           0 :         xchk_whine(sc->mp, "type %s agno 0x%x agbno 0x%x ret_ip %pS",
     283           0 :                         xchk_type_string(sc->sm->sm_type),
     284             :                         xfs_daddr_to_agno(sc->mp, xfs_buf_daddr(bp)),
     285             :                         xfs_daddr_to_agbno(sc->mp, xfs_buf_daddr(bp)),
     286             :                         __return_address);
     287           0 : }
     288             : 
     289             : #ifdef CONFIG_XFS_QUOTA
     290             : /* Record a corrupt quota counter. */
     291             : void
     292           0 : xchk_qcheck_set_corrupt(
     293             :         struct xfs_scrub        *sc,
     294             :         unsigned int            dqtype,
     295             :         xfs_dqid_t              id)
     296             : {
     297           0 :         sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
     298           0 :         xchk_whine(sc->mp, "type %s dqtype %u id %u ret_ip %pS",
     299           0 :                         xchk_type_string(sc->sm->sm_type), dqtype, id, __return_address);
     300           0 :         trace_xchk_qcheck_error(sc, dqtype, id, __return_address);
     301           0 : }
     302             : #endif /* CONFIG_XFS_QUOTA */
     303             : 
     304             : /* Record a corruption while cross-referencing. */
     305             : void
     306           0 : xchk_block_xref_set_corrupt(
     307             :         struct xfs_scrub        *sc,
     308             :         struct xfs_buf          *bp)
     309             : {
     310           0 :         sc->sm->sm_flags |= XFS_SCRUB_OFLAG_XCORRUPT;
     311           0 :         trace_xchk_block_error(sc, xfs_buf_daddr(bp), __return_address);
     312           0 :         xchk_whine(sc->mp, "type %s agno 0x%x agbno 0x%x ret_ip %pS",
     313           0 :                         xchk_type_string(sc->sm->sm_type),
     314             :                         xfs_daddr_to_agno(sc->mp, xfs_buf_daddr(bp)),
     315             :                         xfs_daddr_to_agbno(sc->mp, xfs_buf_daddr(bp)),
     316             :                         __return_address);
     317           0 : }
     318             : 
     319             : /*
     320             :  * Record a corrupt inode.  The trace data will include the block given
     321             :  * by bp if bp is given; otherwise it will use the block location of the
     322             :  * inode record itself.
     323             :  */
     324             : void
     325           0 : xchk_ino_set_corrupt(
     326             :         struct xfs_scrub        *sc,
     327             :         xfs_ino_t               ino)
     328             : {
     329           0 :         sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
     330           0 :         xchk_whine(sc->mp, "ino 0x%llx type %s ret_ip %pS",
     331           0 :                         ino, xchk_type_string(sc->sm->sm_type), __return_address);
     332           0 :         trace_xchk_ino_error(sc, ino, __return_address);
     333           0 : }
     334             : 
     335             : /* Record a corruption while cross-referencing with an inode. */
     336             : void
     337           0 : xchk_ino_xref_set_corrupt(
     338             :         struct xfs_scrub        *sc,
     339             :         xfs_ino_t               ino)
     340             : {
     341           0 :         sc->sm->sm_flags |= XFS_SCRUB_OFLAG_XCORRUPT;
     342           0 :         xchk_whine(sc->mp, "ino 0x%llx type %s ret_ip %pS",
     343           0 :                         ino, xchk_type_string(sc->sm->sm_type), __return_address);
     344           0 :         trace_xchk_ino_error(sc, ino, __return_address);
     345           0 : }
     346             : 
     347             : /* Record corruption in a block indexed by a file fork. */
     348             : void
     349           4 : xchk_fblock_set_corrupt(
     350             :         struct xfs_scrub        *sc,
     351             :         int                     whichfork,
     352             :         xfs_fileoff_t           offset)
     353             : {
     354           4 :         sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
     355           4 :         xchk_whine(sc->mp, "ino 0x%llx fork %d type %s offset %llu ret_ip %pS",
     356           4 :                         sc->ip->i_ino,
     357             :                         whichfork,
     358           4 :                         xchk_type_string(sc->sm->sm_type),
     359             :                         offset,
     360             :                         __return_address);
     361           4 :         trace_xchk_fblock_error(sc, whichfork, offset, __return_address);
     362           4 : }
     363             : 
     364             : /* Record a corruption while cross-referencing a fork block. */
     365             : void
     366           2 : xchk_fblock_xref_set_corrupt(
     367             :         struct xfs_scrub        *sc,
     368             :         int                     whichfork,
     369             :         xfs_fileoff_t           offset)
     370             : {
     371           2 :         sc->sm->sm_flags |= XFS_SCRUB_OFLAG_XCORRUPT;
     372           2 :         xchk_whine(sc->mp, "ino 0x%llx fork %d type %s offset %llu ret_ip %pS",
     373           2 :                         sc->ip->i_ino,
     374             :                         whichfork,
     375           2 :                         xchk_type_string(sc->sm->sm_type),
     376             :                         offset,
     377             :                         __return_address);
     378           2 :         trace_xchk_fblock_error(sc, whichfork, offset, __return_address);
     379           2 : }
     380             : 
     381             : /*
     382             :  * Warn about inodes that need administrative review but is not
     383             :  * incorrect.
     384             :  */
     385             : void
     386           0 : xchk_ino_set_warning(
     387             :         struct xfs_scrub        *sc,
     388             :         xfs_ino_t               ino)
     389             : {
     390           0 :         sc->sm->sm_flags |= XFS_SCRUB_OFLAG_WARNING;
     391           0 :         xchk_whine(sc->mp, "ino 0x%llx type %s ret_ip %pS",
     392           0 :                         ino, xchk_type_string(sc->sm->sm_type), __return_address);
     393           0 :         trace_xchk_ino_warning(sc, ino, __return_address);
     394           0 : }
     395             : 
     396             : /* Warn about a block indexed by a file fork that needs review. */
     397             : void
     398          24 : xchk_fblock_set_warning(
     399             :         struct xfs_scrub        *sc,
     400             :         int                     whichfork,
     401             :         xfs_fileoff_t           offset)
     402             : {
     403          24 :         sc->sm->sm_flags |= XFS_SCRUB_OFLAG_WARNING;
     404          24 :         xchk_whine(sc->mp, "ino 0x%llx fork %d type %s offset %llu ret_ip %pS",
     405          24 :                         sc->ip->i_ino,
     406             :                         whichfork,
     407          24 :                         xchk_type_string(sc->sm->sm_type),
     408             :                         offset,
     409             :                         __return_address);
     410          24 :         trace_xchk_fblock_warning(sc, whichfork, offset, __return_address);
     411          24 : }
     412             : 
     413             : /* Signal an incomplete scrub. */
     414             : void
     415          53 : xchk_set_incomplete(
     416             :         struct xfs_scrub        *sc)
     417             : {
     418          53 :         sc->sm->sm_flags |= XFS_SCRUB_OFLAG_INCOMPLETE;
     419          53 :         trace_xchk_incomplete(sc, __return_address);
     420          53 : }
     421             : 
     422             : /*
     423             :  * rmap scrubbing -- compute the number of blocks with a given owner,
     424             :  * at least according to the reverse mapping data.
     425             :  */
     426             : 
     427             : struct xchk_rmap_ownedby_info {
     428             :         const struct xfs_owner_info     *oinfo;
     429             :         xfs_filblks_t                   *blocks;
     430             : };
     431             : 
     432             : STATIC int
     433  7406605830 : xchk_count_rmap_ownedby_irec(
     434             :         struct xfs_btree_cur            *cur,
     435             :         const struct xfs_rmap_irec      *rec,
     436             :         void                            *priv)
     437             : {
     438  7406605830 :         struct xchk_rmap_ownedby_info   *sroi = priv;
     439  7406605830 :         bool                            irec_attr;
     440  7406605830 :         bool                            oinfo_attr;
     441             : 
     442  7406605830 :         irec_attr = rec->rm_flags & XFS_RMAP_ATTR_FORK;
     443  7406605830 :         oinfo_attr = sroi->oinfo->oi_flags & XFS_OWNER_INFO_ATTR_FORK;
     444             : 
     445  7406605830 :         if (rec->rm_owner != sroi->oinfo->oi_owner)
     446             :                 return 0;
     447             : 
     448    29427362 :         if (XFS_RMAP_NON_INODE_OWNER(rec->rm_owner) || irec_attr == oinfo_attr)
     449    29427362 :                 (*sroi->blocks) += rec->rm_blockcount;
     450             : 
     451             :         return 0;
     452             : }
     453             : 
     454             : /*
     455             :  * Calculate the number of blocks the rmap thinks are owned by something.
     456             :  * The caller should pass us an rmapbt cursor.
     457             :  */
     458             : int
     459      544007 : xchk_count_rmap_ownedby_ag(
     460             :         struct xfs_scrub                *sc,
     461             :         struct xfs_btree_cur            *cur,
     462             :         const struct xfs_owner_info     *oinfo,
     463             :         xfs_filblks_t                   *blocks)
     464             : {
     465      544007 :         struct xchk_rmap_ownedby_info   sroi = {
     466             :                 .oinfo                  = oinfo,
     467             :                 .blocks                 = blocks,
     468             :         };
     469             : 
     470      544007 :         *blocks = 0;
     471      544007 :         return xfs_rmap_query_all(cur, xchk_count_rmap_ownedby_irec,
     472             :                         &sroi);
     473             : }
     474             : 
     475             : /*
     476             :  * AG scrubbing
     477             :  *
     478             :  * These helpers facilitate locking an allocation group's header
     479             :  * buffers, setting up cursors for all btrees that are present, and
     480             :  * cleaning everything up once we're through.
     481             :  */
     482             : 
     483             : /* Decide if we want to return an AG header read failure. */
     484             : static inline bool
     485             : want_ag_read_header_failure(
     486             :         struct xfs_scrub        *sc,
     487             :         unsigned int            type)
     488             : {
     489             :         /* Return all AG header read failures when scanning btrees. */
     490           0 :         if (sc->sm->sm_type != XFS_SCRUB_TYPE_AGF &&
     491           0 :             sc->sm->sm_type != XFS_SCRUB_TYPE_AGFL &&
     492             :             sc->sm->sm_type != XFS_SCRUB_TYPE_AGI)
     493             :                 return true;
     494             :         /*
     495             :          * If we're scanning a given type of AG header, we only want to
     496             :          * see read failures from that specific header.  We'd like the
     497             :          * other headers to cross-check them, but this isn't required.
     498             :          */
     499           0 :         if (sc->sm->sm_type == type)
     500             :                 return true;
     501             :         return false;
     502             : }
     503             : 
     504             : /*
     505             :  * Grab the AG header buffers for the attached perag structure.
     506             :  *
     507             :  * The headers should be released by xchk_ag_free, but as a fail safe we attach
     508             :  * all the buffers we grab to the scrub transaction so they'll all be freed
     509             :  * when we cancel it.
     510             :  */
     511             : static inline int
     512   281054782 : xchk_perag_read_headers(
     513             :         struct xfs_scrub        *sc,
     514             :         struct xchk_ag          *sa)
     515             : {
     516   281054782 :         int                     error;
     517             : 
     518   281054782 :         error = xfs_ialloc_read_agi(sa->pag, sc->tp, &sa->agi_bp);
     519   281095105 :         if (error && want_ag_read_header_failure(sc, XFS_SCRUB_TYPE_AGI))
     520             :                 return error;
     521             : 
     522   281095105 :         error = xfs_alloc_read_agf(sa->pag, sc->tp, 0, &sa->agf_bp);
     523   281096071 :         if (error && want_ag_read_header_failure(sc, XFS_SCRUB_TYPE_AGF))
     524           0 :                 return error;
     525             : 
     526             :         return 0;
     527             : }
     528             : 
     529             : /*
     530             :  * Grab the AG headers for the attached perag structure and wait for pending
     531             :  * intents to drain.
     532             :  */
     533             : int
     534   281045150 : xchk_perag_drain_and_lock(
     535             :         struct xfs_scrub        *sc)
     536             : {
     537   281045150 :         struct xchk_ag          *sa = &sc->sa;
     538   281045150 :         int                     error = 0;
     539             : 
     540   281045150 :         ASSERT(sa->pag != NULL);
     541   281045150 :         ASSERT(sa->agi_bp == NULL);
     542   281045150 :         ASSERT(sa->agf_bp == NULL);
     543             : 
     544   281057467 :         do {
     545   281057467 :                 if (xchk_should_terminate(sc, &error))
     546           3 :                         return error;
     547             : 
     548   281060335 :                 error = xchk_perag_read_headers(sc, sa);
     549   281095842 :                 if (error)
     550           0 :                         return error;
     551             : 
     552             :                 /*
     553             :                  * If we've grabbed an inode for scrubbing then we assume that
     554             :                  * holding its ILOCK will suffice to coordinate with any intent
     555             :                  * chains involving this inode.
     556             :                  */
     557   281095842 :                 if (sc->ip)
     558             :                         return 0;
     559             : 
     560             :                 /*
     561             :                  * Decide if this AG is quiet enough for all metadata to be
     562             :                  * consistent with each other.  XFS allows the AG header buffer
     563             :                  * locks to cycle across transaction rolls while processing
     564             :                  * chains of deferred ops, which means that there could be
     565             :                  * other threads in the middle of processing a chain of
     566             :                  * deferred ops.  For regular operations we are careful about
     567             :                  * ordering operations to prevent collisions between threads
     568             :                  * (which is why we don't need a per-AG lock), but scrub and
     569             :                  * repair have to serialize against chained operations.
     570             :                  *
     571             :                  * We just locked all the AG headers buffers; now take a look
     572             :                  * to see if there are any intents in progress.  If there are,
     573             :                  * drop the AG headers and wait for the intents to drain.
     574             :                  * Since we hold all the AG header locks for the duration of
     575             :                  * the scrub, this is the only time we have to sample the
     576             :                  * intents counter; any threads increasing it after this point
     577             :                  * can't possibly be in the middle of a chain of AG metadata
     578             :                  * updates.
     579             :                  *
     580             :                  * Obviously, this should be slanted against scrub and in favor
     581             :                  * of runtime threads.
     582             :                  */
     583     2444635 :                 if (!xfs_perag_intent_busy(sa->pag))
     584             :                         return 0;
     585             : 
     586       46052 :                 if (sa->agf_bp) {
     587       46052 :                         xfs_trans_brelse(sc->tp, sa->agf_bp);
     588       46052 :                         sa->agf_bp = NULL;
     589             :                 }
     590             : 
     591       46052 :                 if (sa->agi_bp) {
     592       46052 :                         xfs_trans_brelse(sc->tp, sa->agi_bp);
     593       46052 :                         sa->agi_bp = NULL;
     594             :                 }
     595             : 
     596       46052 :                 if (!(sc->flags & XCHK_FSGATES_DRAIN))
     597             :                         return -ECHRNG;
     598       12317 :                 error = xfs_perag_intent_drain(sa->pag);
     599       12317 :                 if (error == -ERESTARTSYS)
     600           0 :                         error = -EINTR;
     601       12317 :         } while (!error);
     602             : 
     603             :         return error;
     604             : }
     605             : 
     606             : /*
     607             :  * Grab the per-AG structure, grab all AG header buffers, and wait until there
     608             :  * aren't any pending intents.  Returns -ENOENT if we can't grab the perag
     609             :  * structure.
     610             :  */
     611             : int
     612   281046189 : xchk_ag_read_headers(
     613             :         struct xfs_scrub        *sc,
     614             :         xfs_agnumber_t          agno,
     615             :         struct xchk_ag          *sa)
     616             : {
     617   281046189 :         struct xfs_mount        *mp = sc->mp;
     618             : 
     619   281046189 :         ASSERT(!sa->pag);
     620   281046189 :         sa->pag = xfs_perag_get(mp, agno);
     621   281033727 :         if (!sa->pag)
     622             :                 return -ENOENT;
     623             : 
     624   281033727 :         return xchk_perag_drain_and_lock(sc);
     625             : }
     626             : 
     627             : /* Release all the AG btree cursors. */
     628             : void
     629   947146497 : xchk_ag_btcur_free(
     630             :         struct xchk_ag          *sa)
     631             : {
     632   947146497 :         if (sa->refc_cur)
     633   215052449 :                 xfs_btree_del_cursor(sa->refc_cur, XFS_BTREE_ERROR);
     634   947181728 :         if (sa->rmap_cur)
     635   215066749 :                 xfs_btree_del_cursor(sa->rmap_cur, XFS_BTREE_ERROR);
     636   947343689 :         if (sa->fino_cur)
     637   281336802 :                 xfs_btree_del_cursor(sa->fino_cur, XFS_BTREE_ERROR);
     638   947339161 :         if (sa->ino_cur)
     639   281337401 :                 xfs_btree_del_cursor(sa->ino_cur, XFS_BTREE_ERROR);
     640   947361189 :         if (sa->cnt_cur)
     641   281345312 :                 xfs_btree_del_cursor(sa->cnt_cur, XFS_BTREE_ERROR);
     642   947362075 :         if (sa->bno_cur)
     643   281345044 :                 xfs_btree_del_cursor(sa->bno_cur, XFS_BTREE_ERROR);
     644             : 
     645   947362651 :         sa->refc_cur = NULL;
     646   947362651 :         sa->rmap_cur = NULL;
     647   947362651 :         sa->fino_cur = NULL;
     648   947362651 :         sa->ino_cur = NULL;
     649   947362651 :         sa->bno_cur = NULL;
     650   947362651 :         sa->cnt_cur = NULL;
     651   947362651 : }
     652             : 
     653             : /* Initialize all the btree cursors for an AG. */
     654             : void
     655   281045869 : xchk_ag_btcur_init(
     656             :         struct xfs_scrub        *sc,
     657             :         struct xchk_ag          *sa)
     658             : {
     659   281045869 :         struct xfs_mount        *mp = sc->mp;
     660             : 
     661   562090384 :         if (sa->agf_bp &&
     662   281045334 :             xchk_ag_btree_healthy_enough(sc, sa->pag, XFS_BTNUM_BNO)) {
     663             :                 /* Set up a bnobt cursor for cross-referencing. */
     664   281044652 :                 sa->bno_cur = xfs_allocbt_init_cursor(mp, sc->tp, sa->agf_bp,
     665             :                                 sa->pag, XFS_BTNUM_BNO);
     666             :         }
     667             : 
     668   562088524 :         if (sa->agf_bp &&
     669   281043964 :             xchk_ag_btree_healthy_enough(sc, sa->pag, XFS_BTNUM_CNT)) {
     670             :                 /* Set up a cntbt cursor for cross-referencing. */
     671   281044293 :                 sa->cnt_cur = xfs_allocbt_init_cursor(mp, sc->tp, sa->agf_bp,
     672             :                                 sa->pag, XFS_BTNUM_CNT);
     673             :         }
     674             : 
     675             :         /* Set up a inobt cursor for cross-referencing. */
     676   562088153 :         if (sa->agi_bp &&
     677   281043964 :             xchk_ag_btree_healthy_enough(sc, sa->pag, XFS_BTNUM_INO)) {
     678   281044145 :                 sa->ino_cur = xfs_inobt_init_cursor(sa->pag, sc->tp, sa->agi_bp,
     679             :                                 XFS_BTNUM_INO);
     680             :         }
     681             : 
     682             :         /* Set up a finobt cursor for cross-referencing. */
     683   562087157 :         if (sa->agi_bp && xfs_has_finobt(mp) &&
     684   281043582 :             xchk_ag_btree_healthy_enough(sc, sa->pag, XFS_BTNUM_FINO)) {
     685   281043597 :                 sa->fino_cur = xfs_inobt_init_cursor(sa->pag, sc->tp, sa->agi_bp,
     686             :                                 XFS_BTNUM_FINO);
     687             :         }
     688             : 
     689             :         /* Set up a rmapbt cursor for cross-referencing. */
     690   495797476 :         if (sa->agf_bp && xfs_has_rmapbt(mp) &&
     691   214753894 :             xchk_ag_btree_healthy_enough(sc, sa->pag, XFS_BTNUM_RMAP)) {
     692   214753323 :                 sa->rmap_cur = xfs_rmapbt_init_cursor(mp, sc->tp, sa->agf_bp,
     693             :                                 sa->pag);
     694             :         }
     695             : 
     696             :         /* Set up a refcountbt cursor for cross-referencing. */
     697   495790770 :         if (sa->agf_bp && xfs_has_reflink(mp) &&
     698   214747631 :             xchk_ag_btree_healthy_enough(sc, sa->pag, XFS_BTNUM_REFC)) {
     699   214748889 :                 sa->refc_cur = xfs_refcountbt_init_cursor(mp, sc->tp,
     700             :                                 sa->agf_bp, sa->pag);
     701             :         }
     702   281042499 : }
     703             : 
     704             : /* Release the AG header context and btree cursors. */
     705             : void
     706   928416977 : xchk_ag_free(
     707             :         struct xfs_scrub        *sc,
     708             :         struct xchk_ag          *sa)
     709             : {
     710   928416977 :         xchk_ag_btcur_free(sa);
     711   928502332 :         xrep_reset_perag_resv(sc);
     712   928837009 :         if (sa->agf_bp) {
     713   281290806 :                 xfs_trans_brelse(sc->tp, sa->agf_bp);
     714   281290772 :                 sa->agf_bp = NULL;
     715             :         }
     716   928836975 :         if (sa->agi_bp) {
     717   281290789 :                 xfs_trans_brelse(sc->tp, sa->agi_bp);
     718   281289375 :                 sa->agi_bp = NULL;
     719             :         }
     720   928835561 :         if (sa->pag) {
     721   281324303 :                 xfs_perag_put(sa->pag);
     722   281324505 :                 sa->pag = NULL;
     723             :         }
     724   928835763 : }
     725             : 
     726             : /*
     727             :  * For scrub, grab the perag structure, the AGI, and the AGF headers, in that
     728             :  * order.  Locking order requires us to get the AGI before the AGF.  We use the
     729             :  * transaction to avoid deadlocking on crosslinked metadata buffers; either the
     730             :  * caller passes one in (bmap scrub) or we have to create a transaction
     731             :  * ourselves.  Returns ENOENT if the perag struct cannot be grabbed.
     732             :  */
     733             : int
     734   279921159 : xchk_ag_init(
     735             :         struct xfs_scrub        *sc,
     736             :         xfs_agnumber_t          agno,
     737             :         struct xchk_ag          *sa)
     738             : {
     739   279921159 :         int                     error;
     740             : 
     741   279921159 :         error = xchk_ag_read_headers(sc, agno, sa);
     742   279968230 :         if (error)
     743             :                 return error;
     744             : 
     745   279953343 :         xchk_ag_btcur_init(sc, sa);
     746   279953343 :         return 0;
     747             : }
     748             : 
     749             : /* Per-scrubber setup functions */
     750             : 
     751             : void
     752   184551969 : xchk_trans_cancel(
     753             :         struct xfs_scrub        *sc)
     754             : {
     755   184551969 :         xfs_trans_cancel(sc->tp);
     756   184570876 :         sc->tp = NULL;
     757           0 : }
     758             : 
     759             : int
     760    91602005 : xchk_trans_alloc_empty(
     761             :         struct xfs_scrub        *sc)
     762             : {
     763    91602005 :         return xfs_trans_alloc_empty(sc->mp, &sc->tp);
     764             : }
     765             : 
     766             : /*
     767             :  * Grab an empty transaction so that we can re-grab locked buffers if
     768             :  * one of our btrees turns out to be cyclic.
     769             :  *
     770             :  * If we're going to repair something, we need to ask for the largest possible
     771             :  * log reservation so that we can handle the worst case scenario for metadata
     772             :  * updates while rebuilding a metadata item.  We also need to reserve as many
     773             :  * blocks in the head transaction as we think we're going to need to rebuild
     774             :  * the metadata object.
     775             :  */
     776             : int
     777   648015864 : xchk_trans_alloc(
     778             :         struct xfs_scrub        *sc,
     779             :         uint                    resblks)
     780             : {
     781   648015864 :         if (sc->sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR)
     782    24736040 :                 return xfs_trans_alloc(sc->mp, &M_RES(sc->mp)->tr_itruncate,
     783             :                                 resblks, 0, 0, &sc->tp);
     784             : 
     785   623279824 :         return xchk_trans_alloc_empty(sc);
     786             : }
     787             : 
     788             : /* Set us up with a transaction and an empty context. */
     789             : int
     790     2631594 : xchk_setup_fs(
     791             :         struct xfs_scrub        *sc)
     792             : {
     793     2631594 :         uint                    resblks;
     794             : 
     795     2631594 :         resblks = xrep_calc_ag_resblks(sc);
     796     2631574 :         return xchk_trans_alloc(sc, resblks);
     797             : }
     798             : 
     799             : /* Set us up with AG headers and btree cursors. */
     800             : int
     801      872308 : xchk_setup_ag_btree(
     802             :         struct xfs_scrub        *sc,
     803             :         bool                    force_log)
     804             : {
     805      872308 :         struct xfs_mount        *mp = sc->mp;
     806      872308 :         int                     error;
     807             : 
     808             :         /*
     809             :          * If the caller asks us to checkpont the log, do so.  This
     810             :          * expensive operation should be performed infrequently and only
     811             :          * as a last resort.  Any caller that sets force_log should
     812             :          * document why they need to do so.
     813             :          */
     814      872308 :         if (force_log) {
     815           0 :                 error = xchk_checkpoint_log(mp);
     816           0 :                 if (error)
     817             :                         return error;
     818             :         }
     819             : 
     820      872308 :         error = xchk_setup_fs(sc);
     821      872351 :         if (error)
     822             :                 return error;
     823             : 
     824      872340 :         return xchk_ag_init(sc, sc->sm->sm_agno, &sc->sa);
     825             : }
     826             : 
     827             : /* Push everything out of the log onto disk. */
     828             : int
     829           0 : xchk_checkpoint_log(
     830             :         struct xfs_mount        *mp)
     831             : {
     832           0 :         int                     error;
     833             : 
     834           0 :         error = xfs_log_force(mp, XFS_LOG_SYNC);
     835           0 :         if (error)
     836             :                 return error;
     837           0 :         xfs_ail_push_all_sync(mp->m_ail);
     838           0 :         return 0;
     839             : }
     840             : 
     841             : /* Verify that an inode is allocated ondisk, then return its cached inode. */
     842             : int
     843  1928488599 : xchk_iget(
     844             :         struct xfs_scrub        *sc,
     845             :         xfs_ino_t               inum,
     846             :         struct xfs_inode        **ipp)
     847             : {
     848  1928488599 :         return xfs_iget(sc->mp, sc->tp, inum, XFS_IGET_UNTRUSTED, 0, ipp);
     849             : }
     850             : 
     851             : /*
     852             :  * Try to grab an inode in a manner that avoids races with physical inode
     853             :  * allocation.  If we can't, return the locked AGI buffer so that the caller
     854             :  * can single-step the loading process to see where things went wrong.
     855             :  * Callers must have a valid scrub transaction.
     856             :  *
     857             :  * If the iget succeeds, return 0, a NULL AGI, and the inode.
     858             :  *
     859             :  * If the iget fails, return the error, the locked AGI, and a NULL inode.  This
     860             :  * can include -EINVAL and -ENOENT for invalid inode numbers or inodes that are
     861             :  * no longer allocated; or any other corruption or runtime error.
     862             :  *
     863             :  * If the AGI read fails, return the error, a NULL AGI, and NULL inode.
     864             :  *
     865             :  * If a fatal signal is pending, return -EINTR, a NULL AGI, and a NULL inode.
     866             :  */
     867             : int
     868      310731 : xchk_iget_agi(
     869             :         struct xfs_scrub        *sc,
     870             :         xfs_ino_t               inum,
     871             :         struct xfs_buf          **agi_bpp,
     872             :         struct xfs_inode        **ipp)
     873             : {
     874      310731 :         struct xfs_mount        *mp = sc->mp;
     875      310731 :         struct xfs_trans        *tp = sc->tp;
     876      310731 :         struct xfs_perag        *pag;
     877      310731 :         int                     error;
     878             : 
     879      310731 :         ASSERT(sc->tp != NULL);
     880             : 
     881      310731 : again:
     882      311137 :         *agi_bpp = NULL;
     883      311137 :         *ipp = NULL;
     884      311137 :         error = 0;
     885             : 
     886      311137 :         if (xchk_should_terminate(sc, &error))
     887           0 :                 return error;
     888             : 
     889             :         /*
     890             :          * Attach the AGI buffer to the scrub transaction to avoid deadlocks
     891             :          * in the iget cache miss path.
     892             :          */
     893      311137 :         pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, inum));
     894      311137 :         error = xfs_ialloc_read_agi(pag, tp, agi_bpp);
     895      311137 :         xfs_perag_put(pag);
     896      311137 :         if (error)
     897           0 :                 return error;
     898             : 
     899      311137 :         error = xfs_iget(mp, tp, inum,
     900             :                         XFS_IGET_NORETRY | XFS_IGET_UNTRUSTED, 0, ipp);
     901      311137 :         if (error == -EAGAIN) {
     902             :                 /*
     903             :                  * The inode may be in core but temporarily unavailable and may
     904             :                  * require the AGI buffer before it can be returned.  Drop the
     905             :                  * AGI buffer and retry the lookup.
     906             :                  *
     907             :                  * Incore lookup will fail with EAGAIN on a cache hit if the
     908             :                  * inode is queued to the inactivation list.  The inactivation
     909             :                  * worker may remove the inode from the unlinked list and hence
     910             :                  * needs the AGI.
     911             :                  *
     912             :                  * Hence xchk_iget_agi() needs to drop the AGI lock on EAGAIN
     913             :                  * to allow inodegc to make progress and move the inode to
     914             :                  * IRECLAIMABLE state where xfs_iget will be able to return it
     915             :                  * again if it can lock the inode.
     916             :                  */
     917         406 :                 xfs_trans_brelse(tp, *agi_bpp);
     918         406 :                 delay(1);
     919         406 :                 goto again;
     920             :         }
     921      310731 :         if (error)
     922             :                 return error;
     923             : 
     924             :         /* We got the inode, so we can release the AGI. */
     925      287701 :         ASSERT(*ipp != NULL);
     926      287701 :         xfs_trans_brelse(tp, *agi_bpp);
     927      287701 :         *agi_bpp = NULL;
     928      287701 :         return 0;
     929             : }
     930             : 
     931             : #ifdef CONFIG_XFS_QUOTA
     932             : /*
     933             :  * Try to attach dquots to this inode if we think we might want to repair it.
     934             :  * Callers must not hold any ILOCKs.  If the dquots are broken and cannot be
     935             :  * attached, a quotacheck will be scheduled.
     936             :  */
     937             : int
     938   643585400 : xchk_ino_dqattach(
     939             :         struct xfs_scrub        *sc)
     940             : {
     941   643585400 :         ASSERT(sc->tp != NULL);
     942   643585400 :         ASSERT(sc->ip != NULL);
     943             : 
     944  1287170800 :         if (!xchk_could_repair(sc))
     945             :                 return 0;
     946             : 
     947    18090493 :         return xrep_ino_dqattach(sc);
     948             : }
     949             : #endif
     950             : 
     951             : /* Install an inode that we opened by handle for scrubbing. */
     952             : int
     953   490756288 : xchk_install_handle_inode(
     954             :         struct xfs_scrub        *sc,
     955             :         struct xfs_inode        *ip)
     956             : {
     957   490756288 :         if (VFS_I(ip)->i_generation != sc->sm->sm_gen) {
     958      583590 :                 xchk_irele(sc, ip);
     959      583590 :                 return -ENOENT;
     960             :         }
     961             : 
     962   490172698 :         sc->ip = ip;
     963   490172698 :         return 0;
     964             : }
     965             : 
     966             : /*
     967             :  * Install an already-referenced inode for scrubbing.  Get our own reference to
     968             :  * the inode to make disposal simpler.  The inode must not be in I_FREEING or
     969             :  * I_WILL_FREE state!
     970             :  */
     971             : int
     972   153518369 : xchk_install_live_inode(
     973             :         struct xfs_scrub        *sc,
     974             :         struct xfs_inode        *ip)
     975             : {
     976   153518369 :         if (!igrab(VFS_I(ip))) {
     977           0 :                 xchk_ino_set_corrupt(sc, ip->i_ino);
     978           0 :                 return -EFSCORRUPTED;
     979             :         }
     980             : 
     981   153517083 :         sc->ip = ip;
     982   153517083 :         return 0;
     983             : }
     984             : 
     985             : /*
     986             :  * In preparation to scrub metadata structures that hang off of an inode,
     987             :  * grab either the inode referenced in the scrub control structure or the
     988             :  * inode passed in.  If the inumber does not reference an allocated inode
     989             :  * record, the function returns ENOENT to end the scrub early.  The inode
     990             :  * is not locked.
     991             :  */
     992             : int
     993   543660263 : xchk_iget_for_scrubbing(
     994             :         struct xfs_scrub        *sc)
     995             : {
     996   543660263 :         struct xfs_imap         imap;
     997   543660263 :         struct xfs_mount        *mp = sc->mp;
     998   543660263 :         struct xfs_perag        *pag;
     999   543660263 :         struct xfs_buf          *agi_bp;
    1000   543660263 :         struct xfs_inode        *ip_in = XFS_I(file_inode(sc->file));
    1001   543660263 :         struct xfs_inode        *ip = NULL;
    1002   543660263 :         xfs_agnumber_t          agno = XFS_INO_TO_AGNO(mp, sc->sm->sm_ino);
    1003   543660263 :         int                     error;
    1004             : 
    1005   543660263 :         ASSERT(sc->tp == NULL);
    1006             : 
    1007             :         /* We want to scan the inode we already had opened. */
    1008   543660263 :         if (sc->sm->sm_ino == 0 || sc->sm->sm_ino == ip_in->i_ino)
    1009   127923997 :                 return xchk_install_live_inode(sc, ip_in);
    1010             : 
    1011             :         /* Reject internal metadata files and obviously bad inode numbers. */
    1012   415736266 :         if (xfs_internal_inum(mp, sc->sm->sm_ino))
    1013             :                 return -ENOENT;
    1014   415539144 :         if (!xfs_verify_ino(sc->mp, sc->sm->sm_ino))
    1015             :                 return -ENOENT;
    1016             : 
    1017             :         /* Try a regular untrusted iget. */
    1018   415524352 :         error = xchk_iget(sc, sc->sm->sm_ino, &ip);
    1019   415528633 :         if (!error)
    1020   413055005 :                 return xchk_install_handle_inode(sc, ip);
    1021     2473628 :         if (error == -ENOENT)
    1022             :                 return error;
    1023       18909 :         if (error != -EINVAL)
    1024           0 :                 goto out_error;
    1025             : 
    1026             :         /*
    1027             :          * EINVAL with IGET_UNTRUSTED probably means one of several things:
    1028             :          * userspace gave us an inode number that doesn't correspond to fs
    1029             :          * space; the inode btree lacks a record for this inode; or there is a
    1030             :          * record, and it says this inode is free.
    1031             :          *
    1032             :          * We want to look up this inode in the inobt to distinguish two
    1033             :          * scenarios: (1) the inobt says the inode is free, in which case
    1034             :          * there's nothing to do; and (2) the inobt says the inode is
    1035             :          * allocated, but loading it failed due to corruption.
    1036             :          *
    1037             :          * Allocate a transaction and grab the AGI to prevent inobt activity
    1038             :          * in this AG.  Retry the iget in case someone allocated a new inode
    1039             :          * after the first iget failed.
    1040             :          */
    1041       18909 :         error = xchk_trans_alloc(sc, 0);
    1042       18909 :         if (error)
    1043           0 :                 goto out_error;
    1044             : 
    1045       18909 :         error = xchk_iget_agi(sc, sc->sm->sm_ino, &agi_bp, &ip);
    1046       18909 :         if (error == 0) {
    1047             :                 /* Actually got the inode, so install it. */
    1048           0 :                 xchk_trans_cancel(sc);
    1049           0 :                 return xchk_install_handle_inode(sc, ip);
    1050             :         }
    1051       18909 :         if (error == -ENOENT)
    1052           0 :                 goto out_gone;
    1053       18909 :         if (error != -EINVAL)
    1054           0 :                 goto out_cancel;
    1055             : 
    1056             :         /* Ensure that we have protected against inode allocation/freeing. */
    1057       18909 :         if (agi_bp == NULL) {
    1058           0 :                 ASSERT(agi_bp != NULL);
    1059           0 :                 error = -ECANCELED;
    1060           0 :                 goto out_cancel;
    1061             :         }
    1062             : 
    1063             :         /*
    1064             :          * Untrusted iget failed a second time.  Let's try an inobt lookup.
    1065             :          * If the inobt thinks this the inode neither can exist inside the
    1066             :          * filesystem nor is allocated, return ENOENT to signal that the check
    1067             :          * can be skipped.
    1068             :          *
    1069             :          * If the lookup returns corruption, we'll mark this inode corrupt and
    1070             :          * exit to userspace.  There's little chance of fixing anything until
    1071             :          * the inobt is straightened out, but there's nothing we can do here.
    1072             :          *
    1073             :          * If the lookup encounters any other error, exit to userspace.
    1074             :          *
    1075             :          * If the lookup succeeds, something else must be very wrong in the fs
    1076             :          * such that setting up the incore inode failed in some strange way.
    1077             :          * Treat those as corruptions.
    1078             :          */
    1079       18909 :         pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, sc->sm->sm_ino));
    1080       18909 :         if (!pag) {
    1081           0 :                 error = -EFSCORRUPTED;
    1082           0 :                 goto out_cancel;
    1083             :         }
    1084             : 
    1085       18909 :         error = xfs_imap(pag, sc->tp, sc->sm->sm_ino, &imap,
    1086             :                         XFS_IGET_UNTRUSTED);
    1087       18909 :         xfs_perag_put(pag);
    1088       18909 :         if (error == -EINVAL || error == -ENOENT)
    1089       18909 :                 goto out_gone;
    1090           0 :         if (!error)
    1091           0 :                 error = -EFSCORRUPTED;
    1092             : 
    1093           0 : out_cancel:
    1094           0 :         xchk_trans_cancel(sc);
    1095           0 : out_error:
    1096           0 :         xchk_whine(mp, "type %s agno 0x%x agbno 0x%x error %d ret_ip %pS",
    1097           0 :                         xchk_type_string(sc->sm->sm_type), agno,
    1098           0 :                         XFS_INO_TO_AGBNO(mp, sc->sm->sm_ino), error,
    1099             :                         __return_address);
    1100           0 :         trace_xchk_op_error(sc, agno, XFS_INO_TO_AGBNO(mp, sc->sm->sm_ino),
    1101             :                         error, __return_address);
    1102           0 :         return error;
    1103       18909 : out_gone:
    1104             :         /* The file is gone, so there's nothing to check. */
    1105       18909 :         xchk_trans_cancel(sc);
    1106       18909 :         return -ENOENT;
    1107             : }
    1108             : 
    1109             : /* Release an inode, possibly dropping it in the process. */
    1110             : void
    1111  5326431215 : xchk_irele(
    1112             :         struct xfs_scrub        *sc,
    1113             :         struct xfs_inode        *ip)
    1114             : {
    1115  5326431215 :         if (current->journal_info != NULL) {
    1116  4579093727 :                 ASSERT(current->journal_info == sc->tp);
    1117             : 
    1118             :                 /*
    1119             :                  * If we are in a transaction, we /cannot/ drop the inode
    1120             :                  * ourselves, because the VFS will trigger writeback, which
    1121             :                  * can require a transaction.  Clear DONTCACHE to force the
    1122             :                  * inode to the LRU, where someone else can take care of
    1123             :                  * dropping it.
    1124             :                  *
    1125             :                  * Note that when we grabbed our reference to the inode, it
    1126             :                  * could have had an active ref and DONTCACHE set if a sysadmin
    1127             :                  * is trying to coerce a change in file access mode.  icache
    1128             :                  * hits do not clear DONTCACHE, so we must do it here.
    1129             :                  */
    1130  4579093727 :                 spin_lock(&VFS_I(ip)->i_lock);
    1131  4579033140 :                 VFS_I(ip)->i_state &= ~I_DONTCACHE;
    1132  4579033140 :                 spin_unlock(&VFS_I(ip)->i_lock);
    1133   747337488 :         } else if (atomic_read(&VFS_I(ip)->i_count) == 1) {
    1134             :                 /*
    1135             :                  * If this is the last reference to the inode and the caller
    1136             :                  * permits it, set DONTCACHE to avoid thrashing.
    1137             :                  */
    1138   104805727 :                 d_mark_dontcache(VFS_I(ip));
    1139             :         }
    1140             : 
    1141  5326275099 :         xfs_irele(ip);
    1142  5323988295 : }
    1143             : 
    1144             : /*
    1145             :  * Set us up to scrub metadata mapped by a file's fork.  Callers must not use
    1146             :  * this to operate on user-accessible regular file data because the MMAPLOCK is
    1147             :  * not taken.
    1148             :  */
    1149             : int
    1150   245352461 : xchk_setup_inode_contents(
    1151             :         struct xfs_scrub        *sc,
    1152             :         unsigned int            resblks)
    1153             : {
    1154   245352461 :         int                     error;
    1155             : 
    1156   245352461 :         error = xchk_iget_for_scrubbing(sc);
    1157   245327814 :         if (error)
    1158             :                 return error;
    1159             : 
    1160             :         /* Lock the inode so the VFS cannot touch this file. */
    1161   244012629 :         xchk_ilock(sc, XFS_IOLOCK_EXCL);
    1162             : 
    1163   244018935 :         error = xchk_trans_alloc(sc, resblks);
    1164   244008186 :         if (error)
    1165           0 :                 goto out;
    1166             : 
    1167   244008186 :         error = xchk_ino_dqattach(sc);
    1168   244009615 :         if (error)
    1169           0 :                 goto out;
    1170             : 
    1171   244009615 :         xchk_ilock(sc, XFS_ILOCK_EXCL);
    1172             : out:
    1173             :         /* scrub teardown will unlock and release the inode for us */
    1174             :         return error;
    1175             : }
    1176             : 
    1177             : void
    1178   864335627 : xchk_ilock(
    1179             :         struct xfs_scrub        *sc,
    1180             :         unsigned int            ilock_flags)
    1181             : {
    1182  1352357871 :         xfs_ilock(sc->ip, ilock_flags);
    1183   244018935 :         sc->ilock_flags |= ilock_flags;
    1184   244020338 : }
    1185             : 
    1186             : bool
    1187    91367777 : xchk_ilock_nowait(
    1188             :         struct xfs_scrub        *sc,
    1189             :         unsigned int            ilock_flags)
    1190             : {
    1191    91367777 :         if (xfs_ilock_nowait(sc->ip, ilock_flags)) {
    1192    91360507 :                 sc->ilock_flags |= ilock_flags;
    1193    91360507 :                 return true;
    1194             :         }
    1195             : 
    1196             :         return false;
    1197             : }
    1198             : 
    1199             : void
    1200   840289921 : xchk_iunlock(
    1201             :         struct xfs_scrub        *sc,
    1202             :         unsigned int            ilock_flags)
    1203             : {
    1204   840289921 :         sc->ilock_flags &= ~ilock_flags;
    1205   840289921 :         xfs_iunlock(sc->ip, ilock_flags);
    1206   840260494 : }
    1207             : 
    1208             : /*
    1209             :  * Predicate that decides if we need to evaluate the cross-reference check.
    1210             :  * If there was an error accessing the cross-reference btree, just delete
    1211             :  * the cursor and skip the check.
    1212             :  */
    1213             : bool
    1214 11133273750 : xchk_should_check_xref(
    1215             :         struct xfs_scrub        *sc,
    1216             :         int                     *error,
    1217             :         struct xfs_btree_cur    **curpp)
    1218             : {
    1219             :         /* No point in xref if we already know we're corrupt. */
    1220 11133273750 :         if (xchk_skip_xref(sc->sm))
    1221             :                 return false;
    1222             : 
    1223 11133273750 :         if (*error == 0)
    1224             :                 return true;
    1225             : 
    1226           0 :         if (curpp) {
    1227             :                 /* If we've already given up on xref, just bail out. */
    1228           0 :                 if (!*curpp)
    1229             :                         return false;
    1230             : 
    1231             :                 /* xref error, delete cursor and bail out. */
    1232           0 :                 xfs_btree_del_cursor(*curpp, XFS_BTREE_ERROR);
    1233           0 :                 *curpp = NULL;
    1234             :         }
    1235             : 
    1236           0 :         sc->sm->sm_flags |= XFS_SCRUB_OFLAG_XFAIL;
    1237           0 :         xchk_whine(sc->mp, "type %s xref error %d ret_ip %pS",
    1238           0 :                         xchk_type_string(sc->sm->sm_type),
    1239             :                         *error,
    1240             :                         __return_address);
    1241           0 :         trace_xchk_xref_error(sc, *error, __return_address);
    1242             : 
    1243             :         /*
    1244             :          * Errors encountered during cross-referencing with another
    1245             :          * data structure should not cause this scrubber to abort.
    1246             :          */
    1247           0 :         *error = 0;
    1248           0 :         return false;
    1249             : }
    1250             : 
    1251             : /* Run the structure verifiers on in-memory buffers to detect bad memory. */
    1252             : void
    1253    76226847 : xchk_buffer_recheck(
    1254             :         struct xfs_scrub        *sc,
    1255             :         struct xfs_buf          *bp)
    1256             : {
    1257    76226847 :         xfs_failaddr_t          fa;
    1258             : 
    1259    76226847 :         if (bp->b_ops == NULL) {
    1260           0 :                 xchk_block_set_corrupt(sc, bp);
    1261           0 :                 return;
    1262             :         }
    1263    76226847 :         if (bp->b_ops->verify_struct == NULL) {
    1264           0 :                 xchk_set_incomplete(sc);
    1265           0 :                 return;
    1266             :         }
    1267    76226847 :         fa = bp->b_ops->verify_struct(bp);
    1268    76223263 :         if (!fa)
    1269             :                 return;
    1270           0 :         sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
    1271           0 :         trace_xchk_block_error(sc, xfs_buf_daddr(bp), fa);
    1272           0 :         xchk_whine(sc->mp, "type %s agno 0x%x agbno 0x%x ret_ip %pS",
    1273           0 :                         xchk_type_string(sc->sm->sm_type),
    1274             :                         xfs_daddr_to_agno(sc->mp, xfs_buf_daddr(bp)),
    1275             :                         xfs_daddr_to_agbno(sc->mp, xfs_buf_daddr(bp)),
    1276             :                         fa);
    1277             : }
    1278             : 
    1279             : static inline int
    1280      206370 : xchk_metadata_inode_subtype(
    1281             :         struct xfs_scrub        *sc,
    1282             :         unsigned int            scrub_type)
    1283             : {
    1284      206370 :         __u32                   smtype = sc->sm->sm_type;
    1285      206370 :         int                     error;
    1286             : 
    1287      206370 :         sc->sm->sm_type = scrub_type;
    1288             : 
    1289      206370 :         switch (scrub_type) {
    1290      103184 :         case XFS_SCRUB_TYPE_INODE:
    1291      103184 :                 error = xchk_inode(sc);
    1292      103184 :                 break;
    1293      103186 :         case XFS_SCRUB_TYPE_BMBTD:
    1294      103186 :                 error = xchk_bmap_data(sc);
    1295      103186 :                 break;
    1296           0 :         default:
    1297           0 :                 ASSERT(0);
    1298           0 :                 error = -EFSCORRUPTED;
    1299           0 :                 break;
    1300             :         }
    1301             : 
    1302      206372 :         sc->sm->sm_type = smtype;
    1303      206372 :         return error;
    1304             : }
    1305             : 
    1306             : /*
    1307             :  * Scrub the attr/data forks of a metadata inode.  The metadata inode must be
    1308             :  * pointed to by sc->ip and the ILOCK must be held.
    1309             :  */
    1310             : int
    1311      103185 : xchk_metadata_inode_forks(
    1312             :         struct xfs_scrub        *sc)
    1313             : {
    1314      103185 :         bool                    shared;
    1315      103185 :         int                     error;
    1316             : 
    1317      103185 :         if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
    1318             :                 return 0;
    1319             : 
    1320             :         /* Check the inode record. */
    1321      103181 :         error = xchk_metadata_inode_subtype(sc, XFS_SCRUB_TYPE_INODE);
    1322      103186 :         if (error || (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT))
    1323             :                 return error;
    1324             : 
    1325             :         /* Metadata inodes don't live on the rt device. */
    1326      103186 :         if (sc->ip->i_diflags & XFS_DIFLAG_REALTIME) {
    1327           0 :                 xchk_ino_set_corrupt(sc, sc->ip->i_ino);
    1328           0 :                 return 0;
    1329             :         }
    1330             : 
    1331             :         /* They should never participate in reflink. */
    1332      103186 :         if (xfs_is_reflink_inode(sc->ip)) {
    1333           0 :                 xchk_ino_set_corrupt(sc, sc->ip->i_ino);
    1334           0 :                 return 0;
    1335             :         }
    1336             : 
    1337             :         /* They also should never have extended attributes. */
    1338      103186 :         if (xfs_inode_hasattr(sc->ip)) {
    1339           0 :                 xchk_ino_set_corrupt(sc, sc->ip->i_ino);
    1340           0 :                 return 0;
    1341             :         }
    1342             : 
    1343             :         /* Invoke the data fork scrubber. */
    1344      103186 :         error = xchk_metadata_inode_subtype(sc, XFS_SCRUB_TYPE_BMBTD);
    1345      103186 :         if (error || (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT))
    1346             :                 return error;
    1347             : 
    1348             :         /* Look for incorrect shared blocks. */
    1349      103186 :         if (xfs_has_reflink(sc->mp)) {
    1350       55168 :                 error = xfs_reflink_inode_has_shared_extents(sc->tp, sc->ip,
    1351             :                                 &shared);
    1352      110336 :                 if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, 0,
    1353             :                                 &error))
    1354           0 :                         return error;
    1355       55168 :                 if (shared)
    1356           0 :                         xchk_ino_set_corrupt(sc, sc->ip->i_ino);
    1357             :         }
    1358             : 
    1359             :         return 0;
    1360             : }
    1361             : 
    1362             : /*
    1363             :  * Enable filesystem hooks (i.e. runtime code patching) before starting a scrub
    1364             :  * operation.  Callers must not hold any locks that intersect with the CPU
    1365             :  * hotplug lock (e.g. writeback locks) because code patching must halt the CPUs
    1366             :  * to change kernel code.
    1367             :  */
    1368             : void
    1369    20637186 : xchk_fsgates_enable(
    1370             :         struct xfs_scrub        *sc,
    1371             :         unsigned int            scrub_fsgates)
    1372             : {
    1373    20637186 :         ASSERT(!(scrub_fsgates & ~XCHK_FSGATES_ALL));
    1374    20637186 :         ASSERT(!(sc->flags & scrub_fsgates));
    1375             : 
    1376    20637186 :         trace_xchk_fsgates_enable(sc, scrub_fsgates);
    1377             : 
    1378    20637214 :         if (scrub_fsgates & XCHK_FSGATES_DRAIN)
    1379       33656 :                 xfs_drain_wait_enable();
    1380             : 
    1381    20637214 :         if (scrub_fsgates & XCHK_FSGATES_QUOTA)
    1382        7647 :                 xfs_dqtrx_hook_enable();
    1383             : 
    1384    20637214 :         if (scrub_fsgates & XCHK_FSGATES_DIRENTS)
    1385    20591421 :                 xfs_dir_hook_enable();
    1386             : 
    1387    20632041 :         if (scrub_fsgates & XCHK_FSGATES_RMAP)
    1388        4496 :                 xfs_rmap_hook_enable();
    1389             : 
    1390    20632040 :         sc->flags |= scrub_fsgates;
    1391    20632040 : }
    1392             : 
    1393             : /*
    1394             :  * Decide if this is this a cached inode that's also allocated.  The caller
    1395             :  * must hold a reference to an AG and the AGI buffer lock to prevent inodes
    1396             :  * from being allocated or freed.
    1397             :  *
    1398             :  * Look up an inode by number in the given file system.  If the inode number
    1399             :  * is invalid, return -EINVAL.  If the inode is not in cache, return -ENODATA.
    1400             :  * If the inode is being reclaimed, return -ENODATA because we know the inode
    1401             :  * cache cannot be updating the ondisk metadata.
    1402             :  *
    1403             :  * Otherwise, the incore inode is the one we want, and it is either live,
    1404             :  * somewhere in the inactivation machinery, or reclaimable.  The inode is
    1405             :  * allocated if i_mode is nonzero.  In all three cases, the cached inode will
    1406             :  * be more up to date than the ondisk inode buffer, so we must use the incore
    1407             :  * i_mode.
    1408             :  */
    1409             : int
    1410  3340450226 : xchk_inode_is_allocated(
    1411             :         struct xfs_scrub        *sc,
    1412             :         xfs_agino_t             agino,
    1413             :         bool                    *inuse)
    1414             : {
    1415  3340450226 :         struct xfs_mount        *mp = sc->mp;
    1416  3340450226 :         struct xfs_perag        *pag = sc->sa.pag;
    1417  3340450226 :         xfs_ino_t               ino;
    1418  3340450226 :         struct xfs_inode        *ip;
    1419  3340450226 :         int                     error;
    1420             : 
    1421             :         /* caller must hold perag reference */
    1422  3340450226 :         if (pag == NULL) {
    1423           0 :                 ASSERT(pag != NULL);
    1424           0 :                 return -EINVAL;
    1425             :         }
    1426             : 
    1427             :         /* caller must have AGI buffer */
    1428  3340450226 :         if (sc->sa.agi_bp == NULL) {
    1429           0 :                 ASSERT(sc->sa.agi_bp != NULL);
    1430           0 :                 return -EINVAL;
    1431             :         }
    1432             : 
    1433             :         /* reject inode numbers outside existing AGs */
    1434  3340450226 :         ino = XFS_AGINO_TO_INO(sc->mp, pag->pag_agno, agino);
    1435  3340450226 :         if (!xfs_verify_ino(mp, ino))
    1436             :                 return -EINVAL;
    1437             : 
    1438  3342818910 :         error = -ENODATA;
    1439  3342818910 :         rcu_read_lock();
    1440  3342619726 :         ip = radix_tree_lookup(&pag->pag_ici_root, agino);
    1441  3342317984 :         if (!ip) {
    1442             :                 /* cache miss */
    1443    17993200 :                 goto out_rcu;
    1444             :         }
    1445             : 
    1446             :         /*
    1447             :          * If the inode number doesn't match, the incore inode got reused
    1448             :          * during an RCU grace period and the radix tree hasn't been updated.
    1449             :          * This isn't the inode we want.
    1450             :          */
    1451  3324324784 :         spin_lock(&ip->i_flags_lock);
    1452  3324213826 :         if (ip->i_ino != ino)
    1453           0 :                 goto out_skip;
    1454             : 
    1455  3324213826 :         trace_xchk_inode_is_allocated(ip);
    1456             : 
    1457             :         /*
    1458             :          * We have an incore inode that matches the inode we want, and the
    1459             :          * caller holds the perag structure and the AGI buffer.  Let's check
    1460             :          * our assumptions below:
    1461             :          */
    1462             : 
    1463             : #ifdef DEBUG
    1464             :         /*
    1465             :          * (1) If the incore inode is live (i.e. referenced from the dcache),
    1466             :          * it will not be INEW, nor will it be in the inactivation or reclaim
    1467             :          * machinery.  The ondisk inode had better be allocated.  This is the
    1468             :          * most trivial case.
    1469             :          */
    1470  3324183771 :         if (!(ip->i_flags & (XFS_NEED_INACTIVE | XFS_INEW | XFS_IRECLAIMABLE |
    1471             :                              XFS_INACTIVATING))) {
    1472             :                 /* live inode */
    1473  3321183884 :                 ASSERT(VFS_I(ip)->i_mode != 0);
    1474             :         }
    1475             : 
    1476             :         /*
    1477             :          * If the incore inode is INEW, there are several possibilities:
    1478             :          *
    1479             :          * (2) For a file that is being created, note that we allocate the
    1480             :          * ondisk inode before allocating, initializing, and adding the incore
    1481             :          * inode to the radix tree.
    1482             :          *
    1483             :          * (3) If the incore inode is being recycled, the inode has to be
    1484             :          * allocated because we don't allow freed inodes to be recycled.
    1485             :          * Recycling doesn't touch i_mode.
    1486             :          */
    1487  3324183771 :         if (ip->i_flags & XFS_INEW) {
    1488             :                 /* created on disk already or recycling */
    1489        2309 :                 ASSERT(VFS_I(ip)->i_mode != 0);
    1490             :         }
    1491             : 
    1492             :         /*
    1493             :          * (4) If the inode is queued for inactivation (NEED_INACTIVE) but
    1494             :          * inactivation has not started (!INACTIVATING), it is still allocated.
    1495             :          */
    1496  3324183771 :         if ((ip->i_flags & XFS_NEED_INACTIVE) &&
    1497             :             !(ip->i_flags & XFS_INACTIVATING)) {
    1498             :                 /* definitely before difree */
    1499       15906 :                 ASSERT(VFS_I(ip)->i_mode != 0);
    1500             :         }
    1501             : #endif
    1502             : 
    1503             :         /*
    1504             :          * If the incore inode is undergoing inactivation (INACTIVATING), there
    1505             :          * are two possibilities:
    1506             :          *
    1507             :          * (5) It is before the point where it would get freed ondisk, in which
    1508             :          * case i_mode is still nonzero.
    1509             :          *
    1510             :          * (6) It has already been freed, in which case i_mode is zero.
    1511             :          *
    1512             :          * We don't take the ILOCK here, but difree and dialloc update the AGI,
    1513             :          * and we've taken the AGI buffer lock, which prevents that from
    1514             :          * happening.
    1515             :          */
    1516             : 
    1517             :         /*
    1518             :          * (7) Inodes undergoing inactivation (INACTIVATING) or queued for
    1519             :          * reclaim (IRECLAIMABLE) could be allocated or free.  i_mode still
    1520             :          * reflects the ondisk state.
    1521             :          */
    1522             : 
    1523             :         /*
    1524             :          * (8) If the inode is in IFLUSHING, it's safe to query i_mode because
    1525             :          * the flush code uses i_mode to format the ondisk inode.
    1526             :          */
    1527             : 
    1528             :         /*
    1529             :          * (9) If the inode is in IRECLAIM and was reachable via the radix
    1530             :          * tree, it still has the same i_mode as it did before it entered
    1531             :          * reclaim.  The inode object is still alive because we hold the RCU
    1532             :          * read lock.
    1533             :          */
    1534             : 
    1535  3324183771 :         *inuse = VFS_I(ip)->i_mode != 0;
    1536  3324183771 :         error = 0;
    1537             : 
    1538  3324183771 : out_skip:
    1539  3324183771 :         spin_unlock(&ip->i_flags_lock);
    1540  3342353869 : out_rcu:
    1541  3342353869 :         rcu_read_unlock();
    1542  3342353869 :         return error;
    1543             : }
    1544             : 
    1545             : /* Complain about failures... */
    1546             : void
    1547          62 : xchk_whine(
    1548             :         const struct xfs_mount  *mp,
    1549             :         const char              *fmt,
    1550             :         ...)
    1551             : {
    1552          62 :         struct va_format        vaf;
    1553          62 :         va_list                 args;
    1554             : 
    1555          62 :         va_start(args, fmt);
    1556             : 
    1557          62 :         vaf.fmt = fmt;
    1558          62 :         vaf.va = &args;
    1559             : 
    1560          62 :         printk(KERN_INFO "XFS (%s) %pS: %pV\n", mp->m_super->s_id,
    1561             :                         __return_address, &vaf);
    1562          62 :         va_end(args);
    1563             : 
    1564          62 :         if (xfs_error_level >= XFS_ERRLEVEL_HIGH)
    1565           0 :                 xfs_stack_trace();
    1566          62 : }

Generated by: LCOV version 1.14