LCOV - code coverage report
Current view: top level - fs/xfs/scrub - common.c (source / functions) Hit Total Coverage
Test: fstests of 6.5.0-rc4-xfsa @ Mon Jul 31 20:08:27 PDT 2023 Lines: 511 654 78.1 %
Date: 2023-07-31 20:08:27 Functions: 54 63 85.7 %

          Line data    Source code
       1             : // SPDX-License-Identifier: GPL-2.0-or-later
       2             : /*
       3             :  * Copyright (C) 2017-2023 Oracle.  All Rights Reserved.
       4             :  * Author: Darrick J. Wong <djwong@kernel.org>
       5             :  */
       6             : #include "xfs.h"
       7             : #include "xfs_fs.h"
       8             : #include "xfs_shared.h"
       9             : #include "xfs_format.h"
      10             : #include "xfs_trans_resv.h"
      11             : #include "xfs_mount.h"
      12             : #include "xfs_btree.h"
      13             : #include "xfs_btree_staging.h"
      14             : #include "xfs_log_format.h"
      15             : #include "xfs_trans.h"
      16             : #include "xfs_inode.h"
      17             : #include "xfs_icache.h"
      18             : #include "xfs_alloc.h"
      19             : #include "xfs_alloc_btree.h"
      20             : #include "xfs_ialloc.h"
      21             : #include "xfs_ialloc_btree.h"
      22             : #include "xfs_refcount_btree.h"
      23             : #include "xfs_rmap.h"
      24             : #include "xfs_rmap_btree.h"
      25             : #include "xfs_log.h"
      26             : #include "xfs_trans_priv.h"
      27             : #include "xfs_da_format.h"
      28             : #include "xfs_da_btree.h"
      29             : #include "xfs_dir2_priv.h"
      30             : #include "xfs_dir2.h"
      31             : #include "xfs_attr.h"
      32             : #include "xfs_reflink.h"
      33             : #include "xfs_ag.h"
      34             : #include "xfs_error.h"
      35             : #include "xfs_quota.h"
      36             : #include "xfs_swapext.h"
      37             : #include "xfs_rtbitmap.h"
      38             : #include "xfs_rtgroup.h"
      39             : #include "xfs_rtrmap_btree.h"
      40             : #include "xfs_bmap_util.h"
      41             : #include "xfs_rtrefcount_btree.h"
      42             : #include "scrub/scrub.h"
      43             : #include "scrub/common.h"
      44             : #include "scrub/trace.h"
      45             : #include "scrub/repair.h"
      46             : #include "scrub/health.h"
      47             : 
      48             : /* Common code for the metadata scrubbers. */
      49             : 
      50             : /*
      51             :  * Handling operational errors.
      52             :  *
      53             :  * The *_process_error() family of functions are used to process error return
      54             :  * codes from functions called as part of a scrub operation.
      55             :  *
      56             :  * If there's no error, we return true to tell the caller that it's ok
      57             :  * to move on to the next check in its list.
      58             :  *
      59             :  * For non-verifier errors (e.g. ENOMEM) we return false to tell the
      60             :  * caller that something bad happened, and we preserve *error so that
      61             :  * the caller can return the *error up the stack to userspace.
      62             :  *
      63             :  * Verifier errors (EFSBADCRC/EFSCORRUPTED) are recorded by setting
      64             :  * OFLAG_CORRUPT in sm_flags and the *error is cleared.  In other words,
      65             :  * we track verifier errors (and failed scrub checks) via OFLAG_CORRUPT,
      66             :  * not via return codes.  We return false to tell the caller that
      67             :  * something bad happened.  Since the error has been cleared, the caller
      68             :  * will (presumably) return that zero and scrubbing will move on to
      69             :  * whatever's next.
      70             :  *
      71             :  * ftrace can be used to record the precise metadata location and the
      72             :  * approximate code location of the failed operation.
      73             :  */
      74             : 
      75             : /* Check for operational errors. */
      76             : static bool
      77   104562786 : __xchk_process_error(
      78             :         struct xfs_scrub        *sc,
      79             :         xfs_agnumber_t          agno,
      80             :         xfs_agblock_t           bno,
      81             :         int                     *error,
      82             :         __u32                   errflag,
      83             :         void                    *ret_ip)
      84             : {
      85   104562786 :         switch (*error) {
      86             :         case 0:
      87             :                 return true;
      88       22485 :         case -EDEADLOCK:
      89             :         case -ECHRNG:
      90             :                 /* Used to restart an op with deadlock avoidance. */
      91       44970 :                 trace_xchk_deadlock_retry(
      92       22485 :                                 sc->ip ? sc->ip : XFS_I(file_inode(sc->file)),
      93             :                                 sc->sm, *error);
      94       22485 :                 break;
      95          24 :         case -ECANCELED:
      96             :                 /*
      97             :                  * ECANCELED here means that the caller set one of the scrub
      98             :                  * outcome flags (corrupt, xfail, xcorrupt) and wants to exit
      99             :                  * quickly.  Set error to zero and do not continue.
     100             :                  */
     101          24 :                 trace_xchk_op_error(sc, agno, bno, *error, ret_ip);
     102          24 :                 *error = 0;
     103          24 :                 break;
     104          18 :         case -EFSBADCRC:
     105             :         case -EFSCORRUPTED:
     106             :                 /* Note the badness but don't abort. */
     107          18 :                 sc->sm->sm_flags |= errflag;
     108          18 :                 xchk_whine(sc->mp, "type %s agno 0x%x agbno 0x%x error %d errflag 0x%x ret_ip %pS",
     109          18 :                                 xchk_type_string(sc->sm->sm_type),
     110             :                                 agno,
     111             :                                 bno,
     112             :                                 *error,
     113             :                                 errflag,
     114             :                                 ret_ip);
     115          18 :                 *error = 0;
     116          31 :                 fallthrough;
     117          31 :         default:
     118          31 :                 if (*error)
     119          13 :                         xchk_whine(sc->mp, "type %s agno 0x%x agbno 0x%x error %d ret_ip %pS",
     120          13 :                                         xchk_type_string(sc->sm->sm_type),
     121             :                                         agno,
     122             :                                         bno,
     123             :                                         *error,
     124             :                                         ret_ip);
     125          31 :                 trace_xchk_op_error(sc, agno, bno, *error, ret_ip);
     126          31 :                 break;
     127             :         }
     128             :         return false;
     129             : }
     130             : 
     131             : bool
     132     3172026 : xchk_process_error(
     133             :         struct xfs_scrub        *sc,
     134             :         xfs_agnumber_t          agno,
     135             :         xfs_agblock_t           bno,
     136             :         int                     *error)
     137             : {
     138     3172026 :         return __xchk_process_error(sc, agno, bno, error,
     139             :                         XFS_SCRUB_OFLAG_CORRUPT, __return_address);
     140             : }
     141             : 
     142             : bool
     143       10573 : xchk_process_rt_error(
     144             :         struct xfs_scrub        *sc,
     145             :         xfs_rgnumber_t          rgno,
     146             :         xfs_rgblock_t           rgbno,
     147             :         int                     *error)
     148             : {
     149       10573 :         return __xchk_process_error(sc, rgno, rgbno, error,
     150             :                         XFS_SCRUB_OFLAG_CORRUPT, __return_address);
     151             : }
     152             : 
     153             : bool
     154   101380361 : xchk_xref_process_error(
     155             :         struct xfs_scrub        *sc,
     156             :         xfs_agnumber_t          agno,
     157             :         xfs_agblock_t           bno,
     158             :         int                     *error)
     159             : {
     160   101380361 :         return __xchk_process_error(sc, agno, bno, error,
     161             :                         XFS_SCRUB_OFLAG_XFAIL, __return_address);
     162             : }
     163             : 
     164             : bool
     165           0 : xchk_xref_process_rt_error(
     166             :         struct xfs_scrub        *sc,
     167             :         xfs_rgnumber_t          rgno,
     168             :         xfs_rgblock_t           rgbno,
     169             :         int                     *error)
     170             : {
     171           0 :         return __xchk_process_error(sc, rgno, rgbno, error,
     172             :                         XFS_SCRUB_OFLAG_XFAIL, __return_address);
     173             : }
     174             : 
     175             : /* Check for operational errors for a file offset. */
     176             : static bool
     177  1091503568 : __xchk_fblock_process_error(
     178             :         struct xfs_scrub        *sc,
     179             :         int                     whichfork,
     180             :         xfs_fileoff_t           offset,
     181             :         int                     *error,
     182             :         __u32                   errflag,
     183             :         void                    *ret_ip)
     184             : {
     185  1091503568 :         switch (*error) {
     186             :         case 0:
     187             :                 return true;
     188           0 :         case -EDEADLOCK:
     189             :         case -ECHRNG:
     190             :                 /* Used to restart an op with deadlock avoidance. */
     191           0 :                 trace_xchk_deadlock_retry(sc->ip, sc->sm, *error);
     192           0 :                 break;
     193           2 :         case -ECANCELED:
     194             :                 /*
     195             :                  * ECANCELED here means that the caller set one of the scrub
     196             :                  * outcome flags (corrupt, xfail, xcorrupt) and wants to exit
     197             :                  * quickly.  Set error to zero and do not continue.
     198             :                  */
     199           2 :                 trace_xchk_file_op_error(sc, whichfork, offset, *error,
     200             :                                 ret_ip);
     201           2 :                 *error = 0;
     202           2 :                 break;
     203           0 :         case -EFSBADCRC:
     204             :         case -EFSCORRUPTED:
     205             :                 /* Note the badness but don't abort. */
     206           0 :                 sc->sm->sm_flags |= errflag;
     207           0 :                 xchk_whine(sc->mp, "ino 0x%llx fork %d type %s offset %llu error %d errflag 0x%x ret_ip %pS",
     208           0 :                                 sc->ip->i_ino,
     209             :                                 whichfork,
     210           0 :                                 xchk_type_string(sc->sm->sm_type),
     211             :                                 offset,
     212             :                                 *error,
     213             :                                 errflag,
     214             :                                 ret_ip);
     215           0 :                 *error = 0;
     216           0 :                 fallthrough;
     217           0 :         default:
     218           0 :                 if (*error)
     219           0 :                         xchk_whine(sc->mp, "ino 0x%llx fork %d type %s offset %llu error %d ret_ip %pS",
     220           0 :                                         sc->ip->i_ino,
     221             :                                         whichfork,
     222           0 :                                         xchk_type_string(sc->sm->sm_type),
     223             :                                         offset,
     224             :                                         *error,
     225             :                                         ret_ip);
     226           0 :                 trace_xchk_file_op_error(sc, whichfork, offset, *error,
     227             :                                 ret_ip);
     228           0 :                 break;
     229             :         }
     230             :         return false;
     231             : }
     232             : 
     233             : bool
     234   815723573 : xchk_fblock_process_error(
     235             :         struct xfs_scrub        *sc,
     236             :         int                     whichfork,
     237             :         xfs_fileoff_t           offset,
     238             :         int                     *error)
     239             : {
     240   816084081 :         return __xchk_fblock_process_error(sc, whichfork, offset, error,
     241             :                         XFS_SCRUB_OFLAG_CORRUPT, __return_address);
     242             : }
     243             : 
     244             : bool
     245   275989250 : xchk_fblock_xref_process_error(
     246             :         struct xfs_scrub        *sc,
     247             :         int                     whichfork,
     248             :         xfs_fileoff_t           offset,
     249             :         int                     *error)
     250             : {
     251   275989250 :         return __xchk_fblock_process_error(sc, whichfork, offset, error,
     252             :                         XFS_SCRUB_OFLAG_XFAIL, __return_address);
     253             : }
     254             : 
     255             : /*
     256             :  * Handling scrub corruption/optimization/warning checks.
     257             :  *
     258             :  * The *_set_{corrupt,preen,warning}() family of functions are used to
     259             :  * record the presence of metadata that is incorrect (corrupt), could be
     260             :  * optimized somehow (preen), or should be flagged for administrative
     261             :  * review but is not incorrect (warn).
     262             :  *
     263             :  * ftrace can be used to record the precise metadata location and
     264             :  * approximate code location of the failed check.
     265             :  */
     266             : 
     267             : /* Record a block which could be optimized. */
     268             : void
     269      285316 : xchk_block_set_preen(
     270             :         struct xfs_scrub        *sc,
     271             :         struct xfs_buf          *bp)
     272             : {
     273      285316 :         sc->sm->sm_flags |= XFS_SCRUB_OFLAG_PREEN;
     274      285316 :         trace_xchk_block_preen(sc, xfs_buf_daddr(bp), __return_address);
     275      285316 : }
     276             : 
     277             : /*
     278             :  * Record an inode which could be optimized.  The trace data will
     279             :  * include the block given by bp if bp is given; otherwise it will use
     280             :  * the block location of the inode record itself.
     281             :  */
     282             : void
     283     3005279 : xchk_ino_set_preen(
     284             :         struct xfs_scrub        *sc,
     285             :         xfs_ino_t               ino)
     286             : {
     287     3005279 :         sc->sm->sm_flags |= XFS_SCRUB_OFLAG_PREEN;
     288     3005279 :         trace_xchk_ino_preen(sc, ino, __return_address);
     289     3005284 : }
     290             : 
     291             : /* Record something being wrong with the filesystem primary superblock. */
     292             : void
     293           0 : xchk_set_corrupt(
     294             :         struct xfs_scrub        *sc)
     295             : {
     296           0 :         sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
     297           0 :         xchk_whine(sc->mp, "type %s ret_ip %pS", xchk_type_string(sc->sm->sm_type),
     298             :                         __return_address);
     299           0 :         trace_xchk_fs_error(sc, 0, __return_address);
     300           0 : }
     301             : 
     302             : /* Record a corrupt block. */
     303             : void
     304           0 : xchk_block_set_corrupt(
     305             :         struct xfs_scrub        *sc,
     306             :         struct xfs_buf          *bp)
     307             : {
     308           0 :         sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
     309           0 :         trace_xchk_block_error(sc, xfs_buf_daddr(bp), __return_address);
     310           0 :         xchk_whine(sc->mp, "type %s agno 0x%x agbno 0x%x ret_ip %pS",
     311           0 :                         xchk_type_string(sc->sm->sm_type),
     312             :                         xfs_daddr_to_agno(sc->mp, xfs_buf_daddr(bp)),
     313             :                         xfs_daddr_to_agbno(sc->mp, xfs_buf_daddr(bp)),
     314             :                         __return_address);
     315           0 : }
     316             : 
     317             : #ifdef CONFIG_XFS_QUOTA
     318             : /* Record a corrupt quota counter. */
     319             : void
     320           0 : xchk_qcheck_set_corrupt(
     321             :         struct xfs_scrub        *sc,
     322             :         unsigned int            dqtype,
     323             :         xfs_dqid_t              id)
     324             : {
     325           0 :         sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
     326           0 :         xchk_whine(sc->mp, "type %s dqtype %u id %u ret_ip %pS",
     327           0 :                         xchk_type_string(sc->sm->sm_type), dqtype, id, __return_address);
     328           0 :         trace_xchk_qcheck_error(sc, dqtype, id, __return_address);
     329           0 : }
     330             : #endif /* CONFIG_XFS_QUOTA */
     331             : 
     332             : /* Record a corruption while cross-referencing. */
     333             : void
     334           0 : xchk_block_xref_set_corrupt(
     335             :         struct xfs_scrub        *sc,
     336             :         struct xfs_buf          *bp)
     337             : {
     338           0 :         sc->sm->sm_flags |= XFS_SCRUB_OFLAG_XCORRUPT;
     339           0 :         trace_xchk_block_error(sc, xfs_buf_daddr(bp), __return_address);
     340           0 :         xchk_whine(sc->mp, "type %s agno 0x%x agbno 0x%x ret_ip %pS",
     341           0 :                         xchk_type_string(sc->sm->sm_type),
     342             :                         xfs_daddr_to_agno(sc->mp, xfs_buf_daddr(bp)),
     343             :                         xfs_daddr_to_agbno(sc->mp, xfs_buf_daddr(bp)),
     344             :                         __return_address);
     345           0 : }
     346             : 
     347             : /*
     348             :  * Record a corrupt inode.  The trace data will include the block given
     349             :  * by bp if bp is given; otherwise it will use the block location of the
     350             :  * inode record itself.
     351             :  */
     352             : void
     353           0 : xchk_ino_set_corrupt(
     354             :         struct xfs_scrub        *sc,
     355             :         xfs_ino_t               ino)
     356             : {
     357           0 :         sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
     358           0 :         xchk_whine(sc->mp, "ino 0x%llx type %s ret_ip %pS",
     359           0 :                         ino, xchk_type_string(sc->sm->sm_type), __return_address);
     360           0 :         trace_xchk_ino_error(sc, ino, __return_address);
     361           0 : }
     362             : 
     363             : /* Record a corruption while cross-referencing with an inode. */
     364             : void
     365           0 : xchk_ino_xref_set_corrupt(
     366             :         struct xfs_scrub        *sc,
     367             :         xfs_ino_t               ino)
     368             : {
     369           0 :         sc->sm->sm_flags |= XFS_SCRUB_OFLAG_XCORRUPT;
     370           0 :         xchk_whine(sc->mp, "ino 0x%llx type %s ret_ip %pS",
     371           0 :                         ino, xchk_type_string(sc->sm->sm_type), __return_address);
     372           0 :         trace_xchk_ino_error(sc, ino, __return_address);
     373           0 : }
     374             : 
     375             : /* Record corruption in a block indexed by a file fork. */
     376             : void
     377           4 : xchk_fblock_set_corrupt(
     378             :         struct xfs_scrub        *sc,
     379             :         int                     whichfork,
     380             :         xfs_fileoff_t           offset)
     381             : {
     382           4 :         sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
     383           4 :         xchk_whine(sc->mp, "ino 0x%llx fork %d type %s offset %llu ret_ip %pS",
     384           4 :                         sc->ip->i_ino,
     385             :                         whichfork,
     386           4 :                         xchk_type_string(sc->sm->sm_type),
     387             :                         offset,
     388             :                         __return_address);
     389           4 :         trace_xchk_fblock_error(sc, whichfork, offset, __return_address);
     390           4 : }
     391             : 
     392             : /* Record a corruption while cross-referencing a fork block. */
     393             : void
     394           2 : xchk_fblock_xref_set_corrupt(
     395             :         struct xfs_scrub        *sc,
     396             :         int                     whichfork,
     397             :         xfs_fileoff_t           offset)
     398             : {
     399           2 :         sc->sm->sm_flags |= XFS_SCRUB_OFLAG_XCORRUPT;
     400           2 :         xchk_whine(sc->mp, "ino 0x%llx fork %d type %s offset %llu ret_ip %pS",
     401           2 :                         sc->ip->i_ino,
     402             :                         whichfork,
     403           2 :                         xchk_type_string(sc->sm->sm_type),
     404             :                         offset,
     405             :                         __return_address);
     406           2 :         trace_xchk_fblock_error(sc, whichfork, offset, __return_address);
     407           2 : }
     408             : 
     409             : /*
     410             :  * Warn about inodes that need administrative review but is not
     411             :  * incorrect.
     412             :  */
     413             : void
     414           0 : xchk_ino_set_warning(
     415             :         struct xfs_scrub        *sc,
     416             :         xfs_ino_t               ino)
     417             : {
     418           0 :         sc->sm->sm_flags |= XFS_SCRUB_OFLAG_WARNING;
     419           0 :         xchk_whine(sc->mp, "ino 0x%llx type %s ret_ip %pS",
     420           0 :                         ino, xchk_type_string(sc->sm->sm_type), __return_address);
     421           0 :         trace_xchk_ino_warning(sc, ino, __return_address);
     422           0 : }
     423             : 
     424             : /* Warn about a block indexed by a file fork that needs review. */
     425             : void
     426          27 : xchk_fblock_set_warning(
     427             :         struct xfs_scrub        *sc,
     428             :         int                     whichfork,
     429             :         xfs_fileoff_t           offset)
     430             : {
     431          27 :         sc->sm->sm_flags |= XFS_SCRUB_OFLAG_WARNING;
     432          27 :         xchk_whine(sc->mp, "ino 0x%llx fork %d type %s offset %llu ret_ip %pS",
     433          27 :                         sc->ip->i_ino,
     434             :                         whichfork,
     435          27 :                         xchk_type_string(sc->sm->sm_type),
     436             :                         offset,
     437             :                         __return_address);
     438          27 :         trace_xchk_fblock_warning(sc, whichfork, offset, __return_address);
     439          27 : }
     440             : 
     441             : /* Signal an incomplete scrub. */
     442             : void
     443          51 : xchk_set_incomplete(
     444             :         struct xfs_scrub        *sc)
     445             : {
     446          51 :         sc->sm->sm_flags |= XFS_SCRUB_OFLAG_INCOMPLETE;
     447          51 :         trace_xchk_incomplete(sc, __return_address);
     448          51 : }
     449             : 
     450             : /*
     451             :  * rmap scrubbing -- compute the number of blocks with a given owner,
     452             :  * at least according to the reverse mapping data.
     453             :  */
     454             : 
     455             : struct xchk_rmap_ownedby_info {
     456             :         const struct xfs_owner_info     *oinfo;
     457             :         xfs_filblks_t                   *blocks;
     458             : };
     459             : 
     460             : STATIC int
     461  9956809765 : xchk_count_rmap_ownedby_irec(
     462             :         struct xfs_btree_cur            *cur,
     463             :         const struct xfs_rmap_irec      *rec,
     464             :         void                            *priv)
     465             : {
     466  9956809765 :         struct xchk_rmap_ownedby_info   *sroi = priv;
     467  9956809765 :         bool                            irec_attr;
     468  9956809765 :         bool                            oinfo_attr;
     469             : 
     470  9956809765 :         irec_attr = rec->rm_flags & XFS_RMAP_ATTR_FORK;
     471  9956809765 :         oinfo_attr = sroi->oinfo->oi_flags & XFS_OWNER_INFO_ATTR_FORK;
     472             : 
     473  9956809765 :         if (rec->rm_owner != sroi->oinfo->oi_owner)
     474             :                 return 0;
     475             : 
     476    50950148 :         if (XFS_RMAP_NON_INODE_OWNER(rec->rm_owner) || irec_attr == oinfo_attr)
     477    50950148 :                 (*sroi->blocks) += rec->rm_blockcount;
     478             : 
     479             :         return 0;
     480             : }
     481             : 
     482             : /*
     483             :  * Calculate the number of blocks the rmap thinks are owned by something.
     484             :  * The caller should pass us an rmapbt cursor.
     485             :  */
     486             : int
     487      999308 : xchk_count_rmap_ownedby_ag(
     488             :         struct xfs_scrub                *sc,
     489             :         struct xfs_btree_cur            *cur,
     490             :         const struct xfs_owner_info     *oinfo,
     491             :         xfs_filblks_t                   *blocks)
     492             : {
     493      999308 :         struct xchk_rmap_ownedby_info   sroi = {
     494             :                 .oinfo                  = oinfo,
     495             :                 .blocks                 = blocks,
     496             :         };
     497             : 
     498      999308 :         *blocks = 0;
     499      999308 :         return xfs_rmap_query_all(cur, xchk_count_rmap_ownedby_irec,
     500             :                         &sroi);
     501             : }
     502             : 
     503             : /*
     504             :  * AG scrubbing
     505             :  *
     506             :  * These helpers facilitate locking an allocation group's header
     507             :  * buffers, setting up cursors for all btrees that are present, and
     508             :  * cleaning everything up once we're through.
     509             :  */
     510             : 
     511             : /* Decide if we want to return an AG header read failure. */
     512             : static inline bool
     513             : want_ag_read_header_failure(
     514             :         struct xfs_scrub        *sc,
     515             :         unsigned int            type)
     516             : {
     517             :         /* Return all AG header read failures when scanning btrees. */
     518           0 :         if (sc->sm->sm_type != XFS_SCRUB_TYPE_AGF &&
     519           0 :             sc->sm->sm_type != XFS_SCRUB_TYPE_AGFL &&
     520             :             sc->sm->sm_type != XFS_SCRUB_TYPE_AGI)
     521             :                 return true;
     522             :         /*
     523             :          * If we're scanning a given type of AG header, we only want to
     524             :          * see read failures from that specific header.  We'd like the
     525             :          * other headers to cross-check them, but this isn't required.
     526             :          */
     527           0 :         if (sc->sm->sm_type == type)
     528             :                 return true;
     529             :         return false;
     530             : }
     531             : 
     532             : /*
     533             :  * Grab the AG header buffers for the attached perag structure.
     534             :  *
     535             :  * The headers should be released by xchk_ag_free, but as a fail safe we attach
     536             :  * all the buffers we grab to the scrub transaction so they'll all be freed
     537             :  * when we cancel it.
     538             :  */
     539             : static inline int
     540   587144298 : xchk_perag_read_headers(
     541             :         struct xfs_scrub        *sc,
     542             :         struct xchk_ag          *sa)
     543             : {
     544   587144298 :         int                     error;
     545             : 
     546   587144298 :         error = xfs_ialloc_read_agi(sa->pag, sc->tp, &sa->agi_bp);
     547   587163207 :         if (error && want_ag_read_header_failure(sc, XFS_SCRUB_TYPE_AGI))
     548             :                 return error;
     549             : 
     550   587163207 :         error = xfs_alloc_read_agf(sa->pag, sc->tp, 0, &sa->agf_bp);
     551   587163957 :         if (error && want_ag_read_header_failure(sc, XFS_SCRUB_TYPE_AGF))
     552           0 :                 return error;
     553             : 
     554             :         return 0;
     555             : }
     556             : 
     557             : /*
     558             :  * Grab the AG headers for the attached perag structure and wait for pending
     559             :  * intents to drain.
     560             :  */
     561             : int
     562   587130845 : xchk_perag_drain_and_lock(
     563             :         struct xfs_scrub        *sc)
     564             : {
     565   587130845 :         struct xchk_ag          *sa = &sc->sa;
     566   587130845 :         int                     error = 0;
     567             : 
     568   587130845 :         ASSERT(sa->pag != NULL);
     569   587130845 :         ASSERT(sa->agi_bp == NULL);
     570   587130845 :         ASSERT(sa->agf_bp == NULL);
     571             : 
     572   587145443 :         do {
     573   587145443 :                 if (xchk_should_terminate(sc, &error))
     574           1 :                         return error;
     575             : 
     576   587144505 :                 error = xchk_perag_read_headers(sc, sa);
     577   587163843 :                 if (error)
     578           0 :                         return error;
     579             : 
     580             :                 /*
     581             :                  * If we've grabbed an inode for scrubbing then we assume that
     582             :                  * holding its ILOCK will suffice to coordinate with any intent
     583             :                  * chains involving this inode.
     584             :                  */
     585   587163843 :                 if (sc->ip)
     586             :                         return 0;
     587             : 
     588             :                 /*
     589             :                  * Decide if this AG is quiet enough for all metadata to be
     590             :                  * consistent with each other.  XFS allows the AG header buffer
     591             :                  * locks to cycle across transaction rolls while processing
     592             :                  * chains of deferred ops, which means that there could be
     593             :                  * other threads in the middle of processing a chain of
     594             :                  * deferred ops.  For regular operations we are careful about
     595             :                  * ordering operations to prevent collisions between threads
     596             :                  * (which is why we don't need a per-AG lock), but scrub and
     597             :                  * repair have to serialize against chained operations.
     598             :                  *
     599             :                  * We just locked all the AG headers buffers; now take a look
     600             :                  * to see if there are any intents in progress.  If there are,
     601             :                  * drop the AG headers and wait for the intents to drain.
     602             :                  * Since we hold all the AG header locks for the duration of
     603             :                  * the scrub, this is the only time we have to sample the
     604             :                  * intents counter; any threads increasing it after this point
     605             :                  * can't possibly be in the middle of a chain of AG metadata
     606             :                  * updates.
     607             :                  *
     608             :                  * Obviously, this should be slanted against scrub and in favor
     609             :                  * of runtime threads.
     610             :                  */
     611     2935107 :                 if (!xfs_perag_intent_busy(sa->pag))
     612             :                         return 0;
     613             : 
     614       51106 :                 if (sa->agf_bp) {
     615       51106 :                         xfs_trans_brelse(sc->tp, sa->agf_bp);
     616       51106 :                         sa->agf_bp = NULL;
     617             :                 }
     618             : 
     619       51106 :                 if (sa->agi_bp) {
     620       51106 :                         xfs_trans_brelse(sc->tp, sa->agi_bp);
     621       51106 :                         sa->agi_bp = NULL;
     622             :                 }
     623             : 
     624       51106 :                 if (!(sc->flags & XCHK_FSGATES_DRAIN))
     625             :                         return -ECHRNG;
     626       14598 :                 error = xfs_perag_intent_drain(sa->pag);
     627       14598 :                 if (error == -ERESTARTSYS)
     628           0 :                         error = -EINTR;
     629       14598 :         } while (!error);
     630             : 
     631             :         return error;
     632             : }
     633             : 
     634             : /*
     635             :  * Grab the per-AG structure, grab all AG header buffers, and wait until there
     636             :  * aren't any pending intents.  Returns -ENOENT if we can't grab the perag
     637             :  * structure.
     638             :  */
     639             : int
     640   587119515 : xchk_ag_read_headers(
     641             :         struct xfs_scrub        *sc,
     642             :         xfs_agnumber_t          agno,
     643             :         struct xchk_ag          *sa)
     644             : {
     645   587119515 :         struct xfs_mount        *mp = sc->mp;
     646             : 
     647   587119515 :         ASSERT(!sa->pag);
     648   587119515 :         sa->pag = xfs_perag_get(mp, agno);
     649   587124820 :         if (!sa->pag)
     650             :                 return -ENOENT;
     651             : 
     652   587124820 :         return xchk_perag_drain_and_lock(sc);
     653             : }
     654             : 
     655             : /* Release all the AG btree cursors. */
     656             : void
     657  1093012748 : xchk_ag_btcur_free(
     658             :         struct xchk_ag          *sa)
     659             : {
     660  1093012748 :         if (sa->refc_cur)
     661   587858373 :                 xfs_btree_del_cursor(sa->refc_cur, XFS_BTREE_ERROR);
     662  1093032068 :         if (sa->rmap_cur)
     663   587898049 :                 xfs_btree_del_cursor(sa->rmap_cur, XFS_BTREE_ERROR);
     664  1093033976 :         if (sa->fino_cur)
     665   587868804 :                 xfs_btree_del_cursor(sa->fino_cur, XFS_BTREE_ERROR);
     666  1093039321 :         if (sa->ino_cur)
     667   587868882 :                 xfs_btree_del_cursor(sa->ino_cur, XFS_BTREE_ERROR);
     668  1093029069 :         if (sa->cnt_cur)
     669   587837696 :                 xfs_btree_del_cursor(sa->cnt_cur, XFS_BTREE_ERROR);
     670  1093027724 :         if (sa->bno_cur)
     671   587837462 :                 xfs_btree_del_cursor(sa->bno_cur, XFS_BTREE_ERROR);
     672             : 
     673  1093028423 :         sa->refc_cur = NULL;
     674  1093028423 :         sa->rmap_cur = NULL;
     675  1093028423 :         sa->fino_cur = NULL;
     676  1093028423 :         sa->ino_cur = NULL;
     677  1093028423 :         sa->bno_cur = NULL;
     678  1093028423 :         sa->cnt_cur = NULL;
     679  1093028423 : }
     680             : 
     681             : /* Initialize all the btree cursors for an AG. */
     682             : void
     683   587103321 : xchk_ag_btcur_init(
     684             :         struct xfs_scrub        *sc,
     685             :         struct xchk_ag          *sa)
     686             : {
     687   587103321 :         struct xfs_mount        *mp = sc->mp;
     688             : 
     689  1174206318 :         if (sa->agf_bp &&
     690   587103149 :             xchk_ag_btree_healthy_enough(sc, sa->pag, XFS_BTNUM_BNO)) {
     691             :                 /* Set up a bnobt cursor for cross-referencing. */
     692   587102924 :                 sa->bno_cur = xfs_allocbt_init_cursor(mp, sc->tp, sa->agf_bp,
     693             :                                 sa->pag, XFS_BTNUM_BNO);
     694             :         }
     695             : 
     696  1174206056 :         if (sa->agf_bp &&
     697   587102621 :             xchk_ag_btree_healthy_enough(sc, sa->pag, XFS_BTNUM_CNT)) {
     698             :                 /* Set up a cntbt cursor for cross-referencing. */
     699   587103328 :                 sa->cnt_cur = xfs_allocbt_init_cursor(mp, sc->tp, sa->agf_bp,
     700             :                                 sa->pag, XFS_BTNUM_CNT);
     701             :         }
     702             : 
     703             :         /* Set up a inobt cursor for cross-referencing. */
     704  1174205387 :         if (sa->agi_bp &&
     705   587101992 :             xchk_ag_btree_healthy_enough(sc, sa->pag, XFS_BTNUM_INO)) {
     706   587103333 :                 sa->ino_cur = xfs_inobt_init_cursor(sa->pag, sc->tp, sa->agi_bp,
     707             :                                 XFS_BTNUM_INO);
     708             :         }
     709             : 
     710             :         /* Set up a finobt cursor for cross-referencing. */
     711  1174205045 :         if (sa->agi_bp && xfs_has_finobt(mp) &&
     712   587102581 :             xchk_ag_btree_healthy_enough(sc, sa->pag, XFS_BTNUM_FINO)) {
     713   587102568 :                 sa->fino_cur = xfs_inobt_init_cursor(sa->pag, sc->tp, sa->agi_bp,
     714             :                                 XFS_BTNUM_FINO);
     715             :         }
     716             : 
     717             :         /* Set up a rmapbt cursor for cross-referencing. */
     718  1174196485 :         if (sa->agf_bp && xfs_has_rmapbt(mp) &&
     719   587094034 :             xchk_ag_btree_healthy_enough(sc, sa->pag, XFS_BTNUM_RMAP)) {
     720   587094860 :                 sa->rmap_cur = xfs_rmapbt_init_cursor(mp, sc->tp, sa->agf_bp,
     721             :                                 sa->pag);
     722             :         }
     723             : 
     724             :         /* Set up a refcountbt cursor for cross-referencing. */
     725  1174191330 :         if (sa->agf_bp && xfs_has_reflink(mp) &&
     726   587089436 :             xchk_ag_btree_healthy_enough(sc, sa->pag, XFS_BTNUM_REFC)) {
     727   587089998 :                 sa->refc_cur = xfs_refcountbt_init_cursor(mp, sc->tp,
     728             :                                 sa->agf_bp, sa->pag);
     729             :         }
     730   587101663 : }
     731             : 
     732             : /* Release the AG header context and btree cursors. */
     733             : void
     734  1090834620 : xchk_ag_free(
     735             :         struct xfs_scrub        *sc,
     736             :         struct xchk_ag          *sa)
     737             : {
     738  1090834620 :         xchk_ag_btcur_free(sa);
     739  1090794694 :         xrep_reset_perag_resv(sc);
     740  1091183909 :         if (sa->agf_bp) {
     741   587712252 :                 xfs_trans_brelse(sc->tp, sa->agf_bp);
     742   587712111 :                 sa->agf_bp = NULL;
     743             :         }
     744  1091183768 :         if (sa->agi_bp) {
     745   587712162 :                 xfs_trans_brelse(sc->tp, sa->agi_bp);
     746   587712013 :                 sa->agi_bp = NULL;
     747             :         }
     748  1091183619 :         if (sa->pag) {
     749   587747795 :                 xfs_perag_put(sa->pag);
     750   587748564 :                 sa->pag = NULL;
     751             :         }
     752  1091184388 : }
     753             : 
     754             : /*
     755             :  * For scrub, grab the perag structure, the AGI, and the AGF headers, in that
     756             :  * order.  Locking order requires us to get the AGI before the AGF.  We use the
     757             :  * transaction to avoid deadlocking on crosslinked metadata buffers; either the
     758             :  * caller passes one in (bmap scrub) or we have to create a transaction
     759             :  * ourselves.  Returns ENOENT if the perag struct cannot be grabbed.
     760             :  */
     761             : int
     762   585756072 : xchk_ag_init(
     763             :         struct xfs_scrub        *sc,
     764             :         xfs_agnumber_t          agno,
     765             :         struct xchk_ag          *sa)
     766             : {
     767   585756072 :         int                     error;
     768             : 
     769   585756072 :         error = xchk_ag_read_headers(sc, agno, sa);
     770   585775149 :         if (error)
     771             :                 return error;
     772             : 
     773   585758992 :         xchk_ag_btcur_init(sc, sa);
     774   585758992 :         return 0;
     775             : }
     776             : 
     777             : /*
     778             :  * For scrubbing a realtime file, grab all the in-core resources we'll need to
     779             :  * check the realtime metadata, which means taking the ILOCK of the realtime
     780             :  * metadata inodes.  Callers must not join these inodes to the transaction
     781             :  * with non-zero lockflags or concurrency problems will result.  The
     782             :  * @rtlock_flags argument takes XCHK_RTLOCK_* flags because scrub has somewhat
     783             :  * unusual locking requirements.
     784             :  */
     785             : void
     786       95153 : xchk_rt_init(
     787             :         struct xfs_scrub        *sc,
     788             :         struct xchk_rt          *sr,
     789             :         unsigned int            rtlock_flags)
     790             : {
     791       95153 :         ASSERT(!(rtlock_flags & ~XCHK_RTLOCK_ALL));
     792      190306 :         ASSERT(hweight32(rtlock_flags & (XCHK_RTLOCK_BITMAP |
     793             :                                          XCHK_RTLOCK_BITMAP_SHARED)) < 2);
     794      190306 :         ASSERT(hweight32(rtlock_flags & (XCHK_RTLOCK_SUMMARY |
     795             :                                          XCHK_RTLOCK_SUMMARY_SHARED)) < 2);
     796       95153 :         ASSERT(sr->rtg == NULL);
     797             : 
     798       95153 :         if (rtlock_flags & XCHK_RTLOCK_BITMAP)
     799       27631 :                 xfs_ilock(sc->mp->m_rbmip, XFS_ILOCK_EXCL);
     800       67522 :         else if (rtlock_flags & XCHK_RTLOCK_BITMAP_SHARED)
     801       67522 :                 xfs_ilock(sc->mp->m_rbmip, XFS_ILOCK_SHARED);
     802             : 
     803       95152 :         if (rtlock_flags & XCHK_RTLOCK_SUMMARY)
     804       67521 :                 xfs_ilock(sc->mp->m_rsumip, XFS_ILOCK_EXCL);
     805       27631 :         else if (rtlock_flags & XCHK_RTLOCK_SUMMARY_SHARED)
     806           0 :                 xfs_ilock(sc->mp->m_rsumip, XFS_ILOCK_SHARED);
     807             : 
     808       95153 :         sr->rtlock_flags = rtlock_flags;
     809       95153 : }
     810             : 
     811             : /*
     812             :  * Unlock the realtime metadata inodes.  This must be done /after/ committing
     813             :  * (or cancelling) the scrub transaction.
     814             :  */
     815             : void
     816   506145577 : xchk_rt_unlock(
     817             :         struct xfs_scrub        *sc,
     818             :         struct xchk_rt          *sr)
     819             : {
     820   506145577 :         ASSERT(sr->rtg == NULL);
     821             : 
     822   506145577 :         if (!sr->rtlock_flags)
     823             :                 return;
     824             : 
     825       95153 :         if (sr->rtlock_flags & XCHK_RTLOCK_SUMMARY)
     826       67522 :                 xfs_iunlock(sc->mp->m_rsumip, XFS_ILOCK_EXCL);
     827             :         else if (sr->rtlock_flags & XCHK_RTLOCK_SUMMARY)
     828             :                 xfs_iunlock(sc->mp->m_rsumip, XFS_ILOCK_SHARED);
     829             : 
     830       95153 :         if (sr->rtlock_flags & XCHK_RTLOCK_BITMAP)
     831       27631 :                 xfs_iunlock(sc->mp->m_rbmip, XFS_ILOCK_EXCL);
     832       67522 :         else if (sr->rtlock_flags & XCHK_RTLOCK_BITMAP_SHARED)
     833       67522 :                 xfs_iunlock(sc->mp->m_rbmip, XFS_ILOCK_SHARED);
     834             : 
     835       95153 :         sr->rtlock_flags = 0;
     836             : }
     837             : 
     838             : #ifdef CONFIG_XFS_RT
     839             : /* Lock all the rt group metadata inode ILOCKs and wait for intents. */
     840             : int
     841    28995795 : xchk_rtgroup_drain_and_lock(
     842             :         struct xfs_scrub        *sc,
     843             :         struct xchk_rt          *sr,
     844             :         unsigned int            rtglock_flags)
     845             : {
     846    28995795 :         int                     error = 0;
     847             : 
     848    28995795 :         ASSERT(sr->rtg != NULL);
     849             : 
     850             :         /*
     851             :          * If we're /only/ locking the rtbitmap in shared mode, then we're
     852             :          * obviously not trying to compare records in two metadata inodes.
     853             :          * There's no need to drain intents here because the caller (most
     854             :          * likely the rgsuper scanner) doesn't need that level of consistency.
     855             :          */
     856    28995795 :         if (rtglock_flags == XFS_RTGLOCK_BITMAP_SHARED) {
     857       13215 :                 xfs_rtgroup_lock(NULL, sr->rtg, rtglock_flags);
     858       13214 :                 sr->rtlock_flags = rtglock_flags;
     859       13214 :                 return 0;
     860             :         }
     861             : 
     862    29003292 :         do {
     863    29003292 :                 if (xchk_should_terminate(sc, &error))
     864           0 :                         return error;
     865             : 
     866    29003445 :                 xfs_rtgroup_lock(NULL, sr->rtg, rtglock_flags);
     867             : 
     868             :                 /*
     869             :                  * If we've grabbed a non-metadata file for scrubbing, we
     870             :                  * assume that holding its ILOCK will suffice to coordinate
     871             :                  * with any rt intent chains involving this inode.
     872             :                  */
     873    29004618 :                 if (sc->ip && !xfs_is_metadata_inode(sc->ip)) {
     874    28692364 :                         sr->rtlock_flags = rtglock_flags;
     875    28692364 :                         return 0;
     876             :                 }
     877             : 
     878             :                 /*
     879             :                  * Decide if the rt group is quiet enough for all metadata to
     880             :                  * be consistent with each other.  Regular file IO doesn't get
     881             :                  * to lock all the rt inodes at the same time, which means that
     882             :                  * there could be other threads in the middle of processing a
     883             :                  * chain of deferred ops.
     884             :                  *
     885             :                  * We just locked all the metadata inodes for this rt group;
     886             :                  * now take a look to see if there are any intents in progress.
     887             :                  * If there are, drop the rt group inode locks and wait for the
     888             :                  * intents to drain.  Since we hold the rt group inode locks
     889             :                  * for the duration of the scrub, this is the only time we have
     890             :                  * to sample the intents counter; any threads increasing it
     891             :                  * after this point can't possibly be in the middle of a chain
     892             :                  * of rt metadata updates.
     893             :                  *
     894             :                  * Obviously, this should be slanted against scrub and in favor
     895             :                  * of runtime threads.
     896             :                  */
     897      312254 :                 if (!xfs_rtgroup_intent_busy(sr->rtg)) {
     898      265717 :                         sr->rtlock_flags = rtglock_flags;
     899      265717 :                         return 0;
     900             :                 }
     901             : 
     902       46588 :                 xfs_rtgroup_unlock(sr->rtg, rtglock_flags);
     903             : 
     904       46588 :                 if (!(sc->flags & XCHK_FSGATES_DRAIN))
     905             :                         return -ECHRNG;
     906       20712 :                 error = xfs_rtgroup_intent_drain(sr->rtg);
     907       20712 :                 if (error == -ERESTARTSYS)
     908           0 :                         error = -EINTR;
     909       20712 :         } while (!error);
     910             : 
     911             :         return error;
     912             : }
     913             : 
     914             : /*
     915             :  * For scrubbing a realtime group, grab all the in-core resources we'll need to
     916             :  * check the metadata, which means taking the ILOCK of the realtime group's
     917             :  * metadata inodes and draining any running intent chains.  Callers must not
     918             :  * join these inodes to the transaction with non-zero lockflags or concurrency
     919             :  * problems will result.  The @rtglock_flags argument takes XFS_RTGLOCK_*
     920             :  * flags.
     921             :  */
     922             : int
     923    28992056 : xchk_rtgroup_init(
     924             :         struct xfs_scrub        *sc,
     925             :         xfs_rgnumber_t          rgno,
     926             :         struct xchk_rt          *sr,
     927             :         unsigned int            rtglock_flags)
     928             : {
     929    28992056 :         int                     error;
     930             : 
     931    28992056 :         ASSERT(sr->rtg == NULL);
     932    28992056 :         ASSERT(sr->rtlock_flags == 0);
     933             : 
     934    28992056 :         sr->rtg = xfs_rtgroup_get(sc->mp, rgno);
     935    28992657 :         if (!sr->rtg)
     936             :                 return -ENOENT;
     937             : 
     938    28992657 :         error = xchk_rtgroup_drain_and_lock(sc, sr, rtglock_flags);
     939    28993878 :         if (error)
     940             :                 return error;
     941             : 
     942    28968057 :         if (xfs_has_rtrmapbt(sc->mp) && (rtglock_flags & XFS_RTGLOCK_RMAP))
     943    28954872 :                 sr->rmap_cur = xfs_rtrmapbt_init_cursor(sc->mp, sc->tp,
     944             :                                 sr->rtg, sr->rtg->rtg_rmapip);
     945             : 
     946    28965343 :         if (xfs_has_rtreflink(sc->mp) && (rtglock_flags & XFS_RTGLOCK_REFCOUNT))
     947    28953497 :                 sr->refc_cur = xfs_rtrefcountbt_init_cursor(sc->mp, sc->tp,
     948             :                                 sr->rtg, sr->rtg->rtg_refcountip);
     949             : 
     950             :         return 0;
     951             : }
     952             : 
     953             : /*
     954             :  * Free all the btree cursors and other incore data relating to the realtime
     955             :  * group.  This has to be done /before/ committing (or cancelling) the scrub
     956             :  * transaction.
     957             :  */
     958             : void
     959   536965262 : xchk_rtgroup_btcur_free(
     960             :         struct xchk_rt          *sr)
     961             : {
     962   536965262 :         if (sr->rmap_cur)
     963    29248281 :                 xfs_btree_del_cursor(sr->rmap_cur, XFS_BTREE_ERROR);
     964   536965327 :         if (sr->refc_cur)
     965    29241284 :                 xfs_btree_del_cursor(sr->refc_cur, XFS_BTREE_ERROR);
     966             : 
     967   536965260 :         sr->refc_cur = NULL;
     968   536965260 :         sr->rmap_cur = NULL;
     969   536965260 : }
     970             : 
     971             : /*
     972             :  * Unlock the realtime group.  This must be done /after/ committing (or
     973             :  * cancelling) the scrub transaction.
     974             :  */
     975             : void
     976    29247533 : xchk_rtgroup_unlock(
     977             :         struct xfs_scrub        *sc,
     978             :         struct xchk_rt          *sr)
     979             : {
     980    29247533 :         ASSERT(sr->rtg != NULL);
     981             : 
     982    29247533 :         if (sr->rtlock_flags) {
     983    29221658 :                 xfs_rtgroup_unlock(sr->rtg, sr->rtlock_flags);
     984    29221624 :                 sr->rtlock_flags = 0;
     985             :         }
     986    29247499 : }
     987             : 
     988             : /*
     989             :  * Unlock the realtime group and release its resources.  This must be done
     990             :  * /after/ committing (or cancelling) the scrub transaction.
     991             :  */
     992             : void
     993    29244284 : xchk_rtgroup_free(
     994             :         struct xfs_scrub        *sc,
     995             :         struct xchk_rt          *sr)
     996             : {
     997    29244284 :         ASSERT(sr->rtg != NULL);
     998             : 
     999    29244284 :         xchk_rtgroup_unlock(sc, sr);
    1000             : 
    1001    29244256 :         xfs_rtgroup_put(sr->rtg);
    1002    29244251 :         sr->rtg = NULL;
    1003    29244251 : }
    1004             : #endif /* CONFIG_XFS_RT */
    1005             : 
    1006             : /* Per-scrubber setup functions */
    1007             : 
    1008             : void
    1009   179922296 : xchk_trans_cancel(
    1010             :         struct xfs_scrub        *sc)
    1011             : {
    1012   179922296 :         xfs_trans_cancel(sc->tp);
    1013   179929588 :         sc->tp = NULL;
    1014           0 : }
    1015             : 
    1016             : int
    1017    88816108 : xchk_trans_alloc_empty(
    1018             :         struct xfs_scrub        *sc)
    1019             : {
    1020    88816108 :         return xfs_trans_alloc_empty(sc->mp, &sc->tp);
    1021             : }
    1022             : 
    1023             : /*
    1024             :  * Grab an empty transaction so that we can re-grab locked buffers if
    1025             :  * one of our btrees turns out to be cyclic.
    1026             :  *
    1027             :  * If we're going to repair something, we need to ask for the largest possible
    1028             :  * log reservation so that we can handle the worst case scenario for metadata
    1029             :  * updates while rebuilding a metadata item.  We also need to reserve as many
    1030             :  * blocks in the head transaction as we think we're going to need to rebuild
    1031             :  * the metadata object.
    1032             :  */
    1033             : int
    1034   506112772 : xchk_trans_alloc(
    1035             :         struct xfs_scrub        *sc,
    1036             :         uint                    resblks)
    1037             : {
    1038   506112772 :         if (sc->sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR)
    1039     6430142 :                 return xfs_trans_alloc(sc->mp, &M_RES(sc->mp)->tr_itruncate,
    1040             :                                 resblks, 0, 0, &sc->tp);
    1041             : 
    1042   499682630 :         return xchk_trans_alloc_empty(sc);
    1043             : }
    1044             : 
    1045             : /* Set us up with a transaction and an empty context. */
    1046             : int
    1047     3186072 : xchk_setup_fs(
    1048             :         struct xfs_scrub        *sc)
    1049             : {
    1050     3186072 :         uint                    resblks;
    1051             : 
    1052     3186072 :         resblks = xrep_calc_ag_resblks(sc);
    1053     3186092 :         return xchk_trans_alloc(sc, resblks);
    1054             : }
    1055             : 
    1056             : /* Set us up with a transaction and an empty context to repair rt metadata. */
    1057             : int
    1058      139784 : xchk_setup_rt(
    1059             :         struct xfs_scrub        *sc)
    1060             : {
    1061      139784 :         uint                    resblks;
    1062             : 
    1063      139784 :         resblks = xrep_calc_rtgroup_resblks(sc);
    1064      139786 :         return xchk_trans_alloc(sc, resblks);
    1065             : }
    1066             : 
    1067             : /* Set us up with AG headers and btree cursors. */
    1068             : int
    1069     1122792 : xchk_setup_ag_btree(
    1070             :         struct xfs_scrub        *sc,
    1071             :         bool                    force_log)
    1072             : {
    1073     1122792 :         struct xfs_mount        *mp = sc->mp;
    1074     1122792 :         int                     error;
    1075             : 
    1076             :         /*
    1077             :          * If the caller asks us to checkpont the log, do so.  This
    1078             :          * expensive operation should be performed infrequently and only
    1079             :          * as a last resort.  Any caller that sets force_log should
    1080             :          * document why they need to do so.
    1081             :          */
    1082     1122792 :         if (force_log) {
    1083           0 :                 error = xchk_checkpoint_log(mp);
    1084           0 :                 if (error)
    1085             :                         return error;
    1086             :         }
    1087             : 
    1088     1122792 :         error = xchk_setup_fs(sc);
    1089     1122837 :         if (error)
    1090             :                 return error;
    1091             : 
    1092     1122837 :         return xchk_ag_init(sc, sc->sm->sm_agno, &sc->sa);
    1093             : }
    1094             : 
    1095             : /* Push everything out of the log onto disk. */
    1096             : int
    1097           0 : xchk_checkpoint_log(
    1098             :         struct xfs_mount        *mp)
    1099             : {
    1100           0 :         int                     error;
    1101             : 
    1102           0 :         error = xfs_log_force(mp, XFS_LOG_SYNC);
    1103           0 :         if (error)
    1104             :                 return error;
    1105           0 :         xfs_ail_push_all_sync(mp->m_ail);
    1106           0 :         return 0;
    1107             : }
    1108             : 
    1109             : /* Verify that an inode is allocated ondisk, then return its cached inode. */
    1110             : int
    1111  1629418075 : xchk_iget(
    1112             :         struct xfs_scrub        *sc,
    1113             :         xfs_ino_t               inum,
    1114             :         struct xfs_inode        **ipp)
    1115             : {
    1116  1629418075 :         return xfs_iget(sc->mp, sc->tp, inum, XFS_IGET_UNTRUSTED, 0, ipp);
    1117             : }
    1118             : 
    1119             : /*
    1120             :  * Try to grab an inode in a manner that avoids races with physical inode
    1121             :  * allocation.  If we can't, return the locked AGI buffer so that the caller
    1122             :  * can single-step the loading process to see where things went wrong.
    1123             :  * Callers must have a valid scrub transaction.
    1124             :  *
    1125             :  * If the iget succeeds, return 0, a NULL AGI, and the inode.
    1126             :  *
    1127             :  * If the iget fails, return the error, the locked AGI, and a NULL inode.  This
    1128             :  * can include -EINVAL and -ENOENT for invalid inode numbers or inodes that are
    1129             :  * no longer allocated; or any other corruption or runtime error.
    1130             :  *
    1131             :  * If the AGI read fails, return the error, a NULL AGI, and NULL inode.
    1132             :  *
    1133             :  * If a fatal signal is pending, return -EINTR, a NULL AGI, and a NULL inode.
    1134             :  */
    1135             : int
    1136      280618 : xchk_iget_agi(
    1137             :         struct xfs_scrub        *sc,
    1138             :         xfs_ino_t               inum,
    1139             :         struct xfs_buf          **agi_bpp,
    1140             :         struct xfs_inode        **ipp)
    1141             : {
    1142      280618 :         struct xfs_mount        *mp = sc->mp;
    1143      280618 :         struct xfs_trans        *tp = sc->tp;
    1144      280618 :         struct xfs_perag        *pag;
    1145      280618 :         int                     error;
    1146             : 
    1147      280618 :         ASSERT(sc->tp != NULL);
    1148             : 
    1149      280618 : again:
    1150      280963 :         *agi_bpp = NULL;
    1151      280963 :         *ipp = NULL;
    1152      280963 :         error = 0;
    1153             : 
    1154      280963 :         if (xchk_should_terminate(sc, &error))
    1155           1 :                 return error;
    1156             : 
    1157             :         /*
    1158             :          * Attach the AGI buffer to the scrub transaction to avoid deadlocks
    1159             :          * in the iget cache miss path.
    1160             :          */
    1161      280962 :         pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, inum));
    1162      280962 :         error = xfs_ialloc_read_agi(pag, tp, agi_bpp);
    1163      280962 :         xfs_perag_put(pag);
    1164      280962 :         if (error)
    1165           0 :                 return error;
    1166             : 
    1167      280962 :         error = xfs_iget(mp, tp, inum,
    1168             :                         XFS_IGET_NORETRY | XFS_IGET_UNTRUSTED, 0, ipp);
    1169      280962 :         if (error == -EAGAIN) {
    1170             :                 /*
    1171             :                  * The inode may be in core but temporarily unavailable and may
    1172             :                  * require the AGI buffer before it can be returned.  Drop the
    1173             :                  * AGI buffer and retry the lookup.
    1174             :                  *
    1175             :                  * Incore lookup will fail with EAGAIN on a cache hit if the
    1176             :                  * inode is queued to the inactivation list.  The inactivation
    1177             :                  * worker may remove the inode from the unlinked list and hence
    1178             :                  * needs the AGI.
    1179             :                  *
    1180             :                  * Hence xchk_iget_agi() needs to drop the AGI lock on EAGAIN
    1181             :                  * to allow inodegc to make progress and move the inode to
    1182             :                  * IRECLAIMABLE state where xfs_iget will be able to return it
    1183             :                  * again if it can lock the inode.
    1184             :                  */
    1185         345 :                 xfs_trans_brelse(tp, *agi_bpp);
    1186         345 :                 delay(1);
    1187         345 :                 goto again;
    1188             :         }
    1189      280617 :         if (error)
    1190             :                 return error;
    1191             : 
    1192             :         /* We got the inode, so we can release the AGI. */
    1193      271603 :         ASSERT(*ipp != NULL);
    1194      271603 :         xfs_trans_brelse(tp, *agi_bpp);
    1195      271603 :         *agi_bpp = NULL;
    1196      271603 :         return 0;
    1197             : }
    1198             : 
    1199             : #ifdef CONFIG_XFS_QUOTA
    1200             : /*
    1201             :  * Try to attach dquots to this inode if we think we might want to repair it.
    1202             :  * Callers must not hold any ILOCKs.  If the dquots are broken and cannot be
    1203             :  * attached, a quotacheck will be scheduled.
    1204             :  */
    1205             : int
    1206   500055008 : xchk_ino_dqattach(
    1207             :         struct xfs_scrub        *sc)
    1208             : {
    1209   500055008 :         ASSERT(sc->tp != NULL);
    1210   500055008 :         ASSERT(sc->ip != NULL);
    1211             : 
    1212  1000110016 :         if (!xchk_could_repair(sc))
    1213             :                 return 0;
    1214             : 
    1215      966946 :         return xrep_ino_dqattach(sc);
    1216             : }
    1217             : #endif
    1218             : 
    1219             : /* Install an inode that we opened by handle for scrubbing. */
    1220             : int
    1221   366902581 : xchk_install_handle_inode(
    1222             :         struct xfs_scrub        *sc,
    1223             :         struct xfs_inode        *ip)
    1224             : {
    1225             :         /*
    1226             :          * Only the directories in the metadata directory tree can be scrubbed
    1227             :          * by handle -- files must be checked through an explicit scrub type.
    1228             :          */
    1229   366902581 :         if ((xfs_is_metadir_inode(ip) && !S_ISDIR(VFS_I(ip)->i_mode)) ||
    1230   366729109 :             VFS_I(ip)->i_generation != sc->sm->sm_gen) {
    1231      505594 :                 xchk_irele(sc, ip);
    1232      505594 :                 return -ENOENT;
    1233             :         }
    1234             : 
    1235   366396987 :         sc->ip = ip;
    1236   366396987 :         return 0;
    1237             : }
    1238             : 
    1239             : /*
    1240             :  * Install an already-referenced inode for scrubbing.  Get our own reference to
    1241             :  * the inode to make disposal simpler.  The inode must not be in I_FREEING or
    1242             :  * I_WILL_FREE state!
    1243             :  */
    1244             : int
    1245   133818807 : xchk_install_live_inode(
    1246             :         struct xfs_scrub        *sc,
    1247             :         struct xfs_inode        *ip)
    1248             : {
    1249   133818807 :         if (!igrab(VFS_I(ip))) {
    1250           0 :                 xchk_ino_set_corrupt(sc, ip->i_ino);
    1251           0 :                 return -EFSCORRUPTED;
    1252             :         }
    1253             : 
    1254   133798840 :         sc->ip = ip;
    1255   133798840 :         return 0;
    1256             : }
    1257             : 
    1258             : /*
    1259             :  * In preparation to scrub metadata structures that hang off of an inode,
    1260             :  * grab either the inode referenced in the scrub control structure or the
    1261             :  * inode passed in.  If the inumber does not reference an allocated inode
    1262             :  * record, the function returns ENOENT to end the scrub early.  The inode
    1263             :  * is not locked.
    1264             :  */
    1265             : int
    1266   424691263 : xchk_iget_for_scrubbing(
    1267             :         struct xfs_scrub        *sc)
    1268             : {
    1269   424691263 :         struct xfs_imap         imap;
    1270   424691263 :         struct xfs_mount        *mp = sc->mp;
    1271   424691263 :         struct xfs_perag        *pag;
    1272   424691263 :         struct xfs_buf          *agi_bp;
    1273   424691263 :         struct xfs_inode        *ip_in = XFS_I(file_inode(sc->file));
    1274   424691263 :         struct xfs_inode        *ip = NULL;
    1275   424691263 :         xfs_agnumber_t          agno = XFS_INO_TO_AGNO(mp, sc->sm->sm_ino);
    1276   424691263 :         int                     error;
    1277             : 
    1278   424691263 :         ASSERT(sc->tp == NULL);
    1279             : 
    1280             :         /* We want to scan the inode we already had opened. */
    1281   424691263 :         if (sc->sm->sm_ino == 0 || sc->sm->sm_ino == ip_in->i_ino)
    1282   111158814 :                 return xchk_install_live_inode(sc, ip_in);
    1283             : 
    1284             :         /* Reject internal metadata files and obviously bad inode numbers. */
    1285   313532449 :         if (xfs_internal_inum(mp, sc->sm->sm_ino))
    1286             :                 return -ENOENT;
    1287   313252520 :         if (!xfs_verify_ino(sc->mp, sc->sm->sm_ino))
    1288             :                 return -ENOENT;
    1289             : 
    1290             :         /* Try a regular untrusted iget. */
    1291   313257424 :         error = xchk_iget(sc, sc->sm->sm_ino, &ip);
    1292   313273559 :         if (!error)
    1293   311363797 :                 return xchk_install_handle_inode(sc, ip);
    1294     1909762 :         if (error == -ENOENT)
    1295             :                 return error;
    1296        7290 :         if (error != -EINVAL)
    1297           0 :                 goto out_error;
    1298             : 
    1299             :         /*
    1300             :          * EINVAL with IGET_UNTRUSTED probably means one of several things:
    1301             :          * userspace gave us an inode number that doesn't correspond to fs
    1302             :          * space; the inode btree lacks a record for this inode; or there is a
    1303             :          * record, and it says this inode is free.
    1304             :          *
    1305             :          * We want to look up this inode in the inobt to distinguish two
    1306             :          * scenarios: (1) the inobt says the inode is free, in which case
    1307             :          * there's nothing to do; and (2) the inobt says the inode is
    1308             :          * allocated, but loading it failed due to corruption.
    1309             :          *
    1310             :          * Allocate a transaction and grab the AGI to prevent inobt activity
    1311             :          * in this AG.  Retry the iget in case someone allocated a new inode
    1312             :          * after the first iget failed.
    1313             :          */
    1314        7290 :         error = xchk_trans_alloc(sc, 0);
    1315        7290 :         if (error)
    1316           0 :                 goto out_error;
    1317             : 
    1318        7290 :         error = xchk_iget_agi(sc, sc->sm->sm_ino, &agi_bp, &ip);
    1319        7290 :         if (error == 0) {
    1320             :                 /* Actually got the inode, so install it. */
    1321           0 :                 xchk_trans_cancel(sc);
    1322           0 :                 return xchk_install_handle_inode(sc, ip);
    1323             :         }
    1324        7290 :         if (error == -ENOENT)
    1325           0 :                 goto out_gone;
    1326        7290 :         if (error != -EINVAL)
    1327           0 :                 goto out_cancel;
    1328             : 
    1329             :         /* Ensure that we have protected against inode allocation/freeing. */
    1330        7290 :         if (agi_bp == NULL) {
    1331           0 :                 ASSERT(agi_bp != NULL);
    1332           0 :                 error = -ECANCELED;
    1333           0 :                 goto out_cancel;
    1334             :         }
    1335             : 
    1336             :         /*
    1337             :          * Untrusted iget failed a second time.  Let's try an inobt lookup.
    1338             :          * If the inobt thinks this the inode neither can exist inside the
    1339             :          * filesystem nor is allocated, return ENOENT to signal that the check
    1340             :          * can be skipped.
    1341             :          *
    1342             :          * If the lookup returns corruption, we'll mark this inode corrupt and
    1343             :          * exit to userspace.  There's little chance of fixing anything until
    1344             :          * the inobt is straightened out, but there's nothing we can do here.
    1345             :          *
    1346             :          * If the lookup encounters any other error, exit to userspace.
    1347             :          *
    1348             :          * If the lookup succeeds, something else must be very wrong in the fs
    1349             :          * such that setting up the incore inode failed in some strange way.
    1350             :          * Treat those as corruptions.
    1351             :          */
    1352        7290 :         pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, sc->sm->sm_ino));
    1353        7290 :         if (!pag) {
    1354           0 :                 error = -EFSCORRUPTED;
    1355           0 :                 goto out_cancel;
    1356             :         }
    1357             : 
    1358        7290 :         error = xfs_imap(pag, sc->tp, sc->sm->sm_ino, &imap,
    1359             :                         XFS_IGET_UNTRUSTED);
    1360        7290 :         xfs_perag_put(pag);
    1361        7290 :         if (error == -EINVAL || error == -ENOENT)
    1362        7290 :                 goto out_gone;
    1363           0 :         if (!error)
    1364           0 :                 error = -EFSCORRUPTED;
    1365             : 
    1366           0 : out_cancel:
    1367           0 :         xchk_trans_cancel(sc);
    1368           0 : out_error:
    1369           0 :         xchk_whine(mp, "type %s agno 0x%x agbno 0x%x error %d ret_ip %pS",
    1370           0 :                         xchk_type_string(sc->sm->sm_type), agno,
    1371           0 :                         XFS_INO_TO_AGBNO(mp, sc->sm->sm_ino), error,
    1372             :                         __return_address);
    1373           0 :         trace_xchk_op_error(sc, agno, XFS_INO_TO_AGBNO(mp, sc->sm->sm_ino),
    1374             :                         error, __return_address);
    1375           0 :         return error;
    1376        7290 : out_gone:
    1377             :         /* The file is gone, so there's nothing to check. */
    1378        7290 :         xchk_trans_cancel(sc);
    1379        7290 :         return -ENOENT;
    1380             : }
    1381             : 
    1382             : /* Release an inode, possibly dropping it in the process. */
    1383             : void
    1384  6686970703 : xchk_irele(
    1385             :         struct xfs_scrub        *sc,
    1386             :         struct xfs_inode        *ip)
    1387             : {
    1388  6686970703 :         if (current->journal_info != NULL) {
    1389  6096864333 :                 ASSERT(current->journal_info == sc->tp);
    1390             : 
    1391             :                 /*
    1392             :                  * If we are in a transaction, we /cannot/ drop the inode
    1393             :                  * ourselves, because the VFS will trigger writeback, which
    1394             :                  * can require a transaction.  Clear DONTCACHE to force the
    1395             :                  * inode to the LRU, where someone else can take care of
    1396             :                  * dropping it.
    1397             :                  *
    1398             :                  * Note that when we grabbed our reference to the inode, it
    1399             :                  * could have had an active ref and DONTCACHE set if a sysadmin
    1400             :                  * is trying to coerce a change in file access mode.  icache
    1401             :                  * hits do not clear DONTCACHE, so we must do it here.
    1402             :                  */
    1403  6096864333 :                 spin_lock(&VFS_I(ip)->i_lock);
    1404  6097328437 :                 VFS_I(ip)->i_state &= ~I_DONTCACHE;
    1405  6097328437 :                 spin_unlock(&VFS_I(ip)->i_lock);
    1406   590106370 :         } else if (atomic_read(&VFS_I(ip)->i_count) == 1) {
    1407             :                 /*
    1408             :                  * If this is the last reference to the inode and the caller
    1409             :                  * permits it, set DONTCACHE to avoid thrashing.
    1410             :                  */
    1411      661442 :                 d_mark_dontcache(VFS_I(ip));
    1412             :         }
    1413             : 
    1414  6687089137 :         xfs_irele(ip);
    1415  6685463915 : }
    1416             : 
    1417             : /*
    1418             :  * Set us up to scrub metadata mapped by a file's fork.  Callers must not use
    1419             :  * this to operate on user-accessible regular file data because the MMAPLOCK is
    1420             :  * not taken.
    1421             :  */
    1422             : int
    1423   190402534 : xchk_setup_inode_contents(
    1424             :         struct xfs_scrub        *sc,
    1425             :         unsigned int            resblks)
    1426             : {
    1427   190402534 :         int                     error;
    1428             : 
    1429   190402534 :         error = xchk_iget_for_scrubbing(sc);
    1430   190408778 :         if (error)
    1431             :                 return error;
    1432             : 
    1433             :         /* Lock the inode so the VFS cannot touch this file. */
    1434   189332460 :         xchk_ilock(sc, XFS_IOLOCK_EXCL);
    1435             : 
    1436   189332953 :         error = xchk_trans_alloc(sc, resblks);
    1437   189333707 :         if (error)
    1438           0 :                 goto out;
    1439             : 
    1440   189333707 :         error = xchk_ino_dqattach(sc);
    1441   189334956 :         if (error)
    1442           0 :                 goto out;
    1443             : 
    1444   189334956 :         xchk_ilock(sc, XFS_ILOCK_EXCL);
    1445             : out:
    1446             :         /* scrub teardown will unlock and release the inode for us */
    1447             :         return error;
    1448             : }
    1449             : 
    1450             : void
    1451   676699289 : xchk_ilock(
    1452             :         struct xfs_scrub        *sc,
    1453             :         unsigned int            ilock_flags)
    1454             : {
    1455  1055366705 :         xfs_ilock(sc->ip, ilock_flags);
    1456   189332953 :         sc->ilock_flags |= ilock_flags;
    1457   189343491 : }
    1458             : 
    1459             : bool
    1460    88500271 : xchk_ilock_nowait(
    1461             :         struct xfs_scrub        *sc,
    1462             :         unsigned int            ilock_flags)
    1463             : {
    1464    88500271 :         if (xfs_ilock_nowait(sc->ip, ilock_flags)) {
    1465    88496714 :                 sc->ilock_flags |= ilock_flags;
    1466    88496714 :                 return true;
    1467             :         }
    1468             : 
    1469             :         return false;
    1470             : }
    1471             : 
    1472             : void
    1473   688929872 : xchk_iunlock(
    1474             :         struct xfs_scrub        *sc,
    1475             :         unsigned int            ilock_flags)
    1476             : {
    1477   688929872 :         sc->ilock_flags &= ~ilock_flags;
    1478   688929872 :         xfs_iunlock(sc->ip, ilock_flags);
    1479   688893994 : }
    1480             : 
    1481             : /*
    1482             :  * Predicate that decides if we need to evaluate the cross-reference check.
    1483             :  * If there was an error accessing the cross-reference btree, just delete
    1484             :  * the cursor and skip the check.
    1485             :  */
    1486             : bool
    1487 15878784215 : xchk_should_check_xref(
    1488             :         struct xfs_scrub        *sc,
    1489             :         int                     *error,
    1490             :         struct xfs_btree_cur    **curpp)
    1491             : {
    1492             :         /* No point in xref if we already know we're corrupt. */
    1493 15878784215 :         if (xchk_skip_xref(sc->sm))
    1494             :                 return false;
    1495             : 
    1496 15878784215 :         if (*error == 0)
    1497             :                 return true;
    1498             : 
    1499           0 :         if (curpp) {
    1500             :                 /* If we've already given up on xref, just bail out. */
    1501           0 :                 if (!*curpp)
    1502             :                         return false;
    1503             : 
    1504             :                 /* xref error, delete cursor and bail out. */
    1505           0 :                 xfs_btree_del_cursor(*curpp, XFS_BTREE_ERROR);
    1506           0 :                 *curpp = NULL;
    1507             :         }
    1508             : 
    1509           0 :         sc->sm->sm_flags |= XFS_SCRUB_OFLAG_XFAIL;
    1510           0 :         xchk_whine(sc->mp, "type %s xref error %d ret_ip %pS",
    1511           0 :                         xchk_type_string(sc->sm->sm_type),
    1512             :                         *error,
    1513             :                         __return_address);
    1514           0 :         trace_xchk_xref_error(sc, *error, __return_address);
    1515             : 
    1516             :         /*
    1517             :          * Errors encountered during cross-referencing with another
    1518             :          * data structure should not cause this scrubber to abort.
    1519             :          */
    1520           0 :         *error = 0;
    1521           0 :         return false;
    1522             : }
    1523             : 
    1524             : /* Run the structure verifiers on in-memory buffers to detect bad memory. */
    1525             : void
    1526    91668028 : xchk_buffer_recheck(
    1527             :         struct xfs_scrub        *sc,
    1528             :         struct xfs_buf          *bp)
    1529             : {
    1530    91668028 :         xfs_failaddr_t          fa;
    1531             : 
    1532    91668028 :         if (bp->b_ops == NULL) {
    1533           0 :                 xchk_block_set_corrupt(sc, bp);
    1534           0 :                 return;
    1535             :         }
    1536    91668028 :         if (bp->b_ops->verify_struct == NULL) {
    1537           0 :                 xchk_set_incomplete(sc);
    1538           0 :                 return;
    1539             :         }
    1540    91668028 :         fa = bp->b_ops->verify_struct(bp);
    1541    91665434 :         if (!fa)
    1542             :                 return;
    1543           0 :         sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
    1544           0 :         trace_xchk_block_error(sc, xfs_buf_daddr(bp), fa);
    1545           0 :         xchk_whine(sc->mp, "type %s agno 0x%x agbno 0x%x ret_ip %pS",
    1546           0 :                         xchk_type_string(sc->sm->sm_type),
    1547             :                         xfs_daddr_to_agno(sc->mp, xfs_buf_daddr(bp)),
    1548             :                         xfs_daddr_to_agbno(sc->mp, xfs_buf_daddr(bp)),
    1549             :                         fa);
    1550             : }
    1551             : 
    1552             : static inline int
    1553      721039 : xchk_metadata_inode_subtype(
    1554             :         struct xfs_scrub        *sc,
    1555             :         unsigned int            scrub_type)
    1556             : {
    1557      721039 :         __u32                   smtype = sc->sm->sm_type;
    1558      721039 :         int                     error;
    1559             : 
    1560      721039 :         sc->sm->sm_type = scrub_type;
    1561             : 
    1562      721039 :         switch (scrub_type) {
    1563      360513 :         case XFS_SCRUB_TYPE_INODE:
    1564      360513 :                 error = xchk_inode(sc);
    1565      360513 :                 break;
    1566      360526 :         case XFS_SCRUB_TYPE_BMBTD:
    1567      360526 :                 error = xchk_bmap_data(sc);
    1568      360526 :                 break;
    1569           0 :         default:
    1570           0 :                 ASSERT(0);
    1571           0 :                 error = -EFSCORRUPTED;
    1572           0 :                 break;
    1573             :         }
    1574             : 
    1575      721052 :         sc->sm->sm_type = smtype;
    1576      721052 :         return error;
    1577             : }
    1578             : 
    1579             : /*
    1580             :  * Scrub the attr/data forks of a metadata inode.  The metadata inode must be
    1581             :  * pointed to by sc->ip and the ILOCK must be held.
    1582             :  */
    1583             : int
    1584      360515 : xchk_metadata_inode_forks(
    1585             :         struct xfs_scrub        *sc)
    1586             : {
    1587      360515 :         bool                    shared;
    1588      360515 :         int                     error;
    1589             : 
    1590      360515 :         if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
    1591             :                 return 0;
    1592             : 
    1593             :         /* Check the inode record. */
    1594      360515 :         error = xchk_metadata_inode_subtype(sc, XFS_SCRUB_TYPE_INODE);
    1595      360526 :         if (error || (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT))
    1596             :                 return error;
    1597             : 
    1598             :         /* Metadata inodes don't live on the rt device. */
    1599      360526 :         if (sc->ip->i_diflags & XFS_DIFLAG_REALTIME) {
    1600           0 :                 xchk_ino_set_corrupt(sc, sc->ip->i_ino);
    1601           0 :                 return 0;
    1602             :         }
    1603             : 
    1604             :         /* They should never participate in reflink. */
    1605      360526 :         if (xfs_is_reflink_inode(sc->ip)) {
    1606           0 :                 xchk_ino_set_corrupt(sc, sc->ip->i_ino);
    1607           0 :                 return 0;
    1608             :         }
    1609             : 
    1610             :         /*
    1611             :          * Metadata files can only have extended attributes if parent pointers
    1612             :          * and the metadata directory tree are enabled.
    1613             :          */
    1614      360526 :         if (xfs_inode_hasattr(sc->ip) &&
    1615      360370 :             !(xfs_has_parent(sc->mp) && xfs_has_metadir(sc->mp))) {
    1616           0 :                 xchk_ino_set_corrupt(sc, sc->ip->i_ino);
    1617           0 :                 return 0;
    1618             :         }
    1619             : 
    1620             :         /* Invoke the data fork scrubber. */
    1621      360526 :         error = xchk_metadata_inode_subtype(sc, XFS_SCRUB_TYPE_BMBTD);
    1622      360526 :         if (error || (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT))
    1623             :                 return error;
    1624             : 
    1625             :         /* Look for incorrect shared blocks. */
    1626      360526 :         if (xfs_has_reflink(sc->mp)) {
    1627      360508 :                 error = xfs_reflink_inode_has_shared_extents(sc->tp, sc->ip,
    1628             :                                 &shared);
    1629      721016 :                 if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, 0,
    1630             :                                 &error))
    1631           0 :                         return error;
    1632      360508 :                 if (shared)
    1633           0 :                         xchk_ino_set_corrupt(sc, sc->ip->i_ino);
    1634             :         }
    1635             : 
    1636             :         return 0;
    1637             : }
    1638             : 
    1639             : /*
    1640             :  * Enable filesystem hooks (i.e. runtime code patching) before starting a scrub
    1641             :  * operation.  Callers must not hold any locks that intersect with the CPU
    1642             :  * hotplug lock (e.g. writeback locks) because code patching must halt the CPUs
    1643             :  * to change kernel code.
    1644             :  */
    1645             : void
    1646    12051841 : xchk_fsgates_enable(
    1647             :         struct xfs_scrub        *sc,
    1648             :         unsigned int            scrub_fsgates)
    1649             : {
    1650    12051841 :         ASSERT(!(scrub_fsgates & ~XCHK_FSGATES_ALL));
    1651    12051841 :         ASSERT(!(sc->flags & scrub_fsgates));
    1652             : 
    1653    12051841 :         trace_xchk_fsgates_enable(sc, scrub_fsgates);
    1654             : 
    1655    12051837 :         if (scrub_fsgates & XCHK_FSGATES_DRAIN)
    1656       69513 :                 xfs_defer_drain_wait_enable();
    1657             : 
    1658    12051837 :         if (scrub_fsgates & XCHK_FSGATES_QUOTA)
    1659       11554 :                 xfs_dqtrx_hook_enable();
    1660             : 
    1661    12051837 :         if (scrub_fsgates & XCHK_FSGATES_DIRENTS)
    1662    11958094 :                 xfs_dir_hook_enable();
    1663             : 
    1664    12050684 :         if (scrub_fsgates & XCHK_FSGATES_RMAP)
    1665       12666 :                 xfs_rmap_hook_enable();
    1666             : 
    1667    12050689 :         sc->flags |= scrub_fsgates;
    1668    12050689 : }
    1669             : 
    1670             : /*
    1671             :  * Decide if this is this a cached inode that's also allocated.  The caller
    1672             :  * must hold a reference to an AG and the AGI buffer lock to prevent inodes
    1673             :  * from being allocated or freed.
    1674             :  *
    1675             :  * Look up an inode by number in the given file system.  If the inode number
    1676             :  * is invalid, return -EINVAL.  If the inode is not in cache, return -ENODATA.
    1677             :  * If the inode is being reclaimed, return -ENODATA because we know the inode
    1678             :  * cache cannot be updating the ondisk metadata.
    1679             :  *
    1680             :  * Otherwise, the incore inode is the one we want, and it is either live,
    1681             :  * somewhere in the inactivation machinery, or reclaimable.  The inode is
    1682             :  * allocated if i_mode is nonzero.  In all three cases, the cached inode will
    1683             :  * be more up to date than the ondisk inode buffer, so we must use the incore
    1684             :  * i_mode.
    1685             :  */
    1686             : int
    1687  3316719117 : xchk_inode_is_allocated(
    1688             :         struct xfs_scrub        *sc,
    1689             :         xfs_agino_t             agino,
    1690             :         bool                    *inuse)
    1691             : {
    1692  3316719117 :         struct xfs_mount        *mp = sc->mp;
    1693  3316719117 :         struct xfs_perag        *pag = sc->sa.pag;
    1694  3316719117 :         xfs_ino_t               ino;
    1695  3316719117 :         struct xfs_inode        *ip;
    1696  3316719117 :         int                     error;
    1697             : 
    1698             :         /* caller must hold perag reference */
    1699  3316719117 :         if (pag == NULL) {
    1700           0 :                 ASSERT(pag != NULL);
    1701           0 :                 return -EINVAL;
    1702             :         }
    1703             : 
    1704             :         /* caller must have AGI buffer */
    1705  3316719117 :         if (sc->sa.agi_bp == NULL) {
    1706           0 :                 ASSERT(sc->sa.agi_bp != NULL);
    1707           0 :                 return -EINVAL;
    1708             :         }
    1709             : 
    1710             :         /* reject inode numbers outside existing AGs */
    1711  3316719117 :         ino = XFS_AGINO_TO_INO(sc->mp, pag->pag_agno, agino);
    1712  3316719117 :         if (!xfs_verify_ino(mp, ino))
    1713             :                 return -EINVAL;
    1714             : 
    1715  3317104305 :         error = -ENODATA;
    1716  3317104305 :         rcu_read_lock();
    1717  3317078597 :         ip = radix_tree_lookup(&pag->pag_ici_root, agino);
    1718  3316895345 :         if (!ip) {
    1719             :                 /* cache miss */
    1720    24086778 :                 goto out_rcu;
    1721             :         }
    1722             : 
    1723             :         /*
    1724             :          * If the inode number doesn't match, the incore inode got reused
    1725             :          * during an RCU grace period and the radix tree hasn't been updated.
    1726             :          * This isn't the inode we want.
    1727             :          */
    1728  3292808567 :         spin_lock(&ip->i_flags_lock);
    1729  3292824189 :         if (ip->i_ino != ino)
    1730           4 :                 goto out_skip;
    1731             : 
    1732  3292824185 :         trace_xchk_inode_is_allocated(ip);
    1733             : 
    1734             :         /*
    1735             :          * We have an incore inode that matches the inode we want, and the
    1736             :          * caller holds the perag structure and the AGI buffer.  Let's check
    1737             :          * our assumptions below:
    1738             :          */
    1739             : 
    1740             : #ifdef DEBUG
    1741             :         /*
    1742             :          * (1) If the incore inode is live (i.e. referenced from the dcache),
    1743             :          * it will not be INEW, nor will it be in the inactivation or reclaim
    1744             :          * machinery.  The ondisk inode had better be allocated.  This is the
    1745             :          * most trivial case.
    1746             :          */
    1747  3292823317 :         if (!(ip->i_flags & (XFS_NEED_INACTIVE | XFS_INEW | XFS_IRECLAIMABLE |
    1748             :                              XFS_INACTIVATING))) {
    1749             :                 /* live inode */
    1750  3285616602 :                 ASSERT(VFS_I(ip)->i_mode != 0);
    1751             :         }
    1752             : 
    1753             :         /*
    1754             :          * If the incore inode is INEW, there are several possibilities:
    1755             :          *
    1756             :          * (2) For a file that is being created, note that we allocate the
    1757             :          * ondisk inode before allocating, initializing, and adding the incore
    1758             :          * inode to the radix tree.
    1759             :          *
    1760             :          * (3) If the incore inode is being recycled, the inode has to be
    1761             :          * allocated because we don't allow freed inodes to be recycled.
    1762             :          * Recycling doesn't touch i_mode.
    1763             :          */
    1764  3292823317 :         if (ip->i_flags & XFS_INEW) {
    1765             :                 /* created on disk already or recycling */
    1766        2263 :                 ASSERT(VFS_I(ip)->i_mode != 0);
    1767             :         }
    1768             : 
    1769             :         /*
    1770             :          * (4) If the inode is queued for inactivation (NEED_INACTIVE) but
    1771             :          * inactivation has not started (!INACTIVATING), it is still allocated.
    1772             :          */
    1773  3292823317 :         if ((ip->i_flags & XFS_NEED_INACTIVE) &&
    1774             :             !(ip->i_flags & XFS_INACTIVATING)) {
    1775             :                 /* definitely before difree */
    1776       16686 :                 ASSERT(VFS_I(ip)->i_mode != 0);
    1777             :         }
    1778             : #endif
    1779             : 
    1780             :         /*
    1781             :          * If the incore inode is undergoing inactivation (INACTIVATING), there
    1782             :          * are two possibilities:
    1783             :          *
    1784             :          * (5) It is before the point where it would get freed ondisk, in which
    1785             :          * case i_mode is still nonzero.
    1786             :          *
    1787             :          * (6) It has already been freed, in which case i_mode is zero.
    1788             :          *
    1789             :          * We don't take the ILOCK here, but difree and dialloc update the AGI,
    1790             :          * and we've taken the AGI buffer lock, which prevents that from
    1791             :          * happening.
    1792             :          */
    1793             : 
    1794             :         /*
    1795             :          * (7) Inodes undergoing inactivation (INACTIVATING) or queued for
    1796             :          * reclaim (IRECLAIMABLE) could be allocated or free.  i_mode still
    1797             :          * reflects the ondisk state.
    1798             :          */
    1799             : 
    1800             :         /*
    1801             :          * (8) If the inode is in IFLUSHING, it's safe to query i_mode because
    1802             :          * the flush code uses i_mode to format the ondisk inode.
    1803             :          */
    1804             : 
    1805             :         /*
    1806             :          * (9) If the inode is in IRECLAIM and was reachable via the radix
    1807             :          * tree, it still has the same i_mode as it did before it entered
    1808             :          * reclaim.  The inode object is still alive because we hold the RCU
    1809             :          * read lock.
    1810             :          */
    1811             : 
    1812  3292823317 :         *inuse = VFS_I(ip)->i_mode != 0;
    1813  3292823317 :         error = 0;
    1814             : 
    1815  3292823321 : out_skip:
    1816  3292823321 :         spin_unlock(&ip->i_flags_lock);
    1817  3316829934 : out_rcu:
    1818  3316829934 :         rcu_read_unlock();
    1819  3316829934 :         return error;
    1820             : }
    1821             : 
    1822             : /* Count the blocks used by a file, even if it's a metadata inode. */
    1823             : int
    1824   156278521 : xchk_inode_count_blocks(
    1825             :         struct xfs_scrub        *sc,
    1826             :         int                     whichfork,
    1827             :         xfs_extnum_t            *nextents,
    1828             :         xfs_filblks_t           *count)
    1829             : {
    1830   156278521 :         struct xfs_ifork        *ifp = xfs_ifork_ptr(sc->ip, whichfork);
    1831   156279052 :         struct xfs_btree_cur    *cur;
    1832   156279052 :         xfs_extlen_t            btblocks;
    1833   156279052 :         int                     error;
    1834             : 
    1835   156279052 :         if (!ifp) {
    1836       31823 :                 *nextents = 0;
    1837       31823 :                 *count = 0;
    1838       31823 :                 return 0;
    1839             :         }
    1840             : 
    1841   156247229 :         switch (ifp->if_format) {
    1842       45077 :         case XFS_DINODE_FMT_RMAP:
    1843       45077 :                 if (!sc->sr.rtg) {
    1844           0 :                         ASSERT(0);
    1845           0 :                         return -EFSCORRUPTED;
    1846             :                 }
    1847       45077 :                 cur = xfs_rtrmapbt_init_cursor(sc->mp, sc->tp, sc->sr.rtg,
    1848             :                                 sc->ip);
    1849       45077 :                 goto meta_btree;
    1850      116291 :         case XFS_DINODE_FMT_REFCOUNT:
    1851      116291 :                 if (!sc->sr.rtg) {
    1852           0 :                         ASSERT(0);
    1853           0 :                         return -EFSCORRUPTED;
    1854             :                 }
    1855      116291 :                 cur = xfs_rtrefcountbt_init_cursor(sc->mp, sc->tp, sc->sr.rtg,
    1856             :                                 sc->ip);
    1857      116291 :                 goto meta_btree;
    1858             :         }
    1859             : 
    1860   156085861 :         return xfs_bmap_count_blocks(sc->tp, sc->ip, whichfork, nextents,
    1861             :                         count);
    1862      161368 : meta_btree:
    1863      161368 :         error = xfs_btree_count_blocks(cur, &btblocks);
    1864      161368 :         xfs_btree_del_cursor(cur, error);
    1865      161368 :         if (error)
    1866             :                 return error;
    1867             : 
    1868      161368 :         *nextents = 0;
    1869      161368 :         *count = btblocks - 1;
    1870      161368 :         return 0;
    1871             : }
    1872             : 
    1873             : /* Complain about failures... */
    1874             : void
    1875          64 : xchk_whine(
    1876             :         const struct xfs_mount  *mp,
    1877             :         const char              *fmt,
    1878             :         ...)
    1879             : {
    1880          64 :         struct va_format        vaf;
    1881          64 :         va_list                 args;
    1882             : 
    1883          64 :         va_start(args, fmt);
    1884             : 
    1885          64 :         vaf.fmt = fmt;
    1886          64 :         vaf.va = &args;
    1887             : 
    1888          64 :         printk(KERN_INFO "XFS (%s) %pS: %pV\n", mp->m_super->s_id,
    1889             :                         __return_address, &vaf);
    1890          64 :         va_end(args);
    1891             : 
    1892          64 :         if (xfs_error_level >= XFS_ERRLEVEL_HIGH)
    1893           0 :                 xfs_stack_trace();
    1894          64 : }

Generated by: LCOV version 1.14