LCOV - code coverage report
Current view: top level - fs/xfs/scrub - nlinks.c (source / functions) Hit Total Coverage
Test: fstests of 6.5.0-rc3-acha @ Mon Jul 31 20:08:06 PDT 2023 Lines: 322 405 79.5 %
Date: 2023-07-31 20:08:07 Functions: 17 17 100.0 %

          Line data    Source code
       1             : // SPDX-License-Identifier: GPL-2.0-or-later
       2             : /*
       3             :  * Copyright (C) 2021-2023 Oracle.  All Rights Reserved.
       4             :  * Author: Darrick J. Wong <djwong@kernel.org>
       5             :  */
       6             : #include "xfs.h"
       7             : #include "xfs_fs.h"
       8             : #include "xfs_shared.h"
       9             : #include "xfs_format.h"
      10             : #include "xfs_trans_resv.h"
      11             : #include "xfs_mount.h"
      12             : #include "xfs_log_format.h"
      13             : #include "xfs_trans.h"
      14             : #include "xfs_inode.h"
      15             : #include "xfs_icache.h"
      16             : #include "xfs_iwalk.h"
      17             : #include "xfs_ialloc.h"
      18             : #include "xfs_dir2.h"
      19             : #include "xfs_dir2_priv.h"
      20             : #include "xfs_ag.h"
      21             : #include "xfs_parent.h"
      22             : #include "scrub/scrub.h"
      23             : #include "scrub/common.h"
      24             : #include "scrub/repair.h"
      25             : #include "scrub/xfile.h"
      26             : #include "scrub/xfarray.h"
      27             : #include "scrub/iscan.h"
      28             : #include "scrub/orphanage.h"
      29             : #include "scrub/nlinks.h"
      30             : #include "scrub/trace.h"
      31             : #include "scrub/readdir.h"
      32             : #include "scrub/tempfile.h"
      33             : #include "scrub/listxattr.h"
      34             : 
      35             : /*
      36             :  * Live Inode Link Count Checking
      37             :  * ==============================
      38             :  *
      39             :  * Inode link counts are "summary" metadata, in the sense that they are
      40             :  * computed as the number of directory entries referencing each file on the
      41             :  * filesystem.  Therefore, we compute the correct link counts by creating a
      42             :  * shadow link count structure and walking every inode.
      43             :  */
      44             : 
      45             : /* Set us up to scrub inode link counts. */
      46             : int
      47       12684 : xchk_setup_nlinks(
      48             :         struct xfs_scrub        *sc)
      49             : {
      50       12684 :         int                     error;
      51             : 
      52       12684 :         xchk_fsgates_enable(sc, XCHK_FSGATES_DIRENTS);
      53             : 
      54       25368 :         if (xchk_could_repair(sc)) {
      55        2007 :                 error = xrep_setup_nlinks(sc);
      56        2007 :                 if (error)
      57             :                         return error;
      58             :         }
      59             : 
      60       12684 :         sc->buf = kvzalloc(sizeof(struct xchk_nlink_ctrs), XCHK_GFP_FLAGS);
      61       12684 :         if (!sc->buf)
      62             :                 return -ENOMEM;
      63             : 
      64       12684 :         return xchk_setup_fs(sc);
      65             : }
      66             : 
      67             : /*
      68             :  * Part 1: Collecting file link counts.  For each file, we create a shadow link
      69             :  * counting structure, then walk the entire directory tree, incrementing parent
      70             :  * and child link counts for each directory entry seen.
      71             :  *
      72             :  * To avoid false corruption reports in part 2, any failure in this part must
      73             :  * set the INCOMPLETE flag even when a negative errno is returned.  This care
      74             :  * must be taken with certain errno values (i.e. EFSBADCRC, EFSCORRUPTED,
      75             :  * ECANCELED) that are absorbed into a scrub state flag update by
      76             :  * xchk_*_process_error.  Scrub and repair share the same incore data
      77             :  * structures, so the INCOMPLETE flag is critical to prevent a repair based on
      78             :  * insufficient information.
      79             :  *
      80             :  * Because we are scanning a live filesystem, it's possible that another thread
      81             :  * will try to update the link counts for an inode that we've already scanned.
      82             :  * This will cause our counts to be incorrect.  Therefore, we hook all
      83             :  * directory entry updates because that is when link count updates occur.  By
      84             :  * shadowing transaction updates in this manner, live nlink check can ensure by
      85             :  * locking the inode and the shadow structure that its own copies are not out
      86             :  * of date.  Because the hook code runs in a different process context from the
      87             :  * scrub code and the scrub state flags are not accessed atomically, failures
      88             :  * in the hook code must abort the iscan and the scrubber must notice the
      89             :  * aborted scan and set the incomplete flag.
      90             :  *
      91             :  * Note that we use jump labels and srcu notifier hooks to minimize the
      92             :  * overhead when live nlinks is /not/ running.  Locking order for nlink
      93             :  * observations is inode ILOCK -> iscan_lock/xchk_nlink_ctrs lock.
      94             :  */
      95             : 
      96             : /*
      97             :  * Add a delta to an nlink counter, being careful about integer overflow.
      98             :  * Clamp the value to XFS_NLINK_PINNED because the ondisk format does not
      99             :  * handle link counts any higher.
     100             :  */
     101             : static inline void
     102             : careful_add(
     103             :         xfs_nlink_t     *nlinkp,
     104             :         int             delta)
     105             : {
     106   929815857 :         uint64_t        new_value = (uint64_t)(*nlinkp) + delta;
     107             : 
     108   929815857 :         *nlinkp = min_t(uint64_t, new_value, XFS_NLINK_PINNED);
     109             : }
     110             : 
     111             : /* Update incore link count information.  Caller must hold the nlinks lock. */
     112             : STATIC int
     113   929815857 : xchk_nlinks_update_incore(
     114             :         struct xchk_nlink_ctrs  *xnc,
     115             :         xfs_ino_t               ino,
     116             :         int                     parents_delta,
     117             :         int                     backrefs_delta,
     118             :         int                     children_delta)
     119             : {
     120   929815857 :         struct xchk_nlink       nl;
     121   929815857 :         int                     error;
     122             : 
     123   929815857 :         if (!xnc->nlinks)
     124             :                 return 0;
     125             : 
     126   929815857 :         error = xfarray_load_sparse(xnc->nlinks, ino, &nl);
     127   929815857 :         if (error)
     128             :                 return error;
     129             : 
     130   929815857 :         trace_xchk_nlinks_update_incore(xnc->sc->mp, ino, &nl, parents_delta,
     131             :                         backrefs_delta, children_delta);
     132             : 
     133   929815857 :         careful_add(&nl.parents, parents_delta);
     134   929815857 :         careful_add(&nl.backrefs, backrefs_delta);
     135   929815857 :         careful_add(&nl.children, children_delta);
     136             : 
     137   929815857 :         nl.flags |= XCHK_NLINK_WRITTEN;
     138   929815857 :         error = xfarray_store(xnc->nlinks, ino, &nl);
     139   929815857 :         if (error == -EFBIG) {
     140             :                 /*
     141             :                  * EFBIG means we tried to store data at too high a byte offset
     142             :                  * in the sparse array.  IOWs, we cannot complete the check and
     143             :                  * must notify userspace that the check was incomplete.
     144             :                  */
     145           0 :                 error = -ECANCELED;
     146             :         }
     147             :         return error;
     148             : }
     149             : 
     150             : /*
     151             :  * Apply a link count change from the regular filesystem into our shadow link
     152             :  * count structure based on a directory update in progress.
     153             :  */
     154             : STATIC int
     155    10242072 : xchk_nlinks_live_update(
     156             :         struct notifier_block           *nb,
     157             :         unsigned long                   action,
     158             :         void                            *data)
     159             : {
     160    10242072 :         struct xfs_dir_update_params    *p = data;
     161    10242072 :         struct xchk_nlink_ctrs          *xnc;
     162    10242072 :         int                             error;
     163             : 
     164    10242072 :         xnc = container_of(nb, struct xchk_nlink_ctrs, hooks.dirent_hook.nb);
     165             : 
     166             :         /*
     167             :          * Ignore temporary directories being used to stage dir repairs, since
     168             :          * we don't bump the link counts of the children.
     169             :          */
     170    10242072 :         if (xrep_is_tempfile(p->dp))
     171             :                 return NOTIFY_DONE;
     172             : 
     173    10242544 :         trace_xchk_nlinks_live_update(xnc->sc->mp, p->dp, action, p->ip->i_ino,
     174    10242544 :                         p->delta, p->name->name, p->name->len);
     175             : 
     176             :         /*
     177             :          * If we've already scanned @dp, update the number of parents that link
     178             :          * to @ip.  If @ip is a subdirectory, update the number of child links
     179             :          * going out of @dp.
     180             :          */
     181    10242032 :         if (xchk_iscan_want_live_update(&xnc->collect_iscan, p->dp->i_ino)) {
     182     7266584 :                 mutex_lock(&xnc->lock);
     183     7266640 :                 error = xchk_nlinks_update_incore(xnc, p->ip->i_ino, p->delta,
     184             :                                 0, 0);
     185     7266640 :                 if (!error && S_ISDIR(VFS_IC(p->ip)->i_mode))
     186     1799191 :                         error = xchk_nlinks_update_incore(xnc, p->dp->i_ino, 0,
     187             :                                         0, p->delta);
     188     7266640 :                 mutex_unlock(&xnc->lock);
     189     7266631 :                 if (error)
     190           0 :                         goto out_abort;
     191             :         }
     192             : 
     193             :         /*
     194             :          * If @ip is a subdirectory and we've already scanned it, update the
     195             :          * number of backrefs pointing to @dp.
     196             :          */
     197    12782668 :         if (S_ISDIR(VFS_IC(p->ip)->i_mode) &&
     198     2539226 :             xchk_iscan_want_live_update(&xnc->collect_iscan, p->ip->i_ino)) {
     199     1693737 :                 mutex_lock(&xnc->lock);
     200     1693737 :                 error = xchk_nlinks_update_incore(xnc, p->dp->i_ino, 0,
     201             :                                 p->delta, 0);
     202     1693737 :                 mutex_unlock(&xnc->lock);
     203     1693737 :                 if (error)
     204           0 :                         goto out_abort;
     205             :         }
     206             : 
     207             :         return NOTIFY_DONE;
     208             : 
     209           0 : out_abort:
     210           0 :         xchk_iscan_abort(&xnc->collect_iscan);
     211           0 :         return NOTIFY_DONE;
     212             : }
     213             : 
     214             : /* Bump the observed link count for the inode referenced by this entry. */
     215             : STATIC int
     216   919059776 : xchk_nlinks_collect_dirent(
     217             :         struct xfs_scrub        *sc,
     218             :         struct xfs_inode        *dp,
     219             :         xfs_dir2_dataptr_t      dapos,
     220             :         const struct xfs_name   *name,
     221             :         xfs_ino_t               ino,
     222             :         void                    *priv)
     223             : {
     224   919059776 :         struct xchk_nlink_ctrs  *xnc = priv;
     225   919059776 :         bool                    dot = false, dotdot = false;
     226   919059776 :         int                     error;
     227             : 
     228             :         /* Does this name make sense? */
     229   919059776 :         if (name->len == 0 || !xfs_dir2_namecheck(name->name, name->len)) {
     230          22 :                 error = -ECANCELED;
     231          22 :                 goto out_abort;
     232             :         }
     233             : 
     234   919059754 :         if (name->len == 1 && name->name[0] == '.')
     235             :                 dot = true;
     236   750085803 :         else if (name->len == 2 && name->name[0] == '.' &&
     237   168973951 :                                    name->name[1] == '.')
     238             :                 dotdot = true;
     239             : 
     240             :         /* Don't accept a '.' entry that points somewhere else. */
     241   168973951 :         if (dot && ino != dp->i_ino) {
     242           0 :                 error = -ECANCELED;
     243           0 :                 goto out_abort;
     244             :         }
     245             : 
     246             :         /* Don't accept an invalid inode number. */
     247   919059754 :         if (!xfs_verify_ino(sc->mp, ino)) {
     248           0 :                 error = -ECANCELED;
     249           0 :                 goto out_abort;
     250             :         }
     251             : 
     252             :         /* Update the shadow link counts if we haven't already failed. */
     253             : 
     254   919059754 :         if (xchk_iscan_aborted(&xnc->collect_iscan)) {
     255           0 :                 error = -ECANCELED;
     256           0 :                 goto out_incomplete;
     257             :         }
     258             : 
     259   919059754 :         trace_xchk_nlinks_collect_dirent(sc->mp, dp, ino, name);
     260             : 
     261   919059754 :         mutex_lock(&xnc->lock);
     262             : 
     263             :         /*
     264             :          * If this is a dotdot entry, it is a back link from dp to ino.  How
     265             :          * we handle this depends on whether or not dp is the root directory.
     266             :          *
     267             :          * The root directory is its own parent, so we pretend the dotdot entry
     268             :          * establishes the "parent" of the root directory.  Increment the
     269             :          * number of parents of the root directory.
     270             :          *
     271             :          * Otherwise, increment the number of backrefs pointing back to ino.
     272             :          *
     273             :          * If the filesystem has parent pointers, we walk the pptrs to
     274             :          * determine the backref count.
     275             :          */
     276   919059754 :         if (dotdot) {
     277   168973951 :                 if (dp == sc->mp->m_rootip)
     278       12682 :                         error = xchk_nlinks_update_incore(xnc, ino, 1, 0, 0);
     279   168961269 :                 else if (!xfs_has_parent(sc->mp))
     280        1270 :                         error = xchk_nlinks_update_incore(xnc, ino, 0, 1, 0);
     281             :                 else
     282             :                         error = 0;
     283       13952 :                 if (error)
     284           0 :                         goto out_unlock;
     285             :         }
     286             : 
     287             :         /*
     288             :          * If this dirent is a forward link from dp to ino, increment the
     289             :          * number of parents linking into ino.
     290             :          */
     291   919059754 :         if (!dot && !dotdot) {
     292   581111852 :                 error = xchk_nlinks_update_incore(xnc, ino, 1, 0, 0);
     293   581111852 :                 if (error)
     294           0 :                         goto out_unlock;
     295             :         }
     296             : 
     297             :         /*
     298             :          * If this dirent is a forward link to a subdirectory, increment the
     299             :          * number of child links of dp.
     300             :          */
     301   919059754 :         if (!dot && !dotdot && name->type == XFS_DIR3_FT_DIR) {
     302   168922903 :                 error = xchk_nlinks_update_incore(xnc, dp->i_ino, 0, 0, 1);
     303   168922903 :                 if (error)
     304           0 :                         goto out_unlock;
     305             :         }
     306             : 
     307   919059754 :         mutex_unlock(&xnc->lock);
     308   919059754 :         return 0;
     309             : 
     310           0 : out_unlock:
     311           0 :         mutex_unlock(&xnc->lock);
     312          22 : out_abort:
     313          22 :         xchk_iscan_abort(&xnc->collect_iscan);
     314          22 : out_incomplete:
     315          22 :         xchk_set_incomplete(sc);
     316          22 :         return error;
     317             : }
     318             : 
     319             : /* Bump the backref count for the inode referenced by this parent pointer. */
     320             : STATIC int
     321   168959999 : xchk_nlinks_collect_pptr(
     322             :         struct xfs_scrub        *sc,
     323             :         struct xfs_inode        *ip,
     324             :         const struct xfs_parent_name_irec *pptr,
     325             :         void                    *priv)
     326             : {
     327   168959999 :         struct xchk_nlink_ctrs  *xnc = priv;
     328   168959999 :         int                     error;
     329             : 
     330             :         /* Don't accept an invalid inode number. */
     331   168959999 :         if (!xfs_verify_ino(sc->mp, pptr->p_ino)) {
     332           0 :                 error = -EFSCORRUPTED;
     333           0 :                 goto out_abort;
     334             :         }
     335             : 
     336             :         /* Update the shadow link counts if we haven't already failed. */
     337             : 
     338   168959999 :         if (xchk_iscan_aborted(&xnc->collect_iscan)) {
     339           0 :                 error = -ECANCELED;
     340           0 :                 goto out_incomplete;
     341             :         }
     342             : 
     343   168959999 :         trace_xchk_nlinks_collect_pptr(sc->mp, ip, pptr);
     344             : 
     345   168959999 :         mutex_lock(&xnc->lock);
     346             : 
     347   168959999 :         error = xchk_nlinks_update_incore(xnc, pptr->p_ino, 0, 1, 0);
     348   168959999 :         if (error)
     349           0 :                 goto out_unlock;
     350             : 
     351   168959999 :         mutex_unlock(&xnc->lock);
     352   168959999 :         return 0;
     353             : 
     354             : out_unlock:
     355           0 :         mutex_unlock(&xnc->lock);
     356           0 : out_abort:
     357           0 :         xchk_iscan_abort(&xnc->collect_iscan);
     358           0 : out_incomplete:
     359           0 :         xchk_set_incomplete(sc);
     360           0 :         return error;
     361             : }
     362             : 
     363             : /* Walk a directory to bump the observed link counts of the children. */
     364             : STATIC int
     365   168976261 : xchk_nlinks_collect_dir(
     366             :         struct xchk_nlink_ctrs  *xnc,
     367             :         struct xfs_inode        *dp)
     368             : {
     369   168976261 :         struct xfs_scrub        *sc = xnc->sc;
     370   168976261 :         unsigned int            lock_mode;
     371   168976261 :         int                     error = 0;
     372             : 
     373             :         /*
     374             :          * Ignore temporary directories being used to stage dir repairs, since
     375             :          * we don't bump the link counts of the children.
     376             :          */
     377   168976261 :         if (xrep_is_tempfile(dp))
     378             :                 return 0;
     379             : 
     380             :         /* Prevent anyone from changing this directory while we walk it. */
     381   168976261 :         xfs_ilock(dp, XFS_IOLOCK_SHARED);
     382   168976261 :         lock_mode = xfs_ilock_data_map_shared(dp);
     383             : 
     384             :         /*
     385             :          * The dotdot entry of an unlinked directory still points to the last
     386             :          * parent, but the parent no longer links to this directory.  Skip the
     387             :          * directory to avoid overcounting.
     388             :          */
     389   168976261 :         if (VFS_I(dp)->i_nlink == 0)
     390        2310 :                 goto out_unlock;
     391             : 
     392             :         /*
     393             :          * We cannot count file links if the directory looks as though it has
     394             :          * been zapped by the inode record repair code.
     395             :          */
     396   168973951 :         if (xchk_dir_looks_zapped(dp))
     397           0 :                 goto out_abort;
     398             : 
     399   168973951 :         error = xchk_dir_walk(sc, dp, xchk_nlinks_collect_dirent, xnc);
     400   168973951 :         if (error == -ECANCELED) {
     401          22 :                 error = 0;
     402          22 :                 goto out_unlock;
     403             :         }
     404   168973929 :         if (error)
     405           0 :                 goto out_abort;
     406             : 
     407             :         /* Walk the parent pointers to get real backref counts. */
     408   168973929 :         if (xfs_has_parent(sc->mp)) {
     409   168972627 :                 error = xchk_pptr_walk(sc, dp, xchk_nlinks_collect_pptr,
     410             :                                 &xnc->pptr, xnc);
     411   168972627 :                 if (error == -ECANCELED) {
     412           0 :                         error = 0;
     413           0 :                         goto out_unlock;
     414             :                 }
     415   168972627 :                 if (error)
     416           0 :                         goto out_abort;
     417             :         }
     418             : 
     419   168973929 :         xchk_iscan_mark_visited(&xnc->collect_iscan, dp);
     420   168973929 :         goto out_unlock;
     421             : 
     422           0 : out_abort:
     423           0 :         xchk_set_incomplete(sc);
     424           0 :         xchk_iscan_abort(&xnc->collect_iscan);
     425   168976261 : out_unlock:
     426   168976261 :         xfs_iunlock(dp, lock_mode);
     427   168976261 :         xfs_iunlock(dp, XFS_IOLOCK_SHARED);
     428   168976261 :         return error;
     429             : }
     430             : 
     431             : /* If this looks like a valid pointer, count it. */
     432             : static inline int
     433       63420 : xchk_nlinks_collect_metafile(
     434             :         struct xchk_nlink_ctrs  *xnc,
     435             :         xfs_ino_t               ino)
     436             : {
     437       63420 :         if (!xfs_verify_ino(xnc->sc->mp, ino))
     438             :                 return 0;
     439             : 
     440       47583 :         trace_xchk_nlinks_collect_metafile(xnc->sc->mp, ino);
     441       47583 :         return xchk_nlinks_update_incore(xnc, ino, 1, 0, 0);
     442             : }
     443             : 
     444             : /* Bump the link counts of metadata files rooted in the superblock. */
     445             : STATIC int
     446       12684 : xchk_nlinks_collect_metafiles(
     447             :         struct xchk_nlink_ctrs  *xnc)
     448             : {
     449       12684 :         struct xfs_mount        *mp = xnc->sc->mp;
     450       12684 :         int                     error = -ECANCELED;
     451             : 
     452             : 
     453       12684 :         if (xchk_iscan_aborted(&xnc->collect_iscan))
     454           0 :                 goto out_incomplete;
     455             : 
     456       12684 :         mutex_lock(&xnc->lock);
     457       12684 :         error = xchk_nlinks_collect_metafile(xnc, mp->m_sb.sb_rbmino);
     458       12684 :         if (error)
     459           0 :                 goto out_abort;
     460             : 
     461       12684 :         error = xchk_nlinks_collect_metafile(xnc, mp->m_sb.sb_rsumino);
     462       12684 :         if (error)
     463           0 :                 goto out_abort;
     464             : 
     465       12684 :         error = xchk_nlinks_collect_metafile(xnc, mp->m_sb.sb_uquotino);
     466       12684 :         if (error)
     467           0 :                 goto out_abort;
     468             : 
     469       12684 :         error = xchk_nlinks_collect_metafile(xnc, mp->m_sb.sb_gquotino);
     470       12684 :         if (error)
     471           0 :                 goto out_abort;
     472             : 
     473       12684 :         error = xchk_nlinks_collect_metafile(xnc, mp->m_sb.sb_pquotino);
     474       12684 :         if (error)
     475           0 :                 goto out_abort;
     476       12684 :         mutex_unlock(&xnc->lock);
     477             : 
     478       12684 :         return 0;
     479             : 
     480           0 : out_abort:
     481           0 :         mutex_unlock(&xnc->lock);
     482           0 :         xchk_iscan_abort(&xnc->collect_iscan);
     483           0 : out_incomplete:
     484           0 :         xchk_set_incomplete(xnc->sc);
     485           0 :         return error;
     486             : }
     487             : 
     488             : /* Advance the collection scan cursor for this non-directory file. */
     489             : static inline int
     490   310266878 : xchk_nlinks_collect_file(
     491             :         struct xchk_nlink_ctrs  *xnc,
     492             :         struct xfs_inode        *ip)
     493             : {
     494   310266878 :         xfs_ilock(ip, XFS_IOLOCK_SHARED);
     495   310266878 :         xchk_iscan_mark_visited(&xnc->collect_iscan, ip);
     496   310266878 :         xfs_iunlock(ip, XFS_IOLOCK_SHARED);
     497   310266878 :         return 0;
     498             : }
     499             : 
     500             : /* Walk all directories and count inode links. */
     501             : STATIC int
     502       12684 : xchk_nlinks_collect(
     503             :         struct xchk_nlink_ctrs  *xnc)
     504             : {
     505       12684 :         struct xfs_scrub        *sc = xnc->sc;
     506       12684 :         struct xfs_inode        *ip;
     507       12684 :         int                     error;
     508             : 
     509             :         /* Count the rt and quota files that are rooted in the superblock. */
     510       12684 :         error = xchk_nlinks_collect_metafiles(xnc);
     511       12684 :         if (error)
     512             :                 return error;
     513             : 
     514             :         /*
     515             :          * Set up for a potentially lengthy filesystem scan by reducing our
     516             :          * transaction resource usage for the duration.  Specifically:
     517             :          *
     518             :          * Cancel the transaction to release the log grant space while we scan
     519             :          * the filesystem.
     520             :          *
     521             :          * Create a new empty transaction to eliminate the possibility of the
     522             :          * inode scan deadlocking on cyclical metadata.
     523             :          *
     524             :          * We pass the empty transaction to the file scanning function to avoid
     525             :          * repeatedly cycling empty transactions.  This can be done even though
     526             :          * we take the IOLOCK to quiesce the file because empty transactions
     527             :          * do not take sb_internal.
     528             :          */
     529       12684 :         xchk_trans_cancel(sc);
     530       12684 :         error = xchk_trans_alloc_empty(sc);
     531       12684 :         if (error)
     532             :                 return error;
     533             : 
     534   479255821 :         while ((error = xchk_iscan_iter(&xnc->collect_iscan, &ip)) == 1) {
     535   479243139 :                 if (S_ISDIR(VFS_I(ip)->i_mode))
     536   168976261 :                         error = xchk_nlinks_collect_dir(xnc, ip);
     537             :                 else
     538   310266878 :                         error = xchk_nlinks_collect_file(xnc, ip);
     539   479243139 :                 xchk_irele(sc, ip);
     540   479243139 :                 if (error)
     541             :                         break;
     542             : 
     543   479243139 :                 if (xchk_should_terminate(sc, &error))
     544             :                         break;
     545             :         }
     546       12684 :         xchk_iscan_iter_finish(&xnc->collect_iscan);
     547       12684 :         if (error) {
     548          26 :                 xchk_set_incomplete(sc);
     549             :                 /*
     550             :                  * If we couldn't grab an inode that was busy with a state
     551             :                  * change, change the error code so that we exit to userspace
     552             :                  * as quickly as possible.
     553             :                  */
     554          26 :                 if (error == -EBUSY)
     555             :                         return -ECANCELED;
     556          26 :                 return error;
     557             :         }
     558             : 
     559             :         /*
     560             :          * Switch out for a real transaction in preparation for building a new
     561             :          * tree.
     562             :          */
     563       12658 :         xchk_trans_cancel(sc);
     564       12658 :         return xchk_setup_fs(sc);
     565             : }
     566             : 
     567             : /*
     568             :  * Part 2: Comparing file link counters.  Walk each inode and compare the link
     569             :  * counts against our shadow information; and then walk each shadow link count
     570             :  * structure (that wasn't covered in the first part), comparing it against the
     571             :  * file.
     572             :  */
     573             : 
     574             : /* Read the observed link count for comparison with the actual inode. */
     575             : STATIC int
     576   479411814 : xchk_nlinks_comparison_read(
     577             :         struct xchk_nlink_ctrs  *xnc,
     578             :         xfs_ino_t               ino,
     579             :         struct xchk_nlink       *obs)
     580             : {
     581   479411814 :         struct xchk_nlink       nl;
     582   479411814 :         int                     error;
     583             : 
     584   479411814 :         error = xfarray_load_sparse(xnc->nlinks, ino, &nl);
     585   479411814 :         if (error)
     586             :                 return error;
     587             : 
     588   479411814 :         nl.flags |= (XCHK_NLINK_COMPARE_SCANNED | XCHK_NLINK_WRITTEN);
     589             : 
     590   479411814 :         error = xfarray_store(xnc->nlinks, ino, &nl);
     591   479411814 :         if (error == -EFBIG) {
     592             :                 /*
     593             :                  * EFBIG means we tried to store data at too high a byte offset
     594             :                  * in the sparse array.  IOWs, we cannot complete the check and
     595             :                  * must notify userspace that the check was incomplete.  This
     596             :                  * shouldn't really happen outside of the collection phase.
     597             :                  */
     598           0 :                 xchk_set_incomplete(xnc->sc);
     599           0 :                 return -ECANCELED;
     600             :         }
     601   479411814 :         if (error)
     602             :                 return error;
     603             : 
     604             :         /* Copy the counters, but do not expose the internal state. */
     605   479411814 :         obs->parents = nl.parents;
     606   479411814 :         obs->backrefs = nl.backrefs;
     607   479411814 :         obs->children = nl.children;
     608   479411814 :         obs->flags = 0;
     609   479411814 :         return 0;
     610             : }
     611             : 
     612             : /* Check our link count against an inode. */
     613             : STATIC int
     614   479411468 : xchk_nlinks_compare_inode(
     615             :         struct xchk_nlink_ctrs  *xnc,
     616             :         struct xfs_inode        *ip)
     617             : {
     618   479411468 :         struct xchk_nlink       obs;
     619   479411468 :         struct xfs_scrub        *sc = xnc->sc;
     620   479411468 :         uint64_t                total_links;
     621   479411468 :         unsigned int            actual_nlink;
     622   479411468 :         int                     error;
     623             : 
     624             :         /*
     625             :          * Ignore temporary files being used to stage repairs, since we assume
     626             :          * they're correct for non-directories, and the directory repair code
     627             :          * doesn't bump the link counts for the children.
     628             :          */
     629   479411468 :         if (xrep_is_tempfile(ip))
     630             :                 return 0;
     631             : 
     632   479411468 :         xfs_ilock(ip, XFS_ILOCK_SHARED);
     633   479411468 :         mutex_lock(&xnc->lock);
     634             : 
     635   479411468 :         if (xchk_iscan_aborted(&xnc->collect_iscan)) {
     636           0 :                 xchk_set_incomplete(xnc->sc);
     637           0 :                 error = -ECANCELED;
     638           0 :                 goto out_scanlock;
     639             :         }
     640             : 
     641   479411468 :         error = xchk_nlinks_comparison_read(xnc, ip->i_ino, &obs);
     642   479411468 :         if (error)
     643           0 :                 goto out_scanlock;
     644             : 
     645             :         /*
     646             :          * If we don't have ftype to get an accurate count of the subdirectory
     647             :          * entries in this directory, take advantage of the fact that on a
     648             :          * consistent ftype=0 filesystem, the number of subdirectory
     649             :          * backreferences (dotdot entries) pointing towards this directory
     650             :          * should be equal to the number of subdirectory entries in the
     651             :          * directory.
     652             :          */
     653   479411468 :         if (!xfs_has_ftype(sc->mp) && S_ISDIR(VFS_I(ip)->i_mode))
     654           0 :                 obs.children = obs.backrefs;
     655             : 
     656   479411468 :         total_links = xchk_nlink_total(ip, &obs);
     657   479411468 :         actual_nlink = VFS_I(ip)->i_nlink;
     658             : 
     659   479411468 :         trace_xchk_nlinks_compare_inode(sc->mp, ip, &obs);
     660             : 
     661             :         /*
     662             :          * If we found so many parents that we'd overflow i_nlink, we must
     663             :          * flag this as a corruption.  If we found enough parents to exceed
     664             :          * the somewhat lower XFS_MAXLINK, warn the system administrator about
     665             :          * this.  The VFS won't let users increase the link count, but it will
     666             :          * let them decrease it.
     667             :          */
     668   479411468 :         if (total_links > XFS_NLINK_PINNED)
     669           0 :                 xchk_ino_set_corrupt(sc, ip->i_ino);
     670   479411468 :         else if (total_links > XFS_MAXLINK)
     671           0 :                 xchk_ino_set_warning(sc, ip->i_ino);
     672             : 
     673             :         /* Link counts should match. */
     674   479411468 :         if (total_links != actual_nlink)
     675           0 :                 xchk_ino_set_corrupt(sc, ip->i_ino);
     676             : 
     677   479411468 :         if (S_ISDIR(VFS_I(ip)->i_mode) && actual_nlink > 0) {
     678             :                 /*
     679             :                  * The collection phase ignores directories with zero link
     680             :                  * count, so we ignore them here too.
     681             :                  *
     682             :                  * The number of subdirectory backreferences (dotdot entries)
     683             :                  * pointing towards this directory should be equal to the
     684             :                  * number of subdirectory entries in the directory.
     685             :                  */
     686   169031896 :                 if (obs.children != obs.backrefs)
     687           0 :                         xchk_ino_xref_set_corrupt(sc, ip->i_ino);
     688             :         } else {
     689             :                 /*
     690             :                  * Non-directories and unlinked directories should not have
     691             :                  * back references.
     692             :                  */
     693   310379572 :                 if (obs.backrefs != 0)
     694           0 :                         xchk_ino_set_corrupt(sc, ip->i_ino);
     695             : 
     696             :                 /*
     697             :                  * Non-directories and unlinked directories should not have
     698             :                  * children.
     699             :                  */
     700   310379572 :                 if (obs.children != 0)
     701           0 :                         xchk_ino_set_corrupt(sc, ip->i_ino);
     702             :         }
     703             : 
     704   479411468 :         if (ip == sc->mp->m_rootip) {
     705             :                 /*
     706             :                  * For the root of a directory tree, both the '.' and '..'
     707             :                  * entries should point to the root directory.  The dotdot
     708             :                  * entry is counted as a parent of the root /and/ a backref of
     709             :                  * the root directory.
     710             :                  */
     711       12658 :                 if (obs.parents != 1)
     712           0 :                         xchk_ino_set_corrupt(sc, ip->i_ino);
     713   479398810 :         } else if (actual_nlink > 0) {
     714             :                 /*
     715             :                  * Linked files that are not the root directory should have at
     716             :                  * least one parent.
     717             :                  */
     718   479396275 :                 if (obs.parents == 0)
     719           0 :                         xchk_ino_set_corrupt(sc, ip->i_ino);
     720             :         }
     721             : 
     722   479411468 :         if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
     723           0 :                 error = -ECANCELED;
     724             : 
     725   479411468 : out_scanlock:
     726   479411468 :         mutex_unlock(&xnc->lock);
     727   479411468 :         xfs_iunlock(ip, XFS_ILOCK_SHARED);
     728   479411468 :         return error;
     729             : }
     730             : 
     731             : /*
     732             :  * Check our link count against an inode that wasn't checked previously.  This
     733             :  * is intended to catch directories with dangling links, though we could be
     734             :  * racing with inode allocation in other threads.
     735             :  */
     736             : STATIC int
     737      288047 : xchk_nlinks_compare_inum(
     738             :         struct xchk_nlink_ctrs  *xnc,
     739             :         xfs_ino_t               ino)
     740             : {
     741      288047 :         struct xchk_nlink       obs;
     742      288047 :         struct xfs_mount        *mp = xnc->sc->mp;
     743      288047 :         struct xfs_trans        *tp = xnc->sc->tp;
     744      288047 :         struct xfs_buf          *agi_bp;
     745      288047 :         struct xfs_inode        *ip;
     746      288047 :         int                     error;
     747             : 
     748             :         /*
     749             :          * The first iget failed, so try again with the variant that returns
     750             :          * either an incore inode or the AGI buffer.  If the function returns
     751             :          * EINVAL/ENOENT, it should have passed us the AGI buffer so that we
     752             :          * can guarantee that the inode won't be allocated while we check for
     753             :          * a zero link count in the observed link count data.
     754             :          */
     755      288047 :         error = xchk_iget_agi(xnc->sc, ino, &agi_bp, &ip);
     756      288047 :         if (!error) {
     757             :                 /* Actually got an inode, so use the inode compare. */
     758      287701 :                 error = xchk_nlinks_compare_inode(xnc, ip);
     759      287701 :                 xchk_irele(xnc->sc, ip);
     760      287701 :                 return error;
     761             :         }
     762         346 :         if (error == -ENOENT || error == -EINVAL) {
     763             :                 /* No inode was found.  Check for zero link count below. */
     764         346 :                 error = 0;
     765             :         }
     766         346 :         if (error)
     767           0 :                 goto out_agi;
     768             : 
     769             :         /* Ensure that we have protected against inode allocation/freeing. */
     770         346 :         if (agi_bp == NULL) {
     771           0 :                 ASSERT(agi_bp != NULL);
     772           0 :                 xchk_set_incomplete(xnc->sc);
     773           0 :                 return -ECANCELED;
     774             :         }
     775             : 
     776         346 :         if (xchk_iscan_aborted(&xnc->collect_iscan)) {
     777           0 :                 xchk_set_incomplete(xnc->sc);
     778           0 :                 error = -ECANCELED;
     779           0 :                 goto out_agi;
     780             :         }
     781             : 
     782         346 :         mutex_lock(&xnc->lock);
     783         346 :         error = xchk_nlinks_comparison_read(xnc, ino, &obs);
     784         346 :         if (error)
     785           0 :                 goto out_scanlock;
     786             : 
     787         346 :         trace_xchk_nlinks_check_zero(mp, ino, &obs);
     788             : 
     789             :         /*
     790             :          * If we can't grab the inode, the link count had better be zero.  We
     791             :          * still hold the AGI to prevent inode allocation/freeing.
     792             :          */
     793         346 :         if (xchk_nlink_total(NULL, &obs) != 0) {
     794           0 :                 xchk_ino_set_corrupt(xnc->sc, ino);
     795           0 :                 error = -ECANCELED;
     796             :         }
     797             : 
     798         346 : out_scanlock:
     799         346 :         mutex_unlock(&xnc->lock);
     800         346 : out_agi:
     801         346 :         if (agi_bp)
     802         346 :                 xfs_trans_brelse(tp, agi_bp);
     803             :         return error;
     804             : }
     805             : 
     806             : /*
     807             :  * Try to visit every inode in the filesystem to compare the link count.  Move
     808             :  * on if we can't grab an inode, since we'll revisit unchecked nlink records in
     809             :  * the second part.
     810             :  */
     811             : static int
     812             : xchk_nlinks_compare_iter(
     813             :         struct xchk_nlink_ctrs  *xnc,
     814             :         struct xfs_inode        **ipp)
     815             : {
     816   479138813 :         int                     error;
     817             : 
     818   479138813 :         do {
     819   479138813 :                 error = xchk_iscan_iter(&xnc->compare_iscan, ipp);
     820   479138813 :         } while (error == -EBUSY);
     821             : 
     822   479136423 :         return error;
     823             : }
     824             : 
     825             : /* Compare the link counts we observed against the live information. */
     826             : STATIC int
     827       12658 : xchk_nlinks_compare(
     828             :         struct xchk_nlink_ctrs  *xnc)
     829             : {
     830       12658 :         struct xchk_nlink       nl;
     831       12658 :         struct xfs_scrub        *sc = xnc->sc;
     832       12658 :         struct xfs_inode        *ip;
     833       12658 :         xfarray_idx_t           cur = XFARRAY_CURSOR_INIT;
     834       12658 :         int                     error;
     835             : 
     836       12658 :         if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
     837             :                 return 0;
     838             : 
     839             :         /*
     840             :          * Create a new empty transaction so that we can advance the iscan
     841             :          * cursor without deadlocking if the inobt has a cycle and push on the
     842             :          * inactivation workqueue.
     843             :          */
     844       12658 :         xchk_trans_cancel(sc);
     845       12658 :         error = xchk_trans_alloc_empty(sc);
     846       12658 :         if (error)
     847             :                 return error;
     848             : 
     849             :         /*
     850             :          * Use the inobt to walk all allocated inodes to compare the link
     851             :          * counts.  Inodes skipped by _compare_iter will be tried again in the
     852             :          * next phase of the scan.
     853             :          */
     854       12658 :         xchk_iscan_start(sc, 0, 0, &xnc->compare_iscan);
     855   958272846 :         while ((error = xchk_nlinks_compare_iter(xnc, &ip)) == 1) {
     856   479123767 :                 error = xchk_nlinks_compare_inode(xnc, ip);
     857   479123767 :                 xchk_iscan_mark_visited(&xnc->compare_iscan, ip);
     858   479123767 :                 xchk_irele(sc, ip);
     859   479123767 :                 if (error)
     860             :                         break;
     861             : 
     862   479123767 :                 if (xchk_should_terminate(sc, &error))
     863             :                         break;
     864             :         }
     865       12658 :         xchk_iscan_iter_finish(&xnc->compare_iscan);
     866       12658 :         xchk_iscan_teardown(&xnc->compare_iscan);
     867       12658 :         if (error)
     868             :                 return error;
     869             : 
     870       12656 :         if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
     871             :                 return 0;
     872             : 
     873             :         /*
     874             :          * Walk all the non-null nlink observations that weren't checked in the
     875             :          * previous step.
     876             :          */
     877       12656 :         mutex_lock(&xnc->lock);
     878   479022719 :         while ((error = xfarray_iter(xnc->nlinks, &cur, &nl)) == 1) {
     879   479010063 :                 xfs_ino_t       ino = cur - 1;
     880             : 
     881   479010063 :                 if (nl.flags & XCHK_NLINK_COMPARE_SCANNED)
     882   478722016 :                         continue;
     883             : 
     884      288047 :                 mutex_unlock(&xnc->lock);
     885             : 
     886      288047 :                 error = xchk_nlinks_compare_inum(xnc, ino);
     887      288047 :                 if (error)
     888           0 :                         return error;
     889             : 
     890      288047 :                 if (xchk_should_terminate(xnc->sc, &error))
     891           0 :                         return error;
     892             : 
     893      288047 :                 mutex_lock(&xnc->lock);
     894             :         }
     895       12656 :         mutex_unlock(&xnc->lock);
     896             : 
     897       12656 :         return error;
     898             : }
     899             : 
     900             : /* Tear down everything associated with a nlinks check. */
     901             : static void
     902       12684 : xchk_nlinks_teardown_scan(
     903             :         void                    *priv)
     904             : {
     905       12684 :         struct xchk_nlink_ctrs  *xnc = priv;
     906             : 
     907             :         /* Discourage any hook functions that might be running. */
     908       12684 :         xchk_iscan_abort(&xnc->collect_iscan);
     909             : 
     910       12684 :         xfs_dir_hook_del(xnc->sc->mp, &xnc->hooks);
     911             : 
     912       12684 :         xfarray_destroy(xnc->nlinks);
     913       12684 :         xnc->nlinks = NULL;
     914             : 
     915       12684 :         xchk_iscan_teardown(&xnc->collect_iscan);
     916       12684 :         mutex_destroy(&xnc->lock);
     917       12684 :         xnc->sc = NULL;
     918       12684 : }
     919             : 
     920             : /*
     921             :  * Scan all inodes in the entire filesystem to generate link count data.  If
     922             :  * the scan is successful, the counts will be left alive for a repair.  If any
     923             :  * error occurs, we'll tear everything down.
     924             :  */
     925             : STATIC int
     926       12684 : xchk_nlinks_setup_scan(
     927             :         struct xfs_scrub        *sc,
     928             :         struct xchk_nlink_ctrs  *xnc)
     929             : {
     930       12684 :         struct xfs_mount        *mp = sc->mp;
     931       12684 :         char                    *descr;
     932       12684 :         unsigned long long      max_inos;
     933       12684 :         xfs_agnumber_t          last_agno = mp->m_sb.sb_agcount - 1;
     934       12684 :         xfs_agino_t             first_agino, last_agino;
     935       12684 :         int                     error;
     936             : 
     937       12684 :         ASSERT(xnc->sc == NULL);
     938       12684 :         xnc->sc = sc;
     939             : 
     940       12684 :         mutex_init(&xnc->lock);
     941             : 
     942             :         /* Retry iget every tenth of a second for up to 30 seconds. */
     943       12684 :         xchk_iscan_start(sc, 30000, 100, &xnc->collect_iscan);
     944             : 
     945             :         /*
     946             :          * Set up enough space to store an nlink record for the highest
     947             :          * possible inode number in this system.
     948             :          */
     949       12684 :         xfs_agino_range(mp, last_agno, &first_agino, &last_agino);
     950       12684 :         max_inos = XFS_AGINO_TO_INO(mp, last_agno, last_agino) + 1;
     951       12684 :         descr = xchk_xfile_descr(sc, "file link counts");
     952       12684 :         error = xfarray_create(descr, min(XFS_MAXINUMBER + 1, max_inos),
     953             :                         sizeof(struct xchk_nlink), &xnc->nlinks);
     954       12684 :         kfree(descr);
     955       12684 :         if (error)
     956           0 :                 goto out_teardown;
     957             : 
     958             :         /*
     959             :          * Hook into the directory entry code so that we can capture updates to
     960             :          * file link counts.  The hook only triggers for inodes that were
     961             :          * already scanned, and the scanner thread takes each inode's ILOCK,
     962             :          * which means that any in-progress inode updates will finish before we
     963             :          * can scan the inode.
     964             :          */
     965       12684 :         ASSERT(sc->flags & XCHK_FSGATES_DIRENTS);
     966       12684 :         xfs_hook_setup(&xnc->hooks.dirent_hook, xchk_nlinks_live_update);
     967       12684 :         error = xfs_dir_hook_add(mp, &xnc->hooks);
     968       12684 :         if (error)
     969           0 :                 goto out_teardown;
     970             : 
     971             :         /* Use deferred cleanup to pass the inode link count data to repair. */
     972       12684 :         sc->buf_cleanup = xchk_nlinks_teardown_scan;
     973       12684 :         return 0;
     974             : 
     975           0 : out_teardown:
     976           0 :         xchk_nlinks_teardown_scan(xnc);
     977           0 :         return error;
     978             : }
     979             : 
     980             : /* Scrub the link count of all inodes on the filesystem. */
     981             : int
     982       12684 : xchk_nlinks(
     983             :         struct xfs_scrub        *sc)
     984             : {
     985       12684 :         struct xchk_nlink_ctrs  *xnc = sc->buf;
     986       12684 :         int                     error = 0;
     987             : 
     988             :         /* Set ourselves up to check link counts on the live filesystem. */
     989       12684 :         error = xchk_nlinks_setup_scan(sc, xnc);
     990       12684 :         if (error)
     991             :                 return error;
     992             : 
     993             :         /* Walk all inodes, picking up link count information. */
     994       12684 :         error = xchk_nlinks_collect(xnc);
     995       12684 :         if (!xchk_xref_process_error(sc, 0, 0, &error))
     996          26 :                 return error;
     997             : 
     998             :         /* Fail fast if we're not playing with a full dataset. */
     999       12658 :         if (xchk_iscan_aborted(&xnc->collect_iscan))
    1000           0 :                 xchk_set_incomplete(sc);
    1001       12658 :         if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_INCOMPLETE)
    1002             :                 return 0;
    1003             : 
    1004             :         /* Compare link counts. */
    1005       12658 :         error = xchk_nlinks_compare(xnc);
    1006       12658 :         if (!xchk_xref_process_error(sc, 0, 0, &error))
    1007           2 :                 return error;
    1008             : 
    1009             :         /* Check one last time for an incomplete dataset. */
    1010       12656 :         if (xchk_iscan_aborted(&xnc->collect_iscan))
    1011           0 :                 xchk_set_incomplete(sc);
    1012             : 
    1013             :         return 0;
    1014             : }

Generated by: LCOV version 1.14