LCOV - code coverage report
Current view: top level - fs/xfs/scrub - dir_repair.c (source / functions) Hit Total Coverage
Test: fstests of 6.5.0-rc4-xfsx @ Mon Jul 31 20:08:34 PDT 2023 Lines: 560 740 75.7 %
Date: 2023-07-31 20:08:34 Functions: 39 42 92.9 %

          Line data    Source code
       1             : // SPDX-License-Identifier: GPL-2.0-or-later
       2             : /*
       3             :  * Copyright (C) 2020-2023 Oracle.  All Rights Reserved.
       4             :  * Author: Darrick J. Wong <djwong@kernel.org>
       5             :  */
       6             : #include "xfs.h"
       7             : #include "xfs_fs.h"
       8             : #include "xfs_shared.h"
       9             : #include "xfs_format.h"
      10             : #include "xfs_trans_resv.h"
      11             : #include "xfs_mount.h"
      12             : #include "xfs_defer.h"
      13             : #include "xfs_bit.h"
      14             : #include "xfs_log_format.h"
      15             : #include "xfs_trans.h"
      16             : #include "xfs_sb.h"
      17             : #include "xfs_inode.h"
      18             : #include "xfs_icache.h"
      19             : #include "xfs_da_format.h"
      20             : #include "xfs_da_btree.h"
      21             : #include "xfs_dir2.h"
      22             : #include "xfs_dir2_priv.h"
      23             : #include "xfs_bmap.h"
      24             : #include "xfs_quota.h"
      25             : #include "xfs_bmap_btree.h"
      26             : #include "xfs_trans_space.h"
      27             : #include "xfs_bmap_util.h"
      28             : #include "xfs_swapext.h"
      29             : #include "xfs_xchgrange.h"
      30             : #include "xfs_ag.h"
      31             : #include "xfs_parent.h"
      32             : #include "scrub/xfs_scrub.h"
      33             : #include "scrub/scrub.h"
      34             : #include "scrub/common.h"
      35             : #include "scrub/trace.h"
      36             : #include "scrub/repair.h"
      37             : #include "scrub/tempfile.h"
      38             : #include "scrub/tempswap.h"
      39             : #include "scrub/xfile.h"
      40             : #include "scrub/xfarray.h"
      41             : #include "scrub/xfblob.h"
      42             : #include "scrub/iscan.h"
      43             : #include "scrub/readdir.h"
      44             : #include "scrub/reap.h"
      45             : #include "scrub/findparent.h"
      46             : #include "scrub/orphanage.h"
      47             : #include "scrub/listxattr.h"
      48             : 
      49             : /*
      50             :  * Directory Repair
      51             :  * ================
      52             :  *
      53             :  * We repair directories by reading the directory data blocks looking for
      54             :  * directory entries that look salvageable (name passes verifiers, entry points
      55             :  * to a valid allocated inode, etc).  Each entry worth salvaging is stashed in
      56             :  * memory, and the stashed entries are periodically replayed into a temporary
      57             :  * directory to constrain memory use.  Batching the construction of the
      58             :  * temporary directory in this fashion reduces lock cycling of the directory
      59             :  * being repaired and the temporary directory, and will later become important
      60             :  * for parent pointer scanning.
      61             :  *
      62             :  * If parent pointers are enabled on this filesystem, we instead reconstruct
      63             :  * the directory by visiting each parent pointer of each file in the filesystem
      64             :  * and translating the relevant parent pointer records into dirents.  In this
      65             :  * case, it is advantageous to stash all directory entries created from parent
      66             :  * pointers for a single child file before replaying them into the temporary
      67             :  * directory.  To save memory, the live filesystem scan reuses the findparent
      68             :  * fields.  Directory repair chooses either parent pointer scanning or
      69             :  * directory entry salvaging, but not both.
      70             :  *
      71             :  * Directory entries added to the temporary directory do not elevate the link
      72             :  * counts of the inodes found.  When salvaging completes, the remaining stashed
      73             :  * entries are replayed to the temporary directory.  An atomic extent swap is
      74             :  * used to commit the new directory blocks to the directory being repaired.
      75             :  * This will disrupt readdir cursors.
      76             :  *
      77             :  * Legacy Locking Issues
      78             :  * ---------------------
      79             :  *
      80             :  * Prior to Linux 6.5, if /a, /a/b, and /c were all directories, the VFS would
      81             :  * not take i_rwsem on /a/b for a "mv /a/b /c/" operation.  This meant that
      82             :  * only b's ILOCK protected b's dotdot update.  b's IOLOCK was not taken,
      83             :  * unlike every other dotdot update (link, remove, mkdir).  If the repair code
      84             :  * dropped the ILOCK, we it was required either to revalidate the dotdot entry
      85             :  * or to use dirent hooks to capture updates from other threads.
      86             :  */
      87             : 
      88             : /* Create a dirent in the tempdir. */
      89             : #define XREP_DIRENT_ADD         (1)
      90             : 
      91             : /* Remove a dirent from the tempdir. */
      92             : #define XREP_DIRENT_REMOVE      (2)
      93             : 
      94             : /* Directory entry to be restored in the new directory. */
      95             : struct xrep_dirent {
      96             :         /* Cookie for retrieval of the dirent name. */
      97             :         xfblob_cookie           name_cookie;
      98             : 
      99             :         /* Target inode number. */
     100             :         xfs_ino_t               ino;
     101             : 
     102             :         /* Length of the dirent name. */
     103             :         uint8_t                 namelen;
     104             : 
     105             :         /* File type of the dirent. */
     106             :         uint8_t                 ftype;
     107             : 
     108             :         /* XREP_DIRENT_{ADD,REMOVE} */
     109             :         uint8_t                 action;
     110             : };
     111             : 
     112             : /*
     113             :  * Stash up to 8 pages of recovered dirent data in dir_entries and dir_names
     114             :  * before we write them to the temp dir.
     115             :  */
     116             : #define XREP_DIR_MAX_STASH_BYTES        (PAGE_SIZE * 8)
     117             : 
     118             : struct xrep_dir {
     119             :         struct xfs_scrub        *sc;
     120             : 
     121             :         /* Fixed-size array of xrep_dirent structures. */
     122             :         struct xfarray          *dir_entries;
     123             : 
     124             :         /* Blobs containing directory entry names. */
     125             :         struct xfblob           *dir_names;
     126             : 
     127             :         /* Information for swapping data forks at the end. */
     128             :         struct xrep_tempswap    tx;
     129             : 
     130             :         /* Preallocated args struct for performing dir operations */
     131             :         struct xfs_da_args      args;
     132             : 
     133             :         /*
     134             :          * Information used to scan the filesystem to find the inumber of the
     135             :          * dotdot entry for this directory.  For directory salvaging when
     136             :          * parent pointers are not enabled, we use the findparent_* functions
     137             :          * on this object and access only the parent_ino field directly.
     138             :          *
     139             :          * When parent pointers are enabled, however, the pptr scanner uses the
     140             :          * iscan, hooks, lock, and parent_ino fields of this object directly.
     141             :          * @pscan.lock coordinates access to dir_entries, dir_names,
     142             :          * parent_ino, subdirs, dirents, and args.  This reduces the memory
     143             :          * requirements of this structure.
     144             :          */
     145             :         struct xrep_parent_scan_info pscan;
     146             : 
     147             :         /*
     148             :          * Context information for attaching this directory to the lost+found
     149             :          * if this directory does not have a parent.
     150             :          */
     151             :         struct xrep_adoption    adoption;
     152             : 
     153             :         /* How many subdirectories did we find? */
     154             :         uint64_t                subdirs;
     155             : 
     156             :         /* How many dirents did we find? */
     157             :         unsigned int            dirents;
     158             : 
     159             :         /* Should we move this directory to the orphanage? */
     160             :         bool                    needs_adoption;
     161             : 
     162             :         /*
     163             :          * Scratch buffer for reading parent pointers from child files.  The
     164             :          * p_name field is used to flush stashed dirents into the temporary
     165             :          * directory in between parent pointers.  At the very end of the
     166             :          * repair, it can also be used to compute the lost+found filename
     167             :          * if we need to reparent the directory.
     168             :          */
     169             :         struct xfs_parent_name_irec pptr;
     170             : };
     171             : 
     172             : /* Tear down all the incore stuff we created. */
     173             : static void
     174      197896 : xrep_dir_teardown(
     175             :         struct xfs_scrub        *sc)
     176             : {
     177      197896 :         struct xrep_dir         *rd = sc->buf;
     178             : 
     179      197896 :         xrep_findparent_scan_teardown(&rd->pscan);
     180      197901 :         xfblob_destroy(rd->dir_names);
     181      197896 :         xfarray_destroy(rd->dir_entries);
     182      197894 : }
     183             : 
     184             : /* Set up for a directory repair. */
     185             : int
     186      242643 : xrep_setup_directory(
     187             :         struct xfs_scrub        *sc)
     188             : {
     189      242643 :         struct xrep_dir         *rd;
     190      242643 :         int                     error;
     191             : 
     192      242643 :         xchk_fsgates_enable(sc, XCHK_FSGATES_DIRENTS);
     193             : 
     194      242638 :         error = xrep_orphanage_try_create(sc);
     195      242643 :         if (error)
     196             :                 return error;
     197             : 
     198      242643 :         error = xrep_tempfile_create(sc, S_IFDIR);
     199      242599 :         if (error)
     200             :                 return error;
     201             : 
     202      242117 :         rd = kvzalloc(sizeof(struct xrep_dir), XCHK_GFP_FLAGS);
     203      242130 :         if (!rd)
     204             :                 return -ENOMEM;
     205      242130 :         rd->sc = sc;
     206      242130 :         sc->buf = rd;
     207             : 
     208      242130 :         return 0;
     209             : }
     210             : 
     211             : /*
     212             :  * If we're the root of a directory tree, we are our own parent.  If we're an
     213             :  * unlinked directory, the parent /won't/ have a link to us.  Set the parent
     214             :  * directory to the root for both cases.  Returns NULLFSINO if we don't know
     215             :  * what to do.
     216             :  */
     217             : static inline xfs_ino_t
     218             : xrep_dir_self_parent(
     219             :         struct xrep_dir         *rd)
     220             : {
     221             :         struct xfs_scrub        *sc = rd->sc;
     222             : 
     223             :         if (sc->ip->i_ino == sc->mp->m_sb.sb_rootino)
     224             :                 return sc->mp->m_sb.sb_rootino;
     225             : 
     226             :         if (VFS_I(sc->ip)->i_nlink == 0)
     227             :                 return sc->mp->m_sb.sb_rootino;
     228             : 
     229             :         return NULLFSINO;
     230             : }
     231             : 
     232             : /*
     233             :  * Look up the dotdot entry and confirm that it's really the parent.
     234             :  * Returns NULLFSINO if we don't know what to do.
     235             :  */
     236             : static inline xfs_ino_t
     237           4 : xrep_dir_lookup_parent(
     238             :         struct xrep_dir         *rd)
     239             : {
     240           4 :         struct xfs_scrub        *sc = rd->sc;
     241           4 :         xfs_ino_t               ino;
     242           4 :         int                     error;
     243             : 
     244           4 :         error = xfs_dir_lookup(sc->tp, sc->ip, &xfs_name_dotdot, &ino, NULL);
     245           4 :         if (error)
     246             :                 return NULLFSINO;
     247           4 :         if (!xfs_verify_dir_ino(sc->mp, ino))
     248             :                 return NULLFSINO;
     249             : 
     250           4 :         error = xrep_findparent_confirm(sc, &ino);
     251           4 :         if (error)
     252             :                 return NULLFSINO;
     253             : 
     254           4 :         return ino;
     255             : }
     256             : 
     257             : /*
     258             :  * Look up '..' in the dentry cache and confirm that it's really the parent.
     259             :  * Returns NULLFSINO if the dcache misses or if the hit is implausible.
     260             :  */
     261             : static inline xfs_ino_t
     262         980 : xrep_dir_dcache_parent(
     263             :         struct xrep_dir         *rd)
     264             : {
     265         980 :         struct xfs_scrub        *sc = rd->sc;
     266         980 :         xfs_ino_t               parent_ino;
     267         980 :         int                     error;
     268             : 
     269         980 :         parent_ino = xrep_findparent_from_dcache(sc);
     270         982 :         if (parent_ino == NULLFSINO)
     271             :                 return parent_ino;
     272             : 
     273         978 :         error = xrep_findparent_confirm(sc, &parent_ino);
     274         978 :         if (error)
     275             :                 return NULLFSINO;
     276             : 
     277         978 :         return parent_ino;
     278             : }
     279             : 
     280             : /* Try to find the parent of the directory being repaired. */
     281             : STATIC int
     282         992 : xrep_dir_find_parent(
     283             :         struct xrep_dir         *rd)
     284             : {
     285         992 :         xfs_ino_t               ino;
     286             : 
     287         992 :         ino = xrep_findparent_self_reference(rd->sc);
     288         990 :         if (ino != NULLFSINO) {
     289          11 :                 xrep_findparent_scan_finish_early(&rd->pscan, ino);
     290          11 :                 return 0;
     291             :         }
     292             : 
     293         979 :         ino = xrep_dir_dcache_parent(rd);
     294         981 :         if (ino != NULLFSINO) {
     295         977 :                 xrep_findparent_scan_finish_early(&rd->pscan, ino);
     296         977 :                 return 0;
     297             :         }
     298             : 
     299           4 :         ino = xrep_dir_lookup_parent(rd);
     300           4 :         if (ino != NULLFSINO) {
     301           4 :                 xrep_findparent_scan_finish_early(&rd->pscan, ino);
     302           4 :                 return 0;
     303             :         }
     304             : 
     305             :         /*
     306             :          * A full filesystem scan is the last resort.  On a busy filesystem,
     307             :          * the scan can fail with -EBUSY if we cannot grab IOLOCKs.  That means
     308             :          * that we don't know what who the parent is, so we should return to
     309             :          * userspace.
     310             :          */
     311           0 :         return xrep_findparent_scan(&rd->pscan);
     312             : }
     313             : 
     314             : /*
     315             :  * Decide if we want to salvage this entry.  We don't bother with oversized
     316             :  * names or the dot entry.
     317             :  */
     318             : STATIC int
     319        7195 : xrep_dir_want_salvage(
     320             :         struct xrep_dir         *rd,
     321             :         const char              *name,
     322             :         int                     namelen,
     323             :         xfs_ino_t               ino)
     324             : {
     325        7195 :         struct xfs_mount        *mp = rd->sc->mp;
     326             : 
     327             :         /* No pointers to ourselves or to garbage. */
     328        7195 :         if (ino == rd->sc->ip->i_ino)
     329             :                 return false;
     330        7189 :         if (!xfs_verify_dir_ino(mp, ino))
     331             :                 return false;
     332             : 
     333             :         /* No weird looking names or dot entries. */
     334        7187 :         if (namelen >= MAXNAMELEN || namelen <= 0)
     335             :                 return false;
     336        7187 :         if (namelen == 1 && name[0] == '.')
     337           0 :                 return false;
     338             : 
     339             :         return true;
     340             : }
     341             : 
     342             : /*
     343             :  * Remember that we want to create a dirent in the tempdir.  These stashed
     344             :  * actions will be replayed later.
     345             :  */
     346             : STATIC int
     347     2490000 : xrep_dir_stash_createname(
     348             :         struct xrep_dir         *rd,
     349             :         const struct xfs_name   *name,
     350             :         xfs_ino_t               ino)
     351             : {
     352     2490000 :         struct xrep_dirent      dirent = {
     353             :                 .action         = XREP_DIRENT_ADD,
     354             :                 .ino            = ino,
     355     2490000 :                 .namelen        = name->len,
     356     2490000 :                 .ftype          = name->type,
     357             :         };
     358     2490000 :         int                     error;
     359             : 
     360     2490000 :         trace_xrep_dir_stash_createname(rd->sc->tempip, name, ino);
     361             : 
     362     2489996 :         error = xfblob_store(rd->dir_names, &dirent.name_cookie, name->name,
     363     2489996 :                         name->len);
     364     2489983 :         if (error)
     365             :                 return error;
     366             : 
     367     2489983 :         return xfarray_append(rd->dir_entries, &dirent);
     368             : }
     369             : 
     370             : /*
     371             :  * Remember that we want to remove a dirent from the tempdir.  These stashed
     372             :  * actions will be replayed later.
     373             :  */
     374             : STATIC int
     375           0 : xrep_dir_stash_removename(
     376             :         struct xrep_dir         *rd,
     377             :         const struct xfs_name   *name,
     378             :         xfs_ino_t               ino)
     379             : {
     380           0 :         struct xrep_dirent      dirent = {
     381             :                 .action         = XREP_DIRENT_REMOVE,
     382             :                 .ino            = ino,
     383           0 :                 .namelen        = name->len,
     384           0 :                 .ftype          = name->type,
     385             :         };
     386           0 :         int                     error;
     387             : 
     388           0 :         trace_xrep_dir_stash_removename(rd->sc->tempip, name, ino);
     389             : 
     390           0 :         error = xfblob_store(rd->dir_names, &dirent.name_cookie, name->name,
     391           0 :                         name->len);
     392           0 :         if (error)
     393             :                 return error;
     394             : 
     395           0 :         return xfarray_append(rd->dir_entries, &dirent);
     396             : }
     397             : 
     398             : /* Allocate an in-core record to hold entries while we rebuild the dir data. */
     399             : STATIC int
     400        7188 : xrep_dir_salvage_entry(
     401             :         struct xrep_dir         *rd,
     402             :         unsigned char           *name,
     403             :         unsigned int            namelen,
     404             :         xfs_ino_t               ino)
     405             : {
     406        7188 :         struct xfs_name         xname = {
     407             :                 .name           = name,
     408             :         };
     409        7188 :         struct xfs_scrub        *sc = rd->sc;
     410        7188 :         struct xfs_inode        *ip;
     411        7188 :         unsigned int            i = 0;
     412        7188 :         int                     error = 0;
     413             : 
     414        7188 :         if (xchk_should_terminate(sc, &error))
     415           0 :                 return error;
     416             : 
     417             :         /*
     418             :          * Truncate the name to the first character that would trip namecheck.
     419             :          * If we no longer have a name after that, ignore this entry.
     420             :          */
     421       62994 :         while (i < namelen && name[i] != 0 && name[i] != '/')
     422       55806 :                 i++;
     423        7188 :         if (i == 0)
     424             :                 return 0;
     425        7188 :         xname.len = i;
     426             : 
     427             :         /* Ignore '..' entries; we already picked the new parent. */
     428        7188 :         if (xname.len == 2 && name[0] == '.' && name[1] == '.') {
     429           6 :                 trace_xrep_dir_salvaged_parent(sc->ip, ino);
     430           6 :                 return 0;
     431             :         }
     432             : 
     433        7182 :         trace_xrep_dir_salvage_entry(sc->ip, &xname, ino);
     434             : 
     435             :         /*
     436             :          * Compute the ftype or dump the entry if we can't.  We don't lock the
     437             :          * inode because inodes can't change type while we have a reference.
     438             :          */
     439        7182 :         error = xchk_iget(sc, ino, &ip);
     440        7183 :         if (error)
     441             :                 return 0;
     442             : 
     443             :         /* Don't mix metadata and regular directory trees. */
     444        7183 :         if (xfs_is_metadir_inode(ip) ^ xfs_is_metadir_inode(rd->sc->ip)) {
     445           0 :                 xchk_irele(sc, ip);
     446           0 :                 return 0;
     447             :         }
     448             : 
     449        7183 :         xname.type = xfs_mode_to_ftype(VFS_I(ip)->i_mode);
     450        7183 :         xchk_irele(sc, ip);
     451             : 
     452        7183 :         return xrep_dir_stash_createname(rd, &xname, ino);
     453             : }
     454             : 
     455             : /* Record a shortform directory entry for later reinsertion. */
     456             : STATIC int
     457        5395 : xrep_dir_salvage_sf_entry(
     458             :         struct xrep_dir                 *rd,
     459             :         struct xfs_dir2_sf_hdr          *sfp,
     460             :         struct xfs_dir2_sf_entry        *sfep)
     461             : {
     462        5395 :         xfs_ino_t                       ino;
     463             : 
     464        5395 :         ino = xfs_dir2_sf_get_ino(rd->sc->mp, sfp, sfep);
     465        5393 :         if (!xrep_dir_want_salvage(rd, sfep->name, sfep->namelen, ino))
     466             :                 return 0;
     467             : 
     468        5393 :         return xrep_dir_salvage_entry(rd, sfep->name, sfep->namelen, ino);
     469             : }
     470             : 
     471             : /* Record a regular directory entry for later reinsertion. */
     472             : STATIC int
     473        1800 : xrep_dir_salvage_data_entry(
     474             :         struct xrep_dir                 *rd,
     475             :         struct xfs_dir2_data_entry      *dep)
     476             : {
     477        1800 :         xfs_ino_t                       ino;
     478             : 
     479        1800 :         ino = be64_to_cpu(dep->inumber);
     480        1800 :         if (!xrep_dir_want_salvage(rd, dep->name, dep->namelen, ino))
     481             :                 return 0;
     482             : 
     483        1794 :         return xrep_dir_salvage_entry(rd, dep->name, dep->namelen, ino);
     484             : }
     485             : 
     486             : /* Try to recover block/data format directory entries. */
     487             : STATIC int
     488          12 : xrep_dir_recover_data(
     489             :         struct xrep_dir         *rd,
     490             :         struct xfs_buf          *bp)
     491             : {
     492          12 :         struct xfs_da_geometry  *geo = rd->sc->mp->m_dir_geo;
     493          12 :         unsigned int            offset;
     494          12 :         unsigned int            end;
     495          12 :         int                     error = 0;
     496             : 
     497             :         /*
     498             :          * Loop over the data portion of the block.
     499             :          * Each object is a real entry (dep) or an unused one (dup).
     500             :          */
     501          12 :         offset = geo->data_entry_offset;
     502          12 :         end = min_t(unsigned int, BBTOB(bp->b_length),
     503             :                         xfs_dir3_data_end_offset(geo, bp->b_addr));
     504             : 
     505        1848 :         while (offset < end) {
     506        1836 :                 struct xfs_dir2_data_unused     *dup = bp->b_addr + offset;
     507        1836 :                 struct xfs_dir2_data_entry      *dep = bp->b_addr + offset;
     508             : 
     509        1836 :                 if (xchk_should_terminate(rd->sc, &error))
     510           0 :                         return error;
     511             : 
     512             :                 /* Skip unused entries. */
     513        1836 :                 if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) {
     514          36 :                         offset += be16_to_cpu(dup->length);
     515          36 :                         continue;
     516             :                 }
     517             : 
     518             :                 /* Don't walk off the end of the block. */
     519        1800 :                 offset += xfs_dir2_data_entsize(rd->sc->mp, dep->namelen);
     520        1800 :                 if (offset > end)
     521             :                         break;
     522             : 
     523             :                 /* Ok, let's save this entry. */
     524        1800 :                 error = xrep_dir_salvage_data_entry(rd, dep);
     525        1800 :                 if (error)
     526           0 :                         return error;
     527             : 
     528             :         }
     529             : 
     530             :         return 0;
     531             : }
     532             : 
     533             : /* Try to recover shortform directory entries. */
     534             : STATIC int
     535         984 : xrep_dir_recover_sf(
     536             :         struct xrep_dir                 *rd)
     537             : {
     538         984 :         struct xfs_dir2_sf_hdr          *sfp;
     539         984 :         struct xfs_dir2_sf_entry        *sfep;
     540         984 :         struct xfs_dir2_sf_entry        *next;
     541         984 :         struct xfs_ifork                *ifp;
     542         984 :         xfs_ino_t                       ino;
     543         984 :         unsigned char                   *end;
     544         984 :         int                             error = 0;
     545             : 
     546         984 :         ifp = xfs_ifork_ptr(rd->sc->ip, XFS_DATA_FORK);
     547         984 :         sfp = (struct xfs_dir2_sf_hdr *)rd->sc->ip->i_df.if_u1.if_data;
     548         984 :         end = (unsigned char *)ifp->if_u1.if_data + ifp->if_bytes;
     549             : 
     550         984 :         ino = xfs_dir2_sf_get_parent_ino(sfp);
     551         984 :         trace_xrep_dir_salvaged_parent(rd->sc->ip, ino);
     552             : 
     553         985 :         sfep = xfs_dir2_sf_firstentry(sfp);
     554        6380 :         while ((unsigned char *)sfep < end) {
     555        5393 :                 if (xchk_should_terminate(rd->sc, &error))
     556           0 :                         return error;
     557             : 
     558        5394 :                 next = xfs_dir2_sf_nextentry(rd->sc->mp, sfp, sfep);
     559        5394 :                 if ((unsigned char *)next > end)
     560             :                         break;
     561             : 
     562             :                 /* Ok, let's save this entry. */
     563        5394 :                 error = xrep_dir_salvage_sf_entry(rd, sfp, sfep);
     564        5395 :                 if (error)
     565           0 :                         return error;
     566             : 
     567             :                 sfep = next;
     568             :         }
     569             : 
     570             :         return 0;
     571             : }
     572             : 
     573             : /*
     574             :  * Try to figure out the format of this directory from the data fork mappings
     575             :  * and the directory size.  If we can be reasonably sure of format, we can be
     576             :  * more aggressive in salvaging directory entries.  On return, @magic_guess
     577             :  * will be set to DIR3_BLOCK_MAGIC if we think this is a "block format"
     578             :  * directory; DIR3_DATA_MAGIC if we think this is a "data format" directory,
     579             :  * and 0 if we can't tell.
     580             :  */
     581             : STATIC void
     582           6 : xrep_dir_guess_format(
     583             :         struct xrep_dir         *rd,
     584             :         __be32                  *magic_guess)
     585             : {
     586           6 :         struct xfs_inode        *dp = rd->sc->ip;
     587           6 :         struct xfs_mount        *mp = rd->sc->mp;
     588           6 :         struct xfs_da_geometry  *geo = mp->m_dir_geo;
     589           6 :         xfs_fileoff_t           last;
     590           6 :         int                     error;
     591             : 
     592           6 :         ASSERT(xfs_has_crc(mp));
     593             : 
     594           6 :         *magic_guess = 0;
     595             : 
     596             :         /*
     597             :          * If there's a single directory block and the directory size is
     598             :          * exactly one block, this has to be a single block format directory.
     599             :          */
     600           6 :         error = xfs_bmap_last_offset(dp, &last, XFS_DATA_FORK);
     601           6 :         if (!error && XFS_FSB_TO_B(mp, last) == geo->blksize &&
     602           5 :             dp->i_disk_size == geo->blksize) {
     603           5 :                 *magic_guess = cpu_to_be32(XFS_DIR3_BLOCK_MAGIC);
     604           6 :                 return;
     605             :         }
     606             : 
     607             :         /*
     608             :          * If the last extent before the leaf offset matches the directory
     609             :          * size and the directory size is larger than 1 block, this is a
     610             :          * data format directory.
     611             :          */
     612           1 :         last = geo->leafblk;
     613           1 :         error = xfs_bmap_last_before(rd->sc->tp, dp, &last, XFS_DATA_FORK);
     614           1 :         if (!error &&
     615           1 :             XFS_FSB_TO_B(mp, last) > geo->blksize &&
     616           1 :             XFS_FSB_TO_B(mp, last) == dp->i_disk_size) {
     617           1 :                 *magic_guess = cpu_to_be32(XFS_DIR3_DATA_MAGIC);
     618           1 :                 return;
     619             :         }
     620             : }
     621             : 
     622             : /* Recover directory entries from a specific directory block. */
     623             : STATIC int
     624          12 : xrep_dir_recover_dirblock(
     625             :         struct xrep_dir         *rd,
     626             :         __be32                  magic_guess,
     627             :         xfs_dablk_t             dabno)
     628             : {
     629          12 :         struct xfs_dir2_data_hdr *hdr;
     630          12 :         struct xfs_buf          *bp;
     631          12 :         __be32                  oldmagic;
     632          12 :         int                     error;
     633             : 
     634             :         /*
     635             :          * Try to read buffer.  We invalidate them in the next step so we don't
     636             :          * bother to set a buffer type or ops.
     637             :          */
     638          12 :         error = xfs_da_read_buf(rd->sc->tp, rd->sc->ip, dabno,
     639             :                         XFS_DABUF_MAP_HOLE_OK, &bp, XFS_DATA_FORK, NULL);
     640          12 :         if (error || !bp)
     641             :                 return error;
     642             : 
     643          12 :         hdr = bp->b_addr;
     644          12 :         oldmagic = hdr->magic;
     645             : 
     646          12 :         trace_xrep_dir_recover_dirblock(rd->sc->ip, dabno,
     647             :                         be32_to_cpu(hdr->magic), be32_to_cpu(magic_guess));
     648             : 
     649             :         /*
     650             :          * If we're sure of the block's format, proceed with the salvage
     651             :          * operation using the specified magic number.
     652             :          */
     653          12 :         if (magic_guess) {
     654          12 :                 hdr->magic = magic_guess;
     655          12 :                 goto recover;
     656             :         }
     657             : 
     658             :         /*
     659             :          * If we couldn't guess what type of directory this is, then we will
     660             :          * only salvage entries from directory blocks that match the magic
     661             :          * number and pass verifiers.
     662             :          */
     663           0 :         switch (hdr->magic) {
     664           0 :         case cpu_to_be32(XFS_DIR2_BLOCK_MAGIC):
     665             :         case cpu_to_be32(XFS_DIR3_BLOCK_MAGIC):
     666           0 :                 if (!xrep_buf_verify_struct(bp, &xfs_dir3_block_buf_ops))
     667           0 :                         goto out;
     668           0 :                 if (xfs_dir3_block_header_check(bp, rd->sc->ip->i_ino) != NULL)
     669           0 :                         goto out;
     670             :                 break;
     671           0 :         case cpu_to_be32(XFS_DIR2_DATA_MAGIC):
     672             :         case cpu_to_be32(XFS_DIR3_DATA_MAGIC):
     673           0 :                 if (!xrep_buf_verify_struct(bp, &xfs_dir3_data_buf_ops))
     674           0 :                         goto out;
     675           0 :                 if (xfs_dir3_data_header_check(bp, rd->sc->ip->i_ino) != NULL)
     676           0 :                         goto out;
     677             :                 break;
     678           0 :         default:
     679           0 :                 goto out;
     680             :         }
     681             : 
     682          12 : recover:
     683          12 :         error = xrep_dir_recover_data(rd, bp);
     684             : 
     685          12 : out:
     686          12 :         hdr->magic = oldmagic;
     687          12 :         xfs_trans_brelse(rd->sc->tp, bp);
     688          12 :         return error;
     689             : }
     690             : 
     691             : static inline void
     692     2862939 : xrep_dir_init_args(
     693             :         struct xrep_dir         *rd,
     694             :         struct xfs_inode        *dp,
     695             :         const struct xfs_name   *name)
     696             : {
     697     2862939 :         memset(&rd->args, 0, sizeof(struct xfs_da_args));
     698     2862939 :         rd->args.geo = rd->sc->mp->m_dir_geo;
     699     2862939 :         rd->args.whichfork = XFS_DATA_FORK;
     700     2862939 :         rd->args.owner = rd->sc->ip->i_ino;
     701     2862939 :         rd->args.trans = rd->sc->tp;
     702     2862939 :         rd->args.dp = dp;
     703     2862939 :         if (!name)
     704             :                 return;
     705     2665120 :         rd->args.name = name->name;
     706     2665120 :         rd->args.namelen = name->len;
     707     2665120 :         rd->args.filetype = name->type;
     708     2665120 :         rd->args.hashval = xfs_dir2_hashname(rd->sc->mp, name);
     709             : }
     710             : 
     711             : /* Replay a stashed createname into the temporary directory. */
     712             : STATIC int
     713     2489351 : xrep_dir_replay_createname(
     714             :         struct xrep_dir         *rd,
     715             :         const struct xfs_name   *name,
     716             :         xfs_ino_t               inum,
     717             :         xfs_extlen_t            total)
     718             : {
     719     2489351 :         struct xfs_scrub        *sc = rd->sc;
     720     2489351 :         struct xfs_inode        *dp = rd->sc->tempip;
     721     2489351 :         bool                    is_block, is_leaf;
     722     2489351 :         int                     error;
     723             : 
     724     2489351 :         ASSERT(S_ISDIR(VFS_I(dp)->i_mode));
     725             : 
     726     2489351 :         error = xfs_dir_ino_validate(sc->mp, inum);
     727     2489086 :         if (error)
     728             :                 return error;
     729             : 
     730     2489103 :         trace_xrep_dir_replay_createname(dp, name, inum);
     731             : 
     732     2489064 :         xrep_dir_init_args(rd, dp, name);
     733     2489090 :         rd->args.inumber = inum;
     734     2489090 :         rd->args.total = total;
     735     2489090 :         rd->args.op_flags = XFS_DA_OP_ADDNAME | XFS_DA_OP_OKNOENT;
     736             : 
     737     2489090 :         if (dp->i_df.if_format == XFS_DINODE_FMT_LOCAL)
     738      908958 :                 return xfs_dir2_sf_addname(&rd->args);
     739             : 
     740     1580132 :         error = xfs_dir2_isblock(&rd->args, &is_block);
     741     1580382 :         if (error)
     742             :                 return error;
     743     1580382 :         if (is_block)
     744      278367 :                 return xfs_dir2_block_addname(&rd->args);
     745             : 
     746     1302015 :         error = xfs_dir2_isleaf(&rd->args, &is_leaf);
     747     1302052 :         if (error)
     748             :                 return error;
     749     1302052 :         if (is_leaf)
     750      488321 :                 return xfs_dir2_leaf_addname(&rd->args);
     751             : 
     752      813731 :         return xfs_dir2_node_addname(&rd->args);
     753             : }
     754             : 
     755             : /* Replay a stashed removename onto the temporary directory. */
     756             : STATIC int
     757           0 : xrep_dir_replay_removename(
     758             :         struct xrep_dir         *rd,
     759             :         const struct xfs_name   *name,
     760             :         xfs_extlen_t            total)
     761             : {
     762           0 :         struct xfs_inode        *dp = rd->args.dp;
     763           0 :         bool                    is_block, is_leaf;
     764           0 :         int                     error;
     765             : 
     766           0 :         ASSERT(S_ISDIR(VFS_I(dp)->i_mode));
     767             : 
     768           0 :         xrep_dir_init_args(rd, dp, name);
     769           0 :         rd->args.op_flags = 0;
     770           0 :         rd->args.total = total;
     771             : 
     772           0 :         trace_xrep_dir_replay_removename(dp, name, 0);
     773             : 
     774           0 :         if (dp->i_df.if_format == XFS_DINODE_FMT_LOCAL)
     775           0 :                 return xfs_dir2_sf_removename(&rd->args);
     776             : 
     777           0 :         error = xfs_dir2_isblock(&rd->args, &is_block);
     778           0 :         if (error)
     779             :                 return error;
     780           0 :         if (is_block)
     781           0 :                 return xfs_dir2_block_removename(&rd->args);
     782             : 
     783           0 :         error = xfs_dir2_isleaf(&rd->args, &is_leaf);
     784           0 :         if (error)
     785             :                 return error;
     786           0 :         if (is_leaf)
     787           0 :                 return xfs_dir2_leaf_removename(&rd->args);
     788             : 
     789           0 :         return xfs_dir2_node_removename(&rd->args);
     790             : }
     791             : 
     792             : /*
     793             :  * Add this stashed incore directory entry to the temporary directory.
     794             :  * The caller must hold the tempdir's IOLOCK, must not hold any ILOCKs, and
     795             :  * must not be in transaction context.
     796             :  */
     797             : STATIC int
     798     2489418 : xrep_dir_replay_update(
     799             :         struct xrep_dir                 *rd,
     800             :         const struct xrep_dirent        *dirent)
     801             : {
     802     2489418 :         struct xfs_name                 name = {
     803     2489418 :                 .len                    = dirent->namelen,
     804     2489418 :                 .type                   = dirent->ftype,
     805     2489418 :                 .name                   = rd->pptr.p_name,
     806             :         };
     807     2489418 :         struct xfs_mount                *mp = rd->sc->mp;
     808             : #ifdef DEBUG
     809     2489418 :         xfs_ino_t                       ino;
     810             : #endif
     811     2489418 :         uint                            resblks;
     812     2489418 :         int                             error;
     813             : 
     814     2489418 :         resblks = xfs_link_space_res(mp, dirent->namelen);
     815     2489433 :         error = xchk_trans_alloc(rd->sc, resblks);
     816     2489455 :         if (error)
     817             :                 return error;
     818             : 
     819             :         /* Lock the temporary directory and join it to the transaction */
     820     2489421 :         xrep_tempfile_ilock(rd->sc);
     821     2489331 :         xfs_trans_ijoin(rd->sc->tp, rd->sc->tempip, 0);
     822             : 
     823     2489390 :         switch (dirent->action) {
     824     2489390 :         case XREP_DIRENT_ADD:
     825             :                 /*
     826             :                  * Create a replacement dirent in the temporary directory.
     827             :                  * Note that _createname doesn't check for existing entries.
     828             :                  * There shouldn't be any in the temporary dir, but we'll
     829             :                  * verify this in debug mode.
     830             :                  */
     831             : #ifdef DEBUG
     832     2489390 :                 error = xchk_dir_lookup(rd->sc, rd->sc->tempip, &name, &ino);
     833     2489357 :                 if (error != -ENOENT) {
     834           0 :                         ASSERT(error != -ENOENT);
     835           0 :                         goto out_cancel;
     836             :                 }
     837             : #endif
     838             : 
     839     2489357 :                 error = xrep_dir_replay_createname(rd, &name, dirent->ino,
     840             :                                 resblks);
     841     2489412 :                 if (error)
     842           0 :                         goto out_cancel;
     843             : 
     844     2489412 :                 if (name.type == XFS_DIR3_FT_DIR)
     845      205942 :                         rd->subdirs++;
     846     2489412 :                 rd->dirents++;
     847     2489412 :                 break;
     848           0 :         case XREP_DIRENT_REMOVE:
     849             :                 /*
     850             :                  * Remove a dirent from the temporary directory.  Note that
     851             :                  * _removename doesn't check the inode target of the exist
     852             :                  * entry.  There should be a perfect match in the temporary
     853             :                  * dir, but we'll verify this in debug mode.
     854             :                  */
     855             : #ifdef DEBUG
     856           0 :                 error = xchk_dir_lookup(rd->sc, rd->sc->tempip, &name, &ino);
     857           0 :                 if (error) {
     858           0 :                         ASSERT(error != 0);
     859           0 :                         goto out_cancel;
     860             :                 }
     861           0 :                 if (ino != dirent->ino) {
     862           0 :                         ASSERT(ino == dirent->ino);
     863           0 :                         error = -EIO;
     864           0 :                         goto out_cancel;
     865             :                 }
     866             : #endif
     867             : 
     868           0 :                 error = xrep_dir_replay_removename(rd, &name, resblks);
     869           0 :                 if (error)
     870           0 :                         goto out_cancel;
     871             : 
     872           0 :                 if (name.type == XFS_DIR3_FT_DIR)
     873           0 :                         rd->subdirs--;
     874           0 :                 rd->dirents--;
     875           0 :                 break;
     876           0 :         default:
     877           0 :                 ASSERT(0);
     878           0 :                 error = -EIO;
     879           0 :                 goto out_cancel;
     880             :         }
     881             : 
     882             :         /* Commit and unlock. */
     883     2489412 :         error = xrep_trans_commit(rd->sc);
     884     2489425 :         if (error)
     885             :                 return error;
     886             : 
     887     2489427 :         xrep_tempfile_iunlock(rd->sc);
     888     2489427 :         return 0;
     889           0 : out_cancel:
     890           0 :         xchk_trans_cancel(rd->sc);
     891           0 :         xrep_tempfile_iunlock(rd->sc);
     892           0 :         return error;
     893             : }
     894             : 
     895             : /*
     896             :  * Flush stashed incore dirent updates that have been recorded by the scanner.
     897             :  * This is done to reduce the memory requirements of the directory rebuild,
     898             :  * since directories can contain up to 32GB of directory data.
     899             :  *
     900             :  * Caller must not hold transactions or ILOCKs.  Caller must hold the tempdir
     901             :  * IOLOCK.
     902             :  */
     903             : STATIC int
     904      199919 : xrep_dir_replay_updates(
     905             :         struct xrep_dir         *rd)
     906             : {
     907      199919 :         xfarray_idx_t           array_cur;
     908      199919 :         int                     error;
     909             : 
     910             :         /* Add all the salvaged dirents to the temporary directory. */
     911     2689180 :         foreach_xfarray_idx(rd->dir_entries, array_cur) {
     912     2489275 :                 struct xrep_dirent      dirent;
     913             : 
     914     2489275 :                 error = xfarray_load(rd->dir_entries, array_cur, &dirent);
     915     2489420 :                 if (error)
     916          33 :                         return error;
     917             : 
     918             :                 /* The dirent name is stored in the in-core buffer. */
     919     2489420 :                 error = xfblob_load(rd->dir_names, dirent.name_cookie,
     920     2489420 :                                 rd->pptr.p_name, dirent.namelen);
     921     2489409 :                 if (error)
     922           0 :                         return error;
     923     2489409 :                 rd->pptr.p_name[MAXNAMELEN - 1] = 0;
     924             : 
     925     2489409 :                 error = xrep_dir_replay_update(rd, &dirent);
     926     2489294 :                 if (error)
     927          33 :                         return error;
     928             :         }
     929             : 
     930             :         /* Empty out both arrays now that we've added the entries. */
     931      199898 :         xfarray_truncate(rd->dir_entries);
     932      199891 :         xfblob_truncate(rd->dir_names);
     933      199891 :         return 0;
     934             : }
     935             : 
     936             : /*
     937             :  * Periodically flush stashed directory entries to the temporary dir.  This
     938             :  * is done to reduce the memory requirements of the directory rebuild, since
     939             :  * directories can contain up to 32GB of directory data.
     940             :  */
     941             : STATIC int
     942         994 : xrep_dir_flush_stashed(
     943             :         struct xrep_dir         *rd)
     944             : {
     945         994 :         int                     error;
     946             : 
     947             :         /*
     948             :          * Entering this function, the scrub context has a reference to the
     949             :          * inode being repaired, the temporary file, and a scrub transaction
     950             :          * that we use during dirent salvaging to avoid livelocking if there
     951             :          * are cycles in the directory structures.  We hold ILOCK_EXCL on both
     952             :          * the inode being repaired and the temporary file, though they are
     953             :          * not ijoined to the scrub transaction.
     954             :          *
     955             :          * To constrain kernel memory use, we occasionally write salvaged
     956             :          * dirents from the xfarray and xfblob structures into the temporary
     957             :          * directory in preparation for swapping the directory structures at
     958             :          * the end.  Updating the temporary file requires a transaction, so we
     959             :          * commit the scrub transaction and drop the two ILOCKs so that
     960             :          * we can allocate whatever transaction we want.
     961             :          *
     962             :          * We still hold IOLOCK_EXCL on the inode being repaired, which
     963             :          * prevents anyone from accessing the damaged directory data while we
     964             :          * repair it.
     965             :          */
     966         994 :         error = xrep_trans_commit(rd->sc);
     967         993 :         if (error)
     968             :                 return error;
     969         993 :         xchk_iunlock(rd->sc, XFS_ILOCK_EXCL);
     970             : 
     971             :         /*
     972             :          * Take the IOLOCK of the temporary file while we modify dirents.  This
     973             :          * isn't strictly required because the temporary file is never revealed
     974             :          * to userspace, but we follow the same locking rules.  We still hold
     975             :          * sc->ip's IOLOCK.
     976             :          */
     977         995 :         error = xrep_tempfile_iolock_polled(rd->sc);
     978         993 :         if (error)
     979             :                 return error;
     980             : 
     981             :         /* Write to the tempdir all the updates that we've stashed. */
     982         993 :         error = xrep_dir_replay_updates(rd);
     983         995 :         xrep_tempfile_iounlock(rd->sc);
     984         994 :         if (error)
     985             :                 return error;
     986             : 
     987             :         /*
     988             :          * Recreate the salvage transaction and relock the dir we're salvaging.
     989             :          */
     990         994 :         error = xchk_trans_alloc(rd->sc, 0);
     991         994 :         if (error)
     992             :                 return error;
     993         994 :         xchk_ilock(rd->sc, XFS_ILOCK_EXCL);
     994         994 :         return 0;
     995             : }
     996             : 
     997             : /* Decide if we've stashed too much dirent data in memory. */
     998             : static inline bool
     999  4447984924 : xrep_dir_want_flush_stashed(
    1000             :         struct xrep_dir         *rd)
    1001             : {
    1002  4447984924 :         unsigned long long      bytes;
    1003             : 
    1004  4447984924 :         bytes = xfarray_bytes(rd->dir_entries) + xfblob_bytes(rd->dir_names);
    1005  4446419212 :         return bytes > XREP_DIR_MAX_STASH_BYTES;
    1006             : }
    1007             : 
    1008             : /* Extract as many directory entries as we can. */
    1009             : STATIC int
    1010           6 : xrep_dir_recover(
    1011             :         struct xrep_dir         *rd)
    1012             : {
    1013           6 :         struct xfs_bmbt_irec    got;
    1014           6 :         struct xfs_scrub        *sc = rd->sc;
    1015           6 :         struct xfs_da_geometry  *geo = sc->mp->m_dir_geo;
    1016           6 :         xfs_fileoff_t           offset;
    1017           6 :         xfs_dablk_t             dabno;
    1018           6 :         __be32                  magic_guess;
    1019           6 :         int                     nmap;
    1020           6 :         int                     error;
    1021             : 
    1022           6 :         xrep_dir_guess_format(rd, &magic_guess);
    1023             : 
    1024             :         /* Iterate each directory data block in the data fork. */
    1025           6 :         for (offset = 0;
    1026          21 :              offset < geo->leafblk;
    1027          15 :              offset = got.br_startoff + got.br_blockcount) {
    1028          15 :                 nmap = 1;
    1029          15 :                 error = xfs_bmapi_read(sc->ip, offset, geo->leafblk - offset,
    1030             :                                 &got, &nmap, 0);
    1031          15 :                 if (error)
    1032           0 :                         return error;
    1033          15 :                 if (nmap != 1)
    1034             :                         return -EFSCORRUPTED;
    1035          15 :                 if (!xfs_bmap_is_written_extent(&got))
    1036           6 :                         continue;
    1037             : 
    1038           9 :                 for (dabno = round_up(got.br_startoff, geo->fsbcount);
    1039          21 :                      dabno < got.br_startoff + got.br_blockcount;
    1040          12 :                      dabno += geo->fsbcount) {
    1041          12 :                         if (xchk_should_terminate(rd->sc, &error))
    1042           0 :                                 return error;
    1043             : 
    1044          12 :                         error = xrep_dir_recover_dirblock(rd,
    1045             :                                         magic_guess, dabno);
    1046          12 :                         if (error)
    1047           0 :                                 return error;
    1048             : 
    1049             :                         /* Flush dirents to constrain memory usage. */
    1050          12 :                         if (xrep_dir_want_flush_stashed(rd)) {
    1051           2 :                                 error = xrep_dir_flush_stashed(rd);
    1052           2 :                                 if (error)
    1053           0 :                                         return error;
    1054             :                         }
    1055             :                 }
    1056             :         }
    1057             : 
    1058             :         return 0;
    1059             : }
    1060             : 
    1061             : /*
    1062             :  * Find all the directory entries for this inode by scraping them out of the
    1063             :  * directory leaf blocks by hand, and flushing them into the temp dir.
    1064             :  */
    1065             : STATIC int
    1066         993 : xrep_dir_find_entries(
    1067             :         struct xrep_dir         *rd)
    1068             : {
    1069         993 :         struct xfs_inode        *dp = rd->sc->ip;
    1070         993 :         int                     error;
    1071             : 
    1072             :         /*
    1073             :          * Salvage directory entries from the old directory, and write them to
    1074             :          * the temporary directory.
    1075             :          */
    1076         993 :         if (dp->i_df.if_format == XFS_DINODE_FMT_LOCAL) {
    1077         987 :                 error = xrep_dir_recover_sf(rd);
    1078             :         } else {
    1079           6 :                 error = xfs_iread_extents(rd->sc->tp, dp, XFS_DATA_FORK);
    1080           6 :                 if (error)
    1081             :                         return error;
    1082             : 
    1083           6 :                 error = xrep_dir_recover(rd);
    1084             :         }
    1085         993 :         if (error)
    1086             :                 return error;
    1087             : 
    1088         993 :         return xrep_dir_flush_stashed(rd);
    1089             : }
    1090             : 
    1091             : /* Scan all files in the filesystem for dirents. */
    1092             : STATIC int
    1093         992 : xrep_dir_salvage_entries(
    1094             :         struct xrep_dir         *rd)
    1095             : {
    1096         992 :         struct xfs_scrub        *sc = rd->sc;
    1097         992 :         int                     error;
    1098             : 
    1099             :         /*
    1100             :          * Drop the ILOCK on this directory so that we can scan for this
    1101             :          * directory's parent.  Figure out who is going to be the parent of
    1102             :          * this directory, then retake the ILOCK so that we can salvage
    1103             :          * directory entries.
    1104             :          */
    1105         992 :         xchk_iunlock(sc, XFS_ILOCK_EXCL);
    1106         993 :         error = xrep_dir_find_parent(rd);
    1107         993 :         xchk_ilock(sc, XFS_ILOCK_EXCL);
    1108         990 :         if (error)
    1109             :                 return error;
    1110             : 
    1111             :         /*
    1112             :          * Collect directory entries by parsing raw leaf blocks to salvage
    1113             :          * whatever we can.  When we're done, free the staging memory before
    1114             :          * swapping the directories to reduce memory usage.
    1115             :          */
    1116         990 :         error = xrep_dir_find_entries(rd);
    1117         992 :         if (error)
    1118             :                 return error;
    1119             : 
    1120             :         /*
    1121             :          * Cancel the repair transaction and drop the ILOCK so that we can
    1122             :          * (later) use the atomic extent swap helper functions to compute the
    1123             :          * correct block reservations and re-lock the inodes.
    1124             :          *
    1125             :          * We still hold IOLOCK_EXCL (aka i_rwsem) which will prevent directory
    1126             :          * modifications, but there's nothing to prevent userspace from reading
    1127             :          * the directory until we're ready for the swap operation.  Reads will
    1128             :          * return -EIO without shutting down the fs, so we're ok with that.
    1129             :          *
    1130             :          * The VFS can change dotdot on us, but the findparent scan will keep
    1131             :          * our incore parent inode up to date.  See the note on locking issues
    1132             :          * for more details.
    1133             :          */
    1134         992 :         error = xrep_trans_commit(sc);
    1135         993 :         if (error)
    1136             :                 return error;
    1137             : 
    1138         993 :         xchk_iunlock(sc, XFS_ILOCK_EXCL);
    1139         993 :         return 0;
    1140             : }
    1141             : 
    1142             : 
    1143             : /*
    1144             :  * Examine a parent pointer of a file.  If it leads us back to the directory
    1145             :  * that we're rebuilding, create an incore dirent from the parent pointer and
    1146             :  * stash it.
    1147             :  */
    1148             : STATIC int
    1149  4859319552 : xrep_dir_scan_pptr(
    1150             :         struct xfs_scrub        *sc,
    1151             :         struct xfs_inode        *ip,
    1152             :         const struct xfs_parent_name_irec *pptr,
    1153             :         void                    *priv)
    1154             : {
    1155  4859319552 :         struct xfs_name         xname;
    1156  4859319552 :         struct xrep_dir         *rd = priv;
    1157  4859319552 :         int                     error;
    1158             : 
    1159             :         /* Ignore parent pointers that point back to a different dir. */
    1160  4859319552 :         if (pptr->p_ino != sc->ip->i_ino ||
    1161     2482809 :             pptr->p_gen != VFS_I(sc->ip)->i_generation)
    1162             :                 return 0;
    1163             : 
    1164             :         /*
    1165             :          * Transform this parent pointer into a dirent and queue it for later
    1166             :          * addition to the temporary directory.
    1167             :          */
    1168     2482809 :         xname.name = pptr->p_name;
    1169     2482809 :         xname.len = pptr->p_namelen;
    1170     2482809 :         xname.type = xfs_mode_to_ftype(VFS_I(ip)->i_mode);
    1171             : 
    1172     2482811 :         mutex_lock(&rd->pscan.lock);
    1173     2482817 :         error = xrep_dir_stash_createname(rd, &xname, ip->i_ino);
    1174     2482808 :         mutex_unlock(&rd->pscan.lock);
    1175     2482808 :         return error;
    1176             : }
    1177             : 
    1178             : /*
    1179             :  * If this child dirent points to the directory being repaired, remember that
    1180             :  * fact so that we can reset the dotdot entry if necessary.
    1181             :  */
    1182             : STATIC int
    1183  7823360040 : xrep_dir_scan_dirent(
    1184             :         struct xfs_scrub        *sc,
    1185             :         struct xfs_inode        *dp,
    1186             :         xfs_dir2_dataptr_t      dapos,
    1187             :         const struct xfs_name   *name,
    1188             :         xfs_ino_t               ino,
    1189             :         void                    *priv)
    1190             : {
    1191  7823360040 :         struct xrep_dir         *rd = priv;
    1192             : 
    1193             :         /* Dirent doesn't point to this directory. */
    1194  7823360040 :         if (ino != rd->sc->ip->i_ino)
    1195             :                 return 0;
    1196             : 
    1197             :         /* Ignore garbage inum. */
    1198      398288 :         if (!xfs_verify_dir_ino(rd->sc->mp, ino))
    1199             :                 return 0;
    1200             : 
    1201             :         /* No weird looking names. */
    1202      398287 :         if (name->len >= MAXNAMELEN || name->len <= 0)
    1203             :                 return 0;
    1204             : 
    1205             :         /* Don't pick up dot or dotdot entries; we only want child dirents. */
    1206      591607 :         if (xfs_dir2_samename(name, &xfs_name_dotdot) ||
    1207      193321 :             xfs_dir2_samename(name, &xfs_name_dot))
    1208      204967 :                 return 0;
    1209             : 
    1210      193321 :         trace_xrep_dir_stash_createname(sc->tempip, &xfs_name_dotdot,
    1211             :                         dp->i_ino);
    1212             : 
    1213      193321 :         xrep_findparent_scan_found(&rd->pscan, dp->i_ino);
    1214      193321 :         return 0;
    1215             : }
    1216             : 
    1217             : /*
    1218             :  * Decide if we want to look for child dirents or parent pointers in this file.
    1219             :  * Skip the dir being repaired and any files being used to stage repairs.
    1220             :  */
    1221             : static inline bool
    1222  8879649129 : xrep_dir_want_scan(
    1223             :         struct xrep_dir         *rd,
    1224             :         const struct xfs_inode  *ip)
    1225             : {
    1226  8879649129 :         return ip != rd->sc->ip && !xrep_is_tempfile(ip);
    1227             : }
    1228             : 
    1229             : /*
    1230             :  * Take ILOCK on a file that we want to scan.
    1231             :  *
    1232             :  * Select ILOCK_EXCL if the file is a directory with an unloaded data bmbt or
    1233             :  * has an unloaded attr bmbt.  Otherwise, take ILOCK_SHARED.
    1234             :  */
    1235             : static inline unsigned int
    1236  4444490974 : xrep_dir_scan_ilock(
    1237             :         struct xrep_dir         *rd,
    1238             :         struct xfs_inode        *ip)
    1239             : {
    1240  4444490974 :         uint                    lock_mode = XFS_ILOCK_SHARED;
    1241             : 
    1242             :         /* Need to take the shared ILOCK to advance the iscan cursor. */
    1243  4444490974 :         if (!xrep_dir_want_scan(rd, ip))
    1244      942687 :                 goto lock;
    1245             : 
    1246  5931136630 :         if (S_ISDIR(VFS_I(ip)->i_mode) && xfs_need_iread_extents(&ip->i_df)) {
    1247           0 :                 lock_mode = XFS_ILOCK_EXCL;
    1248           0 :                 goto lock;
    1249             :         }
    1250             : 
    1251  8885246801 :         if (xfs_inode_has_attr_fork(ip) && xfs_need_iread_extents(&ip->i_af))
    1252           0 :                 lock_mode = XFS_ILOCK_EXCL;
    1253             : 
    1254  4442601509 : lock:
    1255  4443544196 :         xfs_ilock(ip, lock_mode);
    1256  4444598384 :         return lock_mode;
    1257             : }
    1258             : 
    1259             : /*
    1260             :  * Scan this file for relevant child dirents or parent pointers that point to
    1261             :  * the directory we're rebuilding.
    1262             :  */
    1263             : STATIC int
    1264  4444572786 : xrep_dir_scan_file(
    1265             :         struct xrep_dir         *rd,
    1266             :         struct xfs_inode        *ip)
    1267             : {
    1268  4444572786 :         unsigned int            lock_mode;
    1269  4444572786 :         int                     error = 0;
    1270             : 
    1271  4444572786 :         lock_mode = xrep_dir_scan_ilock(rd, ip);
    1272             : 
    1273  4443436389 :         if (!xrep_dir_want_scan(rd, ip))
    1274      942684 :                 goto scan_done;
    1275             : 
    1276  4441072830 :         error = xchk_pptr_walk(rd->sc, ip, xrep_dir_scan_pptr, &rd->pptr, rd);
    1277  4444863735 :         if (error)
    1278           0 :                 goto scan_done;
    1279             : 
    1280  4444863735 :         if (S_ISDIR(VFS_I(ip)->i_mode)) {
    1281  1487779740 :                 error = xchk_dir_walk(rd->sc, ip, xrep_dir_scan_dirent, rd);
    1282  1488061894 :                 if (error)
    1283           0 :                         goto scan_done;
    1284             :         }
    1285             : 
    1286  4445145889 : scan_done:
    1287  4446088573 :         xchk_iscan_mark_visited(&rd->pscan.iscan, ip);
    1288  4443668643 :         xfs_iunlock(ip, lock_mode);
    1289  4440385228 :         return error;
    1290             : }
    1291             : 
    1292             : /*
    1293             :  * Scan all files in the filesystem for parent pointers that we can turn into
    1294             :  * replacement dirents, and a dirent that we can use to set the dotdot pointer.
    1295             :  */
    1296             : STATIC int
    1297      196914 : xrep_dir_scan_dirtree(
    1298             :         struct xrep_dir         *rd)
    1299             : {
    1300      196914 :         struct xfs_scrub        *sc = rd->sc;
    1301      196914 :         struct xfs_inode        *ip;
    1302      196914 :         int                     error;
    1303             : 
    1304             :         /* Roots of directory trees are their own parents. */
    1305      196914 :         if (sc->ip == sc->mp->m_rootip || sc->ip == sc->mp->m_metadirip)
    1306        3581 :                 xrep_findparent_scan_found(&rd->pscan, sc->ip->i_ino);
    1307             : 
    1308             :         /*
    1309             :          * Filesystem scans are time consuming.  Drop the directory ILOCK and
    1310             :          * all other resources for the duration of the scan and hope for the
    1311             :          * best.  The live update hooks will keep our scan information up to
    1312             :          * date even though we've dropped the locks.
    1313             :          */
    1314      196914 :         xchk_trans_cancel(sc);
    1315      196909 :         if (sc->ilock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL))
    1316      196904 :                 xchk_iunlock(sc, sc->ilock_flags & (XFS_ILOCK_SHARED |
    1317             :                                                     XFS_ILOCK_EXCL));
    1318      196903 :         error = xchk_trans_alloc_empty(sc);
    1319      196881 :         if (error)
    1320             :                 return error;
    1321             : 
    1322  4445503612 :         while ((error = xchk_iscan_iter(&rd->pscan.iscan, &ip)) == 1) {
    1323  4444791499 :                 bool            flush;
    1324             : 
    1325  4444791499 :                 error = xrep_dir_scan_file(rd, ip);
    1326  4441256577 :                 xchk_irele(sc, ip);
    1327  4446476937 :                 if (error)
    1328             :                         break;
    1329             : 
    1330             :                 /* Flush stashed dirent updates to constrain memory usage. */
    1331  4446476937 :                 mutex_lock(&rd->pscan.lock);
    1332  4449177162 :                 flush = xrep_dir_want_flush_stashed(rd);
    1333  4446147101 :                 mutex_unlock(&rd->pscan.lock);
    1334  4449940308 :                 if (flush) {
    1335        2052 :                         xchk_trans_cancel(sc);
    1336             : 
    1337        2052 :                         error = xrep_tempfile_iolock_polled(sc);
    1338        2052 :                         if (error)
    1339             :                                 break;
    1340             : 
    1341        2052 :                         mutex_lock(&rd->pscan.lock);
    1342        2052 :                         error = xrep_dir_replay_updates(rd);
    1343        2052 :                         mutex_unlock(&rd->pscan.lock);
    1344        2052 :                         xrep_tempfile_iounlock(sc);
    1345        2052 :                         if (error)
    1346             :                                 break;
    1347             : 
    1348        2052 :                         error = xchk_trans_alloc_empty(sc);
    1349        2052 :                         if (error)
    1350             :                                 break;
    1351             :                 }
    1352             : 
    1353  4449940308 :                 if (xchk_should_terminate(sc, &error))
    1354             :                         break;
    1355             :         }
    1356      196914 :         xchk_iscan_iter_finish(&rd->pscan.iscan);
    1357      196913 :         if (error) {
    1358             :                 /*
    1359             :                  * If we couldn't grab an inode that was busy with a state
    1360             :                  * change, change the error code so that we exit to userspace
    1361             :                  * as quickly as possible.
    1362             :                  */
    1363          29 :                 if (error == -EBUSY)
    1364             :                         return -ECANCELED;
    1365          29 :                 return error;
    1366             :         }
    1367             : 
    1368             :         /*
    1369             :          * Cancel the empty transaction so that we can (later) use the atomic
    1370             :          * extent swap helpers to lock files and commit the new directory.
    1371             :          */
    1372      196884 :         xchk_trans_cancel(rd->sc);
    1373      196884 :         return 0;
    1374             : }
    1375             : 
    1376             : /*
    1377             :  * Capture dirent updates being made by other threads which are relevant to the
    1378             :  * directory being repaired.
    1379             :  */
    1380             : STATIC int
    1381    27818258 : xrep_dir_live_update(
    1382             :         struct notifier_block           *nb,
    1383             :         unsigned long                   action,
    1384             :         void                            *data)
    1385             : {
    1386    27818258 :         struct xfs_dir_update_params    *p = data;
    1387    27818258 :         struct xrep_dir                 *rd;
    1388    27818258 :         struct xfs_scrub                *sc;
    1389    27818258 :         int                             error = 0;
    1390             : 
    1391    27818258 :         rd = container_of(nb, struct xrep_dir, pscan.hooks.dirent_hook.nb);
    1392    27818258 :         sc = rd->sc;
    1393             : 
    1394             :         /*
    1395             :          * This thread updated a child dirent in the directory that we're
    1396             :          * rebuilding.  Stash the update for replay against the temporary
    1397             :          * directory.
    1398             :          */
    1399    27818258 :         if (p->dp->i_ino == sc->ip->i_ino &&
    1400           0 :             xchk_iscan_want_live_update(&rd->pscan.iscan, p->ip->i_ino)) {
    1401           0 :                 mutex_lock(&rd->pscan.lock);
    1402           0 :                 if (p->delta > 0)
    1403           0 :                         error = xrep_dir_stash_createname(rd, p->name,
    1404           0 :                                         p->ip->i_ino);
    1405             :                 else
    1406           0 :                         error = xrep_dir_stash_removename(rd, p->name,
    1407           0 :                                         p->ip->i_ino);
    1408           0 :                 mutex_unlock(&rd->pscan.lock);
    1409           0 :                 if (error)
    1410           0 :                         goto out_abort;
    1411             :         }
    1412             : 
    1413             :         /*
    1414             :          * This thread updated another directory's child dirent that points to
    1415             :          * the directory that we're rebuilding, so remember the new dotdot
    1416             :          * target.
    1417             :          */
    1418    27818258 :         if (p->ip->i_ino == sc->ip->i_ino &&
    1419           0 :             xchk_iscan_want_live_update(&rd->pscan.iscan, p->dp->i_ino)) {
    1420           0 :                 if (p->delta > 0) {
    1421           0 :                         trace_xrep_dir_stash_createname(sc->tempip,
    1422             :                                         &xfs_name_dotdot,
    1423           0 :                                         p->dp->i_ino);
    1424             : 
    1425           0 :                         xrep_findparent_scan_found(&rd->pscan, p->dp->i_ino);
    1426             :                 } else {
    1427           0 :                         trace_xrep_dir_stash_removename(sc->tempip,
    1428             :                                         &xfs_name_dotdot,
    1429             :                                         rd->pscan.parent_ino);
    1430             : 
    1431           0 :                         xrep_findparent_scan_found(&rd->pscan, NULLFSINO);
    1432             :                 }
    1433             :         }
    1434             : 
    1435             :         return NOTIFY_DONE;
    1436             : out_abort:
    1437           0 :         xchk_iscan_abort(&rd->pscan.iscan);
    1438           0 :         return NOTIFY_DONE;
    1439             : }
    1440             : 
    1441             : /*
    1442             :  * Free all the directory blocks and reset the data fork.  The caller must
    1443             :  * join the inode to the transaction.  This function returns with the inode
    1444             :  * joined to a clean scrub transaction.
    1445             :  */
    1446             : STATIC int
    1447      197807 : xrep_dir_reset_fork(
    1448             :         struct xrep_dir         *rd,
    1449             :         xfs_ino_t               parent_ino)
    1450             : {
    1451      197807 :         struct xfs_scrub        *sc = rd->sc;
    1452      197807 :         struct xfs_ifork        *ifp = xfs_ifork_ptr(sc->tempip, XFS_DATA_FORK);
    1453      197807 :         int                     error;
    1454             : 
    1455             :         /* Unmap all the directory buffers. */
    1456      197807 :         if (xfs_ifork_has_extents(ifp)) {
    1457       10638 :                 error = xrep_reap_ifork(sc, sc->tempip, XFS_DATA_FORK);
    1458       10638 :                 if (error)
    1459             :                         return error;
    1460             :         }
    1461             : 
    1462      197807 :         trace_xrep_dir_reset_fork(sc->tempip, parent_ino);
    1463             : 
    1464             :         /* Reset the data fork to an empty data fork. */
    1465      197788 :         xfs_idestroy_fork(ifp);
    1466      197822 :         ifp->if_bytes = 0;
    1467      197822 :         sc->tempip->i_disk_size = 0;
    1468             : 
    1469             :         /* Reinitialize the short form directory. */
    1470      197822 :         xrep_dir_init_args(rd, sc->tempip, NULL);
    1471      197817 :         error = xfs_dir2_sf_create(&rd->args, parent_ino);
    1472      197841 :         if (error)
    1473             :                 return error;
    1474             : 
    1475      197834 :         return xrep_tempfile_roll_trans(sc);
    1476             : }
    1477             : 
    1478             : /*
    1479             :  * Prepare both inodes' directory forks for extent swapping.  Promote the
    1480             :  * tempfile from short format to leaf format, and if the file being repaired
    1481             :  * has a short format data fork, turn it into an empty extent list.
    1482             :  */
    1483             : STATIC int
    1484       10638 : xrep_dir_swap_prep(
    1485             :         struct xfs_scrub        *sc,
    1486             :         bool                    temp_local,
    1487             :         bool                    ip_local)
    1488             : {
    1489       10638 :         int                     error;
    1490             : 
    1491             :         /*
    1492             :          * If the tempfile's directory is in shortform format, convert that
    1493             :          * to a single leaf extent so that we can use the atomic extent swap.
    1494             :          */
    1495       10638 :         if (temp_local) {
    1496        6924 :                 struct xfs_da_args      args = {
    1497        6924 :                         .dp             = sc->tempip,
    1498        6924 :                         .geo            = sc->mp->m_dir_geo,
    1499             :                         .whichfork      = XFS_DATA_FORK,
    1500        6924 :                         .trans          = sc->tp,
    1501             :                         .total          = 1,
    1502        6924 :                         .owner          = sc->ip->i_ino,
    1503             :                 };
    1504             : 
    1505        6924 :                 error = xfs_dir2_sf_to_block(&args);
    1506        6924 :                 if (error)
    1507           0 :                         return error;
    1508             : 
    1509             :                 /*
    1510             :                  * Roll the deferred log items to get us back to a clean
    1511             :                  * transaction.
    1512             :                  */
    1513        6924 :                 error = xfs_defer_finish(&sc->tp);
    1514        6924 :                 if (error)
    1515             :                         return error;
    1516             :         }
    1517             : 
    1518             :         /*
    1519             :          * If the file being repaired had a shortform data fork, convert that
    1520             :          * to an empty extent list in preparation for the atomic extent swap.
    1521             :          */
    1522       10638 :         if (ip_local) {
    1523           0 :                 struct xfs_ifork        *ifp;
    1524             : 
    1525           0 :                 ifp = xfs_ifork_ptr(sc->ip, XFS_DATA_FORK);
    1526           0 :                 xfs_idestroy_fork(ifp);
    1527           0 :                 ifp->if_format = XFS_DINODE_FMT_EXTENTS;
    1528           0 :                 ifp->if_nextents = 0;
    1529           0 :                 ifp->if_bytes = 0;
    1530           0 :                 ifp->if_u1.if_root = NULL;
    1531           0 :                 ifp->if_height = 0;
    1532             : 
    1533           0 :                 xfs_trans_log_inode(sc->tp, sc->ip,
    1534             :                                 XFS_ILOG_CORE | XFS_ILOG_DDATA);
    1535             :         }
    1536             : 
    1537             :         return 0;
    1538             : }
    1539             : 
    1540             : /*
    1541             :  * Replace the inode number of a directory entry.
    1542             :  */
    1543             : static int
    1544      175928 : xrep_dir_replace(
    1545             :         struct xrep_dir         *rd,
    1546             :         struct xfs_inode        *dp,
    1547             :         const struct xfs_name   *name,
    1548             :         xfs_ino_t               inum,
    1549             :         xfs_extlen_t            total)
    1550             : {
    1551      175928 :         struct xfs_scrub        *sc = rd->sc;
    1552      175928 :         bool                    is_block, is_leaf;
    1553      175928 :         int                     error;
    1554             : 
    1555      175928 :         ASSERT(S_ISDIR(VFS_I(dp)->i_mode));
    1556             : 
    1557      175928 :         error = xfs_dir_ino_validate(sc->mp, inum);
    1558      175939 :         if (error)
    1559             :                 return error;
    1560             : 
    1561      175939 :         xrep_dir_init_args(rd, dp, name);
    1562      175924 :         rd->args.inumber = inum;
    1563      175924 :         rd->args.total = total;
    1564             : 
    1565      175924 :         if (dp->i_df.if_format == XFS_DINODE_FMT_LOCAL)
    1566      174123 :                 return xfs_dir2_sf_replace(&rd->args);
    1567             : 
    1568        1801 :         error = xfs_dir2_isblock(&rd->args, &is_block);
    1569        1801 :         if (error)
    1570             :                 return error;
    1571        1801 :         if (is_block)
    1572        1542 :                 return xfs_dir2_block_replace(&rd->args);
    1573             : 
    1574         259 :         error = xfs_dir2_isleaf(&rd->args, &is_leaf);
    1575         259 :         if (error)
    1576             :                 return error;
    1577         259 :         if (is_leaf)
    1578         150 :                 return xfs_dir2_leaf_replace(&rd->args);
    1579             : 
    1580         109 :         return xfs_dir2_node_replace(&rd->args);
    1581             : }
    1582             : 
    1583             : /*
    1584             :  * Reset the link count of this directory and adjust the unlinked list pointers
    1585             :  * as needed.
    1586             :  */
    1587             : STATIC int
    1588      197822 : xrep_dir_set_nlink(
    1589             :         struct xrep_dir         *rd)
    1590             : {
    1591      197822 :         struct xfs_scrub        *sc = rd->sc;
    1592      197822 :         struct xfs_inode        *dp = sc->ip;
    1593      197822 :         struct xfs_perag        *pag;
    1594      197822 :         int                     error;
    1595             : 
    1596             :         /*
    1597             :          * The directory is not on the incore unlinked list, which means that
    1598             :          * it needs to be reachable via the directory tree.  Update the nlink
    1599             :          * with our observed link count.  If the directory has no parent, it
    1600             :          * will be moved to the orphanage.
    1601             :          */
    1602      197822 :         if (!xfs_inode_on_unlinked_list(dp)) {
    1603      197822 :                 xrep_set_nlink(sc->ip, rd->subdirs + 2);
    1604      197822 :                 return 0;
    1605             :         }
    1606             : 
    1607           0 :         xfs_emerg(dp->i_mount, "IUNLINK unlinked dir 0x%llx repair, dirents %u subdirs %llu curr_nlink %u orphan? %d", dp->i_ino, rd->dirents, rd->subdirs, VFS_I(dp)->i_nlink, rd->needs_adoption);
    1608             : 
    1609             :         /*
    1610             :          * The directory is on the unlinked list and we did not find any
    1611             :          * dirents.  Set the link count to zero and let the directory
    1612             :          * inactivate when the last reference drops.
    1613             :          */
    1614           0 :         if (rd->dirents == 0) {
    1615           0 :                 rd->needs_adoption = false;
    1616           0 :                 xrep_set_nlink(sc->ip, 0);
    1617           0 :                 return 0;
    1618             :         }
    1619             : 
    1620             :         /*
    1621             :          * The directory is on the unlinked list and we found dirents.  This
    1622             :          * directory needs to be reachable via the directory tree.  Remove the
    1623             :          * dir from the unlinked list and update nlink with the observed link
    1624             :          * count.  If the directory has no parent, it will be moved to the
    1625             :          * orphanage.
    1626             :          */
    1627           0 :         pag = xfs_perag_get(sc->mp, XFS_INO_TO_AGNO(sc->mp, dp->i_ino));
    1628           0 :         if (!pag) {
    1629           0 :                 ASSERT(0);
    1630           0 :                 return -EFSCORRUPTED;
    1631             :         }
    1632             : 
    1633           0 :         error = xfs_iunlink_remove(sc->tp, pag, dp);
    1634           0 :         xfs_perag_put(pag);
    1635           0 :         if (error)
    1636             :                 return error;
    1637             : 
    1638           0 :         xrep_set_nlink(sc->ip, rd->subdirs + 2);
    1639           0 :         return 0;
    1640             : }
    1641             : 
    1642             : /*
    1643             :  * Finish replaying stashed dirent updates, allocate a transaction for swapping
    1644             :  * extents, and take the ILOCKs of both directories before we commit the new
    1645             :  * directory structure.
    1646             :  */
    1647             : STATIC int
    1648      197874 : xrep_dir_finalize_tempdir(
    1649             :         struct xrep_dir         *rd)
    1650             : {
    1651      197874 :         struct xfs_scrub        *sc = rd->sc;
    1652      197874 :         int                     error;
    1653             : 
    1654      197874 :         if (!xfs_has_parent(sc->mp))
    1655         992 :                 return xrep_tempswap_trans_alloc(sc, XFS_DATA_FORK, &rd->tx);
    1656             : 
    1657      196882 :         error = xrep_dir_replay_updates(rd);
    1658      196885 :         if (error)
    1659             :                 return error;
    1660             : 
    1661      196848 :         error = xrep_tempswap_trans_alloc(sc, XFS_DATA_FORK, &rd->tx);
    1662      196838 :         if (error)
    1663             :                 return error;
    1664             : 
    1665             :         /*
    1666             :          * We rely on the caller's hold on @sc->ip's IOLOCK_EXCL to quiesce all
    1667             :          * possible directory updates during the time when we did not hold the
    1668             :          * ILOCK.  There should not be any dirent updates to replay, but check
    1669             :          * anyway.
    1670             :          */
    1671      196836 :         if (xfarray_length(rd->dir_entries) != 0) {
    1672           0 :                 ASSERT(xfarray_length(rd->dir_entries) == 0);
    1673           0 :                 return -EFSCORRUPTED;
    1674             :         }
    1675             : 
    1676             :         return 0;
    1677             : }
    1678             : 
    1679             : /* Swap the temporary directory's data fork with the one being repaired. */
    1680             : STATIC int
    1681      197816 : xrep_dir_swap(
    1682             :         struct xrep_dir         *rd)
    1683             : {
    1684      197816 :         struct xfs_scrub        *sc = rd->sc;
    1685      197816 :         bool                    ip_local, temp_local;
    1686      197816 :         int                     error = 0;
    1687             : 
    1688             :         /*
    1689             :          * If we never found the parent for this directory, temporarily assign
    1690             :          * the root dir as the parent; we'll move this to the orphanage after
    1691             :          * swapping the dir contents.  We hold the ILOCK of the dir being
    1692             :          * repaired, so we're not worried about racy updates of dotdot.
    1693             :          */
    1694      197816 :         ASSERT(sc->ilock_flags & XFS_ILOCK_EXCL);
    1695      197816 :         if (rd->pscan.parent_ino == NULLFSINO) {
    1696           0 :                 rd->needs_adoption = true;
    1697           0 :                 rd->pscan.parent_ino = rd->sc->mp->m_sb.sb_rootino;
    1698             :         }
    1699             : 
    1700             :         /*
    1701             :          * Reset the temporary directory's '..' entry to point to the parent
    1702             :          * that we found.  The temporary directory was created with the root
    1703             :          * directory as the parent, so we can skip this if repairing a
    1704             :          * subdirectory of the root.
    1705             :          *
    1706             :          * It's also possible that this replacement could also expand a sf
    1707             :          * tempdir into block format.
    1708             :          */
    1709      197816 :         if (rd->pscan.parent_ino != sc->mp->m_rootip->i_ino) {
    1710      175923 :                 error = xrep_dir_replace(rd, rd->sc->tempip, &xfs_name_dotdot,
    1711      175923 :                                 rd->pscan.parent_ino, rd->tx.req.resblks);
    1712      175938 :                 if (error)
    1713             :                         return error;
    1714             :         }
    1715             : 
    1716             :         /*
    1717             :          * Changing the dot and dotdot entries could have changed the shape of
    1718             :          * the directory, so we recompute these.
    1719             :          */
    1720      197831 :         ip_local = sc->ip->i_df.if_format == XFS_DINODE_FMT_LOCAL;
    1721      197831 :         temp_local = sc->tempip->i_df.if_format == XFS_DINODE_FMT_LOCAL;
    1722             : 
    1723             :         /*
    1724             :          * If the both files have a local format data fork and the rebuilt
    1725             :          * directory data would fit in the repaired file's data fork, copy
    1726             :          * the contents from the tempfile and update the directory link count.
    1727             :          * We're done now.
    1728             :          */
    1729      197831 :         if (ip_local && temp_local &&
    1730      187193 :             sc->tempip->i_disk_size <= xfs_inode_data_fork_size(sc->ip)) {
    1731      187190 :                 xrep_tempfile_copyout_local(sc, XFS_DATA_FORK);
    1732      187203 :                 return xrep_dir_set_nlink(rd);
    1733             :         }
    1734             : 
    1735             :         /* Clean the transaction before we start working on the extent swap. */
    1736       10641 :         error = xrep_tempfile_roll_trans(rd->sc);
    1737       10638 :         if (error)
    1738             :                 return error;
    1739             : 
    1740             :         /* Otherwise, make sure both data forks are in block-mapping mode. */
    1741       10638 :         error = xrep_dir_swap_prep(sc, temp_local, ip_local);
    1742       10638 :         if (error)
    1743             :                 return error;
    1744             : 
    1745             :         /*
    1746             :          * Set nlink of the directory in the same transaction sequence that
    1747             :          * (atomically) commits the new directory data.
    1748             :          */
    1749       10638 :         error = xrep_dir_set_nlink(rd);
    1750       10638 :         if (error)
    1751             :                 return error;
    1752             : 
    1753       10638 :         return xrep_tempswap_contents(sc, &rd->tx);
    1754             : }
    1755             : 
    1756             : /*
    1757             :  * Swap the new directory contents (which we created in the tempfile) into the
    1758             :  * directory being repaired.
    1759             :  */
    1760             : STATIC int
    1761      197874 : xrep_dir_rebuild_tree(
    1762             :         struct xrep_dir         *rd)
    1763             : {
    1764      197874 :         struct xfs_scrub        *sc = rd->sc;
    1765      197874 :         int                     error;
    1766             : 
    1767      197874 :         trace_xrep_dir_rebuild_tree(sc->ip, rd->pscan.parent_ino);
    1768             : 
    1769             :         /*
    1770             :          * Take the IOLOCK on the temporary file so that we can run dir
    1771             :          * operations with the same locks held as we would for a normal file.
    1772             :          * We still hold sc->ip's IOLOCK.
    1773             :          */
    1774      197859 :         error = xrep_tempfile_iolock_polled(rd->sc);
    1775      197865 :         if (error)
    1776             :                 return error;
    1777             : 
    1778             :         /*
    1779             :          * Allocate transaction, lock inodes, and make sure that we've replayed
    1780             :          * all the stashed dirent updates to the tempdir.  After this point,
    1781             :          * we're ready to swapext.
    1782             :          */
    1783      197862 :         error = xrep_dir_finalize_tempdir(rd);
    1784      197865 :         if (error)
    1785             :                 return error;
    1786             : 
    1787      197833 :         if (xchk_iscan_aborted(&rd->pscan.iscan))
    1788             :                 return -ECANCELED;
    1789             : 
    1790             :         /*
    1791             :          * Swap the tempdir's data fork with the file being repaired.  This
    1792             :          * recreates the transaction and re-takes the ILOCK in the scrub
    1793             :          * context.
    1794             :          */
    1795      197812 :         error = xrep_dir_swap(rd);
    1796      197811 :         if (error)
    1797             :                 return error;
    1798             : 
    1799             :         /*
    1800             :          * Release the old directory blocks and reset the data fork of the temp
    1801             :          * directory to an empty shortform directory because inactivation does
    1802             :          * nothing for directories.
    1803             :          */
    1804      197810 :         return xrep_dir_reset_fork(rd, sc->mp->m_rootip->i_ino);
    1805             : }
    1806             : 
    1807             : /* Set up the filesystem scan so we can regenerate directory entries. */
    1808             : STATIC int
    1809      197890 : xrep_dir_setup_scan(
    1810             :         struct xrep_dir         *rd)
    1811             : {
    1812      197890 :         struct xfs_scrub        *sc = rd->sc;
    1813      197890 :         char                    *descr;
    1814      197890 :         int                     error;
    1815             : 
    1816             :         /* Set up some staging memory for salvaging dirents. */
    1817      197890 :         descr = xchk_xfile_ino_descr(sc, "directory entries");
    1818      197892 :         error = xfarray_create(descr, 0, sizeof(struct xrep_dirent),
    1819             :                         &rd->dir_entries);
    1820      197907 :         kfree(descr);
    1821      197905 :         if (error)
    1822             :                 return error;
    1823             : 
    1824      197905 :         descr = xchk_xfile_ino_descr(sc, "directory entry names");
    1825      197891 :         error = xfblob_create(descr, &rd->dir_names);
    1826      197907 :         kfree(descr);
    1827      197908 :         if (error)
    1828           0 :                 goto out_xfarray;
    1829             : 
    1830      197908 :         if (xfs_has_parent(sc->mp))
    1831      196915 :                 error = __xrep_findparent_scan_start(sc, &rd->pscan,
    1832             :                                 xrep_dir_live_update);
    1833             :         else
    1834         993 :                 error = xrep_findparent_scan_start(sc, &rd->pscan);
    1835      197909 :         if (error)
    1836           0 :                 goto out_xfblob;
    1837             : 
    1838             :         return 0;
    1839             : 
    1840             : out_xfblob:
    1841           0 :         xfblob_destroy(rd->dir_names);
    1842           0 :         rd->dir_names = NULL;
    1843           0 : out_xfarray:
    1844           0 :         xfarray_destroy(rd->dir_entries);
    1845           0 :         rd->dir_entries = NULL;
    1846           0 :         return error;
    1847             : }
    1848             : 
    1849             : /*
    1850             :  * Move the current file to the orphanage.
    1851             :  *
    1852             :  * Caller must hold IOLOCK_EXCL on @sc->ip, and no other inode locks.  Upon
    1853             :  * successful return, the scrub transaction will have enough extra reservation
    1854             :  * to make the move; it will hold IOLOCK_EXCL and ILOCK_EXCL of @sc->ip and the
    1855             :  * orphanage; and both inodes will be ijoined.
    1856             :  */
    1857             : STATIC int
    1858           0 : xrep_dir_move_to_orphanage(
    1859             :         struct xrep_dir         *rd)
    1860             : {
    1861           0 :         struct xfs_scrub        *sc = rd->sc;
    1862           0 :         xfs_ino_t               orig_parent, new_parent;
    1863           0 :         int                     error;
    1864             : 
    1865             :         /*
    1866           0 :          * We are about to drop the ILOCK on sc->ip to lock the orphanage and
    1867             :          * prepare for the adoption.  Therefore, look up the old dotdot entry
    1868             :          * for sc->ip so that we can compare it after we re-lock sc->ip.
    1869             :          */
    1870             :         error = xchk_dir_lookup(sc, sc->ip, &xfs_name_dotdot, &orig_parent);
    1871             :         if (error)
    1872             :                 return error;
    1873             : 
    1874           0 :         /*
    1875           0 :          * We hold ILOCK_EXCL on both the directory and the tempdir after a
    1876             :          * successful rebuild.  Before we can move the directory to the
    1877             :          * orphanage, we must roll to a clean unjoined transaction.
    1878             :          */
    1879             :         error = xfs_trans_roll(&sc->tp);
    1880             :         if (error)
    1881             :                 return error;
    1882             : 
    1883           0 :         /*
    1884           0 :          * Because the orphanage is just another directory in the filesystem,
    1885             :          * we must take its IOLOCK to coordinate with the VFS.  We cannot take
    1886             :          * an IOLOCK while holding an ILOCK, so we must drop them all.  We may
    1887             :          * have to drop the IOLOCK as well.
    1888             :          */
    1889             :         xrep_tempfile_iunlock_both(sc);
    1890             : 
    1891             :         error = xrep_adoption_init(sc, &rd->adoption);
    1892             :         if (error)
    1893           0 :                 return error;
    1894             : 
    1895           0 :         if (!xrep_orphanage_ilock_nowait(sc, XFS_IOLOCK_EXCL)) {
    1896           0 :                 xchk_iunlock(sc, sc->ilock_flags);
    1897             :                 error = xrep_orphanage_iolock_two(sc);
    1898             :                 if (error)
    1899           0 :                         goto err_adoption;
    1900           0 :         }
    1901           0 : 
    1902           0 :         /* Prepare for the adoption and lock both down. */
    1903           0 :         error = xrep_adoption_prep(&rd->adoption);
    1904             :         if (error)
    1905             :                 goto err_adoption;
    1906             : 
    1907           0 :         error = xrep_adoption_compute_name(&rd->adoption, rd->pptr.p_name);
    1908           0 :         if (error)
    1909           0 :                 goto err_adoption;
    1910             : 
    1911           0 :         /*
    1912           0 :          * Now that we've reacquired the ILOCK on sc->ip, look up the dotdot
    1913           0 :          * entry again.  If the parent changed or the child was unlinked while
    1914             :          * the child directory was unlocked, we don't need to move the child to
    1915             :          * the orphanage after all.
    1916             :          */
    1917             :         error = xchk_dir_lookup(sc, sc->ip, &xfs_name_dotdot, &new_parent);
    1918             :         if (error)
    1919             :                 goto err_adoption;
    1920             :         if (orig_parent != new_parent || VFS_I(sc->ip)->i_nlink == 0) {
    1921           0 :                 error = 0;
    1922           0 :                 goto err_adoption;
    1923           0 :         }
    1924           0 : 
    1925           0 :         /* Attach to the orphanage. */
    1926           0 :         return xrep_adoption_commit(&rd->adoption);
    1927             : err_adoption:
    1928             :         xrep_adoption_cancel(&rd->adoption, error);
    1929             :         return error;
    1930           0 : }
    1931           0 : 
    1932           0 : /*
    1933           0 :  * Repair the directory metadata.
    1934             :  *
    1935             :  * XXX: Directory entry buffers can be multiple fsblocks in size.  The buffer
    1936             :  * cache in XFS can't handle aliased multiblock buffers, so this might
    1937             :  * misbehave if the directory blocks are crosslinked with other filesystem
    1938             :  * metadata.
    1939             :  *
    1940             :  * XXX: Is it necessary to check the dcache for this directory to make sure
    1941             :  * that we always recreate every cached entry?
    1942             :  */
    1943             : int
    1944             : xrep_directory(
    1945             :         struct xfs_scrub        *sc)
    1946             : {
    1947             :         struct xrep_dir         *rd = sc->buf;
    1948      241365 :         int                     error;
    1949             : 
    1950             :         /* The rmapbt is required to reap the old data fork. */
    1951      241365 :         if (!xfs_has_rmapbt(sc->mp))
    1952      241365 :                 return -EOPNOTSUPP;
    1953             : 
    1954             :         error = xrep_dir_setup_scan(rd);
    1955      241365 :         if (error)
    1956             :                 return error;
    1957             : 
    1958      197897 :         if (xfs_has_parent(sc->mp))
    1959      197909 :                 error = xrep_dir_scan_dirtree(rd);
    1960             :         else
    1961             :                 error = xrep_dir_salvage_entries(rd);
    1962      197909 :         if (error)
    1963      196916 :                 goto out_teardown;
    1964             : 
    1965         993 :         /* Last chance to abort before we start committing fixes. */
    1966      197903 :         if (xchk_should_terminate(sc, &error))
    1967          30 :                 goto out_teardown;
    1968             : 
    1969             :         error = xrep_dir_rebuild_tree(rd);
    1970      197873 :         if (error)
    1971           0 :                 goto out_teardown;
    1972             : 
    1973      197874 :         if (rd->needs_adoption) {
    1974      197871 :                 if (!xrep_orphanage_can_adopt(rd->sc))
    1975          33 :                         error = -EFSCORRUPTED;
    1976             :                 else
    1977      197838 :                         error = xrep_dir_move_to_orphanage(rd);
    1978           0 :                 if (error)
    1979           0 :                         goto out_teardown;
    1980             :         }
    1981             : 
    1982             : out_teardown:
    1983      197838 :         xrep_dir_teardown(sc);
    1984      197901 :         return error;
    1985      197894 : }

Generated by: LCOV version 1.14