LCOV - code coverage report
Current view: top level - fs/xfs/scrub - dir_repair.c (source / functions) Hit Total Coverage
Test: fstests of 6.5.0-rc3-acha @ Mon Jul 31 20:08:06 PDT 2023 Lines: 327 738 44.3 %
Date: 2023-07-31 20:08:07 Functions: 24 42 57.1 %

          Line data    Source code
       1             : // SPDX-License-Identifier: GPL-2.0-or-later
       2             : /*
       3             :  * Copyright (C) 2020-2023 Oracle.  All Rights Reserved.
       4             :  * Author: Darrick J. Wong <djwong@kernel.org>
       5             :  */
       6             : #include "xfs.h"
       7             : #include "xfs_fs.h"
       8             : #include "xfs_shared.h"
       9             : #include "xfs_format.h"
      10             : #include "xfs_trans_resv.h"
      11             : #include "xfs_mount.h"
      12             : #include "xfs_defer.h"
      13             : #include "xfs_bit.h"
      14             : #include "xfs_log_format.h"
      15             : #include "xfs_trans.h"
      16             : #include "xfs_sb.h"
      17             : #include "xfs_inode.h"
      18             : #include "xfs_icache.h"
      19             : #include "xfs_da_format.h"
      20             : #include "xfs_da_btree.h"
      21             : #include "xfs_dir2.h"
      22             : #include "xfs_dir2_priv.h"
      23             : #include "xfs_bmap.h"
      24             : #include "xfs_quota.h"
      25             : #include "xfs_bmap_btree.h"
      26             : #include "xfs_trans_space.h"
      27             : #include "xfs_bmap_util.h"
      28             : #include "xfs_swapext.h"
      29             : #include "xfs_xchgrange.h"
      30             : #include "xfs_ag.h"
      31             : #include "xfs_parent.h"
      32             : #include "scrub/xfs_scrub.h"
      33             : #include "scrub/scrub.h"
      34             : #include "scrub/common.h"
      35             : #include "scrub/trace.h"
      36             : #include "scrub/repair.h"
      37             : #include "scrub/tempfile.h"
      38             : #include "scrub/tempswap.h"
      39             : #include "scrub/xfile.h"
      40             : #include "scrub/xfarray.h"
      41             : #include "scrub/xfblob.h"
      42             : #include "scrub/iscan.h"
      43             : #include "scrub/readdir.h"
      44             : #include "scrub/reap.h"
      45             : #include "scrub/findparent.h"
      46             : #include "scrub/orphanage.h"
      47             : #include "scrub/listxattr.h"
      48             : 
      49             : /*
      50             :  * Directory Repair
      51             :  * ================
      52             :  *
      53             :  * We repair directories by reading the directory data blocks looking for
      54             :  * directory entries that look salvageable (name passes verifiers, entry points
      55             :  * to a valid allocated inode, etc).  Each entry worth salvaging is stashed in
      56             :  * memory, and the stashed entries are periodically replayed into a temporary
      57             :  * directory to constrain memory use.  Batching the construction of the
      58             :  * temporary directory in this fashion reduces lock cycling of the directory
      59             :  * being repaired and the temporary directory, and will later become important
      60             :  * for parent pointer scanning.
      61             :  *
      62             :  * If parent pointers are enabled on this filesystem, we instead reconstruct
      63             :  * the directory by visiting each parent pointer of each file in the filesystem
      64             :  * and translating the relevant parent pointer records into dirents.  In this
      65             :  * case, it is advantageous to stash all directory entries created from parent
      66             :  * pointers for a single child file before replaying them into the temporary
      67             :  * directory.  To save memory, the live filesystem scan reuses the findparent
      68             :  * fields.  Directory repair chooses either parent pointer scanning or
      69             :  * directory entry salvaging, but not both.
      70             :  *
      71             :  * Directory entries added to the temporary directory do not elevate the link
      72             :  * counts of the inodes found.  When salvaging completes, the remaining stashed
      73             :  * entries are replayed to the temporary directory.  An atomic extent swap is
      74             :  * used to commit the new directory blocks to the directory being repaired.
      75             :  * This will disrupt readdir cursors.
      76             :  *
      77             :  * Legacy Locking Issues
      78             :  * ---------------------
      79             :  *
      80             :  * Prior to Linux 6.5, if /a, /a/b, and /c were all directories, the VFS would
      81             :  * not take i_rwsem on /a/b for a "mv /a/b /c/" operation.  This meant that
      82             :  * only b's ILOCK protected b's dotdot update.  b's IOLOCK was not taken,
      83             :  * unlike every other dotdot update (link, remove, mkdir).  If the repair code
      84             :  * dropped the ILOCK, we it was required either to revalidate the dotdot entry
      85             :  * or to use dirent hooks to capture updates from other threads.
      86             :  */
      87             : 
      88             : /* Create a dirent in the tempdir. */
      89             : #define XREP_DIRENT_ADD         (1)
      90             : 
      91             : /* Remove a dirent from the tempdir. */
      92             : #define XREP_DIRENT_REMOVE      (2)
      93             : 
      94             : /* Directory entry to be restored in the new directory. */
      95             : struct xrep_dirent {
      96             :         /* Cookie for retrieval of the dirent name. */
      97             :         xfblob_cookie           name_cookie;
      98             : 
      99             :         /* Target inode number. */
     100             :         xfs_ino_t               ino;
     101             : 
     102             :         /* Length of the dirent name. */
     103             :         uint8_t                 namelen;
     104             : 
     105             :         /* File type of the dirent. */
     106             :         uint8_t                 ftype;
     107             : 
     108             :         /* XREP_DIRENT_{ADD,REMOVE} */
     109             :         uint8_t                 action;
     110             : };
     111             : 
     112             : /*
     113             :  * Stash up to 8 pages of recovered dirent data in dir_entries and dir_names
     114             :  * before we write them to the temp dir.
     115             :  */
     116             : #define XREP_DIR_MAX_STASH_BYTES        (PAGE_SIZE * 8)
     117             : 
     118             : struct xrep_dir {
     119             :         struct xfs_scrub        *sc;
     120             : 
     121             :         /* Fixed-size array of xrep_dirent structures. */
     122             :         struct xfarray          *dir_entries;
     123             : 
     124             :         /* Blobs containing directory entry names. */
     125             :         struct xfblob           *dir_names;
     126             : 
     127             :         /* Information for swapping data forks at the end. */
     128             :         struct xrep_tempswap    tx;
     129             : 
     130             :         /* Preallocated args struct for performing dir operations */
     131             :         struct xfs_da_args      args;
     132             : 
     133             :         /*
     134             :          * Information used to scan the filesystem to find the inumber of the
     135             :          * dotdot entry for this directory.  For directory salvaging when
     136             :          * parent pointers are not enabled, we use the findparent_* functions
     137             :          * on this object and access only the parent_ino field directly.
     138             :          *
     139             :          * When parent pointers are enabled, however, the pptr scanner uses the
     140             :          * iscan, hooks, lock, and parent_ino fields of this object directly.
     141             :          * @pscan.lock coordinates access to dir_entries, dir_names,
     142             :          * parent_ino, subdirs, dirents, and args.  This reduces the memory
     143             :          * requirements of this structure.
     144             :          */
     145             :         struct xrep_parent_scan_info pscan;
     146             : 
     147             :         /*
     148             :          * Context information for attaching this directory to the lost+found
     149             :          * if this directory does not have a parent.
     150             :          */
     151             :         struct xrep_adoption    adoption;
     152             : 
     153             :         /* How many subdirectories did we find? */
     154             :         uint64_t                subdirs;
     155             : 
     156             :         /* How many dirents did we find? */
     157             :         unsigned int            dirents;
     158             : 
     159             :         /* Should we move this directory to the orphanage? */
     160             :         bool                    needs_adoption;
     161             : 
     162             :         /*
     163             :          * Scratch buffer for reading parent pointers from child files.  The
     164             :          * p_name field is used to flush stashed dirents into the temporary
     165             :          * directory in between parent pointers.  At the very end of the
     166             :          * repair, it can also be used to compute the lost+found filename
     167             :          * if we need to reparent the directory.
     168             :          */
     169             :         struct xfs_parent_name_irec pptr;
     170             : };
     171             : 
     172             : /* Tear down all the incore stuff we created. */
     173             : static void
     174       12808 : xrep_dir_teardown(
     175             :         struct xfs_scrub        *sc)
     176             : {
     177       12808 :         struct xrep_dir         *rd = sc->buf;
     178             : 
     179       12808 :         xrep_findparent_scan_teardown(&rd->pscan);
     180       12814 :         xfblob_destroy(rd->dir_names);
     181       12809 :         xfarray_destroy(rd->dir_entries);
     182       12813 : }
     183             : 
     184             : /* Set up for a directory repair. */
     185             : int
     186      184730 : xrep_setup_directory(
     187             :         struct xfs_scrub        *sc)
     188             : {
     189      184730 :         struct xrep_dir         *rd;
     190      184730 :         int                     error;
     191             : 
     192      184730 :         xchk_fsgates_enable(sc, XCHK_FSGATES_DIRENTS);
     193             : 
     194      184709 :         error = xrep_orphanage_try_create(sc);
     195      184730 :         if (error)
     196             :                 return error;
     197             : 
     198      184730 :         error = xrep_tempfile_create(sc, S_IFDIR);
     199      184687 :         if (error)
     200             :                 return error;
     201             : 
     202      184686 :         rd = kvzalloc(sizeof(struct xrep_dir), XCHK_GFP_FLAGS);
     203      184659 :         if (!rd)
     204             :                 return -ENOMEM;
     205      184659 :         rd->sc = sc;
     206      184659 :         sc->buf = rd;
     207             : 
     208      184659 :         return 0;
     209             : }
     210             : 
     211             : /*
     212             :  * If we're the root of a directory tree, we are our own parent.  If we're an
     213             :  * unlinked directory, the parent /won't/ have a link to us.  Set the parent
     214             :  * directory to the root for both cases.  Returns NULLFSINO if we don't know
     215             :  * what to do.
     216             :  */
     217             : static inline xfs_ino_t
     218             : xrep_dir_self_parent(
     219             :         struct xrep_dir         *rd)
     220             : {
     221             :         struct xfs_scrub        *sc = rd->sc;
     222             : 
     223             :         if (sc->ip->i_ino == sc->mp->m_sb.sb_rootino)
     224             :                 return sc->mp->m_sb.sb_rootino;
     225             : 
     226             :         if (VFS_I(sc->ip)->i_nlink == 0)
     227             :                 return sc->mp->m_sb.sb_rootino;
     228             : 
     229             :         return NULLFSINO;
     230             : }
     231             : 
     232             : /*
     233             :  * Look up the dotdot entry and confirm that it's really the parent.
     234             :  * Returns NULLFSINO if we don't know what to do.
     235             :  */
     236             : static inline xfs_ino_t
     237           0 : xrep_dir_lookup_parent(
     238             :         struct xrep_dir         *rd)
     239             : {
     240           0 :         struct xfs_scrub        *sc = rd->sc;
     241           0 :         xfs_ino_t               ino;
     242           0 :         int                     error;
     243             : 
     244           0 :         error = xfs_dir_lookup(sc->tp, sc->ip, &xfs_name_dotdot, &ino, NULL);
     245           0 :         if (error)
     246             :                 return NULLFSINO;
     247           0 :         if (!xfs_verify_dir_ino(sc->mp, ino))
     248             :                 return NULLFSINO;
     249             : 
     250           0 :         error = xrep_findparent_confirm(sc, &ino);
     251           0 :         if (error)
     252             :                 return NULLFSINO;
     253             : 
     254           0 :         return ino;
     255             : }
     256             : 
     257             : /*
     258             :  * Look up '..' in the dentry cache and confirm that it's really the parent.
     259             :  * Returns NULLFSINO if the dcache misses or if the hit is implausible.
     260             :  */
     261             : static inline xfs_ino_t
     262           0 : xrep_dir_dcache_parent(
     263             :         struct xrep_dir         *rd)
     264             : {
     265           0 :         struct xfs_scrub        *sc = rd->sc;
     266           0 :         xfs_ino_t               parent_ino;
     267           0 :         int                     error;
     268             : 
     269           0 :         parent_ino = xrep_findparent_from_dcache(sc);
     270           0 :         if (parent_ino == NULLFSINO)
     271             :                 return parent_ino;
     272             : 
     273           0 :         error = xrep_findparent_confirm(sc, &parent_ino);
     274           0 :         if (error)
     275             :                 return NULLFSINO;
     276             : 
     277           0 :         return parent_ino;
     278             : }
     279             : 
     280             : /* Try to find the parent of the directory being repaired. */
     281             : STATIC int
     282           0 : xrep_dir_find_parent(
     283             :         struct xrep_dir         *rd)
     284             : {
     285           0 :         xfs_ino_t               ino;
     286             : 
     287           0 :         ino = xrep_findparent_self_reference(rd->sc);
     288           0 :         if (ino != NULLFSINO) {
     289           0 :                 xrep_findparent_scan_finish_early(&rd->pscan, ino);
     290           0 :                 return 0;
     291             :         }
     292             : 
     293           0 :         ino = xrep_dir_dcache_parent(rd);
     294           0 :         if (ino != NULLFSINO) {
     295           0 :                 xrep_findparent_scan_finish_early(&rd->pscan, ino);
     296           0 :                 return 0;
     297             :         }
     298             : 
     299           0 :         ino = xrep_dir_lookup_parent(rd);
     300           0 :         if (ino != NULLFSINO) {
     301           0 :                 xrep_findparent_scan_finish_early(&rd->pscan, ino);
     302           0 :                 return 0;
     303             :         }
     304             : 
     305             :         /*
     306             :          * A full filesystem scan is the last resort.  On a busy filesystem,
     307             :          * the scan can fail with -EBUSY if we cannot grab IOLOCKs.  That means
     308             :          * that we don't know what who the parent is, so we should return to
     309             :          * userspace.
     310             :          */
     311           0 :         return xrep_findparent_scan(&rd->pscan);
     312             : }
     313             : 
     314             : /*
     315             :  * Decide if we want to salvage this entry.  We don't bother with oversized
     316             :  * names or the dot entry.
     317             :  */
     318             : STATIC int
     319           0 : xrep_dir_want_salvage(
     320             :         struct xrep_dir         *rd,
     321             :         const char              *name,
     322             :         int                     namelen,
     323             :         xfs_ino_t               ino)
     324             : {
     325           0 :         struct xfs_mount        *mp = rd->sc->mp;
     326             : 
     327             :         /* No pointers to ourselves or to garbage. */
     328           0 :         if (ino == rd->sc->ip->i_ino)
     329             :                 return false;
     330           0 :         if (!xfs_verify_dir_ino(mp, ino))
     331             :                 return false;
     332             : 
     333             :         /* No weird looking names or dot entries. */
     334           0 :         if (namelen >= MAXNAMELEN || namelen <= 0)
     335             :                 return false;
     336           0 :         if (namelen == 1 && name[0] == '.')
     337           0 :                 return false;
     338             : 
     339             :         return true;
     340             : }
     341             : 
     342             : /*
     343             :  * Remember that we want to create a dirent in the tempdir.  These stashed
     344             :  * actions will be replayed later.
     345             :  */
     346             : STATIC int
     347      129793 : xrep_dir_stash_createname(
     348             :         struct xrep_dir         *rd,
     349             :         const struct xfs_name   *name,
     350             :         xfs_ino_t               ino)
     351             : {
     352      129793 :         struct xrep_dirent      dirent = {
     353             :                 .action         = XREP_DIRENT_ADD,
     354             :                 .ino            = ino,
     355      129793 :                 .namelen        = name->len,
     356      129793 :                 .ftype          = name->type,
     357             :         };
     358      129793 :         int                     error;
     359             : 
     360      129793 :         trace_xrep_dir_stash_createname(rd->sc->tempip, name, ino);
     361             : 
     362      129793 :         error = xfblob_store(rd->dir_names, &dirent.name_cookie, name->name,
     363      129793 :                         name->len);
     364      129793 :         if (error)
     365             :                 return error;
     366             : 
     367      129793 :         return xfarray_append(rd->dir_entries, &dirent);
     368             : }
     369             : 
     370             : /*
     371             :  * Remember that we want to remove a dirent from the tempdir.  These stashed
     372             :  * actions will be replayed later.
     373             :  */
     374             : STATIC int
     375           0 : xrep_dir_stash_removename(
     376             :         struct xrep_dir         *rd,
     377             :         const struct xfs_name   *name,
     378             :         xfs_ino_t               ino)
     379             : {
     380           0 :         struct xrep_dirent      dirent = {
     381             :                 .action         = XREP_DIRENT_REMOVE,
     382             :                 .ino            = ino,
     383           0 :                 .namelen        = name->len,
     384           0 :                 .ftype          = name->type,
     385             :         };
     386           0 :         int                     error;
     387             : 
     388           0 :         trace_xrep_dir_stash_removename(rd->sc->tempip, name, ino);
     389             : 
     390           0 :         error = xfblob_store(rd->dir_names, &dirent.name_cookie, name->name,
     391           0 :                         name->len);
     392           0 :         if (error)
     393             :                 return error;
     394             : 
     395           0 :         return xfarray_append(rd->dir_entries, &dirent);
     396             : }
     397             : 
     398             : /* Allocate an in-core record to hold entries while we rebuild the dir data. */
     399             : STATIC int
     400           0 : xrep_dir_salvage_entry(
     401             :         struct xrep_dir         *rd,
     402             :         unsigned char           *name,
     403             :         unsigned int            namelen,
     404             :         xfs_ino_t               ino)
     405             : {
     406           0 :         struct xfs_name         xname = {
     407             :                 .name           = name,
     408             :         };
     409           0 :         struct xfs_scrub        *sc = rd->sc;
     410           0 :         struct xfs_inode        *ip;
     411           0 :         unsigned int            i = 0;
     412           0 :         int                     error = 0;
     413             : 
     414           0 :         if (xchk_should_terminate(sc, &error))
     415           0 :                 return error;
     416             : 
     417             :         /*
     418             :          * Truncate the name to the first character that would trip namecheck.
     419             :          * If we no longer have a name after that, ignore this entry.
     420             :          */
     421           0 :         while (i < namelen && name[i] != 0 && name[i] != '/')
     422           0 :                 i++;
     423           0 :         if (i == 0)
     424             :                 return 0;
     425           0 :         xname.len = i;
     426             : 
     427             :         /* Ignore '..' entries; we already picked the new parent. */
     428           0 :         if (xname.len == 2 && name[0] == '.' && name[1] == '.') {
     429           0 :                 trace_xrep_dir_salvaged_parent(sc->ip, ino);
     430           0 :                 return 0;
     431             :         }
     432             : 
     433           0 :         trace_xrep_dir_salvage_entry(sc->ip, &xname, ino);
     434             : 
     435             :         /*
     436             :          * Compute the ftype or dump the entry if we can't.  We don't lock the
     437             :          * inode because inodes can't change type while we have a reference.
     438             :          */
     439           0 :         error = xchk_iget(sc, ino, &ip);
     440           0 :         if (error)
     441             :                 return 0;
     442             : 
     443           0 :         xname.type = xfs_mode_to_ftype(VFS_I(ip)->i_mode);
     444           0 :         xchk_irele(sc, ip);
     445             : 
     446           0 :         return xrep_dir_stash_createname(rd, &xname, ino);
     447             : }
     448             : 
     449             : /* Record a shortform directory entry for later reinsertion. */
     450             : STATIC int
     451           0 : xrep_dir_salvage_sf_entry(
     452             :         struct xrep_dir                 *rd,
     453             :         struct xfs_dir2_sf_hdr          *sfp,
     454             :         struct xfs_dir2_sf_entry        *sfep)
     455             : {
     456           0 :         xfs_ino_t                       ino;
     457             : 
     458           0 :         ino = xfs_dir2_sf_get_ino(rd->sc->mp, sfp, sfep);
     459           0 :         if (!xrep_dir_want_salvage(rd, sfep->name, sfep->namelen, ino))
     460             :                 return 0;
     461             : 
     462           0 :         return xrep_dir_salvage_entry(rd, sfep->name, sfep->namelen, ino);
     463             : }
     464             : 
     465             : /* Record a regular directory entry for later reinsertion. */
     466             : STATIC int
     467           0 : xrep_dir_salvage_data_entry(
     468             :         struct xrep_dir                 *rd,
     469             :         struct xfs_dir2_data_entry      *dep)
     470             : {
     471           0 :         xfs_ino_t                       ino;
     472             : 
     473           0 :         ino = be64_to_cpu(dep->inumber);
     474           0 :         if (!xrep_dir_want_salvage(rd, dep->name, dep->namelen, ino))
     475             :                 return 0;
     476             : 
     477           0 :         return xrep_dir_salvage_entry(rd, dep->name, dep->namelen, ino);
     478             : }
     479             : 
     480             : /* Try to recover block/data format directory entries. */
     481             : STATIC int
     482           0 : xrep_dir_recover_data(
     483             :         struct xrep_dir         *rd,
     484             :         struct xfs_buf          *bp)
     485             : {
     486           0 :         struct xfs_da_geometry  *geo = rd->sc->mp->m_dir_geo;
     487           0 :         unsigned int            offset;
     488           0 :         unsigned int            end;
     489           0 :         int                     error = 0;
     490             : 
     491             :         /*
     492             :          * Loop over the data portion of the block.
     493             :          * Each object is a real entry (dep) or an unused one (dup).
     494             :          */
     495           0 :         offset = geo->data_entry_offset;
     496           0 :         end = min_t(unsigned int, BBTOB(bp->b_length),
     497             :                         xfs_dir3_data_end_offset(geo, bp->b_addr));
     498             : 
     499           0 :         while (offset < end) {
     500           0 :                 struct xfs_dir2_data_unused     *dup = bp->b_addr + offset;
     501           0 :                 struct xfs_dir2_data_entry      *dep = bp->b_addr + offset;
     502             : 
     503           0 :                 if (xchk_should_terminate(rd->sc, &error))
     504           0 :                         return error;
     505             : 
     506             :                 /* Skip unused entries. */
     507           0 :                 if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) {
     508           0 :                         offset += be16_to_cpu(dup->length);
     509           0 :                         continue;
     510             :                 }
     511             : 
     512             :                 /* Don't walk off the end of the block. */
     513           0 :                 offset += xfs_dir2_data_entsize(rd->sc->mp, dep->namelen);
     514           0 :                 if (offset > end)
     515             :                         break;
     516             : 
     517             :                 /* Ok, let's save this entry. */
     518           0 :                 error = xrep_dir_salvage_data_entry(rd, dep);
     519           0 :                 if (error)
     520           0 :                         return error;
     521             : 
     522             :         }
     523             : 
     524             :         return 0;
     525             : }
     526             : 
     527             : /* Try to recover shortform directory entries. */
     528             : STATIC int
     529           0 : xrep_dir_recover_sf(
     530             :         struct xrep_dir                 *rd)
     531             : {
     532           0 :         struct xfs_dir2_sf_hdr          *sfp;
     533           0 :         struct xfs_dir2_sf_entry        *sfep;
     534           0 :         struct xfs_dir2_sf_entry        *next;
     535           0 :         struct xfs_ifork                *ifp;
     536           0 :         xfs_ino_t                       ino;
     537           0 :         unsigned char                   *end;
     538           0 :         int                             error = 0;
     539             : 
     540           0 :         ifp = xfs_ifork_ptr(rd->sc->ip, XFS_DATA_FORK);
     541           0 :         sfp = (struct xfs_dir2_sf_hdr *)rd->sc->ip->i_df.if_u1.if_data;
     542           0 :         end = (unsigned char *)ifp->if_u1.if_data + ifp->if_bytes;
     543             : 
     544           0 :         ino = xfs_dir2_sf_get_parent_ino(sfp);
     545           0 :         trace_xrep_dir_salvaged_parent(rd->sc->ip, ino);
     546             : 
     547           0 :         sfep = xfs_dir2_sf_firstentry(sfp);
     548           0 :         while ((unsigned char *)sfep < end) {
     549           0 :                 if (xchk_should_terminate(rd->sc, &error))
     550           0 :                         return error;
     551             : 
     552           0 :                 next = xfs_dir2_sf_nextentry(rd->sc->mp, sfp, sfep);
     553           0 :                 if ((unsigned char *)next > end)
     554             :                         break;
     555             : 
     556             :                 /* Ok, let's save this entry. */
     557           0 :                 error = xrep_dir_salvage_sf_entry(rd, sfp, sfep);
     558           0 :                 if (error)
     559           0 :                         return error;
     560             : 
     561             :                 sfep = next;
     562             :         }
     563             : 
     564             :         return 0;
     565             : }
     566             : 
     567             : /*
     568             :  * Try to figure out the format of this directory from the data fork mappings
     569             :  * and the directory size.  If we can be reasonably sure of format, we can be
     570             :  * more aggressive in salvaging directory entries.  On return, @magic_guess
     571             :  * will be set to DIR3_BLOCK_MAGIC if we think this is a "block format"
     572             :  * directory; DIR3_DATA_MAGIC if we think this is a "data format" directory,
     573             :  * and 0 if we can't tell.
     574             :  */
     575             : STATIC void
     576           0 : xrep_dir_guess_format(
     577             :         struct xrep_dir         *rd,
     578             :         __be32                  *magic_guess)
     579             : {
     580           0 :         struct xfs_inode        *dp = rd->sc->ip;
     581           0 :         struct xfs_mount        *mp = rd->sc->mp;
     582           0 :         struct xfs_da_geometry  *geo = mp->m_dir_geo;
     583           0 :         xfs_fileoff_t           last;
     584           0 :         int                     error;
     585             : 
     586           0 :         ASSERT(xfs_has_crc(mp));
     587             : 
     588           0 :         *magic_guess = 0;
     589             : 
     590             :         /*
     591             :          * If there's a single directory block and the directory size is
     592             :          * exactly one block, this has to be a single block format directory.
     593             :          */
     594           0 :         error = xfs_bmap_last_offset(dp, &last, XFS_DATA_FORK);
     595           0 :         if (!error && XFS_FSB_TO_B(mp, last) == geo->blksize &&
     596           0 :             dp->i_disk_size == geo->blksize) {
     597           0 :                 *magic_guess = cpu_to_be32(XFS_DIR3_BLOCK_MAGIC);
     598           0 :                 return;
     599             :         }
     600             : 
     601             :         /*
     602             :          * If the last extent before the leaf offset matches the directory
     603             :          * size and the directory size is larger than 1 block, this is a
     604             :          * data format directory.
     605             :          */
     606           0 :         last = geo->leafblk;
     607           0 :         error = xfs_bmap_last_before(rd->sc->tp, dp, &last, XFS_DATA_FORK);
     608           0 :         if (!error &&
     609           0 :             XFS_FSB_TO_B(mp, last) > geo->blksize &&
     610           0 :             XFS_FSB_TO_B(mp, last) == dp->i_disk_size) {
     611           0 :                 *magic_guess = cpu_to_be32(XFS_DIR3_DATA_MAGIC);
     612           0 :                 return;
     613             :         }
     614             : }
     615             : 
     616             : /* Recover directory entries from a specific directory block. */
     617             : STATIC int
     618           0 : xrep_dir_recover_dirblock(
     619             :         struct xrep_dir         *rd,
     620             :         __be32                  magic_guess,
     621             :         xfs_dablk_t             dabno)
     622             : {
     623           0 :         struct xfs_dir2_data_hdr *hdr;
     624           0 :         struct xfs_buf          *bp;
     625           0 :         __be32                  oldmagic;
     626           0 :         int                     error;
     627             : 
     628             :         /*
     629             :          * Try to read buffer.  We invalidate them in the next step so we don't
     630             :          * bother to set a buffer type or ops.
     631             :          */
     632           0 :         error = xfs_da_read_buf(rd->sc->tp, rd->sc->ip, dabno,
     633             :                         XFS_DABUF_MAP_HOLE_OK, &bp, XFS_DATA_FORK, NULL);
     634           0 :         if (error || !bp)
     635             :                 return error;
     636             : 
     637           0 :         hdr = bp->b_addr;
     638           0 :         oldmagic = hdr->magic;
     639             : 
     640           0 :         trace_xrep_dir_recover_dirblock(rd->sc->ip, dabno,
     641           0 :                         be32_to_cpu(hdr->magic), be32_to_cpu(magic_guess));
     642             : 
     643             :         /*
     644             :          * If we're sure of the block's format, proceed with the salvage
     645             :          * operation using the specified magic number.
     646             :          */
     647           0 :         if (magic_guess) {
     648           0 :                 hdr->magic = magic_guess;
     649           0 :                 goto recover;
     650             :         }
     651             : 
     652             :         /*
     653             :          * If we couldn't guess what type of directory this is, then we will
     654             :          * only salvage entries from directory blocks that match the magic
     655             :          * number and pass verifiers.
     656             :          */
     657           0 :         switch (hdr->magic) {
     658           0 :         case cpu_to_be32(XFS_DIR2_BLOCK_MAGIC):
     659             :         case cpu_to_be32(XFS_DIR3_BLOCK_MAGIC):
     660           0 :                 if (!xrep_buf_verify_struct(bp, &xfs_dir3_block_buf_ops))
     661           0 :                         goto out;
     662           0 :                 if (xfs_dir3_block_header_check(bp, rd->sc->ip->i_ino) != NULL)
     663           0 :                         goto out;
     664             :                 break;
     665           0 :         case cpu_to_be32(XFS_DIR2_DATA_MAGIC):
     666             :         case cpu_to_be32(XFS_DIR3_DATA_MAGIC):
     667           0 :                 if (!xrep_buf_verify_struct(bp, &xfs_dir3_data_buf_ops))
     668           0 :                         goto out;
     669           0 :                 if (xfs_dir3_data_header_check(bp, rd->sc->ip->i_ino) != NULL)
     670           0 :                         goto out;
     671             :                 break;
     672           0 :         default:
     673           0 :                 goto out;
     674             :         }
     675             : 
     676           0 : recover:
     677           0 :         error = xrep_dir_recover_data(rd, bp);
     678             : 
     679           0 : out:
     680           0 :         hdr->magic = oldmagic;
     681           0 :         xfs_trans_brelse(rd->sc->tp, bp);
     682           0 :         return error;
     683             : }
     684             : 
     685             : static inline void
     686      153591 : xrep_dir_init_args(
     687             :         struct xrep_dir         *rd,
     688             :         struct xfs_inode        *dp,
     689             :         const struct xfs_name   *name)
     690             : {
     691      153591 :         memset(&rd->args, 0, sizeof(struct xfs_da_args));
     692      153591 :         rd->args.geo = rd->sc->mp->m_dir_geo;
     693      153591 :         rd->args.whichfork = XFS_DATA_FORK;
     694      153591 :         rd->args.owner = rd->sc->ip->i_ino;
     695      153591 :         rd->args.trans = rd->sc->tp;
     696      153591 :         rd->args.dp = dp;
     697      153591 :         if (!name)
     698             :                 return;
     699      140781 :         rd->args.name = name->name;
     700      140781 :         rd->args.namelen = name->len;
     701      140781 :         rd->args.filetype = name->type;
     702      140781 :         rd->args.hashval = xfs_dir2_hashname(rd->sc->mp, name);
     703             : }
     704             : 
     705             : /* Replay a stashed createname into the temporary directory. */
     706             : STATIC int
     707      129783 : xrep_dir_replay_createname(
     708             :         struct xrep_dir         *rd,
     709             :         const struct xfs_name   *name,
     710             :         xfs_ino_t               inum,
     711             :         xfs_extlen_t            total)
     712             : {
     713      129783 :         struct xfs_scrub        *sc = rd->sc;
     714      129783 :         struct xfs_inode        *dp = rd->sc->tempip;
     715      129783 :         bool                    is_block, is_leaf;
     716      129783 :         int                     error;
     717             : 
     718      129783 :         ASSERT(S_ISDIR(VFS_I(dp)->i_mode));
     719             : 
     720      129783 :         error = xfs_dir_ino_validate(sc->mp, inum);
     721      129783 :         if (error)
     722             :                 return error;
     723             : 
     724      129783 :         trace_xrep_dir_replay_createname(dp, name, inum);
     725             : 
     726      129783 :         xrep_dir_init_args(rd, dp, name);
     727      129783 :         rd->args.inumber = inum;
     728      129783 :         rd->args.total = total;
     729      129783 :         rd->args.op_flags = XFS_DA_OP_ADDNAME | XFS_DA_OP_OKNOENT;
     730             : 
     731      129783 :         if (dp->i_df.if_format == XFS_DINODE_FMT_LOCAL)
     732       72023 :                 return xfs_dir2_sf_addname(&rd->args);
     733             : 
     734       57760 :         error = xfs_dir2_isblock(&rd->args, &is_block);
     735       57760 :         if (error)
     736             :                 return error;
     737       57760 :         if (is_block)
     738        6054 :                 return xfs_dir2_block_addname(&rd->args);
     739             : 
     740       51706 :         error = xfs_dir2_isleaf(&rd->args, &is_leaf);
     741       51706 :         if (error)
     742             :                 return error;
     743       51706 :         if (is_leaf)
     744        9603 :                 return xfs_dir2_leaf_addname(&rd->args);
     745             : 
     746       42103 :         return xfs_dir2_node_addname(&rd->args);
     747             : }
     748             : 
     749             : /* Replay a stashed removename onto the temporary directory. */
     750             : STATIC int
     751           0 : xrep_dir_replay_removename(
     752             :         struct xrep_dir         *rd,
     753             :         const struct xfs_name   *name,
     754             :         xfs_extlen_t            total)
     755             : {
     756           0 :         struct xfs_inode        *dp = rd->args.dp;
     757           0 :         bool                    is_block, is_leaf;
     758           0 :         int                     error;
     759             : 
     760           0 :         ASSERT(S_ISDIR(VFS_I(dp)->i_mode));
     761             : 
     762           0 :         xrep_dir_init_args(rd, dp, name);
     763           0 :         rd->args.op_flags = 0;
     764           0 :         rd->args.total = total;
     765             : 
     766           0 :         trace_xrep_dir_replay_removename(dp, name, 0);
     767             : 
     768           0 :         if (dp->i_df.if_format == XFS_DINODE_FMT_LOCAL)
     769           0 :                 return xfs_dir2_sf_removename(&rd->args);
     770             : 
     771           0 :         error = xfs_dir2_isblock(&rd->args, &is_block);
     772           0 :         if (error)
     773             :                 return error;
     774           0 :         if (is_block)
     775           0 :                 return xfs_dir2_block_removename(&rd->args);
     776             : 
     777           0 :         error = xfs_dir2_isleaf(&rd->args, &is_leaf);
     778           0 :         if (error)
     779             :                 return error;
     780           0 :         if (is_leaf)
     781           0 :                 return xfs_dir2_leaf_removename(&rd->args);
     782             : 
     783           0 :         return xfs_dir2_node_removename(&rd->args);
     784             : }
     785             : 
     786             : /*
     787             :  * Add this stashed incore directory entry to the temporary directory.
     788             :  * The caller must hold the tempdir's IOLOCK, must not hold any ILOCKs, and
     789             :  * must not be in transaction context.
     790             :  */
     791             : STATIC int
     792      129783 : xrep_dir_replay_update(
     793             :         struct xrep_dir                 *rd,
     794             :         const struct xrep_dirent        *dirent)
     795             : {
     796      129783 :         struct xfs_name                 name = {
     797      129783 :                 .len                    = dirent->namelen,
     798      129783 :                 .type                   = dirent->ftype,
     799      129783 :                 .name                   = rd->pptr.p_name,
     800             :         };
     801      129783 :         struct xfs_mount                *mp = rd->sc->mp;
     802             : #ifdef DEBUG
     803      129783 :         xfs_ino_t                       ino;
     804             : #endif
     805      129783 :         uint                            resblks;
     806      129783 :         int                             error;
     807             : 
     808      129783 :         resblks = xfs_link_space_res(mp, dirent->namelen);
     809      129783 :         error = xchk_trans_alloc(rd->sc, resblks);
     810      129783 :         if (error)
     811             :                 return error;
     812             : 
     813             :         /* Lock the temporary directory and join it to the transaction */
     814      129783 :         xrep_tempfile_ilock(rd->sc);
     815      129783 :         xfs_trans_ijoin(rd->sc->tp, rd->sc->tempip, 0);
     816             : 
     817      129783 :         switch (dirent->action) {
     818      129783 :         case XREP_DIRENT_ADD:
     819             :                 /*
     820             :                  * Create a replacement dirent in the temporary directory.
     821             :                  * Note that _createname doesn't check for existing entries.
     822             :                  * There shouldn't be any in the temporary dir, but we'll
     823             :                  * verify this in debug mode.
     824             :                  */
     825             : #ifdef DEBUG
     826      129783 :                 error = xchk_dir_lookup(rd->sc, rd->sc->tempip, &name, &ino);
     827      129783 :                 if (error != -ENOENT) {
     828           0 :                         ASSERT(error != -ENOENT);
     829           0 :                         goto out_cancel;
     830             :                 }
     831             : #endif
     832             : 
     833      129783 :                 error = xrep_dir_replay_createname(rd, &name, dirent->ino,
     834             :                                 resblks);
     835      129783 :                 if (error)
     836           0 :                         goto out_cancel;
     837             : 
     838      129783 :                 if (name.type == XFS_DIR3_FT_DIR)
     839       15970 :                         rd->subdirs++;
     840      129783 :                 rd->dirents++;
     841      129783 :                 break;
     842           0 :         case XREP_DIRENT_REMOVE:
     843             :                 /*
     844             :                  * Remove a dirent from the temporary directory.  Note that
     845             :                  * _removename doesn't check the inode target of the exist
     846             :                  * entry.  There should be a perfect match in the temporary
     847             :                  * dir, but we'll verify this in debug mode.
     848             :                  */
     849             : #ifdef DEBUG
     850           0 :                 error = xchk_dir_lookup(rd->sc, rd->sc->tempip, &name, &ino);
     851           0 :                 if (error) {
     852           0 :                         ASSERT(error != 0);
     853           0 :                         goto out_cancel;
     854             :                 }
     855           0 :                 if (ino != dirent->ino) {
     856           0 :                         ASSERT(ino == dirent->ino);
     857           0 :                         error = -EIO;
     858           0 :                         goto out_cancel;
     859             :                 }
     860             : #endif
     861             : 
     862           0 :                 error = xrep_dir_replay_removename(rd, &name, resblks);
     863           0 :                 if (error)
     864           0 :                         goto out_cancel;
     865             : 
     866           0 :                 if (name.type == XFS_DIR3_FT_DIR)
     867           0 :                         rd->subdirs--;
     868           0 :                 rd->dirents--;
     869           0 :                 break;
     870           0 :         default:
     871           0 :                 ASSERT(0);
     872           0 :                 error = -EIO;
     873           0 :                 goto out_cancel;
     874             :         }
     875             : 
     876             :         /* Commit and unlock. */
     877      129783 :         error = xrep_trans_commit(rd->sc);
     878      129783 :         if (error)
     879             :                 return error;
     880             : 
     881      129783 :         xrep_tempfile_iunlock(rd->sc);
     882      129783 :         return 0;
     883           0 : out_cancel:
     884           0 :         xchk_trans_cancel(rd->sc);
     885           0 :         xrep_tempfile_iunlock(rd->sc);
     886           0 :         return error;
     887             : }
     888             : 
     889             : /*
     890             :  * Flush stashed incore dirent updates that have been recorded by the scanner.
     891             :  * This is done to reduce the memory requirements of the directory rebuild,
     892             :  * since directories can contain up to 32GB of directory data.
     893             :  *
     894             :  * Caller must not hold transactions or ILOCKs.  Caller must hold the tempdir
     895             :  * IOLOCK.
     896             :  */
     897             : STATIC int
     898       12813 : xrep_dir_replay_updates(
     899             :         struct xrep_dir         *rd)
     900             : {
     901       12813 :         xfarray_idx_t           array_cur;
     902       12813 :         int                     error;
     903             : 
     904             :         /* Add all the salvaged dirents to the temporary directory. */
     905      142596 :         foreach_xfarray_idx(rd->dir_entries, array_cur) {
     906      129783 :                 struct xrep_dirent      dirent;
     907             : 
     908      129783 :                 error = xfarray_load(rd->dir_entries, array_cur, &dirent);
     909      129783 :                 if (error)
     910           0 :                         return error;
     911             : 
     912             :                 /* The dirent name is stored in the in-core buffer. */
     913      129783 :                 error = xfblob_load(rd->dir_names, dirent.name_cookie,
     914      129783 :                                 rd->pptr.p_name, dirent.namelen);
     915      129783 :                 if (error)
     916           0 :                         return error;
     917      129783 :                 rd->pptr.p_name[MAXNAMELEN - 1] = 0;
     918             : 
     919      129783 :                 error = xrep_dir_replay_update(rd, &dirent);
     920      129783 :                 if (error)
     921           0 :                         return error;
     922             :         }
     923             : 
     924             :         /* Empty out both arrays now that we've added the entries. */
     925       12813 :         xfarray_truncate(rd->dir_entries);
     926       12813 :         xfblob_truncate(rd->dir_names);
     927       12813 :         return 0;
     928             : }
     929             : 
     930             : /*
     931             :  * Periodically flush stashed directory entries to the temporary dir.  This
     932             :  * is done to reduce the memory requirements of the directory rebuild, since
     933             :  * directories can contain up to 32GB of directory data.
     934             :  */
     935             : STATIC int
     936           0 : xrep_dir_flush_stashed(
     937             :         struct xrep_dir         *rd)
     938             : {
     939           0 :         int                     error;
     940             : 
     941             :         /*
     942             :          * Entering this function, the scrub context has a reference to the
     943             :          * inode being repaired, the temporary file, and a scrub transaction
     944             :          * that we use during dirent salvaging to avoid livelocking if there
     945             :          * are cycles in the directory structures.  We hold ILOCK_EXCL on both
     946             :          * the inode being repaired and the temporary file, though they are
     947             :          * not ijoined to the scrub transaction.
     948             :          *
     949             :          * To constrain kernel memory use, we occasionally write salvaged
     950             :          * dirents from the xfarray and xfblob structures into the temporary
     951             :          * directory in preparation for swapping the directory structures at
     952             :          * the end.  Updating the temporary file requires a transaction, so we
     953             :          * commit the scrub transaction and drop the two ILOCKs so that
     954             :          * we can allocate whatever transaction we want.
     955             :          *
     956             :          * We still hold IOLOCK_EXCL on the inode being repaired, which
     957             :          * prevents anyone from accessing the damaged directory data while we
     958             :          * repair it.
     959             :          */
     960           0 :         error = xrep_trans_commit(rd->sc);
     961           0 :         if (error)
     962             :                 return error;
     963           0 :         xchk_iunlock(rd->sc, XFS_ILOCK_EXCL);
     964             : 
     965             :         /*
     966             :          * Take the IOLOCK of the temporary file while we modify dirents.  This
     967             :          * isn't strictly required because the temporary file is never revealed
     968             :          * to userspace, but we follow the same locking rules.  We still hold
     969             :          * sc->ip's IOLOCK.
     970             :          */
     971           0 :         error = xrep_tempfile_iolock_polled(rd->sc);
     972           0 :         if (error)
     973             :                 return error;
     974             : 
     975             :         /* Write to the tempdir all the updates that we've stashed. */
     976           0 :         error = xrep_dir_replay_updates(rd);
     977           0 :         xrep_tempfile_iounlock(rd->sc);
     978           0 :         if (error)
     979             :                 return error;
     980             : 
     981             :         /*
     982             :          * Recreate the salvage transaction and relock the dir we're salvaging.
     983             :          */
     984           0 :         error = xchk_trans_alloc(rd->sc, 0);
     985           0 :         if (error)
     986             :                 return error;
     987           0 :         xchk_ilock(rd->sc, XFS_ILOCK_EXCL);
     988           0 :         return 0;
     989             : }
     990             : 
     991             : /* Decide if we've stashed too much dirent data in memory. */
     992             : static inline bool
     993   358803836 : xrep_dir_want_flush_stashed(
     994             :         struct xrep_dir         *rd)
     995             : {
     996   358803836 :         unsigned long long      bytes;
     997             : 
     998   358803836 :         bytes = xfarray_bytes(rd->dir_entries) + xfblob_bytes(rd->dir_names);
     999   358800704 :         return bytes > XREP_DIR_MAX_STASH_BYTES;
    1000             : }
    1001             : 
    1002             : /* Extract as many directory entries as we can. */
    1003             : STATIC int
    1004           0 : xrep_dir_recover(
    1005             :         struct xrep_dir         *rd)
    1006             : {
    1007           0 :         struct xfs_bmbt_irec    got;
    1008           0 :         struct xfs_scrub        *sc = rd->sc;
    1009           0 :         struct xfs_da_geometry  *geo = sc->mp->m_dir_geo;
    1010           0 :         xfs_fileoff_t           offset;
    1011           0 :         xfs_dablk_t             dabno;
    1012           0 :         __be32                  magic_guess;
    1013           0 :         int                     nmap;
    1014           0 :         int                     error;
    1015             : 
    1016           0 :         xrep_dir_guess_format(rd, &magic_guess);
    1017             : 
    1018             :         /* Iterate each directory data block in the data fork. */
    1019           0 :         for (offset = 0;
    1020           0 :              offset < geo->leafblk;
    1021           0 :              offset = got.br_startoff + got.br_blockcount) {
    1022           0 :                 nmap = 1;
    1023           0 :                 error = xfs_bmapi_read(sc->ip, offset, geo->leafblk - offset,
    1024             :                                 &got, &nmap, 0);
    1025           0 :                 if (error)
    1026           0 :                         return error;
    1027           0 :                 if (nmap != 1)
    1028             :                         return -EFSCORRUPTED;
    1029           0 :                 if (!xfs_bmap_is_written_extent(&got))
    1030           0 :                         continue;
    1031             : 
    1032           0 :                 for (dabno = round_up(got.br_startoff, geo->fsbcount);
    1033           0 :                      dabno < got.br_startoff + got.br_blockcount;
    1034           0 :                      dabno += geo->fsbcount) {
    1035           0 :                         if (xchk_should_terminate(rd->sc, &error))
    1036           0 :                                 return error;
    1037             : 
    1038           0 :                         error = xrep_dir_recover_dirblock(rd,
    1039             :                                         magic_guess, dabno);
    1040           0 :                         if (error)
    1041           0 :                                 return error;
    1042             : 
    1043             :                         /* Flush dirents to constrain memory usage. */
    1044           0 :                         if (xrep_dir_want_flush_stashed(rd)) {
    1045           0 :                                 error = xrep_dir_flush_stashed(rd);
    1046           0 :                                 if (error)
    1047           0 :                                         return error;
    1048             :                         }
    1049             :                 }
    1050             :         }
    1051             : 
    1052             :         return 0;
    1053             : }
    1054             : 
    1055             : /*
    1056             :  * Find all the directory entries for this inode by scraping them out of the
    1057             :  * directory leaf blocks by hand, and flushing them into the temp dir.
    1058             :  */
    1059             : STATIC int
    1060           0 : xrep_dir_find_entries(
    1061             :         struct xrep_dir         *rd)
    1062             : {
    1063           0 :         struct xfs_inode        *dp = rd->sc->ip;
    1064           0 :         int                     error;
    1065             : 
    1066             :         /*
    1067             :          * Salvage directory entries from the old directory, and write them to
    1068             :          * the temporary directory.
    1069             :          */
    1070           0 :         if (dp->i_df.if_format == XFS_DINODE_FMT_LOCAL) {
    1071           0 :                 error = xrep_dir_recover_sf(rd);
    1072             :         } else {
    1073           0 :                 error = xfs_iread_extents(rd->sc->tp, dp, XFS_DATA_FORK);
    1074           0 :                 if (error)
    1075             :                         return error;
    1076             : 
    1077           0 :                 error = xrep_dir_recover(rd);
    1078             :         }
    1079           0 :         if (error)
    1080             :                 return error;
    1081             : 
    1082           0 :         return xrep_dir_flush_stashed(rd);
    1083             : }
    1084             : 
    1085             : /* Scan all files in the filesystem for dirents. */
    1086             : STATIC int
    1087           0 : xrep_dir_salvage_entries(
    1088             :         struct xrep_dir         *rd)
    1089             : {
    1090           0 :         struct xfs_scrub        *sc = rd->sc;
    1091           0 :         int                     error;
    1092             : 
    1093             :         /*
    1094             :          * Drop the ILOCK on this directory so that we can scan for this
    1095             :          * directory's parent.  Figure out who is going to be the parent of
    1096             :          * this directory, then retake the ILOCK so that we can salvage
    1097             :          * directory entries.
    1098             :          */
    1099           0 :         xchk_iunlock(sc, XFS_ILOCK_EXCL);
    1100           0 :         error = xrep_dir_find_parent(rd);
    1101           0 :         xchk_ilock(sc, XFS_ILOCK_EXCL);
    1102           0 :         if (error)
    1103             :                 return error;
    1104             : 
    1105             :         /*
    1106             :          * Collect directory entries by parsing raw leaf blocks to salvage
    1107             :          * whatever we can.  When we're done, free the staging memory before
    1108             :          * swapping the directories to reduce memory usage.
    1109             :          */
    1110           0 :         error = xrep_dir_find_entries(rd);
    1111           0 :         if (error)
    1112             :                 return error;
    1113             : 
    1114             :         /*
    1115             :          * Cancel the repair transaction and drop the ILOCK so that we can
    1116             :          * (later) use the atomic extent swap helper functions to compute the
    1117             :          * correct block reservations and re-lock the inodes.
    1118             :          *
    1119             :          * We still hold IOLOCK_EXCL (aka i_rwsem) which will prevent directory
    1120             :          * modifications, but there's nothing to prevent userspace from reading
    1121             :          * the directory until we're ready for the swap operation.  Reads will
    1122             :          * return -EIO without shutting down the fs, so we're ok with that.
    1123             :          *
    1124             :          * The VFS can change dotdot on us, but the findparent scan will keep
    1125             :          * our incore parent inode up to date.  See the note on locking issues
    1126             :          * for more details.
    1127             :          */
    1128           0 :         error = xrep_trans_commit(sc);
    1129           0 :         if (error)
    1130             :                 return error;
    1131             : 
    1132           0 :         xchk_iunlock(sc, XFS_ILOCK_EXCL);
    1133           0 :         return 0;
    1134             : }
    1135             : 
    1136             : 
    1137             : /*
    1138             :  * Examine a parent pointer of a file.  If it leads us back to the directory
    1139             :  * that we're rebuilding, create an incore dirent from the parent pointer and
    1140             :  * stash it.
    1141             :  */
    1142             : STATIC int
    1143   457794284 : xrep_dir_scan_pptr(
    1144             :         struct xfs_scrub        *sc,
    1145             :         struct xfs_inode        *ip,
    1146             :         const struct xfs_parent_name_irec *pptr,
    1147             :         void                    *priv)
    1148             : {
    1149   457794284 :         struct xfs_name         xname;
    1150   457794284 :         struct xrep_dir         *rd = priv;
    1151   457794284 :         int                     error;
    1152             : 
    1153             :         /* Ignore parent pointers that point back to a different dir. */
    1154   457794284 :         if (pptr->p_ino != sc->ip->i_ino ||
    1155      129793 :             pptr->p_gen != VFS_I(sc->ip)->i_generation)
    1156             :                 return 0;
    1157             : 
    1158             :         /*
    1159             :          * Transform this parent pointer into a dirent and queue it for later
    1160             :          * addition to the temporary directory.
    1161             :          */
    1162      129793 :         xname.name = pptr->p_name;
    1163      129793 :         xname.len = pptr->p_namelen;
    1164      129793 :         xname.type = xfs_mode_to_ftype(VFS_I(ip)->i_mode);
    1165             : 
    1166      129793 :         mutex_lock(&rd->pscan.lock);
    1167      129793 :         error = xrep_dir_stash_createname(rd, &xname, ip->i_ino);
    1168      129793 :         mutex_unlock(&rd->pscan.lock);
    1169      129793 :         return error;
    1170             : }
    1171             : 
    1172             : /*
    1173             :  * If this child dirent points to the directory being repaired, remember that
    1174             :  * fact so that we can reset the dotdot entry if necessary.
    1175             :  */
    1176             : STATIC int
    1177   752730643 : xrep_dir_scan_dirent(
    1178             :         struct xfs_scrub        *sc,
    1179             :         struct xfs_inode        *dp,
    1180             :         xfs_dir2_dataptr_t      dapos,
    1181             :         const struct xfs_name   *name,
    1182             :         xfs_ino_t               ino,
    1183             :         void                    *priv)
    1184             : {
    1185   752730643 :         struct xrep_dir         *rd = priv;
    1186             : 
    1187             :         /* Dirent doesn't point to this directory. */
    1188   752730643 :         if (ino != rd->sc->ip->i_ino)
    1189             :                 return 0;
    1190             : 
    1191             :         /* Ignore garbage inum. */
    1192       27940 :         if (!xfs_verify_dir_ino(rd->sc->mp, ino))
    1193             :                 return 0;
    1194             : 
    1195             :         /* No weird looking names. */
    1196       27940 :         if (name->len >= MAXNAMELEN || name->len <= 0)
    1197             :                 return 0;
    1198             : 
    1199             :         /* Don't pick up dot or dotdot entries; we only want child dirents. */
    1200       39908 :         if (xfs_dir2_samename(name, &xfs_name_dotdot) ||
    1201       11968 :             xfs_dir2_samename(name, &xfs_name_dot))
    1202       15974 :                 return 0;
    1203             : 
    1204       11968 :         trace_xrep_dir_stash_createname(sc->tempip, &xfs_name_dotdot,
    1205             :                         dp->i_ino);
    1206             : 
    1207       11968 :         xrep_findparent_scan_found(&rd->pscan, dp->i_ino);
    1208       11968 :         return 0;
    1209             : }
    1210             : 
    1211             : /*
    1212             :  * Decide if we want to look for child dirents or parent pointers in this file.
    1213             :  * Skip the dir being repaired and any files being used to stage repairs.
    1214             :  */
    1215             : static inline bool
    1216   717006269 : xrep_dir_want_scan(
    1217             :         struct xrep_dir         *rd,
    1218             :         const struct xfs_inode  *ip)
    1219             : {
    1220   717006269 :         return ip != rd->sc->ip && !xrep_is_tempfile(ip);
    1221             : }
    1222             : 
    1223             : /*
    1224             :  * Take ILOCK on a file that we want to scan.
    1225             :  *
    1226             :  * Select ILOCK_EXCL if the file is a directory with an unloaded data bmbt or
    1227             :  * has an unloaded attr bmbt.  Otherwise, take ILOCK_SHARED.
    1228             :  */
    1229             : static inline unsigned int
    1230   358706674 : xrep_dir_scan_ilock(
    1231             :         struct xrep_dir         *rd,
    1232             :         struct xfs_inode        *ip)
    1233             : {
    1234   358706674 :         uint                    lock_mode = XFS_ILOCK_SHARED;
    1235             : 
    1236             :         /* Need to take the shared ILOCK to advance the iscan cursor. */
    1237   358706674 :         if (!xrep_dir_want_scan(rd, ip))
    1238       37169 :                 goto lock;
    1239             : 
    1240   506230353 :         if (S_ISDIR(VFS_I(ip)->i_mode) && xfs_need_iread_extents(&ip->i_df)) {
    1241           0 :                 lock_mode = XFS_ILOCK_EXCL;
    1242           0 :                 goto lock;
    1243             :         }
    1244             : 
    1245   717152010 :         if (xfs_inode_has_attr_fork(ip) && xfs_need_iread_extents(&ip->i_af))
    1246           0 :                 lock_mode = XFS_ILOCK_EXCL;
    1247             : 
    1248   358689301 : lock:
    1249   358726470 :         xfs_ilock(ip, lock_mode);
    1250   358737219 :         return lock_mode;
    1251             : }
    1252             : 
    1253             : /*
    1254             :  * Scan this file for relevant child dirents or parent pointers that point to
    1255             :  * the directory we're rebuilding.
    1256             :  */
    1257             : STATIC int
    1258   358682838 : xrep_dir_scan_file(
    1259             :         struct xrep_dir         *rd,
    1260             :         struct xfs_inode        *ip)
    1261             : {
    1262   358682838 :         unsigned int            lock_mode;
    1263   358682838 :         int                     error = 0;
    1264             : 
    1265   358682838 :         lock_mode = xrep_dir_scan_ilock(rd, ip);
    1266             : 
    1267   358762560 :         if (!xrep_dir_want_scan(rd, ip))
    1268       37169 :                 goto scan_done;
    1269             : 
    1270   358727452 :         error = xchk_pptr_walk(rd->sc, ip, xrep_dir_scan_pptr, &rd->pptr, rd);
    1271   358717100 :         if (error)
    1272           0 :                 goto scan_done;
    1273             : 
    1274   358717100 :         if (S_ISDIR(VFS_I(ip)->i_mode)) {
    1275   147815451 :                 error = xchk_dir_walk(rd->sc, ip, xrep_dir_scan_dirent, rd);
    1276   147793097 :                 if (error)
    1277           0 :                         goto scan_done;
    1278             :         }
    1279             : 
    1280   358694746 : scan_done:
    1281   358731915 :         xchk_iscan_mark_visited(&rd->pscan.iscan, ip);
    1282   358544478 :         xfs_iunlock(ip, lock_mode);
    1283   358730809 :         return error;
    1284             : }
    1285             : 
    1286             : /*
    1287             :  * Scan all files in the filesystem for parent pointers that we can turn into
    1288             :  * replacement dirents, and a dirent that we can use to set the dotdot pointer.
    1289             :  */
    1290             : STATIC int
    1291       12819 : xrep_dir_scan_dirtree(
    1292             :         struct xrep_dir         *rd)
    1293             : {
    1294       12819 :         struct xfs_scrub        *sc = rd->sc;
    1295       12819 :         struct xfs_inode        *ip;
    1296       12819 :         int                     error;
    1297             : 
    1298             :         /* Roots of directory trees are their own parents. */
    1299       12819 :         if (sc->ip == sc->mp->m_rootip)
    1300         848 :                 xrep_findparent_scan_found(&rd->pscan, sc->ip->i_ino);
    1301             : 
    1302             :         /*
    1303             :          * Filesystem scans are time consuming.  Drop the directory ILOCK and
    1304             :          * all other resources for the duration of the scan and hope for the
    1305             :          * best.  The live update hooks will keep our scan information up to
    1306             :          * date even though we've dropped the locks.
    1307             :          */
    1308       12819 :         xchk_trans_cancel(sc);
    1309       12819 :         if (sc->ilock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL))
    1310       12819 :                 xchk_iunlock(sc, sc->ilock_flags & (XFS_ILOCK_SHARED |
    1311             :                                                     XFS_ILOCK_EXCL));
    1312       12819 :         error = xchk_trans_alloc_empty(sc);
    1313       12819 :         if (error)
    1314             :                 return error;
    1315             : 
    1316   358783901 :         while ((error = xchk_iscan_iter(&rd->pscan.iscan, &ip)) == 1) {
    1317   358674936 :                 bool            flush;
    1318             : 
    1319   358674936 :                 error = xrep_dir_scan_file(rd, ip);
    1320   358835322 :                 xchk_irele(sc, ip);
    1321   358779165 :                 if (error)
    1322             :                         break;
    1323             : 
    1324             :                 /* Flush stashed dirent updates to constrain memory usage. */
    1325   358779165 :                 mutex_lock(&rd->pscan.lock);
    1326   358801831 :                 flush = xrep_dir_want_flush_stashed(rd);
    1327   358813362 :                 mutex_unlock(&rd->pscan.lock);
    1328   358784526 :                 if (flush) {
    1329           0 :                         xchk_trans_cancel(sc);
    1330             : 
    1331           0 :                         error = xrep_tempfile_iolock_polled(sc);
    1332           0 :                         if (error)
    1333             :                                 break;
    1334             : 
    1335           0 :                         mutex_lock(&rd->pscan.lock);
    1336           0 :                         error = xrep_dir_replay_updates(rd);
    1337           0 :                         mutex_unlock(&rd->pscan.lock);
    1338           0 :                         xrep_tempfile_iounlock(sc);
    1339           0 :                         if (error)
    1340             :                                 break;
    1341             : 
    1342           0 :                         error = xchk_trans_alloc_empty(sc);
    1343           0 :                         if (error)
    1344             :                                 break;
    1345             :                 }
    1346             : 
    1347   358784526 :                 if (xchk_should_terminate(sc, &error))
    1348             :                         break;
    1349             :         }
    1350       12819 :         xchk_iscan_iter_finish(&rd->pscan.iscan);
    1351       12819 :         if (error) {
    1352             :                 /*
    1353             :                  * If we couldn't grab an inode that was busy with a state
    1354             :                  * change, change the error code so that we exit to userspace
    1355             :                  * as quickly as possible.
    1356             :                  */
    1357           5 :                 if (error == -EBUSY)
    1358             :                         return -ECANCELED;
    1359           5 :                 return error;
    1360             :         }
    1361             : 
    1362             :         /*
    1363             :          * Cancel the empty transaction so that we can (later) use the atomic
    1364             :          * extent swap helpers to lock files and commit the new directory.
    1365             :          */
    1366       12814 :         xchk_trans_cancel(rd->sc);
    1367       12814 :         return 0;
    1368             : }
    1369             : 
    1370             : /*
    1371             :  * Capture dirent updates being made by other threads which are relevant to the
    1372             :  * directory being repaired.
    1373             :  */
    1374             : STATIC int
    1375     8011781 : xrep_dir_live_update(
    1376             :         struct notifier_block           *nb,
    1377             :         unsigned long                   action,
    1378             :         void                            *data)
    1379             : {
    1380     8011781 :         struct xfs_dir_update_params    *p = data;
    1381     8011781 :         struct xrep_dir                 *rd;
    1382     8011781 :         struct xfs_scrub                *sc;
    1383     8011781 :         int                             error = 0;
    1384             : 
    1385     8011781 :         rd = container_of(nb, struct xrep_dir, pscan.hooks.dirent_hook.nb);
    1386     8011781 :         sc = rd->sc;
    1387             : 
    1388             :         /*
    1389             :          * This thread updated a child dirent in the directory that we're
    1390             :          * rebuilding.  Stash the update for replay against the temporary
    1391             :          * directory.
    1392             :          */
    1393     8011781 :         if (p->dp->i_ino == sc->ip->i_ino &&
    1394           0 :             xchk_iscan_want_live_update(&rd->pscan.iscan, p->ip->i_ino)) {
    1395           0 :                 mutex_lock(&rd->pscan.lock);
    1396           0 :                 if (p->delta > 0)
    1397           0 :                         error = xrep_dir_stash_createname(rd, p->name,
    1398           0 :                                         p->ip->i_ino);
    1399             :                 else
    1400           0 :                         error = xrep_dir_stash_removename(rd, p->name,
    1401           0 :                                         p->ip->i_ino);
    1402           0 :                 mutex_unlock(&rd->pscan.lock);
    1403           0 :                 if (error)
    1404           0 :                         goto out_abort;
    1405             :         }
    1406             : 
    1407             :         /*
    1408             :          * This thread updated another directory's child dirent that points to
    1409             :          * the directory that we're rebuilding, so remember the new dotdot
    1410             :          * target.
    1411             :          */
    1412     8011781 :         if (p->ip->i_ino == sc->ip->i_ino &&
    1413           0 :             xchk_iscan_want_live_update(&rd->pscan.iscan, p->dp->i_ino)) {
    1414           0 :                 if (p->delta > 0) {
    1415           0 :                         trace_xrep_dir_stash_createname(sc->tempip,
    1416             :                                         &xfs_name_dotdot,
    1417           0 :                                         p->dp->i_ino);
    1418             : 
    1419           0 :                         xrep_findparent_scan_found(&rd->pscan, p->dp->i_ino);
    1420             :                 } else {
    1421           0 :                         trace_xrep_dir_stash_removename(sc->tempip,
    1422             :                                         &xfs_name_dotdot,
    1423             :                                         rd->pscan.parent_ino);
    1424             : 
    1425           0 :                         xrep_findparent_scan_found(&rd->pscan, NULLFSINO);
    1426             :                 }
    1427             :         }
    1428             : 
    1429             :         return NOTIFY_DONE;
    1430             : out_abort:
    1431           0 :         xchk_iscan_abort(&rd->pscan.iscan);
    1432           0 :         return NOTIFY_DONE;
    1433             : }
    1434             : 
    1435             : /*
    1436             :  * Free all the directory blocks and reset the data fork.  The caller must
    1437             :  * join the inode to the transaction.  This function returns with the inode
    1438             :  * joined to a clean scrub transaction.
    1439             :  */
    1440             : STATIC int
    1441       12812 : xrep_dir_reset_fork(
    1442             :         struct xrep_dir         *rd,
    1443             :         xfs_ino_t               parent_ino)
    1444             : {
    1445       12812 :         struct xfs_scrub        *sc = rd->sc;
    1446       12812 :         struct xfs_ifork        *ifp = xfs_ifork_ptr(sc->tempip, XFS_DATA_FORK);
    1447       12812 :         int                     error;
    1448             : 
    1449             :         /* Unmap all the directory buffers. */
    1450       12812 :         if (xfs_ifork_has_extents(ifp)) {
    1451         883 :                 error = xrep_reap_ifork(sc, sc->tempip, XFS_DATA_FORK);
    1452         883 :                 if (error)
    1453             :                         return error;
    1454             :         }
    1455             : 
    1456       12812 :         trace_xrep_dir_reset_fork(sc->tempip, parent_ino);
    1457             : 
    1458             :         /* Reset the data fork to an empty data fork. */
    1459       12809 :         xfs_idestroy_fork(ifp);
    1460       12810 :         ifp->if_bytes = 0;
    1461       12810 :         sc->tempip->i_disk_size = 0;
    1462             : 
    1463             :         /* Reinitialize the short form directory. */
    1464       12810 :         xrep_dir_init_args(rd, sc->tempip, NULL);
    1465       12809 :         error = xfs_dir2_sf_create(&rd->args, parent_ino);
    1466       12813 :         if (error)
    1467             :                 return error;
    1468             : 
    1469       12813 :         return xrep_tempfile_roll_trans(sc);
    1470             : }
    1471             : 
    1472             : /*
    1473             :  * Prepare both inodes' directory forks for extent swapping.  Promote the
    1474             :  * tempfile from short format to leaf format, and if the file being repaired
    1475             :  * has a short format data fork, turn it into an empty extent list.
    1476             :  */
    1477             : STATIC int
    1478         883 : xrep_dir_swap_prep(
    1479             :         struct xfs_scrub        *sc,
    1480             :         bool                    temp_local,
    1481             :         bool                    ip_local)
    1482             : {
    1483         883 :         int                     error;
    1484             : 
    1485             :         /*
    1486             :          * If the tempfile's directory is in shortform format, convert that
    1487             :          * to a single leaf extent so that we can use the atomic extent swap.
    1488             :          */
    1489         883 :         if (temp_local) {
    1490         820 :                 struct xfs_da_args      args = {
    1491         820 :                         .dp             = sc->tempip,
    1492         820 :                         .geo            = sc->mp->m_dir_geo,
    1493             :                         .whichfork      = XFS_DATA_FORK,
    1494         820 :                         .trans          = sc->tp,
    1495             :                         .total          = 1,
    1496         820 :                         .owner          = sc->ip->i_ino,
    1497             :                 };
    1498             : 
    1499         820 :                 error = xfs_dir2_sf_to_block(&args);
    1500         820 :                 if (error)
    1501           0 :                         return error;
    1502             : 
    1503             :                 /*
    1504             :                  * Roll the deferred log items to get us back to a clean
    1505             :                  * transaction.
    1506             :                  */
    1507         820 :                 error = xfs_defer_finish(&sc->tp);
    1508         820 :                 if (error)
    1509             :                         return error;
    1510             :         }
    1511             : 
    1512             :         /*
    1513             :          * If the file being repaired had a shortform data fork, convert that
    1514             :          * to an empty extent list in preparation for the atomic extent swap.
    1515             :          */
    1516         883 :         if (ip_local) {
    1517           0 :                 struct xfs_ifork        *ifp;
    1518             : 
    1519           0 :                 ifp = xfs_ifork_ptr(sc->ip, XFS_DATA_FORK);
    1520           0 :                 xfs_idestroy_fork(ifp);
    1521           0 :                 ifp->if_format = XFS_DINODE_FMT_EXTENTS;
    1522           0 :                 ifp->if_nextents = 0;
    1523           0 :                 ifp->if_bytes = 0;
    1524           0 :                 ifp->if_u1.if_root = NULL;
    1525           0 :                 ifp->if_height = 0;
    1526             : 
    1527           0 :                 xfs_trans_log_inode(sc->tp, sc->ip,
    1528             :                                 XFS_ILOG_CORE | XFS_ILOG_DDATA);
    1529             :         }
    1530             : 
    1531             :         return 0;
    1532             : }
    1533             : 
    1534             : /*
    1535             :  * Replace the inode number of a directory entry.
    1536             :  */
    1537             : static int
    1538       10998 : xrep_dir_replace(
    1539             :         struct xrep_dir         *rd,
    1540             :         struct xfs_inode        *dp,
    1541             :         const struct xfs_name   *name,
    1542             :         xfs_ino_t               inum,
    1543             :         xfs_extlen_t            total)
    1544             : {
    1545       10998 :         struct xfs_scrub        *sc = rd->sc;
    1546       10998 :         bool                    is_block, is_leaf;
    1547       10998 :         int                     error;
    1548             : 
    1549       10998 :         ASSERT(S_ISDIR(VFS_I(dp)->i_mode));
    1550             : 
    1551       10998 :         error = xfs_dir_ino_validate(sc->mp, inum);
    1552       10998 :         if (error)
    1553             :                 return error;
    1554             : 
    1555       10998 :         xrep_dir_init_args(rd, dp, name);
    1556       10998 :         rd->args.inumber = inum;
    1557       10998 :         rd->args.total = total;
    1558             : 
    1559       10998 :         if (dp->i_df.if_format == XFS_DINODE_FMT_LOCAL)
    1560       10992 :                 return xfs_dir2_sf_replace(&rd->args);
    1561             : 
    1562           6 :         error = xfs_dir2_isblock(&rd->args, &is_block);
    1563           6 :         if (error)
    1564             :                 return error;
    1565           6 :         if (is_block)
    1566           6 :                 return xfs_dir2_block_replace(&rd->args);
    1567             : 
    1568           0 :         error = xfs_dir2_isleaf(&rd->args, &is_leaf);
    1569           0 :         if (error)
    1570             :                 return error;
    1571           0 :         if (is_leaf)
    1572           0 :                 return xfs_dir2_leaf_replace(&rd->args);
    1573             : 
    1574           0 :         return xfs_dir2_node_replace(&rd->args);
    1575             : }
    1576             : 
    1577             : /*
    1578             :  * Reset the link count of this directory and adjust the unlinked list pointers
    1579             :  * as needed.
    1580             :  */
    1581             : STATIC int
    1582       12813 : xrep_dir_set_nlink(
    1583             :         struct xrep_dir         *rd)
    1584             : {
    1585       12813 :         struct xfs_scrub        *sc = rd->sc;
    1586       12813 :         struct xfs_inode        *dp = sc->ip;
    1587       12813 :         struct xfs_perag        *pag;
    1588       12813 :         int                     error;
    1589             : 
    1590             :         /*
    1591             :          * The directory is not on the incore unlinked list, which means that
    1592             :          * it needs to be reachable via the directory tree.  Update the nlink
    1593             :          * with our observed link count.  If the directory has no parent, it
    1594             :          * will be moved to the orphanage.
    1595             :          */
    1596       12813 :         if (!xfs_inode_on_unlinked_list(dp)) {
    1597       12813 :                 xrep_set_nlink(sc->ip, rd->subdirs + 2);
    1598       12813 :                 return 0;
    1599             :         }
    1600             : 
    1601           0 :         xfs_emerg(dp->i_mount, "IUNLINK unlinked dir 0x%llx repair, dirents %u subdirs %llu curr_nlink %u orphan? %d", dp->i_ino, rd->dirents, rd->subdirs, VFS_I(dp)->i_nlink, rd->needs_adoption);
    1602             : 
    1603             :         /*
    1604             :          * The directory is on the unlinked list and we did not find any
    1605             :          * dirents.  Set the link count to zero and let the directory
    1606             :          * inactivate when the last reference drops.
    1607             :          */
    1608           0 :         if (rd->dirents == 0) {
    1609           0 :                 rd->needs_adoption = false;
    1610           0 :                 xrep_set_nlink(sc->ip, 0);
    1611           0 :                 return 0;
    1612             :         }
    1613             : 
    1614             :         /*
    1615             :          * The directory is on the unlinked list and we found dirents.  This
    1616             :          * directory needs to be reachable via the directory tree.  Remove the
    1617             :          * dir from the unlinked list and update nlink with the observed link
    1618             :          * count.  If the directory has no parent, it will be moved to the
    1619             :          * orphanage.
    1620             :          */
    1621           0 :         pag = xfs_perag_get(sc->mp, XFS_INO_TO_AGNO(sc->mp, dp->i_ino));
    1622           0 :         if (!pag) {
    1623           0 :                 ASSERT(0);
    1624           0 :                 return -EFSCORRUPTED;
    1625             :         }
    1626             : 
    1627           0 :         error = xfs_iunlink_remove(sc->tp, pag, dp);
    1628           0 :         xfs_perag_put(pag);
    1629           0 :         if (error)
    1630             :                 return error;
    1631             : 
    1632           0 :         xrep_set_nlink(sc->ip, rd->subdirs + 2);
    1633           0 :         return 0;
    1634             : }
    1635             : 
    1636             : /*
    1637             :  * Finish replaying stashed dirent updates, allocate a transaction for swapping
    1638             :  * extents, and take the ILOCKs of both directories before we commit the new
    1639             :  * directory structure.
    1640             :  */
    1641             : STATIC int
    1642       12810 : xrep_dir_finalize_tempdir(
    1643             :         struct xrep_dir         *rd)
    1644             : {
    1645       12810 :         struct xfs_scrub        *sc = rd->sc;
    1646       12810 :         int                     error;
    1647             : 
    1648       12810 :         if (!xfs_has_parent(sc->mp))
    1649           0 :                 return xrep_tempswap_trans_alloc(sc, XFS_DATA_FORK, &rd->tx);
    1650             : 
    1651       12810 :         error = xrep_dir_replay_updates(rd);
    1652       12814 :         if (error)
    1653             :                 return error;
    1654             : 
    1655       12814 :         error = xrep_tempswap_trans_alloc(sc, XFS_DATA_FORK, &rd->tx);
    1656       12814 :         if (error)
    1657             :                 return error;
    1658             : 
    1659             :         /*
    1660             :          * We rely on the caller's hold on @sc->ip's IOLOCK_EXCL to quiesce all
    1661             :          * possible directory updates during the time when we did not hold the
    1662             :          * ILOCK.  There should not be any dirent updates to replay, but check
    1663             :          * anyway.
    1664             :          */
    1665       12814 :         if (xfarray_length(rd->dir_entries) != 0) {
    1666           0 :                 ASSERT(xfarray_length(rd->dir_entries) == 0);
    1667           0 :                 return -EFSCORRUPTED;
    1668             :         }
    1669             : 
    1670             :         return 0;
    1671             : }
    1672             : 
    1673             : /* Swap the temporary directory's data fork with the one being repaired. */
    1674             : STATIC int
    1675       12812 : xrep_dir_swap(
    1676             :         struct xrep_dir         *rd)
    1677             : {
    1678       12812 :         struct xfs_scrub        *sc = rd->sc;
    1679       12812 :         bool                    ip_local, temp_local;
    1680       12812 :         int                     error = 0;
    1681             : 
    1682             :         /*
    1683             :          * If we never found the parent for this directory, temporarily assign
    1684             :          * the root dir as the parent; we'll move this to the orphanage after
    1685             :          * swapping the dir contents.  We hold the ILOCK of the dir being
    1686             :          * repaired, so we're not worried about racy updates of dotdot.
    1687             :          */
    1688       12812 :         ASSERT(sc->ilock_flags & XFS_ILOCK_EXCL);
    1689       12812 :         if (rd->pscan.parent_ino == NULLFSINO) {
    1690           0 :                 rd->needs_adoption = true;
    1691           0 :                 rd->pscan.parent_ino = rd->sc->mp->m_sb.sb_rootino;
    1692             :         }
    1693             : 
    1694             :         /*
    1695             :          * Reset the temporary directory's '..' entry to point to the parent
    1696             :          * that we found.  The temporary directory was created with the root
    1697             :          * directory as the parent, so we can skip this if repairing a
    1698             :          * subdirectory of the root.
    1699             :          *
    1700             :          * It's also possible that this replacement could also expand a sf
    1701             :          * tempdir into block format.
    1702             :          */
    1703       12812 :         if (rd->pscan.parent_ino != sc->mp->m_rootip->i_ino) {
    1704       10998 :                 error = xrep_dir_replace(rd, rd->sc->tempip, &xfs_name_dotdot,
    1705       10998 :                                 rd->pscan.parent_ino, rd->tx.req.resblks);
    1706       10998 :                 if (error)
    1707             :                         return error;
    1708             :         }
    1709             : 
    1710             :         /*
    1711             :          * Changing the dot and dotdot entries could have changed the shape of
    1712             :          * the directory, so we recompute these.
    1713             :          */
    1714       12812 :         ip_local = sc->ip->i_df.if_format == XFS_DINODE_FMT_LOCAL;
    1715       12812 :         temp_local = sc->tempip->i_df.if_format == XFS_DINODE_FMT_LOCAL;
    1716             : 
    1717             :         /*
    1718             :          * If the both files have a local format data fork and the rebuilt
    1719             :          * directory data would fit in the repaired file's data fork, copy
    1720             :          * the contents from the tempfile and update the directory link count.
    1721             :          * We're done now.
    1722             :          */
    1723       12812 :         if (ip_local && temp_local &&
    1724       11929 :             sc->tempip->i_disk_size <= xfs_inode_data_fork_size(sc->ip)) {
    1725       11926 :                 xrep_tempfile_copyout_local(sc, XFS_DATA_FORK);
    1726       11930 :                 return xrep_dir_set_nlink(rd);
    1727             :         }
    1728             : 
    1729             :         /* Clean the transaction before we start working on the extent swap. */
    1730         886 :         error = xrep_tempfile_roll_trans(rd->sc);
    1731         883 :         if (error)
    1732             :                 return error;
    1733             : 
    1734             :         /* Otherwise, make sure both data forks are in block-mapping mode. */
    1735         883 :         error = xrep_dir_swap_prep(sc, temp_local, ip_local);
    1736         883 :         if (error)
    1737             :                 return error;
    1738             : 
    1739             :         /*
    1740             :          * Set nlink of the directory in the same transaction sequence that
    1741             :          * (atomically) commits the new directory data.
    1742             :          */
    1743         883 :         error = xrep_dir_set_nlink(rd);
    1744         883 :         if (error)
    1745             :                 return error;
    1746             : 
    1747         883 :         return xrep_tempswap_contents(sc, &rd->tx);
    1748             : }
    1749             : 
    1750             : /*
    1751             :  * Swap the new directory contents (which we created in the tempfile) into the
    1752             :  * directory being repaired.
    1753             :  */
    1754             : STATIC int
    1755       12814 : xrep_dir_rebuild_tree(
    1756             :         struct xrep_dir         *rd)
    1757             : {
    1758       12814 :         struct xfs_scrub        *sc = rd->sc;
    1759       12814 :         int                     error;
    1760             : 
    1761       12814 :         trace_xrep_dir_rebuild_tree(sc->ip, rd->pscan.parent_ino);
    1762             : 
    1763             :         /*
    1764             :          * Take the IOLOCK on the temporary file so that we can run dir
    1765             :          * operations with the same locks held as we would for a normal file.
    1766             :          * We still hold sc->ip's IOLOCK.
    1767             :          */
    1768       12814 :         error = xrep_tempfile_iolock_polled(rd->sc);
    1769       12811 :         if (error)
    1770             :                 return error;
    1771             : 
    1772             :         /*
    1773             :          * Allocate transaction, lock inodes, and make sure that we've replayed
    1774             :          * all the stashed dirent updates to the tempdir.  After this point,
    1775             :          * we're ready to swapext.
    1776             :          */
    1777       12811 :         error = xrep_dir_finalize_tempdir(rd);
    1778       12813 :         if (error)
    1779             :                 return error;
    1780             : 
    1781       12813 :         if (xchk_iscan_aborted(&rd->pscan.iscan))
    1782             :                 return -ECANCELED;
    1783             : 
    1784             :         /*
    1785             :          * Swap the tempdir's data fork with the file being repaired.  This
    1786             :          * recreates the transaction and re-takes the ILOCK in the scrub
    1787             :          * context.
    1788             :          */
    1789       12813 :         error = xrep_dir_swap(rd);
    1790       12811 :         if (error)
    1791             :                 return error;
    1792             : 
    1793             :         /*
    1794             :          * Release the old directory blocks and reset the data fork of the temp
    1795             :          * directory to an empty shortform directory because inactivation does
    1796             :          * nothing for directories.
    1797             :          */
    1798       12811 :         return xrep_dir_reset_fork(rd, sc->mp->m_rootip->i_ino);
    1799             : }
    1800             : 
    1801             : /* Set up the filesystem scan so we can regenerate directory entries. */
    1802             : STATIC int
    1803       12819 : xrep_dir_setup_scan(
    1804             :         struct xrep_dir         *rd)
    1805             : {
    1806       12819 :         struct xfs_scrub        *sc = rd->sc;
    1807       12819 :         char                    *descr;
    1808       12819 :         int                     error;
    1809             : 
    1810             :         /* Set up some staging memory for salvaging dirents. */
    1811       12819 :         descr = xchk_xfile_ino_descr(sc, "directory entries");
    1812       12819 :         error = xfarray_create(descr, 0, sizeof(struct xrep_dirent),
    1813             :                         &rd->dir_entries);
    1814       12819 :         kfree(descr);
    1815       12819 :         if (error)
    1816             :                 return error;
    1817             : 
    1818       12819 :         descr = xchk_xfile_ino_descr(sc, "directory entry names");
    1819       12819 :         error = xfblob_create(descr, &rd->dir_names);
    1820       12819 :         kfree(descr);
    1821       12819 :         if (error)
    1822           0 :                 goto out_xfarray;
    1823             : 
    1824       12819 :         if (xfs_has_parent(sc->mp))
    1825       12819 :                 error = __xrep_findparent_scan_start(sc, &rd->pscan,
    1826             :                                 xrep_dir_live_update);
    1827             :         else
    1828           0 :                 error = xrep_findparent_scan_start(sc, &rd->pscan);
    1829       12819 :         if (error)
    1830           0 :                 goto out_xfblob;
    1831             : 
    1832             :         return 0;
    1833             : 
    1834             : out_xfblob:
    1835           0 :         xfblob_destroy(rd->dir_names);
    1836           0 :         rd->dir_names = NULL;
    1837           0 : out_xfarray:
    1838           0 :         xfarray_destroy(rd->dir_entries);
    1839           0 :         rd->dir_entries = NULL;
    1840           0 :         return error;
    1841             : }
    1842             : 
    1843             : /*
    1844             :  * Move the current file to the orphanage.
    1845             :  *
    1846             :  * Caller must hold IOLOCK_EXCL on @sc->ip, and no other inode locks.  Upon
    1847             :  * successful return, the scrub transaction will have enough extra reservation
    1848             :  * to make the move; it will hold IOLOCK_EXCL and ILOCK_EXCL of @sc->ip and the
    1849             :  * orphanage; and both inodes will be ijoined.
    1850             :  */
    1851             : STATIC int
    1852           0 : xrep_dir_move_to_orphanage(
    1853             :         struct xrep_dir         *rd)
    1854             : {
    1855           0 :         struct xfs_scrub        *sc = rd->sc;
    1856           0 :         xfs_ino_t               orig_parent, new_parent;
    1857           0 :         int                     error;
    1858             : 
    1859             :         /* No orphanage?  We can't fix this. */
    1860           0 :         if (!sc->orphanage)
    1861             :                 return -EFSCORRUPTED;
    1862             : 
    1863             :         /*
    1864             :          * We are about to drop the ILOCK on sc->ip to lock the orphanage and
    1865             :          * prepare for the adoption.  Therefore, look up the old dotdot entry
    1866             :          * for sc->ip so that we can compare it after we re-lock sc->ip.
    1867             :          */
    1868           0 :         error = xchk_dir_lookup(sc, sc->ip, &xfs_name_dotdot, &orig_parent);
    1869           0 :         if (error)
    1870             :                 return error;
    1871             : 
    1872             :         /*
    1873             :          * We hold ILOCK_EXCL on both the directory and the tempdir after a
    1874             :          * successful rebuild.  Before we can move the directory to the
    1875             :          * orphanage, we must roll to a clean unjoined transaction.
    1876             :          */
    1877           0 :         error = xfs_trans_roll(&sc->tp);
    1878           0 :         if (error)
    1879             :                 return error;
    1880             : 
    1881             :         /*
    1882             :          * Because the orphanage is just another directory in the filesystem,
    1883             :          * we must take its IOLOCK to coordinate with the VFS.  We cannot take
    1884             :          * an IOLOCK while holding an ILOCK, so we must drop them all.  We may
    1885             :          * have to drop the IOLOCK as well.
    1886             :          */
    1887           0 :         xrep_tempfile_iunlock_both(sc);
    1888             : 
    1889           0 :         error = xrep_adoption_init(sc, &rd->adoption);
    1890           0 :         if (error)
    1891             :                 return error;
    1892             : 
    1893           0 :         if (!xrep_orphanage_ilock_nowait(sc, XFS_IOLOCK_EXCL)) {
    1894           0 :                 xchk_iunlock(sc, sc->ilock_flags);
    1895           0 :                 error = xrep_orphanage_iolock_two(sc);
    1896           0 :                 if (error)
    1897           0 :                         goto err_adoption;
    1898             :         }
    1899             : 
    1900             :         /* Prepare for the adoption and lock both down. */
    1901           0 :         error = xrep_adoption_prep(&rd->adoption);
    1902           0 :         if (error)
    1903           0 :                 goto err_adoption;
    1904             : 
    1905           0 :         error = xrep_adoption_compute_name(&rd->adoption, rd->pptr.p_name);
    1906           0 :         if (error)
    1907           0 :                 goto err_adoption;
    1908             : 
    1909             :         /*
    1910             :          * Now that we've reacquired the ILOCK on sc->ip, look up the dotdot
    1911             :          * entry again.  If the parent changed or the child was unlinked while
    1912             :          * the child directory was unlocked, we don't need to move the child to
    1913             :          * the orphanage after all.
    1914             :          */
    1915           0 :         error = xchk_dir_lookup(sc, sc->ip, &xfs_name_dotdot, &new_parent);
    1916           0 :         if (error)
    1917           0 :                 goto err_adoption;
    1918           0 :         if (orig_parent != new_parent || VFS_I(sc->ip)->i_nlink == 0) {
    1919           0 :                 error = 0;
    1920           0 :                 goto err_adoption;
    1921             :         }
    1922             : 
    1923             :         /* Attach to the orphanage. */
    1924           0 :         return xrep_adoption_commit(&rd->adoption);
    1925           0 : err_adoption:
    1926           0 :         xrep_adoption_cancel(&rd->adoption, error);
    1927           0 :         return error;
    1928             : }
    1929             : 
    1930             : /*
    1931             :  * Repair the directory metadata.
    1932             :  *
    1933             :  * XXX: Directory entry buffers can be multiple fsblocks in size.  The buffer
    1934             :  * cache in XFS can't handle aliased multiblock buffers, so this might
    1935             :  * misbehave if the directory blocks are crosslinked with other filesystem
    1936             :  * metadata.
    1937             :  *
    1938             :  * XXX: Is it necessary to check the dcache for this directory to make sure
    1939             :  * that we always recreate every cached entry?
    1940             :  */
    1941             : int
    1942      183695 : xrep_directory(
    1943             :         struct xfs_scrub        *sc)
    1944             : {
    1945      183695 :         struct xrep_dir         *rd = sc->buf;
    1946      183695 :         int                     error;
    1947             : 
    1948             :         /* The rmapbt is required to reap the old data fork. */
    1949      183695 :         if (!xfs_has_rmapbt(sc->mp))
    1950             :                 return -EOPNOTSUPP;
    1951             : 
    1952       12819 :         error = xrep_dir_setup_scan(rd);
    1953       12819 :         if (error)
    1954             :                 return error;
    1955             : 
    1956       12819 :         if (xfs_has_parent(sc->mp))
    1957       12819 :                 error = xrep_dir_scan_dirtree(rd);
    1958             :         else
    1959           0 :                 error = xrep_dir_salvage_entries(rd);
    1960       12819 :         if (error)
    1961           5 :                 goto out_teardown;
    1962             : 
    1963             :         /* Last chance to abort before we start committing fixes. */
    1964       12814 :         if (xchk_should_terminate(sc, &error))
    1965           0 :                 goto out_teardown;
    1966             : 
    1967       12810 :         error = xrep_dir_rebuild_tree(rd);
    1968       12807 :         if (error)
    1969           0 :                 goto out_teardown;
    1970             : 
    1971       12807 :         if (rd->needs_adoption) {
    1972           0 :                 error = xrep_dir_move_to_orphanage(rd);
    1973           0 :                 if (error)
    1974             :                         goto out_teardown;
    1975             :         }
    1976             : 
    1977       12807 : out_teardown:
    1978       12812 :         xrep_dir_teardown(sc);
    1979       12812 :         return error;
    1980             : }

Generated by: LCOV version 1.14