LCOV - code coverage report
Current view: top level - fs/xfs - xfs_inode.c (source / functions) Hit Total Coverage
Test: fstests of 6.5.0-rc4-xfsx @ Mon Jul 31 20:08:34 PDT 2023 Lines: 1050 1224 85.8 %
Date: 2023-07-31 20:08:34 Functions: 53 56 94.6 %

          Line data    Source code
       1             : // SPDX-License-Identifier: GPL-2.0
       2             : /*
       3             :  * Copyright (c) 2000-2006 Silicon Graphics, Inc.
       4             :  * All Rights Reserved.
       5             :  */
       6             : #include <linux/iversion.h>
       7             : 
       8             : #include "xfs.h"
       9             : #include "xfs_fs.h"
      10             : #include "xfs_shared.h"
      11             : #include "xfs_format.h"
      12             : #include "xfs_log_format.h"
      13             : #include "xfs_trans_resv.h"
      14             : #include "xfs_mount.h"
      15             : #include "xfs_defer.h"
      16             : #include "xfs_inode.h"
      17             : #include "xfs_dir2.h"
      18             : #include "xfs_attr.h"
      19             : #include "xfs_bit.h"
      20             : #include "xfs_trans_space.h"
      21             : #include "xfs_trans.h"
      22             : #include "xfs_buf_item.h"
      23             : #include "xfs_inode_item.h"
      24             : #include "xfs_iunlink_item.h"
      25             : #include "xfs_ialloc.h"
      26             : #include "xfs_bmap.h"
      27             : #include "xfs_bmap_util.h"
      28             : #include "xfs_errortag.h"
      29             : #include "xfs_error.h"
      30             : #include "xfs_quota.h"
      31             : #include "xfs_filestream.h"
      32             : #include "xfs_trace.h"
      33             : #include "xfs_icache.h"
      34             : #include "xfs_symlink.h"
      35             : #include "xfs_trans_priv.h"
      36             : #include "xfs_log.h"
      37             : #include "xfs_bmap_btree.h"
      38             : #include "xfs_reflink.h"
      39             : #include "xfs_ag.h"
      40             : #include "xfs_log_priv.h"
      41             : #include "xfs_health.h"
      42             : #include "xfs_pnfs.h"
      43             : #include "xfs_parent.h"
      44             : #include "xfs_xattr.h"
      45             : #include "xfs_inode_util.h"
      46             : #include "xfs_imeta.h"
      47             : 
      48             : struct kmem_cache *xfs_inode_cache;
      49             : 
      50             : /*
      51             :  * These two are wrapper routines around the xfs_ilock() routine used to
      52             :  * centralize some grungy code.  They are used in places that wish to lock the
      53             :  * inode solely for reading the extents.  The reason these places can't just
      54             :  * call xfs_ilock(ip, XFS_ILOCK_SHARED) is that the inode lock also guards to
      55             :  * bringing in of the extents from disk for a file in b-tree format.  If the
      56             :  * inode is in b-tree format, then we need to lock the inode exclusively until
      57             :  * the extents are read in.  Locking it exclusively all the time would limit
      58             :  * our parallelism unnecessarily, though.  What we do instead is check to see
      59             :  * if the extents have been read in yet, and only lock the inode exclusively
      60             :  * if they have not.
      61             :  *
      62             :  * The functions return a value which should be given to the corresponding
      63             :  * xfs_iunlock() call.
      64             :  */
      65             : uint
      66  1209835115 : xfs_ilock_data_map_shared(
      67             :         struct xfs_inode        *ip)
      68             : {
      69  1209835115 :         uint                    lock_mode = XFS_ILOCK_SHARED;
      70             : 
      71  1209835115 :         if (xfs_need_iread_extents(&ip->i_df))
      72       63516 :                 lock_mode = XFS_ILOCK_EXCL;
      73  1209696431 :         xfs_ilock(ip, lock_mode);
      74  1209696886 :         return lock_mode;
      75             : }
      76             : 
      77             : uint
      78  2261941791 : xfs_ilock_attr_map_shared(
      79             :         struct xfs_inode        *ip)
      80             : {
      81  2261941791 :         uint                    lock_mode = XFS_ILOCK_SHARED;
      82             : 
      83  4467255929 :         if (xfs_inode_has_attr_fork(ip) && xfs_need_iread_extents(&ip->i_af))
      84           1 :                 lock_mode = XFS_ILOCK_EXCL;
      85  2252849620 :         xfs_ilock(ip, lock_mode);
      86  2246762709 :         return lock_mode;
      87             : }
      88             : 
      89             : /*
      90             :  * You can't set both SHARED and EXCL for the same lock,
      91             :  * and only XFS_IOLOCK_SHARED, XFS_IOLOCK_EXCL, XFS_MMAPLOCK_SHARED,
      92             :  * XFS_MMAPLOCK_EXCL, XFS_ILOCK_SHARED, XFS_ILOCK_EXCL are valid values
      93             :  * to set in lock_flags.
      94             :  */
      95             : static inline void
      96 >30661*10^7 : xfs_lock_flags_assert(
      97             :         uint            lock_flags)
      98             : {
      99 >30661*10^7 :         ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) !=
     100             :                 (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL));
     101 >30661*10^7 :         ASSERT((lock_flags & (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL)) !=
     102             :                 (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL));
     103 >30661*10^7 :         ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) !=
     104             :                 (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
     105 >30661*10^7 :         ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_SUBCLASS_MASK)) == 0);
     106 >30661*10^7 :         ASSERT(lock_flags != 0);
     107 >30661*10^7 : }
     108             : 
     109             : /*
     110             :  * In addition to i_rwsem in the VFS inode, the xfs inode contains 2
     111             :  * multi-reader locks: invalidate_lock and the i_lock.  This routine allows
     112             :  * various combinations of the locks to be obtained.
     113             :  *
     114             :  * The 3 locks should always be ordered so that the IO lock is obtained first,
     115             :  * the mmap lock second and the ilock last in order to prevent deadlock.
     116             :  *
     117             :  * Basic locking order:
     118             :  *
     119             :  * i_rwsem -> invalidate_lock -> page_lock -> i_ilock
     120             :  *
     121             :  * mmap_lock locking order:
     122             :  *
     123             :  * i_rwsem -> page lock -> mmap_lock
     124             :  * mmap_lock -> invalidate_lock -> page_lock
     125             :  *
     126             :  * The difference in mmap_lock locking order mean that we cannot hold the
     127             :  * invalidate_lock over syscall based read(2)/write(2) based IO. These IO paths
     128             :  * can fault in pages during copy in/out (for buffered IO) or require the
     129             :  * mmap_lock in get_user_pages() to map the user pages into the kernel address
     130             :  * space for direct IO. Similarly the i_rwsem cannot be taken inside a page
     131             :  * fault because page faults already hold the mmap_lock.
     132             :  *
     133             :  * Hence to serialise fully against both syscall and mmap based IO, we need to
     134             :  * take both the i_rwsem and the invalidate_lock. These locks should *only* be
     135             :  * both taken in places where we need to invalidate the page cache in a race
     136             :  * free manner (e.g. truncate, hole punch and other extent manipulation
     137             :  * functions).
     138             :  */
     139             : void
     140 >15201*10^7 : xfs_ilock(
     141             :         xfs_inode_t             *ip,
     142             :         uint                    lock_flags)
     143             : {
     144 >15201*10^7 :         trace_xfs_ilock(ip, lock_flags, _RET_IP_);
     145             : 
     146 >15189*10^7 :         xfs_lock_flags_assert(lock_flags);
     147             : 
     148 >15206*10^7 :         if (lock_flags & XFS_IOLOCK_EXCL) {
     149  1198112711 :                 down_write_nested(&VFS_I(ip)->i_rwsem,
     150             :                                   XFS_IOLOCK_DEP(lock_flags));
     151 >15086*10^7 :         } else if (lock_flags & XFS_IOLOCK_SHARED) {
     152  1714469836 :                 down_read_nested(&VFS_I(ip)->i_rwsem,
     153             :                                  XFS_IOLOCK_DEP(lock_flags));
     154             :         }
     155             : 
     156 >15206*10^7 :         if (lock_flags & XFS_MMAPLOCK_EXCL) {
     157   190439929 :                 down_write_nested(&VFS_I(ip)->i_mapping->invalidate_lock,
     158             :                                   XFS_MMAPLOCK_DEP(lock_flags));
     159 >15187*10^7 :         } else if (lock_flags & XFS_MMAPLOCK_SHARED) {
     160   261429943 :                 down_read_nested(&VFS_I(ip)->i_mapping->invalidate_lock,
     161             :                                  XFS_MMAPLOCK_DEP(lock_flags));
     162             :         }
     163             : 
     164 >15206*10^7 :         if (lock_flags & XFS_ILOCK_EXCL)
     165  5189797448 :                 mrupdate_nested(&ip->i_lock, XFS_ILOCK_DEP(lock_flags));
     166 >14687*10^7 :         else if (lock_flags & XFS_ILOCK_SHARED)
     167 >14360*10^7 :                 mraccess_nested(&ip->i_lock, XFS_ILOCK_DEP(lock_flags));
     168 >15322*10^7 : }
     169             : 
     170             : /*
     171             :  * This is just like xfs_ilock(), except that the caller
     172             :  * is guaranteed not to sleep.  It returns 1 if it gets
     173             :  * the requested locks and 0 otherwise.  If the IO lock is
     174             :  * obtained but the inode lock cannot be, then the IO lock
     175             :  * is dropped before returning.
     176             :  *
     177             :  * ip -- the inode being locked
     178             :  * lock_flags -- this parameter indicates the inode's locks to be
     179             :  *       to be locked.  See the comment for xfs_ilock() for a list
     180             :  *       of valid values.
     181             :  */
     182             : int
     183  3238819471 : xfs_ilock_nowait(
     184             :         xfs_inode_t             *ip,
     185             :         uint                    lock_flags)
     186             : {
     187  3238819471 :         trace_xfs_ilock_nowait(ip, lock_flags, _RET_IP_);
     188             : 
     189  3237572656 :         xfs_lock_flags_assert(lock_flags);
     190             : 
     191  3238450025 :         if (lock_flags & XFS_IOLOCK_EXCL) {
     192   794389434 :                 if (!down_write_trylock(&VFS_I(ip)->i_rwsem))
     193     2592329 :                         goto out;
     194  2444060591 :         } else if (lock_flags & XFS_IOLOCK_SHARED) {
     195   280203339 :                 if (!down_read_trylock(&VFS_I(ip)->i_rwsem))
     196    42328712 :                         goto out;
     197             :         }
     198             : 
     199  3194849215 :         if (lock_flags & XFS_MMAPLOCK_EXCL) {
     200      926493 :                 if (!down_write_trylock(&VFS_I(ip)->i_mapping->invalidate_lock))
     201          39 :                         goto out_undo_iolock;
     202  3193922722 :         } else if (lock_flags & XFS_MMAPLOCK_SHARED) {
     203           0 :                 if (!down_read_trylock(&VFS_I(ip)->i_mapping->invalidate_lock))
     204           0 :                         goto out_undo_iolock;
     205             :         }
     206             : 
     207  3194849422 :         if (lock_flags & XFS_ILOCK_EXCL) {
     208  1205886246 :                 if (!mrtryupdate(&ip->i_lock))
     209       42558 :                         goto out_undo_mmaplock;
     210  1988963176 :         } else if (lock_flags & XFS_ILOCK_SHARED) {
     211   957833464 :                 if (!mrtryaccess(&ip->i_lock))
     212     3470343 :                         goto out_undo_mmaplock;
     213             :         }
     214             :         return 1;
     215             : 
     216     3512901 : out_undo_mmaplock:
     217     3512901 :         if (lock_flags & XFS_MMAPLOCK_EXCL)
     218           0 :                 up_write(&VFS_I(ip)->i_mapping->invalidate_lock);
     219     3512901 :         else if (lock_flags & XFS_MMAPLOCK_SHARED)
     220           0 :                 up_read(&VFS_I(ip)->i_mapping->invalidate_lock);
     221     3512901 : out_undo_iolock:
     222     3512940 :         if (lock_flags & XFS_IOLOCK_EXCL)
     223           0 :                 up_write(&VFS_I(ip)->i_rwsem);
     224     3512940 :         else if (lock_flags & XFS_IOLOCK_SHARED)
     225           0 :                 up_read(&VFS_I(ip)->i_rwsem);
     226     3512940 : out:
     227             :         return 0;
     228             : }
     229             : 
     230             : /*
     231             :  * xfs_iunlock() is used to drop the inode locks acquired with
     232             :  * xfs_ilock() and xfs_ilock_nowait().  The caller must pass
     233             :  * in the flags given to xfs_ilock() or xfs_ilock_nowait() so
     234             :  * that we know which locks to drop.
     235             :  *
     236             :  * ip -- the inode being unlocked
     237             :  * lock_flags -- this parameter indicates the inode's locks to be
     238             :  *       to be unlocked.  See the comment for xfs_ilock() for a list
     239             :  *       of valid values for this parameter.
     240             :  *
     241             :  */
     242             : void
     243 >15478*10^7 : xfs_iunlock(
     244             :         xfs_inode_t             *ip,
     245             :         uint                    lock_flags)
     246             : {
     247 >15478*10^7 :         xfs_lock_flags_assert(lock_flags);
     248             : 
     249 >15406*10^7 :         if (lock_flags & XFS_IOLOCK_EXCL)
     250  1987373862 :                 up_write(&VFS_I(ip)->i_rwsem);
     251 >15207*10^7 :         else if (lock_flags & XFS_IOLOCK_SHARED)
     252  1954269543 :                 up_read(&VFS_I(ip)->i_rwsem);
     253             : 
     254 >15406*10^7 :         if (lock_flags & XFS_MMAPLOCK_EXCL)
     255   191695833 :                 up_write(&VFS_I(ip)->i_mapping->invalidate_lock);
     256 >15387*10^7 :         else if (lock_flags & XFS_MMAPLOCK_SHARED)
     257   261482142 :                 up_read(&VFS_I(ip)->i_mapping->invalidate_lock);
     258             : 
     259 >15406*10^7 :         if (lock_flags & XFS_ILOCK_EXCL)
     260  6399515454 :                 mrunlock_excl(&ip->i_lock);
     261 >14766*10^7 :         else if (lock_flags & XFS_ILOCK_SHARED)
     262 >14468*10^7 :                 mrunlock_shared(&ip->i_lock);
     263             : 
     264 >15465*10^7 :         trace_xfs_iunlock(ip, lock_flags, _RET_IP_);
     265 >15290*10^7 : }
     266             : 
     267             : /*
     268             :  * give up write locks.  the i/o lock cannot be held nested
     269             :  * if it is being demoted.
     270             :  */
     271             : void
     272     2289878 : xfs_ilock_demote(
     273             :         xfs_inode_t             *ip,
     274             :         uint                    lock_flags)
     275             : {
     276     2289878 :         ASSERT(lock_flags & (XFS_IOLOCK_EXCL|XFS_MMAPLOCK_EXCL|XFS_ILOCK_EXCL));
     277     2289878 :         ASSERT((lock_flags &
     278             :                 ~(XFS_IOLOCK_EXCL|XFS_MMAPLOCK_EXCL|XFS_ILOCK_EXCL)) == 0);
     279             : 
     280     2289878 :         if (lock_flags & XFS_ILOCK_EXCL)
     281           0 :                 mrdemote(&ip->i_lock);
     282     2289878 :         if (lock_flags & XFS_MMAPLOCK_EXCL)
     283           0 :                 downgrade_write(&VFS_I(ip)->i_mapping->invalidate_lock);
     284     2289878 :         if (lock_flags & XFS_IOLOCK_EXCL)
     285     2289813 :                 downgrade_write(&VFS_I(ip)->i_rwsem);
     286             : 
     287     2289971 :         trace_xfs_ilock_demote(ip, lock_flags, _RET_IP_);
     288     2289876 : }
     289             : 
     290             : #if defined(DEBUG) || defined(XFS_WARN)
     291             : static inline bool
     292             : __xfs_rwsem_islocked(
     293             :         struct rw_semaphore     *rwsem,
     294             :         bool                    shared)
     295             : {
     296  1726163655 :         if (!debug_locks)
     297           0 :                 return rwsem_is_locked(rwsem);
     298             : 
     299             :         if (!shared)
     300             :                 return lockdep_is_held_type(rwsem, 0);
     301             : 
     302             :         /*
     303             :          * We are checking that the lock is held at least in shared
     304             :          * mode but don't care that it might be held exclusively
     305             :          * (i.e. shared | excl). Hence we check if the lock is held
     306             :          * in any mode rather than an explicit shared mode.
     307             :          */
     308             :         return lockdep_is_held_type(rwsem, -1);
     309             : }
     310             : 
     311             : bool
     312 52228042940 : xfs_isilocked(
     313             :         struct xfs_inode        *ip,
     314             :         uint                    lock_flags)
     315             : {
     316 52228042940 :         if (lock_flags & (XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)) {
     317 51023214387 :                 if (!(lock_flags & XFS_ILOCK_SHARED))
     318 21764679481 :                         return !!ip->i_lock.mr_writer;
     319 29258534906 :                 return rwsem_is_locked(&ip->i_lock.mr_lock);
     320             :         }
     321             : 
     322  1204828553 :         if (lock_flags & (XFS_MMAPLOCK_EXCL|XFS_MMAPLOCK_SHARED)) {
     323    22609150 :                 return __xfs_rwsem_islocked(&VFS_I(ip)->i_mapping->invalidate_lock,
     324             :                                 (lock_flags & XFS_MMAPLOCK_SHARED));
     325             :         }
     326             : 
     327  1182219403 :         if (lock_flags & (XFS_IOLOCK_EXCL | XFS_IOLOCK_SHARED)) {
     328  1203295360 :                 return __xfs_rwsem_islocked(&VFS_I(ip)->i_rwsem,
     329             :                                 (lock_flags & XFS_IOLOCK_SHARED));
     330             :         }
     331             : 
     332           0 :         ASSERT(0);
     333           0 :         return false;
     334             : }
     335             : #endif
     336             : 
     337             : /*
     338             :  * xfs_lockdep_subclass_ok() is only used in an ASSERT, so is only called when
     339             :  * DEBUG or XFS_WARN is set. And MAX_LOCKDEP_SUBCLASSES is then only defined
     340             :  * when CONFIG_LOCKDEP is set. Hence the complex define below to avoid build
     341             :  * errors and warnings.
     342             :  */
     343             : #if (defined(DEBUG) || defined(XFS_WARN)) && defined(CONFIG_LOCKDEP)
     344             : static bool
     345             : xfs_lockdep_subclass_ok(
     346             :         int subclass)
     347             : {
     348             :         return subclass < MAX_LOCKDEP_SUBCLASSES;
     349             : }
     350             : #else
     351             : #define xfs_lockdep_subclass_ok(subclass)       (true)
     352             : #endif
     353             : 
     354             : /*
     355             :  * Bump the subclass so xfs_lock_inodes() acquires each lock with a different
     356             :  * value. This can be called for any type of inode lock combination, including
     357             :  * parent locking. Care must be taken to ensure we don't overrun the subclass
     358             :  * storage fields in the class mask we build.
     359             :  */
     360             : static inline uint
     361   555559890 : xfs_lock_inumorder(
     362             :         uint    lock_mode,
     363             :         uint    subclass)
     364             : {
     365   555559890 :         uint    class = 0;
     366             : 
     367   555559890 :         ASSERT(!(lock_mode & XFS_ILOCK_PARENT));
     368   555559890 :         ASSERT(xfs_lockdep_subclass_ok(subclass));
     369             : 
     370   555559890 :         if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)) {
     371           0 :                 ASSERT(subclass <= XFS_IOLOCK_MAX_SUBCLASS);
     372           0 :                 class += subclass << XFS_IOLOCK_SHIFT;
     373             :         }
     374             : 
     375   555559890 :         if (lock_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)) {
     376           0 :                 ASSERT(subclass <= XFS_MMAPLOCK_MAX_SUBCLASS);
     377           0 :                 class += subclass << XFS_MMAPLOCK_SHIFT;
     378             :         }
     379             : 
     380   555559890 :         if (lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)) {
     381   555585324 :                 ASSERT(subclass <= XFS_ILOCK_MAX_SUBCLASS);
     382   555585324 :                 class += subclass << XFS_ILOCK_SHIFT;
     383             :         }
     384             : 
     385   555559890 :         return (lock_mode & ~XFS_LOCK_SUBCLASS_MASK) | class;
     386             : }
     387             : 
     388             : /*
     389             :  * The following routine will lock n inodes in exclusive mode.  We assume the
     390             :  * caller calls us with the inodes in i_ino order.
     391             :  *
     392             :  * We need to detect deadlock where an inode that we lock is in the AIL and we
     393             :  * start waiting for another inode that is locked by a thread in a long running
     394             :  * transaction (such as truncate). This can result in deadlock since the long
     395             :  * running trans might need to wait for the inode we just locked in order to
     396             :  * push the tail and free space in the log.
     397             :  *
     398             :  * xfs_lock_inodes() can only be used to lock one type of lock at a time -
     399             :  * the iolock, the mmaplock or the ilock, but not more than one at a time. If we
     400             :  * lock more than one at a time, lockdep will report false positives saying we
     401             :  * have violated locking orders.
     402             :  */
     403             : void
     404    50653154 : xfs_lock_inodes(
     405             :         struct xfs_inode        **ips,
     406             :         int                     inodes,
     407             :         uint                    lock_mode)
     408             : {
     409    50653154 :         int                     attempts = 0;
     410    50653154 :         uint                    i;
     411    50653154 :         int                     j;
     412    50653154 :         bool                    try_lock;
     413    50653154 :         struct xfs_log_item     *lp;
     414             : 
     415             :         /*
     416             :          * Currently supports between 2 and 5 inodes with exclusive locking.  We
     417             :          * support an arbitrary depth of locking here, but absolute limits on
     418             :          * inodes depend on the type of locking and the limits placed by
     419             :          * lockdep annotations in xfs_lock_inumorder.  These are all checked by
     420             :          * the asserts.
     421             :          */
     422    50653154 :         ASSERT(ips && inodes >= 2 && inodes <= 5);
     423    50653154 :         ASSERT(lock_mode & (XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL |
     424             :                             XFS_ILOCK_EXCL));
     425    50653154 :         ASSERT(!(lock_mode & (XFS_IOLOCK_SHARED | XFS_MMAPLOCK_SHARED |
     426             :                               XFS_ILOCK_SHARED)));
     427    50653154 :         ASSERT(!(lock_mode & XFS_MMAPLOCK_EXCL) ||
     428             :                 inodes <= XFS_MMAPLOCK_MAX_SUBCLASS + 1);
     429    50653154 :         ASSERT(!(lock_mode & XFS_ILOCK_EXCL) ||
     430             :                 inodes <= XFS_ILOCK_MAX_SUBCLASS + 1);
     431             : 
     432    50653154 :         if (lock_mode & XFS_IOLOCK_EXCL) {
     433           0 :                 ASSERT(!(lock_mode & (XFS_MMAPLOCK_EXCL | XFS_ILOCK_EXCL)));
     434    50653154 :         } else if (lock_mode & XFS_MMAPLOCK_EXCL)
     435           0 :                 ASSERT(!(lock_mode & XFS_ILOCK_EXCL));
     436             : 
     437    50653154 : again:
     438    50673510 :         try_lock = false;
     439    50673510 :         i = 0;
     440   220959355 :         for (; i < inodes; i++) {
     441   170305147 :                 ASSERT(ips[i]);
     442             : 
     443   170305147 :                 if (i && (ips[i] == ips[i - 1]))        /* Already locked */
     444     3499977 :                         continue;
     445             : 
     446             :                 /*
     447             :                  * If try_lock is not set yet, make sure all locked inodes are
     448             :                  * not in the AIL.  If any are, set try_lock to be used later.
     449             :                  */
     450   166805170 :                 if (!try_lock) {
     451   240015621 :                         for (j = (i - 1); j >= 0 && !try_lock; j--) {
     452   110421372 :                                 lp = &ips[j]->i_itemp->ili_item;
     453   220224755 :                                 if (lp && test_bit(XFS_LI_IN_AIL, &lp->li_flags))
     454    32331298 :                                         try_lock = true;
     455             :                         }
     456             :                 }
     457             : 
     458             :                 /*
     459             :                  * If any of the previous locks we have locked is in the AIL,
     460             :                  * we must TRY to get the second and subsequent locks. If
     461             :                  * we can't get any, we must release all we have
     462             :                  * and try again.
     463             :                  */
     464   166805170 :                 if (!try_lock) {
     465    97263389 :                         xfs_ilock(ips[i], xfs_lock_inumorder(lock_mode, i));
     466    97263804 :                         continue;
     467             :                 }
     468             : 
     469             :                 /* try_lock means we have an inode locked that is in the AIL. */
     470    69541781 :                 ASSERT(i != 0);
     471    69541781 :                 if (xfs_ilock_nowait(ips[i], xfs_lock_inumorder(lock_mode, i)))
     472    69522064 :                         continue;
     473             : 
     474             :                 /*
     475             :                  * Unlock all previous guys and try again.  xfs_iunlock will try
     476             :                  * to push the tail if the inode is in the AIL.
     477             :                  */
     478       20356 :                 attempts++;
     479       59175 :                 for (j = i - 1; j >= 0; j--) {
     480             :                         /*
     481             :                          * Check to see if we've already unlocked this one.  Not
     482             :                          * the first one going back, and the inode ptr is the
     483             :                          * same.
     484             :                          */
     485       38819 :                         if (j != (i - 1) && ips[j] == ips[j + 1])
     486        7635 :                                 continue;
     487             : 
     488       31184 :                         xfs_iunlock(ips[j], lock_mode);
     489             :                 }
     490             : 
     491       20356 :                 if ((attempts % 5) == 0) {
     492        3890 :                         delay(1); /* Don't just spin the CPU */
     493             :                 }
     494       20356 :                 goto again;
     495             :         }
     496    50654208 : }
     497             : 
     498             : /*
     499             :  * xfs_lock_two_inodes() can only be used to lock ilock. The iolock and
     500             :  * mmaplock must be double-locked separately since we use i_rwsem and
     501             :  * invalidate_lock for that. We now support taking one lock EXCL and the
     502             :  * other SHARED.
     503             :  */
     504             : void
     505   194417393 : xfs_lock_two_inodes(
     506             :         struct xfs_inode        *ip0,
     507             :         uint                    ip0_mode,
     508             :         struct xfs_inode        *ip1,
     509             :         uint                    ip1_mode)
     510             : {
     511   194417393 :         int                     attempts = 0;
     512   194417393 :         struct xfs_log_item     *lp;
     513             : 
     514   194417393 :         ASSERT(hweight32(ip0_mode) == 1);
     515   194417393 :         ASSERT(hweight32(ip1_mode) == 1);
     516   194417393 :         ASSERT(!(ip0_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)));
     517   194417393 :         ASSERT(!(ip1_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)));
     518   194417393 :         ASSERT(!(ip0_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)));
     519   194417393 :         ASSERT(!(ip1_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)));
     520   194417393 :         ASSERT(ip0->i_ino != ip1->i_ino);
     521             : 
     522   194417393 :         if (ip0->i_ino > ip1->i_ino) {
     523    23399876 :                 swap(ip0, ip1);
     524    23399876 :                 swap(ip0_mode, ip1_mode);
     525             :         }
     526             : 
     527   194417393 :  again:
     528   194432066 :         xfs_ilock(ip0, xfs_lock_inumorder(ip0_mode, 0));
     529             : 
     530             :         /*
     531             :          * If the first lock we have locked is in the AIL, we must TRY to get
     532             :          * the second lock. If we can't get it, we must release the first one
     533             :          * and try again.
     534             :          */
     535   194447643 :         lp = &ip0->i_itemp->ili_item;
     536   194447643 :         if (lp && test_bit(XFS_LI_IN_AIL, &lp->li_flags)) {
     537   140804274 :                 if (!xfs_ilock_nowait(ip1, xfs_lock_inumorder(ip1_mode, 1))) {
     538       14673 :                         xfs_iunlock(ip0, ip0_mode);
     539       14673 :                         if ((++attempts % 5) == 0)
     540        2814 :                                 delay(1); /* Don't just spin the CPU */
     541       14673 :                         goto again;
     542             :                 }
     543             :         } else {
     544    53643369 :                 xfs_ilock(ip1, xfs_lock_inumorder(ip1_mode, 1));
     545             :         }
     546   194451886 : }
     547             : 
     548             : /*
     549             :  * Lookups up an inode from "name". If ci_name is not NULL, then a CI match
     550             :  * is allowed, otherwise it has to be an exact match. If a CI match is found,
     551             :  * ci_name->name will point to a the actual name (caller must free) or
     552             :  * will be set to NULL if an exact match is found.
     553             :  */
     554             : int
     555   217727661 : xfs_lookup(
     556             :         struct xfs_inode        *dp,
     557             :         const struct xfs_name   *name,
     558             :         struct xfs_inode        **ipp,
     559             :         struct xfs_name         *ci_name)
     560             : {
     561   217727661 :         xfs_ino_t               inum;
     562   217727661 :         int                     error;
     563             : 
     564   217727661 :         trace_xfs_lookup(dp, name);
     565             : 
     566   435140954 :         if (xfs_is_shutdown(dp->i_mount))
     567             :                 return -EIO;
     568             : 
     569   217501013 :         error = xfs_dir_lookup(NULL, dp, name, &inum, ci_name);
     570   217935814 :         if (error)
     571   161097998 :                 goto out_unlock;
     572             : 
     573    56837816 :         error = xfs_iget(dp->i_mount, NULL, inum, 0, 0, ipp);
     574    56845512 :         if (error)
     575        4177 :                 goto out_free_name;
     576             : 
     577             :         /*
     578             :          * Make sure that a corrupt directory cannot accidentally link to a
     579             :          * metadata file.
     580             :          */
     581    56841335 :         if (XFS_IS_CORRUPT(dp->i_mount, xfs_is_metadir_inode(*ipp))) {
     582           0 :                 xfs_fs_mark_sick(dp->i_mount, XFS_SICK_FS_METADIR);
     583           0 :                 error = -EFSCORRUPTED;
     584           0 :                 goto out_irele;
     585             :         }
     586             : 
     587             :         return 0;
     588             : 
     589             : out_irele:
     590           0 :         xfs_irele(*ipp);
     591        4177 : out_free_name:
     592        4177 :         if (ci_name)
     593           0 :                 kmem_free(ci_name->name);
     594        4177 : out_unlock:
     595   161102175 :         *ipp = NULL;
     596   161102175 :         return error;
     597             : }
     598             : 
     599             : /*
     600             :  * Initialise a newly allocated inode and return the in-core inode to the
     601             :  * caller locked exclusively.
     602             :  *
     603             :  * Caller is responsible for unlocking the inode manually upon return
     604             :  */
     605             : int
     606   123443153 : xfs_icreate(
     607             :         struct xfs_trans        *tp,
     608             :         xfs_ino_t               ino,
     609             :         const struct xfs_icreate_args *args,
     610             :         struct xfs_inode        **ipp)
     611             : {
     612   123443153 :         struct xfs_mount        *mp = tp->t_mountp;
     613   123443153 :         struct xfs_inode        *ip = NULL;
     614   123443153 :         int                     error;
     615             : 
     616             :         /*
     617             :          * Get the in-core inode with the lock held exclusively to prevent
     618             :          * others from looking at until we're done.
     619             :          */
     620   123443153 :         error = xfs_iget(mp, tp, ino, XFS_IGET_CREATE, XFS_ILOCK_EXCL, &ip);
     621   123877494 :         if (error)
     622             :                 return error;
     623             : 
     624   123877483 :         ASSERT(ip != NULL);
     625   123877483 :         xfs_trans_ijoin(tp, ip, 0);
     626   123589418 :         xfs_inode_init(tp, args, ip);
     627             : 
     628             :         /* now that we have an i_mode we can setup the inode structure */
     629   124061491 :         xfs_setup_inode(ip);
     630             : 
     631   123613261 :         *ipp = ip;
     632   123613261 :         return 0;
     633             : }
     634             : 
     635             : /* Set up inode attributes for newly created children of a directory. */
     636             : void
     637   116803360 : xfs_icreate_args_inherit(
     638             :         struct xfs_icreate_args *args,
     639             :         struct xfs_inode        *dp,
     640             :         struct mnt_idmap        *idmap,
     641             :         umode_t                 mode,
     642             :         bool                    init_xattrs)
     643             : {
     644   116803360 :         args->idmap = idmap;
     645   116803360 :         args->pip = dp;
     646   116803360 :         args->uid = mapped_fsuid(idmap, &init_user_ns);
     647   116100889 :         args->gid = mapped_fsgid(idmap, &init_user_ns);
     648   116147116 :         args->prid = xfs_get_initial_prid(dp);
     649   116440711 :         args->mode = mode;
     650             : 
     651             :         /* Don't clobber the caller's flags */
     652   116440711 :         if (init_xattrs)
     653    34998908 :                 args->flags |= XFS_ICREATE_ARGS_INIT_XATTRS;
     654   116440711 : }
     655             : 
     656             : /* Set up inode attributes for newly created internal files. */
     657             : void
     658    14736492 : xfs_icreate_args_rootfile(
     659             :         struct xfs_icreate_args *args,
     660             :         struct xfs_mount        *mp,
     661             :         umode_t                 mode,
     662             :         bool                    init_xattrs)
     663             : {
     664    14736492 :         args->idmap = &nop_mnt_idmap;
     665    14736492 :         args->uid = GLOBAL_ROOT_UID;
     666    14736492 :         args->gid = GLOBAL_ROOT_GID;
     667    14736492 :         args->prid = 0;
     668    14736492 :         args->mode = mode;
     669    14736492 :         args->flags = XFS_ICREATE_ARGS_FORCE_UID |
     670             :                       XFS_ICREATE_ARGS_FORCE_GID |
     671             :                       XFS_ICREATE_ARGS_FORCE_MODE;
     672    14736492 :         if (init_xattrs)
     673       82472 :                 args->flags |= XFS_ICREATE_ARGS_INIT_XATTRS;
     674    14736492 : }
     675             : 
     676             : int
     677   130819922 : xfs_icreate_dqalloc(
     678             :         const struct xfs_icreate_args   *args,
     679             :         struct xfs_dquot                **udqpp,
     680             :         struct xfs_dquot                **gdqpp,
     681             :         struct xfs_dquot                **pdqpp)
     682             : {
     683   130819922 :         unsigned int                    flags = XFS_QMOPT_QUOTALL;
     684             : 
     685   130819922 :         *udqpp = *gdqpp = *pdqpp = NULL;
     686             : 
     687   130819922 :         if (!(args->flags & XFS_ICREATE_ARGS_FORCE_GID))
     688   116206551 :                 flags |= XFS_QMOPT_INHERIT;
     689             : 
     690   130819922 :         return xfs_qm_vop_dqalloc(args->pip, args->uid, args->gid, args->prid,
     691             :                         flags, udqpp, gdqpp, pdqpp);
     692             : }
     693             : 
     694             : int
     695    73398889 : xfs_create(
     696             :         struct xfs_inode        *dp,
     697             :         struct xfs_name         *name,
     698             :         const struct xfs_icreate_args *args,
     699             :         struct xfs_inode        **ipp)
     700             : {
     701    73398889 :         struct xfs_dir_update   du = {
     702             :                 .dp             = dp,
     703             :                 .name           = name,
     704             :         };
     705    73398889 :         struct xfs_mount        *mp = dp->i_mount;
     706    73398889 :         struct xfs_trans        *tp = NULL;
     707    73398889 :         struct xfs_dquot        *udqp;
     708    73398889 :         struct xfs_dquot        *gdqp;
     709    73398889 :         struct xfs_dquot        *pdqp;
     710    73398889 :         struct xfs_trans_res    *tres;
     711    73398889 :         xfs_ino_t               ino;
     712    73398889 :         bool                    unlock_dp_on_error = false;
     713    73398889 :         bool                    is_dir = S_ISDIR(args->mode);
     714    73398889 :         uint                    resblks;
     715    73398889 :         int                     error;
     716             : 
     717    73398889 :         ASSERT(args->pip == dp);
     718    73398889 :         trace_xfs_create(dp, name);
     719             : 
     720   146248486 :         if (xfs_is_shutdown(mp))
     721             :                 return -EIO;
     722             : 
     723             :         /*
     724             :          * Make sure that we have allocated dquot(s) on disk.
     725             :          */
     726    73124225 :         error = xfs_icreate_dqalloc(args, &udqp, &gdqp, &pdqp);
     727    73726382 :         if (error)
     728             :                 return error;
     729             : 
     730    73725612 :         if (is_dir) {
     731    13177005 :                 resblks = xfs_mkdir_space_res(mp, name->len);
     732    13173768 :                 tres = &M_RES(mp)->tr_mkdir;
     733             :         } else {
     734    60548607 :                 resblks = xfs_create_space_res(mp, name->len);
     735    60177065 :                 tres = &M_RES(mp)->tr_create;
     736             :         }
     737             : 
     738    73350833 :         error = xfs_parent_start(mp, &du.parent);
     739    73493298 :         if (error)
     740         173 :                 goto out_release_dquots;
     741             : 
     742             :         /*
     743             :          * Initially assume that the file does not exist and
     744             :          * reserve the resources for that case.  If that is not
     745             :          * the case we'll drop the one we have and get a more
     746             :          * appropriate transaction later.
     747             :          */
     748    73493125 :         error = xfs_trans_alloc_icreate(mp, tres, udqp, gdqp, pdqp, resblks,
     749             :                         &tp);
     750    73826087 :         if (error == -ENOSPC) {
     751             :                 /* flush outstanding delalloc blocks and retry */
     752      684825 :                 xfs_flush_inodes(mp);
     753      684633 :                 error = xfs_trans_alloc_icreate(mp, tres, udqp, gdqp, pdqp,
     754             :                                 resblks, &tp);
     755             :         }
     756    73825874 :         if (error)
     757      640266 :                 goto out_parent;
     758             : 
     759    73185608 :         xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT);
     760    73177245 :         unlock_dp_on_error = true;
     761             : 
     762             :         /*
     763             :          * A newly created regular or special file just has one directory
     764             :          * entry pointing to them, but a directory also the "." entry
     765             :          * pointing to itself.
     766             :          */
     767    73177245 :         error = xfs_dialloc(&tp, dp, args->mode, &ino);
     768    72994779 :         if (!error)
     769    72907520 :                 error = xfs_icreate(tp, ino, args, &du.ip);
     770    72981592 :         if (error)
     771      158950 :                 goto out_trans_cancel;
     772             : 
     773             :         /*
     774             :          * Now we join the directory inode to the transaction.  We do not do it
     775             :          * earlier because xfs_dialloc might commit the previous transaction
     776             :          * (and release all the locks).  An error from here on will result in
     777             :          * the transaction cancel unlocking dp so don't do it explicitly in the
     778             :          * error path.
     779             :          */
     780    72822642 :         xfs_trans_ijoin(tp, dp, 0);
     781             : 
     782    72994183 :         error = xfs_dir_create_child(tp, resblks, &du);
     783    72840410 :         if (error)
     784         227 :                 goto out_trans_cancel;
     785             : 
     786             :         /*
     787             :          * If this is a synchronous mount, make sure that the
     788             :          * create transaction goes to disk before returning to
     789             :          * the user.
     790             :          */
     791    72840183 :         if (xfs_has_wsync(mp) || xfs_has_dirsync(mp))
     792        1293 :                 xfs_trans_set_sync(tp);
     793             : 
     794             :         /*
     795             :          * Attach the dquot(s) to the inodes and modify them incore.
     796             :          * These ids of the inode couldn't have changed since the new
     797             :          * inode has been locked ever since it was created.
     798             :          */
     799    72840183 :         xfs_qm_vop_create_dqattach(tp, du.ip, udqp, gdqp, pdqp);
     800             : 
     801    72853927 :         error = xfs_trans_commit(tp);
     802    73062730 :         if (error)
     803         265 :                 goto out_release_inode;
     804             : 
     805    73062465 :         xfs_qm_dqrele(udqp);
     806    73050898 :         xfs_qm_dqrele(gdqp);
     807    73052231 :         xfs_qm_dqrele(pdqp);
     808             : 
     809    73054796 :         *ipp = du.ip;
     810    73054796 :         xfs_iunlock(du.ip, XFS_ILOCK_EXCL);
     811    73009649 :         xfs_iunlock(dp, XFS_ILOCK_EXCL);
     812    73027288 :         xfs_parent_finish(mp, du.parent);
     813             :         return 0;
     814             : 
     815      159177 :  out_trans_cancel:
     816      159177 :         xfs_trans_cancel(tp);
     817      155395 :  out_release_inode:
     818             :         /*
     819             :          * Wait until after the current transaction is aborted to finish the
     820             :          * setup of the inode and release the inode.  This prevents recursive
     821             :          * transactions and deadlocks from xfs_inactive.
     822             :          */
     823      155395 :         if (du.ip) {
     824         492 :                 xfs_iunlock(du.ip, XFS_ILOCK_EXCL);
     825         492 :                 xfs_finish_inode_setup(du.ip);
     826         492 :                 xfs_irele(du.ip);
     827             :         }
     828      154903 :  out_parent:
     829      795661 :         xfs_parent_finish(mp, du.parent);
     830      797471 :  out_release_dquots:
     831      797471 :         xfs_qm_dqrele(udqp);
     832      800353 :         xfs_qm_dqrele(gdqp);
     833      799972 :         xfs_qm_dqrele(pdqp);
     834             : 
     835      799752 :         if (unlock_dp_on_error)
     836      158968 :                 xfs_iunlock(dp, XFS_ILOCK_EXCL);
     837             :         return error;
     838             : }
     839             : 
     840             : int
     841    11530901 : xfs_create_tmpfile(
     842             :         struct xfs_inode        *dp,
     843             :         const struct xfs_icreate_args *args,
     844             :         struct xfs_inode        **ipp)
     845             : {
     846    11530901 :         struct xfs_mount        *mp = dp->i_mount;
     847    11530901 :         struct xfs_inode        *ip = NULL;
     848    11530901 :         struct xfs_trans        *tp = NULL;
     849    11530901 :         struct xfs_dquot        *udqp;
     850    11530901 :         struct xfs_dquot        *gdqp;
     851    11530901 :         struct xfs_dquot        *pdqp;
     852    11530901 :         struct xfs_trans_res    *tres;
     853    11530901 :         xfs_ino_t               ino;
     854    11530901 :         uint                    resblks;
     855    11530901 :         int                     error;
     856             : 
     857    11530901 :         ASSERT(args->nlink == 0);
     858    11530901 :         ASSERT(args->pip == dp);
     859             : 
     860    23061802 :         if (xfs_is_shutdown(mp))
     861             :                 return -EIO;
     862             : 
     863             :         /*
     864             :          * Make sure that we have allocated dquot(s) on disk.
     865             :          */
     866    11530901 :         error = xfs_icreate_dqalloc(args, &udqp, &gdqp, &pdqp);
     867    11921626 :         if (error)
     868             :                 return error;
     869             : 
     870    11921626 :         resblks = XFS_IALLOC_SPACE_RES(mp);
     871    11921626 :         tres = &M_RES(mp)->tr_create_tmpfile;
     872             : 
     873    11921626 :         error = xfs_trans_alloc_icreate(mp, tres, udqp, gdqp, pdqp, resblks,
     874             :                         &tp);
     875    12042380 :         if (error)
     876       81536 :                 goto out_release_dquots;
     877             : 
     878    11960844 :         error = xfs_dialloc(&tp, dp, args->mode, &ino);
     879    11673213 :         if (!error)
     880    11680959 :                 error = xfs_icreate(tp, ino, args, &ip);
     881    11659571 :         if (error)
     882          41 :                 goto out_trans_cancel;
     883             : 
     884    11659530 :         if (xfs_has_wsync(mp))
     885           0 :                 xfs_trans_set_sync(tp);
     886             : 
     887             :         /*
     888             :          * Attach the dquot(s) to the inodes and modify them incore.
     889             :          * These ids of the inode couldn't have changed since the new
     890             :          * inode has been locked ever since it was created.
     891             :          */
     892    11659530 :         xfs_qm_vop_create_dqattach(tp, ip, udqp, gdqp, pdqp);
     893             : 
     894    11784228 :         error = xfs_iunlink(tp, ip);
     895    11721711 :         if (error)
     896           1 :                 goto out_trans_cancel;
     897             : 
     898    11721710 :         error = xfs_trans_commit(tp);
     899    12020373 :         if (error)
     900           2 :                 goto out_release_inode;
     901             : 
     902    12020371 :         xfs_qm_dqrele(udqp);
     903    12001164 :         xfs_qm_dqrele(gdqp);
     904    11994699 :         xfs_qm_dqrele(pdqp);
     905             : 
     906    12004332 :         *ipp = ip;
     907    12004332 :         xfs_iunlock(ip, XFS_ILOCK_EXCL);
     908    12004332 :         return 0;
     909             : 
     910          42 :  out_trans_cancel:
     911          42 :         xfs_trans_cancel(tp);
     912          44 :  out_release_inode:
     913             :         /*
     914             :          * Wait until after the current transaction is aborted to finish the
     915             :          * setup of the inode and release the inode.  This prevents recursive
     916             :          * transactions and deadlocks from xfs_inactive.
     917             :          */
     918          44 :         if (ip) {
     919           3 :                 xfs_iunlock(ip, XFS_ILOCK_EXCL);
     920           3 :                 xfs_finish_inode_setup(ip);
     921           3 :                 xfs_irele(ip);
     922             :         }
     923          41 :  out_release_dquots:
     924       81580 :         xfs_qm_dqrele(udqp);
     925       81581 :         xfs_qm_dqrele(gdqp);
     926       81581 :         xfs_qm_dqrele(pdqp);
     927             : 
     928       81581 :         return error;
     929             : }
     930             : 
     931             : int
     932    13901403 : xfs_link(
     933             :         struct xfs_inode        *tdp,
     934             :         struct xfs_inode        *sip,
     935             :         struct xfs_name         *target_name)
     936             : {
     937    13901403 :         struct xfs_dir_update   du = {
     938             :                 .dp             = tdp,
     939             :                 .name           = target_name,
     940             :                 .ip             = sip,
     941             :         };
     942    13901403 :         struct xfs_mount        *mp = tdp->i_mount;
     943    13901403 :         struct xfs_trans        *tp;
     944    13901403 :         int                     error, nospace_error = 0;
     945    13901403 :         int                     resblks;
     946             : 
     947    13901403 :         trace_xfs_link(tdp, target_name);
     948             : 
     949    13900359 :         ASSERT(!S_ISDIR(VFS_I(sip)->i_mode));
     950             : 
     951    27800718 :         if (xfs_is_shutdown(mp))
     952             :                 return -EIO;
     953             : 
     954    13900359 :         error = xfs_qm_dqattach(sip);
     955    13901469 :         if (error)
     956           0 :                 goto std_return;
     957             : 
     958    13901469 :         error = xfs_qm_dqattach(tdp);
     959    13901639 :         if (error)
     960           6 :                 goto std_return;
     961             : 
     962    13901633 :         error = xfs_parent_start(mp, &du.parent);
     963    13901614 :         if (error)
     964           0 :                 goto std_return;
     965             : 
     966    13901614 :         resblks = xfs_link_space_res(mp, target_name->len);
     967    13900545 :         error = xfs_trans_alloc_dir(tdp, &M_RES(mp)->tr_link, sip, &resblks,
     968             :                         &tp, &nospace_error);
     969    13902409 :         if (error)
     970           0 :                 goto out_parent;
     971             : 
     972             :         /*
     973             :          * We don't allow reservationless or quotaless hardlinking when parent
     974             :          * pointers are enabled because we can't back out if the xattrs must
     975             :          * grow.
     976             :          */
     977    13902409 :         if (du.parent && nospace_error) {
     978       73386 :                 error = nospace_error;
     979       73386 :                 goto error_return;
     980             :         }
     981             : 
     982             :         /*
     983             :          * If we are using project inheritance, we only allow hard link
     984             :          * creation in our tree when the project IDs are the same; else
     985             :          * the tree quota mechanism could be circumvented.
     986             :          */
     987    13829023 :         if (unlikely((tdp->i_diflags & XFS_DIFLAG_PROJINHERIT) &&
     988             :                      tdp->i_projid != sip->i_projid)) {
     989           0 :                 error = -EXDEV;
     990           0 :                 goto error_return;
     991             :         }
     992             : 
     993    13829023 :         error = xfs_dir_add_child(tp, resblks, &du);
     994    13828136 :         if (error)
     995          15 :                 goto error_return;
     996             : 
     997             :         /*
     998             :          * If this is a synchronous mount, make sure that the
     999             :          * link transaction goes to disk before returning to
    1000             :          * the user.
    1001             :          */
    1002    13828121 :         if (xfs_has_wsync(mp) || xfs_has_dirsync(mp))
    1003           0 :                 xfs_trans_set_sync(tp);
    1004             : 
    1005    13828121 :         error = xfs_trans_commit(tp);
    1006    13829783 :         xfs_iunlock(tdp, XFS_ILOCK_EXCL);
    1007    13829735 :         xfs_iunlock(sip, XFS_ILOCK_EXCL);
    1008    13829775 :         xfs_parent_finish(mp, du.parent);
    1009             :         return error;
    1010             : 
    1011       73401 :  error_return:
    1012       73401 :         xfs_trans_cancel(tp);
    1013       73403 :         xfs_iunlock(tdp, XFS_ILOCK_EXCL);
    1014       73402 :         xfs_iunlock(sip, XFS_ILOCK_EXCL);
    1015       73401 :  out_parent:
    1016       73401 :         xfs_parent_finish(mp, du.parent);
    1017       73409 :  std_return:
    1018       73409 :         if (error == -ENOSPC && nospace_error)
    1019       71547 :                 error = nospace_error;
    1020             :         return error;
    1021             : }
    1022             : 
    1023             : /* Clear the reflink flag and the cowblocks tag if possible. */
    1024             : static void
    1025    33505519 : xfs_itruncate_clear_reflink_flags(
    1026             :         struct xfs_inode        *ip)
    1027             : {
    1028    33505519 :         struct xfs_ifork        *dfork;
    1029    33505519 :         struct xfs_ifork        *cfork;
    1030             : 
    1031    33505519 :         if (!xfs_is_reflink_inode(ip))
    1032             :                 return;
    1033    10528700 :         dfork = xfs_ifork_ptr(ip, XFS_DATA_FORK);
    1034    10528700 :         cfork = xfs_ifork_ptr(ip, XFS_COW_FORK);
    1035    10528700 :         if (dfork->if_bytes == 0 && cfork->if_bytes == 0)
    1036     1327133 :                 ip->i_diflags2 &= ~XFS_DIFLAG2_REFLINK;
    1037    10528700 :         if (cfork->if_bytes == 0)
    1038     6742704 :                 xfs_inode_clear_cowblocks_tag(ip);
    1039             : }
    1040             : 
    1041             : /*
    1042             :  * Free up the underlying blocks past new_size.  The new size must be smaller
    1043             :  * than the current size.  This routine can be used both for the attribute and
    1044             :  * data fork, and does not modify the inode size, which is left to the caller.
    1045             :  *
    1046             :  * The transaction passed to this routine must have made a permanent log
    1047             :  * reservation of at least XFS_ITRUNCATE_LOG_RES.  This routine may commit the
    1048             :  * given transaction and start new ones, so make sure everything involved in
    1049             :  * the transaction is tidy before calling here.  Some transaction will be
    1050             :  * returned to the caller to be committed.  The incoming transaction must
    1051             :  * already include the inode, and both inode locks must be held exclusively.
    1052             :  * The inode must also be "held" within the transaction.  On return the inode
    1053             :  * will be "held" within the returned transaction.  This routine does NOT
    1054             :  * require any disk space to be reserved for it within the transaction.
    1055             :  *
    1056             :  * If we get an error, we must return with the inode locked and linked into the
    1057             :  * current transaction. This keeps things simple for the higher level code,
    1058             :  * because it always knows that the inode is locked and held in the transaction
    1059             :  * that returns to it whether errors occur or not.  We don't mark the inode
    1060             :  * dirty on error so that transactions can be easily aborted if possible.
    1061             :  */
    1062             : int
    1063    35454181 : xfs_itruncate_extents_flags(
    1064             :         struct xfs_trans        **tpp,
    1065             :         struct xfs_inode        *ip,
    1066             :         int                     whichfork,
    1067             :         xfs_fsize_t             new_size,
    1068             :         int                     flags)
    1069             : {
    1070    35454181 :         struct xfs_mount        *mp = ip->i_mount;
    1071    35454181 :         struct xfs_trans        *tp = *tpp;
    1072    35454181 :         xfs_fileoff_t           first_unmap_block;
    1073    35454181 :         int                     error = 0;
    1074             : 
    1075    35454181 :         ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
    1076    56530138 :         ASSERT(!atomic_read(&VFS_I(ip)->i_count) ||
    1077             :                xfs_isilocked(ip, XFS_IOLOCK_EXCL));
    1078    70908362 :         ASSERT(new_size <= XFS_ISIZE(ip));
    1079    35454181 :         ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
    1080    35454181 :         ASSERT(ip->i_itemp != NULL);
    1081    35454181 :         ASSERT(ip->i_itemp->ili_lock_flags == 0);
    1082    35454181 :         ASSERT(!XFS_NOT_DQATTACHED(mp, ip));
    1083             : 
    1084    35454181 :         trace_xfs_itruncate_extents_start(ip, new_size);
    1085             : 
    1086    35342404 :         flags |= xfs_bmapi_aflag(whichfork);
    1087             : 
    1088             :         /*
    1089             :          * Since it is possible for space to become allocated beyond
    1090             :          * the end of the file (in a crash where the space is allocated
    1091             :          * but the inode size is not yet updated), simply remove any
    1092             :          * blocks which show up between the new EOF and the maximum
    1093             :          * possible file size.
    1094             :          *
    1095             :          * We have to free all the blocks to the bmbt maximum offset, even if
    1096             :          * the page cache can't scale that far.
    1097             :          */
    1098    35342404 :         first_unmap_block = XFS_B_TO_FSB(mp, (xfs_ufsize_t)new_size);
    1099    35342404 :         if (!xfs_verify_fileoff(mp, first_unmap_block)) {
    1100           0 :                 WARN_ON_ONCE(first_unmap_block > XFS_MAX_FILEOFF);
    1101             :                 return 0;
    1102             :         }
    1103             : 
    1104    35341668 :         error = xfs_bunmapi_range(&tp, ip, flags, first_unmap_block,
    1105             :                         XFS_MAX_FILEOFF);
    1106    35472204 :         if (error)
    1107        1937 :                 goto out;
    1108             : 
    1109    35470267 :         if (whichfork == XFS_DATA_FORK) {
    1110             :                 /* Remove all pending CoW reservations. */
    1111    33567376 :                 error = xfs_reflink_cancel_cow_blocks(ip, &tp,
    1112             :                                 first_unmap_block, XFS_MAX_FILEOFF, true);
    1113    33519602 :                 if (error)
    1114           0 :                         goto out;
    1115             : 
    1116    33519602 :                 xfs_itruncate_clear_reflink_flags(ip);
    1117             :         }
    1118             : 
    1119             :         /*
    1120             :          * Always re-log the inode so that our permanent transaction can keep
    1121             :          * on rolling it forward in the log.
    1122             :          */
    1123    35392119 :         xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
    1124             : 
    1125    35585345 :         trace_xfs_itruncate_extents_end(ip, new_size);
    1126             : 
    1127    35572204 : out:
    1128    35572204 :         *tpp = tp;
    1129    35572204 :         return error;
    1130             : }
    1131             : 
    1132             : int
    1133   710248224 : xfs_release(
    1134             :         struct xfs_inode        *ip,
    1135             :         bool                    want_free_eofblocks)
    1136             : {
    1137   710248224 :         struct xfs_mount        *mp = ip->i_mount;
    1138   710248224 :         int                     error = 0;
    1139             : 
    1140   710248224 :         if (!S_ISREG(VFS_I(ip)->i_mode) || (VFS_I(ip)->i_mode == 0))
    1141             :                 return 0;
    1142             : 
    1143             :         /* If this is a read-only mount, don't do this (would generate I/O) */
    1144  1420651964 :         if (xfs_is_readonly(mp))
    1145             :                 return 0;
    1146             : 
    1147  1419667744 :         if (!xfs_is_shutdown(mp)) {
    1148   709003259 :                 int truncated;
    1149             : 
    1150             :                 /*
    1151             :                  * If we previously truncated this file and removed old data
    1152             :                  * in the process, we want to initiate "early" writeout on
    1153             :                  * the last close.  This is an attempt to combat the notorious
    1154             :                  * NULL files problem which is particularly noticeable from a
    1155             :                  * truncate down, buffered (re-)write (delalloc), followed by
    1156             :                  * a crash.  What we are effectively doing here is
    1157             :                  * significantly reducing the time window where we'd otherwise
    1158             :                  * be exposed to that problem.
    1159             :                  */
    1160   709003259 :                 truncated = xfs_iflags_test_and_clear(ip, XFS_ITRUNCATED);
    1161   709696796 :                 if (truncated) {
    1162     2779200 :                         xfs_iflags_clear(ip, XFS_IDIRTY_RELEASE);
    1163     2779193 :                         if (ip->i_delayed_blks > 0) {
    1164      309105 :                                 error = filemap_flush(VFS_I(ip)->i_mapping);
    1165      309112 :                                 if (error)
    1166             :                                         return error;
    1167             :                         }
    1168             :                 }
    1169             :         }
    1170             : 
    1171   710527369 :         if (VFS_I(ip)->i_nlink == 0)
    1172             :                 return 0;
    1173             : 
    1174             :         /*
    1175             :          * If we can't get the iolock just skip truncating the blocks past EOF
    1176             :          * because we could deadlock with the mmap_lock otherwise. We'll get
    1177             :          * another chance to drop them once the last reference to the inode is
    1178             :          * dropped, so we'll never leak blocks permanently.
    1179             :          */
    1180   702243978 :         if (!want_free_eofblocks || !xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL))
    1181   118993752 :                 return 0;
    1182             : 
    1183   582919845 :         if (xfs_can_free_eofblocks(ip, false)) {
    1184             :                 /*
    1185             :                  * Check if the inode is being opened, written and closed
    1186             :                  * frequently and we have delayed allocation blocks outstanding
    1187             :                  * (e.g. streaming writes from the NFS server), truncating the
    1188             :                  * blocks past EOF will cause fragmentation to occur.
    1189             :                  *
    1190             :                  * In this case don't do the truncation, but we have to be
    1191             :                  * careful how we detect this case. Blocks beyond EOF show up as
    1192             :                  * i_delayed_blks even when the inode is clean, so we need to
    1193             :                  * truncate them away first before checking for a dirty release.
    1194             :                  * Hence on the first dirty close we will still remove the
    1195             :                  * speculative allocation, but after that we will leave it in
    1196             :                  * place.
    1197             :                  */
    1198   109041580 :                 if (xfs_iflags_test(ip, XFS_IDIRTY_RELEASE))
    1199    43599178 :                         goto out_unlock;
    1200             : 
    1201    10888382 :                 error = xfs_free_eofblocks(ip);
    1202    11054608 :                 if (error)
    1203           4 :                         goto out_unlock;
    1204             : 
    1205    11054604 :                 xfs_iflags_set(ip, XFS_IDIRTY_RELEASE);
    1206             :         }
    1207             : 
    1208   528455237 : out_unlock:
    1209   583149568 :         xfs_iunlock(ip, XFS_IOLOCK_EXCL);
    1210   583149568 :         return error;
    1211             : }
    1212             : 
    1213             : /*
    1214             :  * Mark all the buffers attached to this directory stale.  In theory we should
    1215             :  * never be freeing a directory with any blocks at all, but this covers the
    1216             :  * case where we've recovered a directory swap with a "temporary" directory
    1217             :  * created by online repair and now need to dump it.
    1218             :  */
    1219             : STATIC void
    1220           0 : xfs_inactive_dir(
    1221             :         struct xfs_inode        *dp)
    1222             : {
    1223           0 :         struct xfs_iext_cursor  icur;
    1224           0 :         struct xfs_bmbt_irec    got;
    1225           0 :         struct xfs_mount        *mp = dp->i_mount;
    1226           0 :         struct xfs_da_geometry  *geo = mp->m_dir_geo;
    1227           0 :         struct xfs_ifork        *ifp = xfs_ifork_ptr(dp, XFS_DATA_FORK);
    1228           0 :         xfs_fileoff_t           off;
    1229             : 
    1230             :         /*
    1231             :          * Invalidate each directory block.  All directory blocks are of
    1232             :          * fsbcount length and alignment, so we only need to walk those same
    1233             :          * offsets.  We hold the only reference to this inode, so we must wait
    1234             :          * for the buffer locks.
    1235             :          */
    1236           0 :         for_each_xfs_iext(ifp, &icur, &got) {
    1237           0 :                 for (off = round_up(got.br_startoff, geo->fsbcount);
    1238           0 :                      off < got.br_startoff + got.br_blockcount;
    1239           0 :                      off += geo->fsbcount) {
    1240           0 :                         struct xfs_buf  *bp = NULL;
    1241           0 :                         xfs_fsblock_t   fsbno;
    1242           0 :                         int             error;
    1243             : 
    1244           0 :                         fsbno = (off - got.br_startoff) + got.br_startblock;
    1245           0 :                         error = xfs_buf_incore(mp->m_ddev_targp,
    1246           0 :                                         XFS_FSB_TO_DADDR(mp, fsbno),
    1247           0 :                                         XFS_FSB_TO_BB(mp, geo->fsbcount),
    1248             :                                         XBF_LIVESCAN, &bp);
    1249           0 :                         if (error)
    1250           0 :                                 continue;
    1251             : 
    1252           0 :                         xfs_buf_stale(bp);
    1253           0 :                         xfs_buf_relse(bp);
    1254             :                 }
    1255             :         }
    1256           0 : }
    1257             : 
    1258             : /*
    1259             :  * xfs_inactive_truncate
    1260             :  *
    1261             :  * Called to perform a truncate when an inode becomes unlinked.
    1262             :  */
    1263             : STATIC int
    1264    12333734 : xfs_inactive_truncate(
    1265             :         struct xfs_inode *ip)
    1266             : {
    1267    12333734 :         struct xfs_mount        *mp = ip->i_mount;
    1268    12333734 :         struct xfs_trans        *tp;
    1269    12333734 :         int                     error;
    1270             : 
    1271    12333734 :         error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp);
    1272    12333917 :         if (error) {
    1273        1962 :                 ASSERT(xfs_is_shutdown(mp));
    1274         981 :                 return error;
    1275             :         }
    1276    12332936 :         xfs_ilock(ip, XFS_ILOCK_EXCL);
    1277    12341415 :         xfs_trans_ijoin(tp, ip, 0);
    1278             : 
    1279             :         /*
    1280             :          * Log the inode size first to prevent stale data exposure in the event
    1281             :          * of a system crash before the truncate completes. See the related
    1282             :          * comment in xfs_vn_setattr_size() for details.
    1283             :          */
    1284    12344472 :         ip->i_disk_size = 0;
    1285    12344472 :         xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
    1286             : 
    1287    12353752 :         error = xfs_itruncate_extents(&tp, ip, XFS_DATA_FORK, 0);
    1288    12360286 :         if (error)
    1289        1254 :                 goto error_trans_cancel;
    1290             : 
    1291    12359032 :         ASSERT(ip->i_df.if_nextents == 0);
    1292             : 
    1293    12359032 :         error = xfs_trans_commit(tp);
    1294    12358500 :         if (error)
    1295           0 :                 goto error_unlock;
    1296             : 
    1297    12358500 :         xfs_iunlock(ip, XFS_ILOCK_EXCL);
    1298    12358500 :         return 0;
    1299             : 
    1300             : error_trans_cancel:
    1301        1254 :         xfs_trans_cancel(tp);
    1302        1254 : error_unlock:
    1303        1254 :         xfs_iunlock(ip, XFS_ILOCK_EXCL);
    1304        1254 :         return error;
    1305             : }
    1306             : 
    1307             : /*
    1308             :  * xfs_inactive_ifree()
    1309             :  *
    1310             :  * Perform the inode free when an inode is unlinked.
    1311             :  */
    1312             : STATIC int
    1313    75354990 : xfs_inactive_ifree(
    1314             :         struct xfs_inode *ip)
    1315             : {
    1316    75354990 :         struct xfs_mount        *mp = ip->i_mount;
    1317    75354990 :         struct xfs_trans        *tp;
    1318    75354990 :         int                     error;
    1319             : 
    1320             :         /*
    1321             :          * We try to use a per-AG reservation for any block needed by the finobt
    1322             :          * tree, but as the finobt feature predates the per-AG reservation
    1323             :          * support a degraded file system might not have enough space for the
    1324             :          * reservation at mount time.  In that case try to dip into the reserved
    1325             :          * pool and pray.
    1326             :          *
    1327             :          * Send a warning if the reservation does happen to fail, as the inode
    1328             :          * now remains allocated and sits on the unlinked list until the fs is
    1329             :          * repaired.
    1330             :          */
    1331    75354990 :         if (unlikely(mp->m_finobt_nores)) {
    1332           0 :                 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ifree,
    1333             :                                 XFS_IFREE_SPACE_RES(mp), 0, XFS_TRANS_RESERVE,
    1334             :                                 &tp);
    1335             :         } else {
    1336    75354990 :                 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ifree, 0, 0, 0, &tp);
    1337             :         }
    1338    75444296 :         if (error) {
    1339           0 :                 if (error == -ENOSPC) {
    1340           0 :                         xfs_warn_ratelimited(mp,
    1341             :                         "Failed to remove inode(s) from unlinked list. "
    1342             :                         "Please free space, unmount and run xfs_repair.");
    1343             :                 } else {
    1344           0 :                         ASSERT(xfs_is_shutdown(mp));
    1345             :                 }
    1346           0 :                 return error;
    1347             :         }
    1348             : 
    1349             :         /*
    1350             :          * We do not hold the inode locked across the entire rolling transaction
    1351             :          * here. We only need to hold it for the first transaction that
    1352             :          * xfs_ifree() builds, which may mark the inode XFS_ISTALE if the
    1353             :          * underlying cluster buffer is freed. Relogging an XFS_ISTALE inode
    1354             :          * here breaks the relationship between cluster buffer invalidation and
    1355             :          * stale inode invalidation on cluster buffer item journal commit
    1356             :          * completion, and can result in leaving dirty stale inodes hanging
    1357             :          * around in memory.
    1358             :          *
    1359             :          * We have no need for serialising this inode operation against other
    1360             :          * operations - we freed the inode and hence reallocation is required
    1361             :          * and that will serialise on reallocating the space the deferops need
    1362             :          * to free. Hence we can unlock the inode on the first commit of
    1363             :          * the transaction rather than roll it right through the deferops. This
    1364             :          * avoids relogging the XFS_ISTALE inode.
    1365             :          *
    1366             :          * We check that xfs_ifree() hasn't grown an internal transaction roll
    1367             :          * by asserting that the inode is still locked when it returns.
    1368             :          */
    1369    75444296 :         xfs_ilock(ip, XFS_ILOCK_EXCL);
    1370    75392617 :         xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
    1371             : 
    1372    75378189 :         error = xfs_ifree(tp, ip);
    1373    75535344 :         ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
    1374    75535344 :         if (error) {
    1375             :                 /*
    1376             :                  * If we fail to free the inode, shut down.  The cancel
    1377             :                  * might do that, we need to make sure.  Otherwise the
    1378             :                  * inode might be lost for a long time or forever.
    1379             :                  */
    1380         242 :                 if (!xfs_is_shutdown(mp)) {
    1381           7 :                         xfs_notice(mp, "%s: xfs_ifree returned error %d",
    1382             :                                 __func__, error);
    1383           7 :                         xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR);
    1384             :                 }
    1385         121 :                 xfs_trans_cancel(tp);
    1386         121 :                 return error;
    1387             :         }
    1388             : 
    1389             :         /*
    1390             :          * Credit the quota account(s). The inode is gone.
    1391             :          */
    1392    75535223 :         xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_ICOUNT, -1);
    1393             : 
    1394    75431703 :         return xfs_trans_commit(tp);
    1395             : }
    1396             : 
    1397             : /*
    1398             :  * Returns true if we need to update the on-disk metadata before we can free
    1399             :  * the memory used by this inode.  Updates include freeing post-eof
    1400             :  * preallocations; freeing COW staging extents; and marking the inode free in
    1401             :  * the inobt if it is on the unlinked list.
    1402             :  */
    1403             : bool
    1404   879523327 : xfs_inode_needs_inactive(
    1405             :         struct xfs_inode        *ip)
    1406             : {
    1407   879523327 :         struct xfs_mount        *mp = ip->i_mount;
    1408   879523327 :         struct xfs_ifork        *cow_ifp = xfs_ifork_ptr(ip, XFS_COW_FORK);
    1409             : 
    1410             :         /*
    1411             :          * If the inode is already free, then there can be nothing
    1412             :          * to clean up here.
    1413             :          */
    1414   879523327 :         if (VFS_I(ip)->i_mode == 0)
    1415             :                 return false;
    1416             : 
    1417             :         /* If this is a read-only mount, don't do this (would generate I/O) */
    1418  1759046654 :         if (xfs_is_readonly(mp))
    1419             :                 return false;
    1420             : 
    1421             :         /* If the log isn't running, push inodes straight to reclaim. */
    1422  1757178478 :         if (xfs_is_shutdown(mp) || xfs_has_norecovery(mp))
    1423             :                 return false;
    1424             : 
    1425             :         /* Metadata inodes require explicit resource cleanup. */
    1426   554973955 :         if (xfs_is_metadata_inode(ip))
    1427             :                 return false;
    1428             : 
    1429             :         /* Want to clean out the cow blocks if there are any. */
    1430   553823759 :         if (cow_ifp && cow_ifp->if_bytes > 0)
    1431             :                 return true;
    1432             : 
    1433             :         /* Unlinked files must be freed. */
    1434   552658976 :         if (VFS_I(ip)->i_nlink == 0)
    1435             :                 return true;
    1436             : 
    1437             :         /*
    1438             :          * This file isn't being freed, so check if there are post-eof blocks
    1439             :          * to free.  @force is true because we are evicting an inode from the
    1440             :          * cache.  Post-eof blocks must be freed, lest we end up with broken
    1441             :          * free space accounting.
    1442             :          *
    1443             :          * Note: don't bother with iolock here since lockdep complains about
    1444             :          * acquiring it in reclaim context. We have the only reference to the
    1445             :          * inode at this point anyways.
    1446             :          */
    1447   478045089 :         return xfs_can_free_eofblocks(ip, true);
    1448             : }
    1449             : 
    1450             : /*
    1451             :  * Save health status somewhere, if we're dumping an inode with uncorrected
    1452             :  * errors and online repair isn't running.
    1453             :  */
    1454             : static inline void
    1455    75846129 : xfs_inactive_health(
    1456             :         struct xfs_inode        *ip)
    1457             : {
    1458    75846129 :         struct xfs_mount        *mp = ip->i_mount;
    1459    75846129 :         struct xfs_perag        *pag;
    1460    75846129 :         unsigned int            sick;
    1461    75846129 :         unsigned int            checked;
    1462             : 
    1463    75846129 :         xfs_inode_measure_sickness(ip, &sick, &checked);
    1464    75803613 :         if (!sick)
    1465    75803613 :                 return;
    1466             : 
    1467           0 :         trace_xfs_inode_unfixed_corruption(ip, sick);
    1468             : 
    1469           0 :         if (sick & XFS_SICK_INO_FORGET)
    1470             :                 return;
    1471             : 
    1472           0 :         pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
    1473           0 :         if (!pag) {
    1474             :                 /* There had better still be a perag structure! */
    1475           0 :                 ASSERT(0);
    1476           0 :                 return;
    1477             :         }
    1478             : 
    1479           0 :         xfs_ag_mark_sick(pag, XFS_SICK_AG_INODES);
    1480           0 :         xfs_perag_put(pag);
    1481             : }
    1482             : 
    1483             : /*
    1484             :  * xfs_inactive
    1485             :  *
    1486             :  * This is called when the vnode reference count for the vnode
    1487             :  * goes to zero.  If the file has been unlinked, then it must
    1488             :  * now be truncated.  Also, we clear all of the read-ahead state
    1489             :  * kept for the inode here since the file is now closed.
    1490             :  */
    1491             : int
    1492    75754948 : xfs_inactive(
    1493             :         xfs_inode_t     *ip)
    1494             : {
    1495    75754948 :         struct xfs_mount        *mp;
    1496    75754948 :         int                     error = 0;
    1497    75754948 :         int                     truncate = 0;
    1498             : 
    1499             :         /*
    1500             :          * If the inode is already free, then there can be nothing
    1501             :          * to clean up here.
    1502             :          */
    1503    75754948 :         if (VFS_I(ip)->i_mode == 0) {
    1504           0 :                 ASSERT(ip->i_df.if_broot_bytes == 0);
    1505           0 :                 goto out;
    1506             :         }
    1507             : 
    1508    75754948 :         mp = ip->i_mount;
    1509   151676437 :         ASSERT(!xfs_iflags_test(ip, XFS_IRECOVERY));
    1510             : 
    1511    75921489 :         xfs_inactive_health(ip);
    1512             : 
    1513             :         /* If this is a read-only mount, don't do this (would generate I/O) */
    1514   151280214 :         if (xfs_is_readonly(mp))
    1515           0 :                 goto out;
    1516             : 
    1517             :         /* Metadata inodes require explicit resource cleanup. */
    1518    75640107 :         if (xfs_is_metadata_inode(ip))
    1519           0 :                 goto out;
    1520             : 
    1521             :         /* Try to clean out the cow blocks if there are any. */
    1522   151280214 :         if (xfs_inode_has_cow_data(ip))
    1523     1164785 :                 xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF, true);
    1524             : 
    1525    75639564 :         if (VFS_I(ip)->i_nlink != 0) {
    1526             :                 /*
    1527             :                  * force is true because we are evicting an inode from the
    1528             :                  * cache. Post-eof blocks must be freed, lest we end up with
    1529             :                  * broken free space accounting.
    1530             :                  *
    1531             :                  * Note: don't bother with iolock here since lockdep complains
    1532             :                  * about acquiring it in reclaim context. We have the only
    1533             :                  * reference to the inode at this point anyways.
    1534             :                  */
    1535      396996 :                 if (xfs_can_free_eofblocks(ip, true))
    1536      177034 :                         error = xfs_free_eofblocks(ip);
    1537             : 
    1538      396980 :                 goto out;
    1539             :         }
    1540             : 
    1541    75242568 :         if (S_ISREG(VFS_I(ip)->i_mode) &&
    1542    40973438 :             (ip->i_disk_size != 0 || XFS_ISIZE(ip) != 0 ||
    1543    28652536 :              ip->i_df.if_nextents > 0 || ip->i_delayed_blks > 0))
    1544             :                 truncate = 1;
    1545             : 
    1546    75242568 :         error = xfs_qm_dqattach(ip);
    1547    75205872 :         if (error)
    1548           7 :                 goto out;
    1549             : 
    1550    75205865 :         if (S_ISDIR(VFS_I(ip)->i_mode) && ip->i_df.if_nextents > 0) {
    1551           0 :                 xfs_inactive_dir(ip);
    1552           0 :                 truncate = 1;
    1553             :         }
    1554             : 
    1555    75205865 :         if (S_ISLNK(VFS_I(ip)->i_mode))
    1556    29490424 :                 error = xfs_inactive_symlink(ip);
    1557    45715441 :         else if (truncate)
    1558    12331776 :                 error = xfs_inactive_truncate(ip);
    1559    75315867 :         if (error)
    1560        2261 :                 goto out;
    1561             : 
    1562             :         /*
    1563             :          * If there are attributes associated with the file then blow them away
    1564             :          * now.  The code calls a routine that recursively deconstructs the
    1565             :          * attribute fork. If also blows away the in-core attribute fork.
    1566             :          */
    1567    75313606 :         if (xfs_inode_has_attr_fork(ip)) {
    1568    64157887 :                 error = xfs_attr_inactive(ip);
    1569    64286070 :                 if (error)
    1570         325 :                         goto out;
    1571             :         }
    1572             : 
    1573    75441464 :         ASSERT(ip->i_forkoff == 0);
    1574             : 
    1575             :         /*
    1576             :          * Free the inode.
    1577             :          */
    1578    75441464 :         error = xfs_inactive_ifree(ip);
    1579             : 
    1580    76006018 : out:
    1581             :         /*
    1582             :          * We're done making metadata updates for this inode, so we can release
    1583             :          * the attached dquots.
    1584             :          */
    1585    76006018 :         xfs_qm_dqdetach(ip);
    1586    75982849 :         return error;
    1587             : }
    1588             : 
    1589             : /*
    1590             :  * Find an inode on the unlinked list. This does not take references to the
    1591             :  * inode as we have existence guarantees by holding the AGI buffer lock and that
    1592             :  * only unlinked, referenced inodes can be on the unlinked inode list.  If we
    1593             :  * don't find the inode in cache, then let the caller handle the situation.
    1594             :  */
    1595             : struct xfs_inode *
    1596    59194229 : xfs_iunlink_lookup(
    1597             :         struct xfs_perag        *pag,
    1598             :         xfs_agino_t             agino)
    1599             : {
    1600    59194229 :         struct xfs_inode        *ip;
    1601             : 
    1602    59194229 :         rcu_read_lock();
    1603    58888166 :         ip = radix_tree_lookup(&pag->pag_ici_root, agino);
    1604             : 
    1605             :         /*
    1606             :          * Inode not in memory or in RCU freeing limbo should not happen.
    1607             :          * Warn about this and let the caller handle the failure.
    1608             :          */
    1609   118320641 :         if (WARN_ON_ONCE(!ip || !ip->i_ino)) {
    1610           0 :                 xfs_emerg(pag->pag_mount, "IUNLINK agno 0x%x agino 0x%x ino 0x%llx ip? %d", pag->pag_agno, agino, XFS_AGINO_TO_INO(pag->pag_mount, pag->pag_agno, agino), ip != NULL);
    1611           0 :                 rcu_read_unlock();
    1612           0 :                 return NULL;
    1613             :         }
    1614   118507715 :         ASSERT(!xfs_iflags_test(ip, XFS_IRECLAIMABLE | XFS_IRECLAIM));
    1615   118922503 :         if (xfs_iflags_test(ip, XFS_IRECLAIMABLE | XFS_IRECLAIM))
    1616           0 :                 xfs_emerg(pag->pag_mount, "IUNLINK agno 0x%x agino 0x%x ino 0x%llx ipino 0x%llx", pag->pag_agno, agino, XFS_AGINO_TO_INO(pag->pag_mount, pag->pag_agno, agino), ip->i_ino);
    1617    59451681 :         rcu_read_unlock();
    1618    59451681 :         return ip;
    1619             : }
    1620             : 
    1621             : /*
    1622             :  * Look up the inode number specified and if it is not already marked XFS_ISTALE
    1623             :  * mark it stale. We should only find clean inodes in this lookup that aren't
    1624             :  * already stale.
    1625             :  */
    1626             : static void
    1627    21126749 : xfs_ifree_mark_inode_stale(
    1628             :         struct xfs_perag        *pag,
    1629             :         struct xfs_inode        *free_ip,
    1630             :         xfs_ino_t               inum)
    1631             : {
    1632    21126749 :         struct xfs_mount        *mp = pag->pag_mount;
    1633    21126749 :         struct xfs_inode_log_item *iip;
    1634    21126749 :         struct xfs_inode        *ip;
    1635             : 
    1636    21126749 : retry:
    1637    21126749 :         rcu_read_lock();
    1638    21126881 :         ip = radix_tree_lookup(&pag->pag_ici_root, XFS_INO_TO_AGINO(mp, inum));
    1639             : 
    1640             :         /* Inode not in memory, nothing to do */
    1641    21127253 :         if (!ip) {
    1642     2770308 :                 rcu_read_unlock();
    1643     2770308 :                 return;
    1644             :         }
    1645             : 
    1646             :         /*
    1647             :          * because this is an RCU protected lookup, we could find a recently
    1648             :          * freed or even reallocated inode during the lookup. We need to check
    1649             :          * under the i_flags_lock for a valid inode here. Skip it if it is not
    1650             :          * valid, the wrong inode or stale.
    1651             :          */
    1652    18356945 :         spin_lock(&ip->i_flags_lock);
    1653    18357400 :         if (ip->i_ino != inum || __xfs_iflags_test(ip, XFS_ISTALE))
    1654       47074 :                 goto out_iflags_unlock;
    1655             : 
    1656             :         /*
    1657             :          * Don't try to lock/unlock the current inode, but we _cannot_ skip the
    1658             :          * other inodes that we did not find in the list attached to the buffer
    1659             :          * and are not already marked stale. If we can't lock it, back off and
    1660             :          * retry.
    1661             :          */
    1662    18310326 :         if (ip != free_ip) {
    1663    17955060 :                 if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) {
    1664           0 :                         spin_unlock(&ip->i_flags_lock);
    1665           0 :                         rcu_read_unlock();
    1666           0 :                         delay(1);
    1667           0 :                         goto retry;
    1668             :                 }
    1669             :         }
    1670    18309698 :         ip->i_flags |= XFS_ISTALE;
    1671             : 
    1672             :         /*
    1673             :          * If the inode is flushing, it is already attached to the buffer.  All
    1674             :          * we needed to do here is mark the inode stale so buffer IO completion
    1675             :          * will remove it from the AIL.
    1676             :          */
    1677    18309698 :         iip = ip->i_itemp;
    1678    18309698 :         if (__xfs_iflags_test(ip, XFS_IFLUSHING)) {
    1679        4287 :                 ASSERT(!list_empty(&iip->ili_item.li_bio_list));
    1680        4287 :                 ASSERT(iip->ili_last_fields);
    1681        4287 :                 goto out_iunlock;
    1682             :         }
    1683             : 
    1684             :         /*
    1685             :          * Inodes not attached to the buffer can be released immediately.
    1686             :          * Everything else has to go through xfs_iflush_abort() on journal
    1687             :          * commit as the flock synchronises removal of the inode from the
    1688             :          * cluster buffer against inode reclaim.
    1689             :          */
    1690    18305411 :         if (!iip || list_empty(&iip->ili_item.li_bio_list))
    1691      890834 :                 goto out_iunlock;
    1692             : 
    1693    17414577 :         __xfs_iflags_set(ip, XFS_IFLUSHING);
    1694    17414577 :         spin_unlock(&ip->i_flags_lock);
    1695    17416171 :         rcu_read_unlock();
    1696             : 
    1697             :         /* we have a dirty inode in memory that has not yet been flushed. */
    1698    17416135 :         spin_lock(&iip->ili_lock);
    1699    17415435 :         iip->ili_last_fields = iip->ili_fields;
    1700    17415435 :         iip->ili_fields = 0;
    1701    17415435 :         iip->ili_fsync_fields = 0;
    1702    17415435 :         spin_unlock(&iip->ili_lock);
    1703    17415516 :         ASSERT(iip->ili_last_fields);
    1704             : 
    1705    17415516 :         if (ip != free_ip)
    1706    17070258 :                 xfs_iunlock(ip, XFS_ILOCK_EXCL);
    1707             :         return;
    1708             : 
    1709      895121 : out_iunlock:
    1710      895121 :         if (ip != free_ip)
    1711      884513 :                 xfs_iunlock(ip, XFS_ILOCK_EXCL);
    1712       10608 : out_iflags_unlock:
    1713      942206 :         spin_unlock(&ip->i_flags_lock);
    1714      942253 :         rcu_read_unlock();
    1715             : }
    1716             : 
    1717             : /*
    1718             :  * A big issue when freeing the inode cluster is that we _cannot_ skip any
    1719             :  * inodes that are in memory - they all must be marked stale and attached to
    1720             :  * the cluster buffer.
    1721             :  */
    1722             : int
    1723      355846 : xfs_ifree_cluster(
    1724             :         struct xfs_trans        *tp,
    1725             :         struct xfs_perag        *pag,
    1726             :         struct xfs_inode        *free_ip,
    1727             :         struct xfs_icluster     *xic)
    1728             : {
    1729      355846 :         struct xfs_mount        *mp = free_ip->i_mount;
    1730      355846 :         struct xfs_ino_geometry *igeo = M_IGEO(mp);
    1731      355846 :         struct xfs_buf          *bp;
    1732      355846 :         xfs_daddr_t             blkno;
    1733      355846 :         xfs_ino_t               inum = xic->first_ino;
    1734      355846 :         int                     nbufs;
    1735      355846 :         int                     i, j;
    1736      355846 :         int                     ioffset;
    1737      355846 :         int                     error;
    1738             : 
    1739      355846 :         nbufs = igeo->ialloc_blks / igeo->blocks_per_cluster;
    1740             : 
    1741     1067617 :         for (j = 0; j < nbufs; j++, inum += igeo->inodes_per_cluster) {
    1742             :                 /*
    1743             :                  * The allocation bitmap tells us which inodes of the chunk were
    1744             :                  * physically allocated. Skip the cluster if an inode falls into
    1745             :                  * a sparse region.
    1746             :                  */
    1747      711724 :                 ioffset = inum - xic->first_ino;
    1748      711724 :                 if ((xic->alloc & XFS_INOBT_MASK(ioffset)) == 0) {
    1749       51440 :                         ASSERT(ioffset % igeo->inodes_per_cluster == 0);
    1750       51440 :                         continue;
    1751             :                 }
    1752             : 
    1753      660284 :                 blkno = XFS_AGB_TO_DADDR(mp, XFS_INO_TO_AGNO(mp, inum),
    1754             :                                          XFS_INO_TO_AGBNO(mp, inum));
    1755             : 
    1756             :                 /*
    1757             :                  * We obtain and lock the backing buffer first in the process
    1758             :                  * here to ensure dirty inodes attached to the buffer remain in
    1759             :                  * the flushing state while we mark them stale.
    1760             :                  *
    1761             :                  * If we scan the in-memory inodes first, then buffer IO can
    1762             :                  * complete before we get a lock on it, and hence we may fail
    1763             :                  * to mark all the active inodes on the buffer stale.
    1764             :                  */
    1765      660284 :                 error = xfs_trans_get_buf(tp, mp->m_ddev_targp, blkno,
    1766      660284 :                                 mp->m_bsize * igeo->blocks_per_cluster,
    1767             :                                 XBF_UNMAPPED, &bp);
    1768      660277 :                 if (error)
    1769           0 :                         return error;
    1770             : 
    1771             :                 /*
    1772             :                  * This buffer may not have been correctly initialised as we
    1773             :                  * didn't read it from disk. That's not important because we are
    1774             :                  * only using to mark the buffer as stale in the log, and to
    1775             :                  * attach stale cached inodes on it. That means it will never be
    1776             :                  * dispatched for IO. If it is, we want to know about it, and we
    1777             :                  * want it to fail. We can acheive this by adding a write
    1778             :                  * verifier to the buffer.
    1779             :                  */
    1780      660277 :                 bp->b_ops = &xfs_inode_buf_ops;
    1781             : 
    1782             :                 /*
    1783             :                  * Now we need to set all the cached clean inodes as XFS_ISTALE,
    1784             :                  * too. This requires lookups, and will skip inodes that we've
    1785             :                  * already marked XFS_ISTALE.
    1786             :                  */
    1787    21787210 :                 for (i = 0; i < igeo->inodes_per_cluster; i++)
    1788    21126860 :                         xfs_ifree_mark_inode_stale(pag, free_ip, inum + i);
    1789             : 
    1790      660350 :                 xfs_trans_stale_inode_buf(tp, bp);
    1791      660333 :                 xfs_trans_binval(tp, bp);
    1792             :         }
    1793             :         return 0;
    1794             : }
    1795             : 
    1796             : /*
    1797             :  * This is called to return an inode to the inode free list.  The inode should
    1798             :  * already be truncated to 0 length and have no pages associated with it.  This
    1799             :  * routine also assumes that the inode is already a part of the transaction.
    1800             :  *
    1801             :  * The on-disk copy of the inode will have been added to the list of unlinked
    1802             :  * inodes in the AGI. We need to remove the inode from that list atomically with
    1803             :  * respect to freeing it here.
    1804             :  */
    1805             : int
    1806    75393714 : xfs_ifree(
    1807             :         struct xfs_trans        *tp,
    1808             :         struct xfs_inode        *ip)
    1809             : {
    1810    75393714 :         struct xfs_mount        *mp = ip->i_mount;
    1811    75393714 :         struct xfs_perag        *pag;
    1812    75393714 :         struct xfs_icluster     xic = { 0 };
    1813    75393714 :         struct xfs_inode_log_item *iip = ip->i_itemp;
    1814    75393714 :         int                     error;
    1815             : 
    1816    75393714 :         ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
    1817    75393714 :         ASSERT(VFS_I(ip)->i_nlink == 0);
    1818    75393714 :         ASSERT(ip->i_df.if_nextents == 0);
    1819    75393714 :         ASSERT(ip->i_disk_size == 0 || !S_ISREG(VFS_I(ip)->i_mode));
    1820    75393714 :         ASSERT(ip->i_nblocks == 0);
    1821             : 
    1822    75393714 :         pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
    1823             : 
    1824    75496334 :         error = xfs_dir_ifree(tp, pag, ip, &xic);
    1825    75575317 :         if (error)
    1826         121 :                 goto out;
    1827             : 
    1828   151159781 :         if (xfs_iflags_test(ip, XFS_IPRESERVE_DM_FIELDS))
    1829           0 :                 xfs_iflags_clear(ip, XFS_IPRESERVE_DM_FIELDS);
    1830             : 
    1831             :         /* Don't attempt to replay owner changes for a deleted inode */
    1832    75584585 :         spin_lock(&iip->ili_lock);
    1833    75556875 :         iip->ili_fields &= ~(XFS_ILOG_AOWNER | XFS_ILOG_DOWNER);
    1834    75556875 :         spin_unlock(&iip->ili_lock);
    1835             : 
    1836    75577569 :         if (xic.deleted)
    1837      355870 :                 error = xfs_ifree_cluster(tp, pag, ip, &xic);
    1838    75221699 : out:
    1839    75577711 :         xfs_perag_put(pag);
    1840    75568081 :         return error;
    1841             : }
    1842             : 
    1843             : /*
    1844             :  * This is called to unpin an inode.  The caller must have the inode locked
    1845             :  * in at least shared mode so that the buffer cannot be subsequently pinned
    1846             :  * once someone is waiting for it to be unpinned.
    1847             :  */
    1848             : static void
    1849           9 : xfs_iunpin(
    1850             :         struct xfs_inode        *ip)
    1851             : {
    1852           9 :         ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
    1853             : 
    1854           9 :         trace_xfs_inode_unpin_nowait(ip, _RET_IP_);
    1855             : 
    1856             :         /* Give the log a push to start the unpinning I/O */
    1857           9 :         xfs_log_force_seq(ip->i_mount, ip->i_itemp->ili_commit_seq, 0, NULL);
    1858             : 
    1859           9 : }
    1860             : 
    1861             : static void
    1862           9 : __xfs_iunpin_wait(
    1863             :         struct xfs_inode        *ip)
    1864             : {
    1865           9 :         wait_queue_head_t *wq = bit_waitqueue(&ip->i_flags, __XFS_IPINNED_BIT);
    1866           9 :         DEFINE_WAIT_BIT(wait, &ip->i_flags, __XFS_IPINNED_BIT);
    1867             : 
    1868           9 :         xfs_iunpin(ip);
    1869             : 
    1870           9 :         do {
    1871           9 :                 prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE);
    1872           9 :                 if (xfs_ipincount(ip))
    1873           0 :                         io_schedule();
    1874           9 :         } while (xfs_ipincount(ip));
    1875           9 :         finish_wait(wq, &wait.wq_entry);
    1876           9 : }
    1877             : 
    1878             : void
    1879   325411208 : xfs_iunpin_wait(
    1880             :         struct xfs_inode        *ip)
    1881             : {
    1882   325411208 :         if (xfs_ipincount(ip))
    1883           9 :                 __xfs_iunpin_wait(ip);
    1884   325411208 : }
    1885             : 
    1886             : /*
    1887             :  * Removing an inode from the namespace involves removing the directory entry
    1888             :  * and dropping the link count on the inode. Removing the directory entry can
    1889             :  * result in locking an AGF (directory blocks were freed) and removing a link
    1890             :  * count can result in placing the inode on an unlinked list which results in
    1891             :  * locking an AGI.
    1892             :  *
    1893             :  * The big problem here is that we have an ordering constraint on AGF and AGI
    1894             :  * locking - inode allocation locks the AGI, then can allocate a new extent for
    1895             :  * new inodes, locking the AGF after the AGI. Similarly, freeing the inode
    1896             :  * removes the inode from the unlinked list, requiring that we lock the AGI
    1897             :  * first, and then freeing the inode can result in an inode chunk being freed
    1898             :  * and hence freeing disk space requiring that we lock an AGF.
    1899             :  *
    1900             :  * Hence the ordering that is imposed by other parts of the code is AGI before
    1901             :  * AGF. This means we cannot remove the directory entry before we drop the inode
    1902             :  * reference count and put it on the unlinked list as this results in a lock
    1903             :  * order of AGF then AGI, and this can deadlock against inode allocation and
    1904             :  * freeing. Therefore we must drop the link counts before we remove the
    1905             :  * directory entry.
    1906             :  *
    1907             :  * This is still safe from a transactional point of view - it is not until we
    1908             :  * get to xfs_defer_finish() that we have the possibility of multiple
    1909             :  * transactions in this operation. Hence as long as we remove the directory
    1910             :  * entry and drop the link count in the first transaction of the remove
    1911             :  * operation, there are no transactional constraints on the ordering here.
    1912             :  */
    1913             : int
    1914    70633084 : xfs_remove(
    1915             :         struct xfs_inode        *dp,
    1916             :         struct xfs_name         *name,
    1917             :         struct xfs_inode        *ip)
    1918             : {
    1919    70633084 :         struct xfs_dir_update   du = {
    1920             :                 .dp             = dp,
    1921             :                 .name           = name,
    1922             :                 .ip             = ip,
    1923             :         };
    1924    70633084 :         struct xfs_mount        *mp = dp->i_mount;
    1925    70633084 :         struct xfs_trans        *tp = NULL;
    1926    70633084 :         int                     is_dir = S_ISDIR(VFS_I(ip)->i_mode);
    1927    70633084 :         int                     dontcare;
    1928    70633084 :         int                     error = 0;
    1929    70633084 :         uint                    resblks;
    1930             : 
    1931    70633084 :         trace_xfs_remove(dp, name);
    1932             : 
    1933   141251676 :         if (xfs_is_shutdown(mp))
    1934             :                 return -EIO;
    1935             : 
    1936    70625481 :         error = xfs_qm_dqattach(dp);
    1937    70580738 :         if (error)
    1938           5 :                 goto std_return;
    1939             : 
    1940    70580733 :         error = xfs_qm_dqattach(ip);
    1941    70612511 :         if (error)
    1942           0 :                 goto std_return;
    1943             : 
    1944    70612511 :         error = xfs_parent_start(mp, &du.parent);
    1945    70605326 :         if (error)
    1946           0 :                 goto std_return;
    1947             : 
    1948             :         /*
    1949             :          * We try to get the real space reservation first, allowing for
    1950             :          * directory btree deletion(s) implying possible bmap insert(s).  If we
    1951             :          * can't get the space reservation then we use 0 instead, and avoid the
    1952             :          * bmap btree insert(s) in the directory code by, if the bmap insert
    1953             :          * tries to happen, instead trimming the LAST block from the directory.
    1954             :          *
    1955             :          * Ignore EDQUOT and ENOSPC being returned via nospace_error because
    1956             :          * the directory code can handle a reservationless update and we don't
    1957             :          * want to prevent a user from trying to free space by deleting things.
    1958             :          */
    1959    70605326 :         resblks = xfs_remove_space_res(mp, name->len);
    1960    70595868 :         error = xfs_trans_alloc_dir(dp, &M_RES(mp)->tr_remove, ip, &resblks,
    1961             :                         &tp, &dontcare);
    1962    70646018 :         if (error) {
    1963           1 :                 ASSERT(error != -ENOSPC);
    1964           1 :                 goto out_parent;
    1965             :         }
    1966             : 
    1967    70646017 :         error = xfs_dir_remove_child(tp, resblks, &du);
    1968    70627828 :         if (error)
    1969     3727354 :                 goto out_trans_cancel;
    1970             : 
    1971             :         /*
    1972             :          * If this is a synchronous mount, make sure that the
    1973             :          * remove transaction goes to disk before returning to
    1974             :          * the user.
    1975             :          */
    1976    66900474 :         if (xfs_has_wsync(mp) || xfs_has_dirsync(mp))
    1977        1199 :                 xfs_trans_set_sync(tp);
    1978             : 
    1979    66900474 :         error = xfs_trans_commit(tp);
    1980    66930237 :         if (error)
    1981           2 :                 goto out_unlock;
    1982             : 
    1983    66930235 :         if (is_dir && xfs_inode_is_filestream(ip))
    1984        4321 :                 xfs_filestream_deassociate(ip);
    1985             : 
    1986    66930234 :         xfs_iunlock(ip, XFS_ILOCK_EXCL);
    1987    66929412 :         xfs_iunlock(dp, XFS_ILOCK_EXCL);
    1988    66930465 :         xfs_parent_finish(mp, du.parent);
    1989             :         return 0;
    1990             : 
    1991             :  out_trans_cancel:
    1992     3727354 :         xfs_trans_cancel(tp);
    1993     3727244 :  out_unlock:
    1994     3727244 :         xfs_iunlock(ip, XFS_ILOCK_EXCL);
    1995     3727259 :         xfs_iunlock(dp, XFS_ILOCK_EXCL);
    1996     3727364 :  out_parent:
    1997     3727364 :         xfs_parent_finish(mp, du.parent);
    1998             :  std_return:
    1999             :         return error;
    2000             : }
    2001             : 
    2002             : static inline void
    2003    50654453 : xfs_iunlock_rename(
    2004             :         struct xfs_inode        **i_tab,
    2005             :         int                     num_inodes)
    2006             : {
    2007    50654453 :         int                     i;
    2008             : 
    2009   220903524 :         for (i = num_inodes - 1; i >= 0; i--) {
    2010             :                 /* Skip duplicate inodes if src and target dps are the same */
    2011   170249058 :                 if (!i_tab[i] || (i > 0 && i_tab[i] == i_tab[i - 1]))
    2012     3492794 :                         continue;
    2013   166756264 :                 xfs_iunlock(i_tab[i], XFS_ILOCK_EXCL);
    2014             :         }
    2015    50654466 : }
    2016             : 
    2017             : /*
    2018             :  * Enter all inodes for a rename transaction into a sorted array.
    2019             :  */
    2020             : #define __XFS_SORT_INODES       5
    2021             : STATIC void
    2022    51191827 : xfs_sort_for_rename(
    2023             :         struct xfs_inode        *dp1,   /* in: old (source) directory inode */
    2024             :         struct xfs_inode        *dp2,   /* in: new (target) directory inode */
    2025             :         struct xfs_inode        *ip1,   /* in: inode of old entry */
    2026             :         struct xfs_inode        *ip2,   /* in: inode of new entry */
    2027             :         struct xfs_inode        *wip,   /* in: whiteout inode */
    2028             :         struct xfs_inode        **i_tab,/* out: sorted array of inodes */
    2029             :         int                     *num_inodes)  /* in/out: inodes in array */
    2030             : {
    2031    51191827 :         int                     i;
    2032             : 
    2033    51191827 :         ASSERT(*num_inodes == __XFS_SORT_INODES);
    2034    51191827 :         memset(i_tab, 0, *num_inodes * sizeof(struct xfs_inode *));
    2035             : 
    2036             :         /*
    2037             :          * i_tab contains a list of pointers to inodes.  We initialize
    2038             :          * the table here & we'll sort it.  We will then use it to
    2039             :          * order the acquisition of the inode locks.
    2040             :          *
    2041             :          * Note that the table may contain duplicates.  e.g., dp1 == dp2.
    2042             :          */
    2043    51191827 :         i = 0;
    2044    51191827 :         i_tab[i++] = dp1;
    2045    51191827 :         i_tab[i++] = dp2;
    2046    51191827 :         i_tab[i++] = ip1;
    2047    51191827 :         if (ip2)
    2048    14645620 :                 i_tab[i++] = ip2;
    2049    51191827 :         if (wip)
    2050     3867169 :                 i_tab[i++] = wip;
    2051    51191827 :         *num_inodes = i;
    2052             : 
    2053    51191827 :         xfs_sort_inodes(i_tab, *num_inodes);
    2054    51191734 : }
    2055             : 
    2056             : void
    2057    51191690 : xfs_sort_inodes(
    2058             :         struct xfs_inode        **i_tab,
    2059             :         unsigned int            num_inodes)
    2060             : {
    2061    51191690 :         int                     i, j;
    2062             : 
    2063    51191690 :         ASSERT(num_inodes <= __XFS_SORT_INODES);
    2064             : 
    2065             :         /*
    2066             :          * Sort the elements via bubble sort.  (Remember, there are at
    2067             :          * most 5 elements to sort, so this is adequate.)
    2068             :          */
    2069   223274080 :         for (i = 0; i < num_inodes; i++) {
    2070   590410182 :                 for (j = 1; j < num_inodes; j++) {
    2071   418327792 :                         if (i_tab[j]->i_ino < i_tab[j-1]->i_ino)
    2072   418327792 :                                 swap(i_tab[j], i_tab[j - 1]);
    2073             :                 }
    2074             :         }
    2075    51191690 : }
    2076             : 
    2077             : /*
    2078             :  * xfs_rename_alloc_whiteout()
    2079             :  *
    2080             :  * Return a referenced, unlinked, unlocked inode that can be used as a
    2081             :  * whiteout in a rename transaction. We use a tmpfile inode here so that if we
    2082             :  * crash between allocating the inode and linking it into the rename transaction
    2083             :  * recovery will free the inode and we won't leak it.
    2084             :  */
    2085             : static int
    2086     3948747 : xfs_rename_alloc_whiteout(
    2087             :         struct mnt_idmap        *idmap,
    2088             :         struct xfs_name         *src_name,
    2089             :         struct xfs_inode        *dp,
    2090             :         struct xfs_inode        **wip)
    2091             : {
    2092     3948747 :         struct xfs_icreate_args args = {
    2093             :                 .nlink          = 0,
    2094             :         };
    2095     3948747 :         struct xfs_inode        *tmpfile;
    2096     3948747 :         struct qstr             name;
    2097     3948747 :         int                     error;
    2098             : 
    2099     3948747 :         xfs_icreate_args_inherit(&args, dp, idmap, S_IFCHR | WHITEOUT_MODE,
    2100     3948747 :                         xfs_has_parent(dp->i_mount));
    2101             : 
    2102     3948682 :         error = xfs_create_tmpfile(dp, &args, &tmpfile);
    2103     3948897 :         if (error)
    2104             :                 return error;
    2105             : 
    2106     3867318 :         name.name = src_name->name;
    2107     3867318 :         name.len = src_name->len;
    2108     3867318 :         error = xfs_inode_init_security(VFS_I(tmpfile), VFS_I(dp), &name);
    2109     3867211 :         if (error) {
    2110           0 :                 xfs_finish_inode_setup(tmpfile);
    2111           0 :                 xfs_irele(tmpfile);
    2112           0 :                 return error;
    2113             :         }
    2114             : 
    2115             :         /*
    2116             :          * Prepare the tmpfile inode as if it were created through the VFS.
    2117             :          * Complete the inode setup and flag it as linkable.  nlink is already
    2118             :          * zero, so we can skip the drop_nlink.
    2119             :          */
    2120     3867211 :         xfs_setup_iops(tmpfile);
    2121     3867017 :         xfs_finish_inode_setup(tmpfile);
    2122     3867279 :         VFS_I(tmpfile)->i_state |= I_LINKABLE;
    2123             : 
    2124     3867279 :         *wip = tmpfile;
    2125     3867279 :         return 0;
    2126             : }
    2127             : 
    2128             : /*
    2129             :  * xfs_rename
    2130             :  */
    2131             : int
    2132    51273383 : xfs_rename(
    2133             :         struct mnt_idmap                *idmap,
    2134             :         struct xfs_inode                *src_dp,
    2135             :         struct xfs_name                 *src_name,
    2136             :         struct xfs_inode                *src_ip,
    2137             :         struct xfs_inode                *target_dp,
    2138             :         struct xfs_name                 *target_name,
    2139             :         struct xfs_inode                *target_ip,
    2140             :         unsigned int                    flags)
    2141             : {
    2142    51273383 :         struct xfs_dir_update           src = {
    2143             :                 .dp                     = src_dp,
    2144             :                 .name                   = src_name,
    2145             :                 .ip                     = src_ip,
    2146             :         };
    2147    51273383 :         struct xfs_dir_update           tgt = {
    2148             :                 .dp                     = target_dp,
    2149             :                 .name                   = target_name,
    2150             :                 .ip                     = target_ip,
    2151             :         };
    2152    51273383 :         struct xfs_mount                *mp = src_dp->i_mount;
    2153    51273383 :         struct xfs_trans                *tp;
    2154    51273383 :         struct xfs_inode                *wip = NULL;    /* whiteout inode */
    2155    51273383 :         struct xfs_inode                *inodes[__XFS_SORT_INODES];
    2156    51273383 :         int                             i;
    2157    51273383 :         int                             num_inodes = __XFS_SORT_INODES;
    2158    51273383 :         bool                            new_parent = (src_dp != target_dp);
    2159    51273383 :         bool                            src_is_directory =
    2160    51273383 :                                                 S_ISDIR(VFS_I(src_ip)->i_mode);
    2161    51273383 :         int                             spaceres;
    2162    51273383 :         bool                            retried = false;
    2163    51273383 :         int                             error, nospace_error = 0;
    2164    51273383 :         struct xfs_parent_defer         *wip_pptr = NULL;
    2165             : 
    2166    51273383 :         trace_xfs_rename(src_dp, target_dp, src_name, target_name);
    2167             : 
    2168    51272862 :         if ((flags & RENAME_EXCHANGE) && !target_ip)
    2169             :                 return -EINVAL;
    2170             : 
    2171             :         /*
    2172             :          * If we are doing a whiteout operation, allocate the whiteout inode
    2173             :          * we will be placing at the target and ensure the type is set
    2174             :          * appropriately.
    2175             :          */
    2176    51272862 :         if (flags & RENAME_WHITEOUT) {
    2177     3948528 :                 error = xfs_rename_alloc_whiteout(idmap, src_name,
    2178             :                                                   target_dp, &wip);
    2179     3948768 :                 if (error)
    2180             :                         return error;
    2181             : 
    2182             :                 /* setup target dirent info as whiteout */
    2183     3867188 :                 src_name->type = XFS_DIR3_FT_CHRDEV;
    2184             :         }
    2185             : 
    2186    51191522 :         xfs_sort_for_rename(src_dp, target_dp, src_ip, target_ip, wip,
    2187             :                                 inodes, &num_inodes);
    2188             : 
    2189    51191709 :         error = xfs_parent_start(mp, &src.parent);
    2190    51191728 :         if (error)
    2191           0 :                 goto out_release_wip;
    2192             : 
    2193    51191728 :         if (wip) {
    2194     3867128 :                 error = xfs_parent_start_locked(mp, &wip_pptr);
    2195     3867200 :                 if (error)
    2196           0 :                         goto out_src_ip_pptr;
    2197             :         }
    2198             : 
    2199    51191800 :         if (target_ip) {
    2200    14645637 :                 error = xfs_parent_start_locked(mp, &tgt.parent);
    2201    14645644 :                 if (error)
    2202           0 :                         goto out_wip_pptr;
    2203             :         }
    2204             : 
    2205    51191807 : retry:
    2206    51195258 :         nospace_error = 0;
    2207   102390293 :         spaceres = xfs_rename_space_res(mp, src_name->len, target_ip != NULL,
    2208    51195258 :                         target_name->len, wip != NULL);
    2209    51195035 :         error = xfs_trans_alloc(mp, &M_RES(mp)->tr_rename, spaceres, 0, 0, &tp);
    2210    51195290 :         if (error == -ENOSPC) {
    2211      546746 :                 nospace_error = error;
    2212      546746 :                 spaceres = 0;
    2213      546746 :                 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_rename, 0, 0, 0,
    2214             :                                 &tp);
    2215             :         }
    2216    51195271 :         if (error)
    2217         220 :                 goto out_tgt_ip_pptr;
    2218             : 
    2219             :         /*
    2220             :          * We don't allow reservationless renaming when parent pointers are
    2221             :          * enabled because we can't back out if the xattrs must grow.
    2222             :          */
    2223    51195051 :         if (src.parent && nospace_error) {
    2224      540267 :                 error = nospace_error;
    2225      540267 :                 xfs_trans_cancel(tp);
    2226      540317 :                 goto out_tgt_ip_pptr;
    2227             :         }
    2228             : 
    2229             :         /*
    2230             :          * Attach the dquots to the inodes
    2231             :          */
    2232    50654784 :         error = xfs_qm_vop_rename_dqattach(inodes);
    2233    50654560 :         if (error) {
    2234         793 :                 xfs_trans_cancel(tp);
    2235         793 :                 goto out_tgt_ip_pptr;
    2236             :         }
    2237             : 
    2238             :         /*
    2239             :          * Lock all the participating inodes. Depending upon whether
    2240             :          * the target_name exists in the target directory, and
    2241             :          * whether the target directory is the same as the source
    2242             :          * directory, we can lock from 2 to 5 inodes.
    2243             :          */
    2244    50653767 :         xfs_lock_inodes(inodes, num_inodes, XFS_ILOCK_EXCL);
    2245             : 
    2246             :         /*
    2247             :          * Join all the inodes to the transaction.
    2248             :          */
    2249    50654216 :         xfs_trans_ijoin(tp, src_dp, 0);
    2250    50654151 :         if (new_parent)
    2251    47161727 :                 xfs_trans_ijoin(tp, target_dp, 0);
    2252    50654151 :         xfs_trans_ijoin(tp, src_ip, 0);
    2253    50654385 :         if (target_ip)
    2254    14503839 :                 xfs_trans_ijoin(tp, target_ip, 0);
    2255    50654364 :         if (wip)
    2256     3781904 :                 xfs_trans_ijoin(tp, wip, 0);
    2257             : 
    2258             :         /*
    2259             :          * If we are using project inheritance, we only allow renames
    2260             :          * into our tree when the project IDs are the same; else the
    2261             :          * tree quota mechanism would be circumvented.
    2262             :          */
    2263    50654376 :         if (unlikely((target_dp->i_diflags & XFS_DIFLAG_PROJINHERIT) &&
    2264             :                      target_dp->i_projid != src_ip->i_projid)) {
    2265           0 :                 error = -EXDEV;
    2266           0 :                 goto out_trans_cancel;
    2267             :         }
    2268             : 
    2269             :         /* RENAME_EXCHANGE is unique from here on. */
    2270    50654376 :         if (flags & RENAME_EXCHANGE) {
    2271    13839487 :                 error = xfs_dir_exchange_children(tp, &src, &tgt, spaceres);
    2272    13839466 :                 if (error)
    2273         186 :                         goto out_trans_cancel;
    2274    13839280 :                 goto out_commit;
    2275             :         }
    2276             : 
    2277             :         /*
    2278             :          * Try to reserve quota to handle an expansion of the target directory.
    2279             :          * We'll allow the rename to continue in reservationless mode if we hit
    2280             :          * a space usage constraint.  If we trigger reservationless mode, save
    2281             :          * the errno if there isn't any free space in the target directory.
    2282             :          */
    2283    36814889 :         if (spaceres != 0) {
    2284    36810160 :                 error = xfs_trans_reserve_quota_nblks(tp, target_dp, spaceres,
    2285             :                                 0, false);
    2286    36810120 :                 if (error == -EDQUOT || error == -ENOSPC) {
    2287        6160 :                         if (!retried) {
    2288        3451 :                                 xfs_trans_cancel(tp);
    2289        3451 :                                 xfs_iunlock_rename(inodes, num_inodes);
    2290        3451 :                                 xfs_blockgc_free_quota(target_dp, 0);
    2291        3451 :                                 retried = true;
    2292        3451 :                                 goto retry;
    2293             :                         }
    2294             : 
    2295             :                         nospace_error = error;
    2296             :                         spaceres = 0;
    2297             :                         error = 0;
    2298             :                 }
    2299    36806669 :                 if (error)
    2300           0 :                         goto out_trans_cancel;
    2301             :         }
    2302             : 
    2303             :         /*
    2304             :          * We don't allow quotaless renaming when parent pointers are enabled
    2305             :          * because we can't back out if the xattrs must grow.
    2306             :          */
    2307    36811398 :         if (src.parent && nospace_error) {
    2308        2808 :                 error = nospace_error;
    2309        2808 :                 goto out_trans_cancel;
    2310             :         }
    2311             : 
    2312             :         /*
    2313             :          * Lock the AGI buffers we need to handle bumping the nlink of the
    2314             :          * whiteout inode off the unlinked list and to handle dropping the
    2315             :          * nlink of the target inode.  Per locking order rules, do this in
    2316             :          * increasing AG order and before directory block allocation tries to
    2317             :          * grab AGFs because we grab AGIs before AGFs.
    2318             :          *
    2319             :          * The (vfs) caller must ensure that if src is a directory then
    2320             :          * target_ip is either null or an empty directory.
    2321             :          */
    2322   151679746 :         for (i = 0; i < num_inodes && inodes[i] != NULL; i++) {
    2323   225960175 :                 if (inodes[i] == wip ||
    2324   111089205 :                     (inodes[i] == target_ip &&
    2325      664333 :                      (VFS_I(target_ip)->i_nlink == 1 || src_is_directory))) {
    2326     4419548 :                         struct xfs_perag        *pag;
    2327     4419548 :                         struct xfs_buf          *bp;
    2328             : 
    2329    13258582 :                         pag = xfs_perag_get(mp,
    2330     4419548 :                                         XFS_INO_TO_AGNO(mp, inodes[i]->i_ino));
    2331     4419664 :                         error = xfs_read_agi(pag, tp, &bp);
    2332     4419583 :                         xfs_perag_put(pag);
    2333     4419674 :                         if (error)
    2334           6 :                                 goto out_trans_cancel;
    2335             :                 }
    2336             :         }
    2337             : 
    2338    36808566 :         error = xfs_dir_rename_children(tp, &src, &tgt, spaceres, wip,
    2339             :                         wip_pptr);
    2340    36808342 :         if (error)
    2341         151 :                 goto out_trans_cancel;
    2342             : 
    2343    36808191 :         if (wip) {
    2344             :                 /*
    2345             :                  * Now we have a real link, clear the "I'm a tmpfile" state
    2346             :                  * flag from the inode so it doesn't accidentally get misused in
    2347             :                  * future.
    2348             :                  */
    2349     3781826 :                 VFS_I(wip)->i_state &= ~I_LINKABLE;
    2350             :         }
    2351             : 
    2352    33026365 : out_commit:
    2353             :         /*
    2354             :          * If this is a synchronous mount, make sure that the rename
    2355             :          * transaction goes to disk before returning to the user.
    2356             :          */
    2357    50647471 :         if (xfs_has_wsync(tp->t_mountp) || xfs_has_dirsync(tp->t_mountp))
    2358         152 :                 xfs_trans_set_sync(tp);
    2359             : 
    2360    50647471 :         error = xfs_trans_commit(tp);
    2361    50647883 :         nospace_error = 0;
    2362    50647883 :         goto out_unlock;
    2363             : 
    2364        3151 : out_trans_cancel:
    2365        3151 :         xfs_trans_cancel(tp);
    2366    50651034 : out_unlock:
    2367    50651034 :         xfs_iunlock_rename(inodes, num_inodes);
    2368    51192354 : out_tgt_ip_pptr:
    2369    51192354 :         xfs_parent_finish(mp, tgt.parent);
    2370    51192352 : out_wip_pptr:
    2371    51192352 :         xfs_parent_finish(mp, wip_pptr);
    2372    51192369 : out_src_ip_pptr:
    2373    51192369 :         xfs_parent_finish(mp, src.parent);
    2374    51192390 : out_release_wip:
    2375    51192390 :         if (wip)
    2376     3867401 :                 xfs_irele(wip);
    2377    51192350 :         if (error == -ENOSPC && nospace_error)
    2378      540325 :                 error = nospace_error;
    2379             :         return error;
    2380             : }
    2381             : 
    2382             : static int
    2383   363107965 : xfs_iflush(
    2384             :         struct xfs_inode        *ip,
    2385             :         struct xfs_buf          *bp)
    2386             : {
    2387   363107965 :         struct xfs_inode_log_item *iip = ip->i_itemp;
    2388   363107965 :         struct xfs_dinode       *dip;
    2389   363107965 :         struct xfs_mount        *mp = ip->i_mount;
    2390   363107965 :         int                     error;
    2391             : 
    2392   363107965 :         ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
    2393   726215930 :         ASSERT(xfs_iflags_test(ip, XFS_IFLUSHING));
    2394   363107965 :         ASSERT(ip->i_df.if_format != XFS_DINODE_FMT_BTREE ||
    2395             :                ip->i_df.if_nextents > XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK));
    2396   363107965 :         ASSERT(iip->ili_item.li_buf == bp);
    2397             : 
    2398   363107965 :         dip = xfs_buf_offset(bp, ip->i_imap.im_boffset);
    2399             : 
    2400             :         /*
    2401             :          * We don't flush the inode if any of the following checks fail, but we
    2402             :          * do still update the log item and attach to the backing buffer as if
    2403             :          * the flush happened. This is a formality to facilitate predictable
    2404             :          * error handling as the caller will shutdown and fail the buffer.
    2405             :          */
    2406   363107965 :         error = -EFSCORRUPTED;
    2407   363107965 :         if (XFS_TEST_ERROR(dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC),
    2408             :                                mp, XFS_ERRTAG_IFLUSH_1)) {
    2409           0 :                 xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
    2410             :                         "%s: Bad inode %llu magic number 0x%x, ptr "PTR_FMT,
    2411             :                         __func__, ip->i_ino, be16_to_cpu(dip->di_magic), dip);
    2412           0 :                 goto flush_out;
    2413             :         }
    2414   363107965 :         if (ip->i_df.if_format == XFS_DINODE_FMT_RMAP) {
    2415       66641 :                 if (!S_ISREG(VFS_I(ip)->i_mode) ||
    2416       66641 :                     !(ip->i_diflags2 & XFS_DIFLAG2_METADIR)) {
    2417           0 :                         xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
    2418             :                                 "%s: Bad rt rmapbt inode %Lu, ptr "PTR_FMT,
    2419             :                                 __func__, ip->i_ino, ip);
    2420           0 :                         goto flush_out;
    2421             :                 }
    2422   363041324 :         } else if (ip->i_df.if_format == XFS_DINODE_FMT_REFCOUNT) {
    2423       45318 :                 if (!S_ISREG(VFS_I(ip)->i_mode) ||
    2424       45318 :                     !(ip->i_diflags2 & XFS_DIFLAG2_METADIR)) {
    2425           0 :                         xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
    2426             :                                 "%s: Bad rt refcountbt inode %Lu, ptr "PTR_FMT,
    2427             :                                 __func__, ip->i_ino, ip);
    2428           0 :                         goto flush_out;
    2429             :                 }
    2430   362996006 :         } else if (S_ISREG(VFS_I(ip)->i_mode)) {
    2431   239671050 :                 if (XFS_TEST_ERROR(
    2432             :                     ip->i_df.if_format != XFS_DINODE_FMT_EXTENTS &&
    2433             :                     ip->i_df.if_format != XFS_DINODE_FMT_BTREE,
    2434             :                     mp, XFS_ERRTAG_IFLUSH_3)) {
    2435           0 :                         xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
    2436             :                                 "%s: Bad regular inode %llu, ptr "PTR_FMT,
    2437             :                                 __func__, ip->i_ino, ip);
    2438           0 :                         goto flush_out;
    2439             :                 }
    2440   123324956 :         } else if (S_ISDIR(VFS_I(ip)->i_mode)) {
    2441    80761124 :                 if (XFS_TEST_ERROR(
    2442             :                     ip->i_df.if_format != XFS_DINODE_FMT_EXTENTS &&
    2443             :                     ip->i_df.if_format != XFS_DINODE_FMT_BTREE &&
    2444             :                     ip->i_df.if_format != XFS_DINODE_FMT_LOCAL,
    2445             :                     mp, XFS_ERRTAG_IFLUSH_4)) {
    2446           0 :                         xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
    2447             :                                 "%s: Bad directory inode %llu, ptr "PTR_FMT,
    2448             :                                 __func__, ip->i_ino, ip);
    2449           0 :                         goto flush_out;
    2450             :                 }
    2451             :         }
    2452   726215930 :         if (XFS_TEST_ERROR(ip->i_df.if_nextents + xfs_ifork_nextents(&ip->i_af) >
    2453             :                                 ip->i_nblocks, mp, XFS_ERRTAG_IFLUSH_5)) {
    2454           0 :                 xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
    2455             :                         "%s: detected corrupt incore inode %llu, "
    2456             :                         "total extents = %llu nblocks = %lld, ptr "PTR_FMT,
    2457             :                         __func__, ip->i_ino,
    2458             :                         ip->i_df.if_nextents + xfs_ifork_nextents(&ip->i_af),
    2459             :                         ip->i_nblocks, ip);
    2460           0 :                 goto flush_out;
    2461             :         }
    2462   363107965 :         if (XFS_TEST_ERROR(ip->i_forkoff > mp->m_sb.sb_inodesize,
    2463             :                                 mp, XFS_ERRTAG_IFLUSH_6)) {
    2464           0 :                 xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
    2465             :                         "%s: bad inode %llu, forkoff 0x%x, ptr "PTR_FMT,
    2466             :                         __func__, ip->i_ino, ip->i_forkoff, ip);
    2467           0 :                 goto flush_out;
    2468             :         }
    2469             : 
    2470   363107965 :         if (xfs_inode_has_attr_fork(ip)) {
    2471   349466658 :                 if (ip->i_af.if_format == XFS_DINODE_FMT_RMAP) {
    2472           0 :                         xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
    2473             :                                 "%s: rt rmapbt in inode %Lu attr fork, ptr "PTR_FMT,
    2474             :                                 __func__, ip->i_ino, ip);
    2475           0 :                         goto flush_out;
    2476   349466658 :                 } else if (ip->i_af.if_format == XFS_DINODE_FMT_REFCOUNT) {
    2477           0 :                         xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
    2478             :                                 "%s: rt refcountbt in inode %Lu attr fork, ptr "PTR_FMT,
    2479             :                                 __func__, ip->i_ino, ip);
    2480           0 :                         goto flush_out;
    2481             :                 }
    2482             :         }
    2483             : 
    2484             :         /*
    2485             :          * Inode item log recovery for v2 inodes are dependent on the flushiter
    2486             :          * count for correct sequencing.  We bump the flush iteration count so
    2487             :          * we can detect flushes which postdate a log record during recovery.
    2488             :          * This is redundant as we now log every change and hence this can't
    2489             :          * happen but we need to still do it to ensure backwards compatibility
    2490             :          * with old kernels that predate logging all inode changes.
    2491             :          */
    2492   363107965 :         if (!xfs_has_v3inodes(mp))
    2493        1325 :                 ip->i_flushiter++;
    2494             : 
    2495             :         /*
    2496             :          * If there are inline format data / attr forks attached to this inode,
    2497             :          * make sure they are not corrupt.
    2498             :          */
    2499   442689763 :         if (ip->i_df.if_format == XFS_DINODE_FMT_LOCAL &&
    2500    79581798 :             xfs_ifork_verify_local_data(ip))
    2501           0 :                 goto flush_out;
    2502   363107965 :         if (xfs_inode_has_attr_fork(ip) &&
    2503   611140476 :             ip->i_af.if_format == XFS_DINODE_FMT_LOCAL &&
    2504   261673818 :             xfs_ifork_verify_local_attr(ip))
    2505           0 :                 goto flush_out;
    2506             : 
    2507             :         /*
    2508             :          * Copy the dirty parts of the inode into the on-disk inode.  We always
    2509             :          * copy out the core of the inode, because if the inode is dirty at all
    2510             :          * the core must be.
    2511             :          */
    2512   363107965 :         xfs_inode_to_disk(ip, dip, iip->ili_item.li_lsn);
    2513             : 
    2514             :         /* Wrap, we never let the log put out DI_MAX_FLUSH */
    2515   363107965 :         if (!xfs_has_v3inodes(mp)) {
    2516        1325 :                 if (ip->i_flushiter == DI_MAX_FLUSH)
    2517           0 :                         ip->i_flushiter = 0;
    2518             :         }
    2519             : 
    2520   363107965 :         xfs_iflush_fork(ip, dip, iip, XFS_DATA_FORK);
    2521   363107965 :         if (xfs_inode_has_attr_fork(ip))
    2522   349466658 :                 xfs_iflush_fork(ip, dip, iip, XFS_ATTR_FORK);
    2523             : 
    2524             :         /*
    2525             :          * We've recorded everything logged in the inode, so we'd like to clear
    2526             :          * the ili_fields bits so we don't log and flush things unnecessarily.
    2527             :          * However, we can't stop logging all this information until the data
    2528             :          * we've copied into the disk buffer is written to disk.  If we did we
    2529             :          * might overwrite the copy of the inode in the log with all the data
    2530             :          * after re-logging only part of it, and in the face of a crash we
    2531             :          * wouldn't have all the data we need to recover.
    2532             :          *
    2533             :          * What we do is move the bits to the ili_last_fields field.  When
    2534             :          * logging the inode, these bits are moved back to the ili_fields field.
    2535             :          * In the xfs_buf_inode_iodone() routine we clear ili_last_fields, since
    2536             :          * we know that the information those bits represent is permanently on
    2537             :          * disk.  As long as the flush completes before the inode is logged
    2538             :          * again, then both ili_fields and ili_last_fields will be cleared.
    2539             :          */
    2540             :         error = 0;
    2541   363107965 : flush_out:
    2542   363107965 :         spin_lock(&iip->ili_lock);
    2543   363107965 :         iip->ili_last_fields = iip->ili_fields;
    2544   363107965 :         iip->ili_fields = 0;
    2545   363107965 :         iip->ili_fsync_fields = 0;
    2546   363107965 :         spin_unlock(&iip->ili_lock);
    2547             : 
    2548             :         /*
    2549             :          * Store the current LSN of the inode so that we can tell whether the
    2550             :          * item has moved in the AIL from xfs_buf_inode_iodone().
    2551             :          */
    2552   363107965 :         xfs_trans_ail_copy_lsn(mp->m_ail, &iip->ili_flush_lsn,
    2553             :                                 &iip->ili_item.li_lsn);
    2554             : 
    2555             :         /* generate the checksum. */
    2556   363107965 :         xfs_dinode_calc_crc(mp, dip);
    2557   363107965 :         if (error)
    2558           0 :                 xfs_inode_mark_sick(ip, XFS_SICK_INO_CORE);
    2559   363107965 :         return error;
    2560             : }
    2561             : 
    2562             : /*
    2563             :  * Non-blocking flush of dirty inode metadata into the backing buffer.
    2564             :  *
    2565             :  * The caller must have a reference to the inode and hold the cluster buffer
    2566             :  * locked. The function will walk across all the inodes on the cluster buffer it
    2567             :  * can find and lock without blocking, and flush them to the cluster buffer.
    2568             :  *
    2569             :  * On successful flushing of at least one inode, the caller must write out the
    2570             :  * buffer and release it. If no inodes are flushed, -EAGAIN will be returned and
    2571             :  * the caller needs to release the buffer. On failure, the filesystem will be
    2572             :  * shut down, the buffer will have been unlocked and released, and EFSCORRUPTED
    2573             :  * will be returned.
    2574             :  */
    2575             : int
    2576    55147167 : xfs_iflush_cluster(
    2577             :         struct xfs_buf          *bp)
    2578             : {
    2579    55147167 :         struct xfs_mount        *mp = bp->b_mount;
    2580    55147167 :         struct xfs_log_item     *lip, *n;
    2581    55147167 :         struct xfs_inode        *ip;
    2582    55147167 :         struct xfs_inode_log_item *iip;
    2583    55147167 :         int                     clcount = 0;
    2584    55147167 :         int                     error = 0;
    2585             : 
    2586             :         /*
    2587             :          * We must use the safe variant here as on shutdown xfs_iflush_abort()
    2588             :          * will remove itself from the list.
    2589             :          */
    2590   436014407 :         list_for_each_entry_safe(lip, n, &bp->b_li_list, li_bio_list) {
    2591   380867240 :                 iip = (struct xfs_inode_log_item *)lip;
    2592   380867240 :                 ip = iip->ili_inode;
    2593             : 
    2594             :                 /*
    2595             :                  * Quick and dirty check to avoid locks if possible.
    2596             :                  */
    2597   380867240 :                 if (__xfs_iflags_test(ip, XFS_IRECLAIM | XFS_IFLUSHING))
    2598      637837 :                         continue;
    2599   380229403 :                 if (xfs_ipincount(ip))
    2600    13402287 :                         continue;
    2601             : 
    2602             :                 /*
    2603             :                  * The inode is still attached to the buffer, which means it is
    2604             :                  * dirty but reclaim might try to grab it. Check carefully for
    2605             :                  * that, and grab the ilock while still holding the i_flags_lock
    2606             :                  * to guarantee reclaim will not be able to reclaim this inode
    2607             :                  * once we drop the i_flags_lock.
    2608             :                  */
    2609   366827116 :                 spin_lock(&ip->i_flags_lock);
    2610   366827116 :                 ASSERT(!__xfs_iflags_test(ip, XFS_ISTALE));
    2611   366827116 :                 if (__xfs_iflags_test(ip, XFS_IRECLAIM | XFS_IFLUSHING)) {
    2612          12 :                         spin_unlock(&ip->i_flags_lock);
    2613          12 :                         continue;
    2614             :                 }
    2615             : 
    2616             :                 /*
    2617             :                  * ILOCK will pin the inode against reclaim and prevent
    2618             :                  * concurrent transactions modifying the inode while we are
    2619             :                  * flushing the inode. If we get the lock, set the flushing
    2620             :                  * state before we drop the i_flags_lock.
    2621             :                  */
    2622   366827104 :                 if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) {
    2623     1513773 :                         spin_unlock(&ip->i_flags_lock);
    2624     1513773 :                         continue;
    2625             :                 }
    2626   365313331 :                 __xfs_iflags_set(ip, XFS_IFLUSHING);
    2627   365313331 :                 spin_unlock(&ip->i_flags_lock);
    2628             : 
    2629             :                 /*
    2630             :                  * Abort flushing this inode if we are shut down because the
    2631             :                  * inode may not currently be in the AIL. This can occur when
    2632             :                  * log I/O failure unpins the inode without inserting into the
    2633             :                  * AIL, leaving a dirty/unpinned inode attached to the buffer
    2634             :                  * that otherwise looks like it should be flushed.
    2635             :                  */
    2636   730626662 :                 if (xlog_is_shutdown(mp->m_log)) {
    2637     2205366 :                         xfs_iunpin_wait(ip);
    2638     2205366 :                         xfs_iflush_abort(ip);
    2639     2205366 :                         xfs_iunlock(ip, XFS_ILOCK_SHARED);
    2640     2205366 :                         error = -EIO;
    2641     2205366 :                         continue;
    2642             :                 }
    2643             : 
    2644             :                 /* don't block waiting on a log force to unpin dirty inodes */
    2645   363107965 :                 if (xfs_ipincount(ip)) {
    2646           0 :                         xfs_iflags_clear(ip, XFS_IFLUSHING);
    2647           0 :                         xfs_iunlock(ip, XFS_ILOCK_SHARED);
    2648           0 :                         continue;
    2649             :                 }
    2650             : 
    2651   363107965 :                 if (!xfs_inode_clean(ip))
    2652   363107965 :                         error = xfs_iflush(ip, bp);
    2653             :                 else
    2654           0 :                         xfs_iflags_clear(ip, XFS_IFLUSHING);
    2655   363107965 :                 xfs_iunlock(ip, XFS_ILOCK_SHARED);
    2656   363107965 :                 if (error)
    2657             :                         break;
    2658   363107965 :                 clcount++;
    2659             :         }
    2660             : 
    2661    55147167 :         if (error) {
    2662             :                 /*
    2663             :                  * Shutdown first so we kill the log before we release this
    2664             :                  * buffer. If it is an INODE_ALLOC buffer and pins the tail
    2665             :                  * of the log, failing it before the _log_ is shut down can
    2666             :                  * result in the log tail being moved forward in the journal
    2667             :                  * on disk because log writes can still be taking place. Hence
    2668             :                  * unpinning the tail will allow the ICREATE intent to be
    2669             :                  * removed from the log an recovery will fail with uninitialised
    2670             :                  * inode cluster buffers.
    2671             :                  */
    2672      314789 :                 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
    2673      314789 :                 bp->b_flags |= XBF_ASYNC;
    2674      314789 :                 xfs_buf_ioend_fail(bp);
    2675      314789 :                 return error;
    2676             :         }
    2677             : 
    2678    54832378 :         if (!clcount)
    2679             :                 return -EAGAIN;
    2680             : 
    2681    54604137 :         XFS_STATS_INC(mp, xs_icluster_flushcnt);
    2682    54604137 :         XFS_STATS_ADD(mp, xs_icluster_flushinode, clcount);
    2683    54604137 :         return 0;
    2684             : 
    2685             : }
    2686             : 
    2687             : /* Release an inode. */
    2688             : void
    2689 >13955*10^7 : xfs_irele(
    2690             :         struct xfs_inode        *ip)
    2691             : {
    2692 >13955*10^7 :         trace_xfs_irele(ip, _RET_IP_);
    2693 >13941*10^7 :         iput(VFS_I(ip));
    2694 >14114*10^7 : }
    2695             : 
    2696             : void
    2697     1623864 : xfs_imeta_irele(
    2698             :         struct xfs_inode        *ip)
    2699             : {
    2700     1623864 :         ASSERT(!xfs_has_metadir(ip->i_mount) || xfs_is_metadir_inode(ip));
    2701             : 
    2702     1623864 :         xfs_irele(ip);
    2703     1623864 : }
    2704             : 
    2705             : /*
    2706             :  * Ensure all commited transactions touching the inode are written to the log.
    2707             :  */
    2708             : int
    2709     1063489 : xfs_log_force_inode(
    2710             :         struct xfs_inode        *ip)
    2711             : {
    2712     1063489 :         xfs_csn_t               seq = 0;
    2713             : 
    2714     1063489 :         xfs_ilock(ip, XFS_ILOCK_SHARED);
    2715     1063499 :         if (xfs_ipincount(ip))
    2716      100272 :                 seq = ip->i_itemp->ili_commit_seq;
    2717     1063499 :         xfs_iunlock(ip, XFS_ILOCK_SHARED);
    2718             : 
    2719     1063500 :         if (!seq)
    2720             :                 return 0;
    2721      100274 :         return xfs_log_force_seq(ip->i_mount, seq, XFS_LOG_SYNC, NULL);
    2722             : }
    2723             : 
    2724             : /*
    2725             :  * Grab the exclusive iolock for a data copy from src to dest, making sure to
    2726             :  * abide vfs locking order (lowest pointer value goes first) and breaking the
    2727             :  * layout leases before proceeding.  The loop is needed because we cannot call
    2728             :  * the blocking break_layout() with the iolocks held, and therefore have to
    2729             :  * back out both locks.
    2730             :  */
    2731             : static int
    2732   390410120 : xfs_iolock_two_inodes_and_break_layout(
    2733             :         struct inode            *src,
    2734             :         struct inode            *dest)
    2735             : {
    2736   390410120 :         int                     error;
    2737             : 
    2738   390410120 :         if (src > dest)
    2739   191886309 :                 swap(src, dest);
    2740             : 
    2741   390410120 : retry:
    2742             :         /* Wait to break both inodes' layouts before we start locking. */
    2743   390410120 :         error = break_layout(src, true);
    2744   390417674 :         if (error)
    2745           0 :                 return error;
    2746   390417674 :         if (src != dest) {
    2747   380718524 :                 error = break_layout(dest, true);
    2748   380710268 :                 if (error)
    2749           0 :                         return error;
    2750             :         }
    2751             : 
    2752             :         /* Lock one inode and make sure nobody got in and leased it. */
    2753   390409418 :         inode_lock(src);
    2754   390408901 :         error = break_layout(src, false);
    2755   390409874 :         if (error) {
    2756           0 :                 inode_unlock(src);
    2757           0 :                 if (error == -EWOULDBLOCK)
    2758           0 :                         goto retry;
    2759           0 :                 return error;
    2760             :         }
    2761             : 
    2762   390409874 :         if (src == dest)
    2763             :                 return 0;
    2764             : 
    2765             :         /* Lock the other inode and make sure nobody got in and leased it. */
    2766   380710719 :         inode_lock_nested(dest, I_MUTEX_NONDIR2);
    2767   380715006 :         error = break_layout(dest, false);
    2768   380721135 :         if (error) {
    2769           0 :                 inode_unlock(src);
    2770           0 :                 inode_unlock(dest);
    2771           0 :                 if (error == -EWOULDBLOCK)
    2772           0 :                         goto retry;
    2773           0 :                 return error;
    2774             :         }
    2775             : 
    2776             :         return 0;
    2777             : }
    2778             : 
    2779             : static int
    2780           0 : xfs_mmaplock_two_inodes_and_break_dax_layout(
    2781             :         struct xfs_inode        *ip1,
    2782             :         struct xfs_inode        *ip2)
    2783             : {
    2784           0 :         int                     error;
    2785           0 :         bool                    retry;
    2786           0 :         struct page             *page;
    2787             : 
    2788           0 :         if (ip1->i_ino > ip2->i_ino)
    2789           0 :                 swap(ip1, ip2);
    2790             : 
    2791           0 : again:
    2792           0 :         retry = false;
    2793             :         /* Lock the first inode */
    2794           0 :         xfs_ilock(ip1, XFS_MMAPLOCK_EXCL);
    2795           0 :         error = xfs_break_dax_layouts(VFS_I(ip1), &retry);
    2796           0 :         if (error || retry) {
    2797           0 :                 xfs_iunlock(ip1, XFS_MMAPLOCK_EXCL);
    2798           0 :                 if (error == 0 && retry)
    2799           0 :                         goto again;
    2800           0 :                 return error;
    2801             :         }
    2802             : 
    2803           0 :         if (ip1 == ip2)
    2804             :                 return 0;
    2805             : 
    2806             :         /* Nested lock the second inode */
    2807           0 :         xfs_ilock(ip2, xfs_lock_inumorder(XFS_MMAPLOCK_EXCL, 1));
    2808             :         /*
    2809             :          * We cannot use xfs_break_dax_layouts() directly here because it may
    2810             :          * need to unlock & lock the XFS_MMAPLOCK_EXCL which is not suitable
    2811             :          * for this nested lock case.
    2812             :          */
    2813           0 :         page = dax_layout_busy_page(VFS_I(ip2)->i_mapping);
    2814           0 :         if (page && page_ref_count(page) != 1) {
    2815           0 :                 xfs_iunlock(ip2, XFS_MMAPLOCK_EXCL);
    2816           0 :                 xfs_iunlock(ip1, XFS_MMAPLOCK_EXCL);
    2817           0 :                 goto again;
    2818             :         }
    2819             : 
    2820             :         return 0;
    2821             : }
    2822             : 
    2823             : /*
    2824             :  * Lock two inodes so that userspace cannot initiate I/O via file syscalls or
    2825             :  * mmap activity.
    2826             :  */
    2827             : int
    2828   390379396 : xfs_ilock2_io_mmap(
    2829             :         struct xfs_inode        *ip1,
    2830             :         struct xfs_inode        *ip2)
    2831             : {
    2832   390379396 :         int                     ret;
    2833             : 
    2834   390379396 :         ret = xfs_iolock_two_inodes_and_break_layout(VFS_I(ip1), VFS_I(ip2));
    2835   390406081 :         if (ret)
    2836             :                 return ret;
    2837             : 
    2838   390406081 :         if (IS_DAX(VFS_I(ip1)) && IS_DAX(VFS_I(ip2))) {
    2839           0 :                 ret = xfs_mmaplock_two_inodes_and_break_dax_layout(ip1, ip2);
    2840           0 :                 if (ret) {
    2841           0 :                         inode_unlock(VFS_I(ip2));
    2842           0 :                         if (ip1 != ip2)
    2843           0 :                                 inode_unlock(VFS_I(ip1));
    2844           0 :                         return ret;
    2845             :                 }
    2846             :         } else
    2847   390406081 :                 filemap_invalidate_lock_two(VFS_I(ip1)->i_mapping,
    2848             :                                             VFS_I(ip2)->i_mapping);
    2849             : 
    2850             :         return 0;
    2851             : }
    2852             : 
    2853             : /* Unlock both inodes to allow IO and mmap activity. */
    2854             : void
    2855   390343571 : xfs_iunlock2_io_mmap(
    2856             :         struct xfs_inode        *ip1,
    2857             :         struct xfs_inode        *ip2)
    2858             : {
    2859   390343571 :         if (IS_DAX(VFS_I(ip1)) && IS_DAX(VFS_I(ip2))) {
    2860           0 :                 xfs_iunlock(ip2, XFS_MMAPLOCK_EXCL);
    2861           0 :                 if (ip1 != ip2)
    2862           0 :                         xfs_iunlock(ip1, XFS_MMAPLOCK_EXCL);
    2863             :         } else
    2864   390343571 :                 filemap_invalidate_unlock_two(VFS_I(ip1)->i_mapping,
    2865             :                                               VFS_I(ip2)->i_mapping);
    2866             : 
    2867   390386188 :         inode_unlock(VFS_I(ip2));
    2868   390412936 :         if (ip1 != ip2)
    2869   380713806 :                 inode_unlock(VFS_I(ip1));
    2870   390411600 : }
    2871             : 
    2872             : /* Compute the number of data and realtime blocks used by a file. */
    2873             : void
    2874   430812685 : xfs_inode_count_blocks(
    2875             :         struct xfs_trans        *tp,
    2876             :         struct xfs_inode        *ip,
    2877             :         xfs_filblks_t           *dblocks,
    2878             :         xfs_filblks_t           *rblocks)
    2879             : {
    2880   430812685 :         struct xfs_ifork        *ifp = xfs_ifork_ptr(ip, XFS_DATA_FORK);
    2881             : 
    2882   430812685 :         if (!XFS_IS_REALTIME_INODE(ip)) {
    2883   333639844 :                 *dblocks = ip->i_nblocks;
    2884   333639844 :                 *rblocks = 0;
    2885   333639844 :                 return;
    2886             :         }
    2887             : 
    2888    97172841 :         *rblocks = 0;
    2889    97172841 :         xfs_bmap_count_leaves(ifp, rblocks);
    2890    97170272 :         *dblocks = ip->i_nblocks - *rblocks;
    2891             : }
    2892             : 
    2893             : static void
    2894           0 : xfs_wait_dax_page(
    2895             :         struct inode            *inode)
    2896             : {
    2897           0 :         struct xfs_inode        *ip = XFS_I(inode);
    2898             : 
    2899           0 :         xfs_iunlock(ip, XFS_MMAPLOCK_EXCL);
    2900           0 :         schedule();
    2901           0 :         xfs_ilock(ip, XFS_MMAPLOCK_EXCL);
    2902           0 : }
    2903             : 
    2904             : int
    2905    66587609 : xfs_break_dax_layouts(
    2906             :         struct inode            *inode,
    2907             :         bool                    *retry)
    2908             : {
    2909    66587609 :         struct page             *page;
    2910             : 
    2911    66587609 :         ASSERT(xfs_isilocked(XFS_I(inode), XFS_MMAPLOCK_EXCL));
    2912             : 
    2913    66587609 :         page = dax_layout_busy_page(inode->i_mapping);
    2914    66585919 :         if (!page)
    2915             :                 return 0;
    2916             : 
    2917           0 :         *retry = true;
    2918           0 :         return ___wait_var_event(&page->_refcount,
    2919             :                         atomic_read(&page->_refcount) == 1, TASK_INTERRUPTIBLE,
    2920             :                         0, 0, xfs_wait_dax_page(inode));
    2921             : }
    2922             : 
    2923             : int
    2924   433671536 : xfs_break_layouts(
    2925             :         struct inode            *inode,
    2926             :         uint                    *iolock,
    2927             :         enum layout_break_reason reason)
    2928             : {
    2929   433671536 :         bool                    retry;
    2930   433671536 :         int                     error;
    2931             : 
    2932   433671536 :         ASSERT(xfs_isilocked(XFS_I(inode), XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL));
    2933             : 
    2934   433663480 :         do {
    2935   433663480 :                 retry = false;
    2936   433663480 :                 switch (reason) {
    2937    66587822 :                 case BREAK_UNMAP:
    2938    66587822 :                         error = xfs_break_dax_layouts(inode, &retry);
    2939    66585879 :                         if (error || retry)
    2940             :                                 break;
    2941   433661506 :                         fallthrough;
    2942             :                 case BREAK_WRITE:
    2943   433661506 :                         error = xfs_break_leased_layouts(inode, iolock, &retry);
    2944   433661506 :                         break;
    2945             :                 default:
    2946           0 :                         WARN_ON_ONCE(1);
    2947           0 :                         error = -EINVAL;
    2948             :                 }
    2949   434678050 :         } while (error == 0 && retry);
    2950             : 
    2951   434686106 :         return error;
    2952             : }
    2953             : 
    2954             : /* Returns the size of fundamental allocation unit for a file, in bytes. */
    2955             : unsigned int
    2956   443597471 : xfs_inode_alloc_unitsize(
    2957             :         struct xfs_inode        *ip)
    2958             : {
    2959   443597471 :         unsigned int            blocks = 1;
    2960             : 
    2961   443597471 :         if (XFS_IS_REALTIME_INODE(ip))
    2962   235487936 :                 blocks = ip->i_mount->m_sb.sb_rextsize;
    2963             : 
    2964   443597471 :         return XFS_FSB_TO_B(ip->i_mount, blocks);
    2965             : }
    2966             : 
    2967             : /* Should we always be using copy on write for file writes? */
    2968             : bool
    2969  3622851472 : xfs_is_always_cow_inode(
    2970             :         struct xfs_inode        *ip)
    2971             : {
    2972  3622851472 :         return ip->i_mount->m_always_cow && xfs_has_reflink(ip->i_mount);
    2973             : }

Generated by: LCOV version 1.14