LCOV - fstests of 6.5.0-rc4-xfsa @ Mon Jul 31 20:08:27 PDT 2023

LCOV - code coverage report

Current view:	top level - fs/xfs - xfs_inode.c (source / functions)		Hit	Total	Coverage
Test:	fstests of 6.5.0-rc4-xfsa @ Mon Jul 31 20:08:27 PDT 2023	Lines:	1054	1187	88.8 %
Date:	2023-07-31 20:08:27	Functions:	53	54	98.1 %

          Line data    Source code

       1             : // SPDX-License-Identifier: GPL-2.0
       2             : /*
       3             :  * Copyright (c) 2000-2006 Silicon Graphics, Inc.
       4             :  * All Rights Reserved.
       5             :  */
       6             : #include <linux/iversion.h>
       7             : 
       8             : #include "xfs.h"
       9             : #include "xfs_fs.h"
      10             : #include "xfs_shared.h"
      11             : #include "xfs_format.h"
      12             : #include "xfs_log_format.h"
      13             : #include "xfs_trans_resv.h"
      14             : #include "xfs_mount.h"
      15             : #include "xfs_defer.h"
      16             : #include "xfs_inode.h"
      17             : #include "xfs_dir2.h"
      18             : #include "xfs_attr.h"
      19             : #include "xfs_bit.h"
      20             : #include "xfs_trans_space.h"
      21             : #include "xfs_trans.h"
      22             : #include "xfs_buf_item.h"
      23             : #include "xfs_inode_item.h"
      24             : #include "xfs_iunlink_item.h"
      25             : #include "xfs_ialloc.h"
      26             : #include "xfs_bmap.h"
      27             : #include "xfs_bmap_util.h"
      28             : #include "xfs_errortag.h"
      29             : #include "xfs_error.h"
      30             : #include "xfs_quota.h"
      31             : #include "xfs_filestream.h"
      32             : #include "xfs_trace.h"
      33             : #include "xfs_icache.h"
      34             : #include "xfs_symlink.h"
      35             : #include "xfs_trans_priv.h"
      36             : #include "xfs_log.h"
      37             : #include "xfs_bmap_btree.h"
      38             : #include "xfs_reflink.h"
      39             : #include "xfs_ag.h"
      40             : #include "xfs_log_priv.h"
      41             : #include "xfs_health.h"
      42             : #include "xfs_pnfs.h"
      43             : #include "xfs_parent.h"
      44             : #include "xfs_xattr.h"
      45             : #include "xfs_inode_util.h"
      46             : #include "xfs_imeta.h"
      47             : 
      48             : struct kmem_cache *xfs_inode_cache;
      49             : 
      50             : /*
      51             :  * These two are wrapper routines around the xfs_ilock() routine used to
      52             :  * centralize some grungy code.  They are used in places that wish to lock the
      53             :  * inode solely for reading the extents.  The reason these places can't just
      54             :  * call xfs_ilock(ip, XFS_ILOCK_SHARED) is that the inode lock also guards to
      55             :  * bringing in of the extents from disk for a file in b-tree format.  If the
      56             :  * inode is in b-tree format, then we need to lock the inode exclusively until
      57             :  * the extents are read in.  Locking it exclusively all the time would limit
      58             :  * our parallelism unnecessarily, though.  What we do instead is check to see
      59             :  * if the extents have been read in yet, and only lock the inode exclusively
      60             :  * if they have not.
      61             :  *
      62             :  * The functions return a value which should be given to the corresponding
      63             :  * xfs_iunlock() call.
      64             :  */
      65             : uint
      66   868021026 : xfs_ilock_data_map_shared(
      67             :         struct xfs_inode        *ip)
      68             : {
      69   868021026 :         uint                    lock_mode = XFS_ILOCK_SHARED;
      70             : 
      71   868021026 :         if (xfs_need_iread_extents(&ip->i_df))
      72      128511 :                 lock_mode = XFS_ILOCK_EXCL;
      73   868140830 :         xfs_ilock(ip, lock_mode);
      74   868200874 :         return lock_mode;
      75             : }
      76             : 
      77             : uint
      78  1429220489 : xfs_ilock_attr_map_shared(
      79             :         struct xfs_inode        *ip)
      80             : {
      81  1429220489 :         uint                    lock_mode = XFS_ILOCK_SHARED;
      82             : 
      83  2847261111 :         if (xfs_inode_has_attr_fork(ip) && xfs_need_iread_extents(&ip->i_af))
      84           0 :                 lock_mode = XFS_ILOCK_EXCL;
      85  1429291606 :         xfs_ilock(ip, lock_mode);
      86  1430224561 :         return lock_mode;
      87             : }
      88             : 
      89             : /*
      90             :  * You can't set both SHARED and EXCL for the same lock,
      91             :  * and only XFS_IOLOCK_SHARED, XFS_IOLOCK_EXCL, XFS_MMAPLOCK_SHARED,
      92             :  * XFS_MMAPLOCK_EXCL, XFS_ILOCK_SHARED, XFS_ILOCK_EXCL are valid values
      93             :  * to set in lock_flags.
      94             :  */
      95             : static inline void
      96 >16295*10^7 : xfs_lock_flags_assert(
      97             :         uint            lock_flags)
      98             : {
      99 >16295*10^7 :         ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) !=
     100             :                 (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL));
     101 >16295*10^7 :         ASSERT((lock_flags & (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL)) !=
     102             :                 (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL));
     103 >16295*10^7 :         ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) !=
     104             :                 (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
     105 >16295*10^7 :         ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_SUBCLASS_MASK)) == 0);
     106 >16295*10^7 :         ASSERT(lock_flags != 0);
     107 >16295*10^7 : }
     108             : 
     109             : /*
     110             :  * In addition to i_rwsem in the VFS inode, the xfs inode contains 2
     111             :  * multi-reader locks: invalidate_lock and the i_lock.  This routine allows
     112             :  * various combinations of the locks to be obtained.
     113             :  *
     114             :  * The 3 locks should always be ordered so that the IO lock is obtained first,
     115             :  * the mmap lock second and the ilock last in order to prevent deadlock.
     116             :  *
     117             :  * Basic locking order:
     118             :  *
     119             :  * i_rwsem -> invalidate_lock -> page_lock -> i_ilock
     120             :  *
     121             :  * mmap_lock locking order:
     122             :  *
     123             :  * i_rwsem -> page lock -> mmap_lock
     124             :  * mmap_lock -> invalidate_lock -> page_lock
     125             :  *
     126             :  * The difference in mmap_lock locking order mean that we cannot hold the
     127             :  * invalidate_lock over syscall based read(2)/write(2) based IO. These IO paths
     128             :  * can fault in pages during copy in/out (for buffered IO) or require the
     129             :  * mmap_lock in get_user_pages() to map the user pages into the kernel address
     130             :  * space for direct IO. Similarly the i_rwsem cannot be taken inside a page
     131             :  * fault because page faults already hold the mmap_lock.
     132             :  *
     133             :  * Hence to serialise fully against both syscall and mmap based IO, we need to
     134             :  * take both the i_rwsem and the invalidate_lock. These locks should *only* be
     135             :  * both taken in places where we need to invalidate the page cache in a race
     136             :  * free manner (e.g. truncate, hole punch and other extent manipulation
     137             :  * functions).
     138             :  */
     139             : void
     140 79994967413 : xfs_ilock(
     141             :         xfs_inode_t             *ip,
     142             :         uint                    lock_flags)
     143             : {
     144 79994967413 :         trace_xfs_ilock(ip, lock_flags, _RET_IP_);
     145             : 
     146 80287709722 :         xfs_lock_flags_assert(lock_flags);
     147             : 
     148 80191928910 :         if (lock_flags & XFS_IOLOCK_EXCL) {
     149   578453725 :                 down_write_nested(&VFS_I(ip)->i_rwsem,
     150             :                                   XFS_IOLOCK_DEP(lock_flags));
     151 79613475185 :         } else if (lock_flags & XFS_IOLOCK_SHARED) {
     152   685332849 :                 down_read_nested(&VFS_I(ip)->i_rwsem,
     153             :                                  XFS_IOLOCK_DEP(lock_flags));
     154             :         }
     155             : 
     156 80192001235 :         if (lock_flags & XFS_MMAPLOCK_EXCL) {
     157    64947689 :                 down_write_nested(&VFS_I(ip)->i_mapping->invalidate_lock,
     158             :                                   XFS_MMAPLOCK_DEP(lock_flags));
     159 80127053546 :         } else if (lock_flags & XFS_MMAPLOCK_SHARED) {
     160    39757766 :                 down_read_nested(&VFS_I(ip)->i_mapping->invalidate_lock,
     161             :                                  XFS_MMAPLOCK_DEP(lock_flags));
     162             :         }
     163             : 
     164 80192001567 :         if (lock_flags & XFS_ILOCK_EXCL)
     165  2854612947 :                 mrupdate_nested(&ip->i_lock, XFS_ILOCK_DEP(lock_flags));
     166 77337388620 :         else if (lock_flags & XFS_ILOCK_SHARED)
     167 76166075459 :                 mraccess_nested(&ip->i_lock, XFS_ILOCK_DEP(lock_flags));
     168 80228552765 : }
     169             : 
     170             : /*
     171             :  * This is just like xfs_ilock(), except that the caller
     172             :  * is guaranteed not to sleep.  It returns 1 if it gets
     173             :  * the requested locks and 0 otherwise.  If the IO lock is
     174             :  * obtained but the inode lock cannot be, then the IO lock
     175             :  * is dropped before returning.
     176             :  *
     177             :  * ip -- the inode being locked
     178             :  * lock_flags -- this parameter indicates the inode's locks to be
     179             :  *       to be locked.  See the comment for xfs_ilock() for a list
     180             :  *       of valid values.
     181             :  */
     182             : int
     183  2915411866 : xfs_ilock_nowait(
     184             :         xfs_inode_t             *ip,
     185             :         uint                    lock_flags)
     186             : {
     187  2915411866 :         trace_xfs_ilock_nowait(ip, lock_flags, _RET_IP_);
     188             : 
     189  2915734827 :         xfs_lock_flags_assert(lock_flags);
     190             : 
     191  2918644576 :         if (lock_flags & XFS_IOLOCK_EXCL) {
     192   576956131 :                 if (!down_write_trylock(&VFS_I(ip)->i_rwsem))
     193      296158 :                         goto out;
     194  2341688445 :         } else if (lock_flags & XFS_IOLOCK_SHARED) {
     195   169200405 :                 if (!down_read_trylock(&VFS_I(ip)->i_rwsem))
     196    22120776 :                         goto out;
     197             :         }
     198             : 
     199  2896246232 :         if (lock_flags & XFS_MMAPLOCK_EXCL) {
     200      150181 :                 if (!down_write_trylock(&VFS_I(ip)->i_mapping->invalidate_lock))
     201           0 :                         goto out_undo_iolock;
     202  2896096051 :         } else if (lock_flags & XFS_MMAPLOCK_SHARED) {
     203           0 :                 if (!down_read_trylock(&VFS_I(ip)->i_mapping->invalidate_lock))
     204           0 :                         goto out_undo_iolock;
     205             :         }
     206             : 
     207  2896246271 :         if (lock_flags & XFS_ILOCK_EXCL) {
     208  1272108959 :                 if (!mrtryupdate(&ip->i_lock))
     209       19950 :                         goto out_undo_mmaplock;
     210  1624137312 :         } else if (lock_flags & XFS_ILOCK_SHARED) {
     211   900324159 :                 if (!mrtryaccess(&ip->i_lock))
     212      990393 :                         goto out_undo_mmaplock;
     213             :         }
     214             :         return 1;
     215             : 
     216     1010343 : out_undo_mmaplock:
     217     1010343 :         if (lock_flags & XFS_MMAPLOCK_EXCL)
     218           0 :                 up_write(&VFS_I(ip)->i_mapping->invalidate_lock);
     219     1010343 :         else if (lock_flags & XFS_MMAPLOCK_SHARED)
     220           0 :                 up_read(&VFS_I(ip)->i_mapping->invalidate_lock);
     221     1010343 : out_undo_iolock:
     222     1010343 :         if (lock_flags & XFS_IOLOCK_EXCL)
     223           0 :                 up_write(&VFS_I(ip)->i_rwsem);
     224     1010343 :         else if (lock_flags & XFS_IOLOCK_SHARED)
     225           0 :                 up_read(&VFS_I(ip)->i_rwsem);
     226     1010343 : out:
     227             :         return 0;
     228             : }
     229             : 
     230             : /*
     231             :  * xfs_iunlock() is used to drop the inode locks acquired with
     232             :  * xfs_ilock() and xfs_ilock_nowait().  The caller must pass
     233             :  * in the flags given to xfs_ilock() or xfs_ilock_nowait() so
     234             :  * that we know which locks to drop.
     235             :  *
     236             :  * ip -- the inode being unlocked
     237             :  * lock_flags -- this parameter indicates the inode's locks to be
     238             :  *       to be unlocked.  See the comment for xfs_ilock() for a list
     239             :  *       of valid values for this parameter.
     240             :  *
     241             :  */
     242             : void
     243 82281383845 : xfs_iunlock(
     244             :         xfs_inode_t             *ip,
     245             :         uint                    lock_flags)
     246             : {
     247 82281383845 :         xfs_lock_flags_assert(lock_flags);
     248             : 
     249 82482297370 :         if (lock_flags & XFS_IOLOCK_EXCL)
     250  1155061904 :                 up_write(&VFS_I(ip)->i_rwsem);
     251 81327235466 :         else if (lock_flags & XFS_IOLOCK_SHARED)
     252   832516651 :                 up_read(&VFS_I(ip)->i_rwsem);
     253             : 
     254 82482295348 :         if (lock_flags & XFS_MMAPLOCK_EXCL)
     255    65097752 :                 up_write(&VFS_I(ip)->i_mapping->invalidate_lock);
     256 82417197596 :         else if (lock_flags & XFS_MMAPLOCK_SHARED)
     257    39757793 :                 up_read(&VFS_I(ip)->i_mapping->invalidate_lock);
     258             : 
     259 82482295175 :         if (lock_flags & XFS_ILOCK_EXCL)
     260  4125316788 :                 mrunlock_excl(&ip->i_lock);
     261 78356978387 :         else if (lock_flags & XFS_ILOCK_SHARED)
     262 77035615522 :                 mrunlock_shared(&ip->i_lock);
     263             : 
     264 82563342891 :         trace_xfs_iunlock(ip, lock_flags, _RET_IP_);
     265 82559800636 : }
     266             : 
     267             : /*
     268             :  * give up write locks.  the i/o lock cannot be held nested
     269             :  * if it is being demoted.
     270             :  */
     271             : void
     272      140954 : xfs_ilock_demote(
     273             :         xfs_inode_t             *ip,
     274             :         uint                    lock_flags)
     275             : {
     276      140954 :         ASSERT(lock_flags & (XFS_IOLOCK_EXCL|XFS_MMAPLOCK_EXCL|XFS_ILOCK_EXCL));
     277      140954 :         ASSERT((lock_flags &
     278             :                 ~(XFS_IOLOCK_EXCL|XFS_MMAPLOCK_EXCL|XFS_ILOCK_EXCL)) == 0);
     279             : 
     280      140954 :         if (lock_flags & XFS_ILOCK_EXCL)
     281           0 :                 mrdemote(&ip->i_lock);
     282      140954 :         if (lock_flags & XFS_MMAPLOCK_EXCL)
     283           0 :                 downgrade_write(&VFS_I(ip)->i_mapping->invalidate_lock);
     284      140954 :         if (lock_flags & XFS_IOLOCK_EXCL)
     285      140957 :                 downgrade_write(&VFS_I(ip)->i_rwsem);
     286             : 
     287      140952 :         trace_xfs_ilock_demote(ip, lock_flags, _RET_IP_);
     288      140954 : }
     289             : 
     290             : #if defined(DEBUG) || defined(XFS_WARN)
     291             : static inline bool
     292             : __xfs_rwsem_islocked(
     293             :         struct rw_semaphore     *rwsem,
     294             :         bool                    shared)
     295             : {
     296  1331109329 :         if (!debug_locks)
     297           0 :                 return rwsem_is_locked(rwsem);
     298             : 
     299             :         if (!shared)
     300             :                 return lockdep_is_held_type(rwsem, 0);
     301             : 
     302             :         /*
     303             :          * We are checking that the lock is held at least in shared
     304             :          * mode but don't care that it might be held exclusively
     305             :          * (i.e. shared | excl). Hence we check if the lock is held
     306             :          * in any mode rather than an explicit shared mode.
     307             :          */
     308             :         return lockdep_is_held_type(rwsem, -1);
     309             : }
     310             : 
     311             : bool
     312 23594027791 : xfs_isilocked(
     313             :         struct xfs_inode        *ip,
     314             :         uint                    lock_flags)
     315             : {
     316 23594027791 :         if (lock_flags & (XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)) {
     317 22397995780 :                 if (!(lock_flags & XFS_ILOCK_SHARED))
     318 12226953786 :                         return !!ip->i_lock.mr_writer;
     319 10171041994 :                 return rwsem_is_locked(&ip->i_lock.mr_lock);
     320             :         }
     321             : 
     322  1196032011 :         if (lock_flags & (XFS_MMAPLOCK_EXCL|XFS_MMAPLOCK_SHARED)) {
     323     7423529 :                 return __xfs_rwsem_islocked(&VFS_I(ip)->i_mapping->invalidate_lock,
     324             :                                 (lock_flags & XFS_MMAPLOCK_SHARED));
     325             :         }
     326             : 
     327  1188608482 :         if (lock_flags & (XFS_IOLOCK_EXCL | XFS_IOLOCK_SHARED)) {
     328  1197630291 :                 return __xfs_rwsem_islocked(&VFS_I(ip)->i_rwsem,
     329             :                                 (lock_flags & XFS_IOLOCK_SHARED));
     330             :         }
     331             : 
     332           0 :         ASSERT(0);
     333           0 :         return false;
     334             : }
     335             : #endif
     336             : 
     337             : /*
     338             :  * xfs_lockdep_subclass_ok() is only used in an ASSERT, so is only called when
     339             :  * DEBUG or XFS_WARN is set. And MAX_LOCKDEP_SUBCLASSES is then only defined
     340             :  * when CONFIG_LOCKDEP is set. Hence the complex define below to avoid build
     341             :  * errors and warnings.
     342             :  */
     343             : #if (defined(DEBUG) || defined(XFS_WARN)) && defined(CONFIG_LOCKDEP)
     344             : static bool
     345             : xfs_lockdep_subclass_ok(
     346             :         int subclass)
     347             : {
     348             :         return subclass < MAX_LOCKDEP_SUBCLASSES;
     349             : }
     350             : #else
     351             : #define xfs_lockdep_subclass_ok(subclass)       (true)
     352             : #endif
     353             : 
     354             : /*
     355             :  * Bump the subclass so xfs_lock_inodes() acquires each lock with a different
     356             :  * value. This can be called for any type of inode lock combination, including
     357             :  * parent locking. Care must be taken to ensure we don't overrun the subclass
     358             :  * storage fields in the class mask we build.
     359             :  */
     360             : static inline uint
     361   383635028 : xfs_lock_inumorder(
     362             :         uint    lock_mode,
     363             :         uint    subclass)
     364             : {
     365   383635028 :         uint    class = 0;
     366             : 
     367   383635028 :         ASSERT(!(lock_mode & XFS_ILOCK_PARENT));
     368   383635028 :         ASSERT(xfs_lockdep_subclass_ok(subclass));
     369             : 
     370   383635028 :         if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)) {
     371           0 :                 ASSERT(subclass <= XFS_IOLOCK_MAX_SUBCLASS);
     372           0 :                 class += subclass << XFS_IOLOCK_SHIFT;
     373             :         }
     374             : 
     375   383635028 :         if (lock_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)) {
     376           0 :                 ASSERT(subclass <= XFS_MMAPLOCK_MAX_SUBCLASS);
     377           0 :                 class += subclass << XFS_MMAPLOCK_SHIFT;
     378             :         }
     379             : 
     380   383635028 :         if (lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)) {
     381   383637018 :                 ASSERT(subclass <= XFS_ILOCK_MAX_SUBCLASS);
     382   383637018 :                 class += subclass << XFS_ILOCK_SHIFT;
     383             :         }
     384             : 
     385   383635028 :         return (lock_mode & ~XFS_LOCK_SUBCLASS_MASK) | class;
     386             : }
     387             : 
     388             : /*
     389             :  * The following routine will lock n inodes in exclusive mode.  We assume the
     390             :  * caller calls us with the inodes in i_ino order.
     391             :  *
     392             :  * We need to detect deadlock where an inode that we lock is in the AIL and we
     393             :  * start waiting for another inode that is locked by a thread in a long running
     394             :  * transaction (such as truncate). This can result in deadlock since the long
     395             :  * running trans might need to wait for the inode we just locked in order to
     396             :  * push the tail and free space in the log.
     397             :  *
     398             :  * xfs_lock_inodes() can only be used to lock one type of lock at a time -
     399             :  * the iolock, the mmaplock or the ilock, but not more than one at a time. If we
     400             :  * lock more than one at a time, lockdep will report false positives saying we
     401             :  * have violated locking orders.
     402             :  */
     403             : void
     404    31330219 : xfs_lock_inodes(
     405             :         struct xfs_inode        **ips,
     406             :         int                     inodes,
     407             :         uint                    lock_mode)
     408             : {
     409    31330219 :         int                     attempts = 0;
     410    31330219 :         uint                    i;
     411    31330219 :         int                     j;
     412    31330219 :         bool                    try_lock;
     413    31330219 :         struct xfs_log_item     *lp;
     414             : 
     415             :         /*
     416             :          * Currently supports between 2 and 5 inodes with exclusive locking.  We
     417             :          * support an arbitrary depth of locking here, but absolute limits on
     418             :          * inodes depend on the type of locking and the limits placed by
     419             :          * lockdep annotations in xfs_lock_inumorder.  These are all checked by
     420             :          * the asserts.
     421             :          */
     422    31330219 :         ASSERT(ips && inodes >= 2 && inodes <= 5);
     423    31330219 :         ASSERT(lock_mode & (XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL |
     424             :                             XFS_ILOCK_EXCL));
     425    31330219 :         ASSERT(!(lock_mode & (XFS_IOLOCK_SHARED | XFS_MMAPLOCK_SHARED |
     426             :                               XFS_ILOCK_SHARED)));
     427    31330219 :         ASSERT(!(lock_mode & XFS_MMAPLOCK_EXCL) ||
     428             :                 inodes <= XFS_MMAPLOCK_MAX_SUBCLASS + 1);
     429    31330219 :         ASSERT(!(lock_mode & XFS_ILOCK_EXCL) ||
     430             :                 inodes <= XFS_ILOCK_MAX_SUBCLASS + 1);
     431             : 
     432    31330219 :         if (lock_mode & XFS_IOLOCK_EXCL) {
     433           0 :                 ASSERT(!(lock_mode & (XFS_MMAPLOCK_EXCL | XFS_ILOCK_EXCL)));
     434    31330219 :         } else if (lock_mode & XFS_MMAPLOCK_EXCL)
     435           0 :                 ASSERT(!(lock_mode & XFS_ILOCK_EXCL));
     436             : 
     437    31330219 : again:
     438    31343070 :         try_lock = false;
     439    31343070 :         i = 0;
     440   136606232 :         for (; i < inodes; i++) {
     441   105276013 :                 ASSERT(ips[i]);
     442             : 
     443   105276013 :                 if (i && (ips[i] == ips[i - 1]))        /* Already locked */
     444     1217999 :                         continue;
     445             : 
     446             :                 /*
     447             :                  * If try_lock is not set yet, make sure all locked inodes are
     448             :                  * not in the AIL.  If any are, set try_lock to be used later.
     449             :                  */
     450   104058014 :                 if (!try_lock) {
     451   158291398 :                         for (j = (i - 1); j >= 0 && !try_lock; j--) {
     452    74799271 :                                 lp = &ips[j]->i_itemp->ili_item;
     453   142023789 :                                 if (lp && test_bit(XFS_LI_IN_AIL, &lp->li_flags))
     454    18707717 :                                         try_lock = true;
     455             :                         }
     456             :                 }
     457             : 
     458             :                 /*
     459             :                  * If any of the previous locks we have locked is in the AIL,
     460             :                  * we must TRY to get the second and subsequent locks. If
     461             :                  * we can't get any, we must release all we have
     462             :                  * and try again.
     463             :                  */
     464   104058014 :                 if (!try_lock) {
     465    64784432 :                         xfs_ilock(ips[i], xfs_lock_inumorder(lock_mode, i));
     466    64784433 :                         continue;
     467             :                 }
     468             : 
     469             :                 /* try_lock means we have an inode locked that is in the AIL. */
     470    39273582 :                 ASSERT(i != 0);
     471    39273582 :                 if (xfs_ilock_nowait(ips[i], xfs_lock_inumorder(lock_mode, i)))
     472    39260730 :                         continue;
     473             : 
     474             :                 /*
     475             :                  * Unlock all previous guys and try again.  xfs_iunlock will try
     476             :                  * to push the tail if the inode is in the AIL.
     477             :                  */
     478       12851 :                 attempts++;
     479       37860 :                 for (j = i - 1; j >= 0; j--) {
     480             :                         /*
     481             :                          * Check to see if we've already unlocked this one.  Not
     482             :                          * the first one going back, and the inode ptr is the
     483             :                          * same.
     484             :                          */
     485       25009 :                         if (j != (i - 1) && ips[j] == ips[j + 1])
     486        6285 :                                 continue;
     487             : 
     488       18724 :                         xfs_iunlock(ips[j], lock_mode);
     489             :                 }
     490             : 
     491       12851 :                 if ((attempts % 5) == 0) {
     492        2483 :                         delay(1); /* Don't just spin the CPU */
     493             :                 }
     494       12851 :                 goto again;
     495             :         }
     496    31330219 : }
     497             : 
     498             : /*
     499             :  * xfs_lock_two_inodes() can only be used to lock ilock. The iolock and
     500             :  * mmaplock must be double-locked separately since we use i_rwsem and
     501             :  * invalidate_lock for that. We now support taking one lock EXCL and the
     502             :  * other SHARED.
     503             :  */
     504             : void
     505   139788203 : xfs_lock_two_inodes(
     506             :         struct xfs_inode        *ip0,
     507             :         uint                    ip0_mode,
     508             :         struct xfs_inode        *ip1,
     509             :         uint                    ip1_mode)
     510             : {
     511   139788203 :         int                     attempts = 0;
     512   139788203 :         struct xfs_log_item     *lp;
     513             : 
     514   279583724 :         ASSERT(hweight32(ip0_mode) == 1);
     515   279586424 :         ASSERT(hweight32(ip1_mode) == 1);
     516   139794820 :         ASSERT(!(ip0_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)));
     517   139794820 :         ASSERT(!(ip1_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)));
     518   139794820 :         ASSERT(!(ip0_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)));
     519   139794820 :         ASSERT(!(ip1_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)));
     520   139794820 :         ASSERT(ip0->i_ino != ip1->i_ino);
     521             : 
     522   139794820 :         if (ip0->i_ino > ip1->i_ino) {
     523     8955901 :                 swap(ip0, ip1);
     524     8955901 :                 swap(ip0_mode, ip1_mode);
     525             :         }
     526             : 
     527   139794820 :  again:
     528   139801910 :         xfs_ilock(ip0, xfs_lock_inumorder(ip0_mode, 0));
     529             : 
     530             :         /*
     531             :          * If the first lock we have locked is in the AIL, we must TRY to get
     532             :          * the second lock. If we can't get it, we must release the first one
     533             :          * and try again.
     534             :          */
     535   139789995 :         lp = &ip0->i_itemp->ili_item;
     536   139789995 :         if (lp && test_bit(XFS_LI_IN_AIL, &lp->li_flags)) {
     537   103332989 :                 if (!xfs_ilock_nowait(ip1, xfs_lock_inumorder(ip1_mode, 1))) {
     538        7090 :                         xfs_iunlock(ip0, ip0_mode);
     539        7090 :                         if ((++attempts % 5) == 0)
     540        1407 :                                 delay(1); /* Don't just spin the CPU */
     541        7090 :                         goto again;
     542             :                 }
     543             :         } else {
     544    36457006 :                 xfs_ilock(ip1, xfs_lock_inumorder(ip1_mode, 1));
     545             :         }
     546   139795372 : }
     547             : 
     548             : /*
     549             :  * Lookups up an inode from "name". If ci_name is not NULL, then a CI match
     550             :  * is allowed, otherwise it has to be an exact match. If a CI match is found,
     551             :  * ci_name->name will point to a the actual name (caller must free) or
     552             :  * will be set to NULL if an exact match is found.
     553             :  */
     554             : int
     555   229987639 : xfs_lookup(
     556             :         struct xfs_inode        *dp,
     557             :         const struct xfs_name   *name,
     558             :         struct xfs_inode        **ipp,
     559             :         struct xfs_name         *ci_name)
     560             : {
     561   229987639 :         xfs_ino_t               inum;
     562   229987639 :         int                     error;
     563             : 
     564   229987639 :         trace_xfs_lookup(dp, name);
     565             : 
     566   459975078 :         if (xfs_is_shutdown(dp->i_mount))
     567             :                 return -EIO;
     568             : 
     569   229924237 :         error = xfs_dir_lookup(NULL, dp, name, &inum, ci_name);
     570   229931053 :         if (error)
     571    80748422 :                 goto out_unlock;
     572             : 
     573   149182631 :         error = xfs_iget(dp->i_mount, NULL, inum, 0, 0, ipp);
     574   149156773 :         if (error)
     575        2255 :                 goto out_free_name;
     576             : 
     577             :         /*
     578             :          * Make sure that a corrupt directory cannot accidentally link to a
     579             :          * metadata file.
     580             :          */
     581   149154518 :         if (XFS_IS_CORRUPT(dp->i_mount, xfs_is_metadir_inode(*ipp))) {
     582           0 :                 xfs_fs_mark_sick(dp->i_mount, XFS_SICK_FS_METADIR);
     583           0 :                 error = -EFSCORRUPTED;
     584           0 :                 goto out_irele;
     585             :         }
     586             : 
     587             :         return 0;
     588             : 
     589             : out_irele:
     590           0 :         xfs_irele(*ipp);
     591        2255 : out_free_name:
     592        2255 :         if (ci_name)
     593           0 :                 kmem_free(ci_name->name);
     594        2255 : out_unlock:
     595    80750677 :         *ipp = NULL;
     596    80750677 :         return error;
     597             : }
     598             : 
     599             : /*
     600             :  * Initialise a newly allocated inode and return the in-core inode to the
     601             :  * caller locked exclusively.
     602             :  *
     603             :  * Caller is responsible for unlocking the inode manually upon return
     604             :  */
     605             : int
     606    64362329 : xfs_icreate(
     607             :         struct xfs_trans        *tp,
     608             :         xfs_ino_t               ino,
     609             :         const struct xfs_icreate_args *args,
     610             :         struct xfs_inode        **ipp)
     611             : {
     612    64362329 :         struct xfs_mount        *mp = tp->t_mountp;
     613    64362329 :         struct xfs_inode        *ip = NULL;
     614    64362329 :         int                     error;
     615             : 
     616             :         /*
     617             :          * Get the in-core inode with the lock held exclusively to prevent
     618             :          * others from looking at until we're done.
     619             :          */
     620    64362329 :         error = xfs_iget(mp, tp, ino, XFS_IGET_CREATE, XFS_ILOCK_EXCL, &ip);
     621    64361311 :         if (error)
     622             :                 return error;
     623             : 
     624    64361309 :         ASSERT(ip != NULL);
     625    64361309 :         xfs_trans_ijoin(tp, ip, 0);
     626    64360934 :         xfs_inode_init(tp, args, ip);
     627             : 
     628             :         /* now that we have an i_mode we can setup the inode structure */
     629    64363094 :         xfs_setup_inode(ip);
     630             : 
     631    64362772 :         *ipp = ip;
     632    64362772 :         return 0;
     633             : }
     634             : 
     635             : /* Set up inode attributes for newly created children of a directory. */
     636             : void
     637    64316393 : xfs_icreate_args_inherit(
     638             :         struct xfs_icreate_args *args,
     639             :         struct xfs_inode        *dp,
     640             :         struct mnt_idmap        *idmap,
     641             :         umode_t                 mode,
     642             :         bool                    init_xattrs)
     643             : {
     644    64316393 :         args->idmap = idmap;
     645    64316393 :         args->pip = dp;
     646    64316393 :         args->uid = mapped_fsuid(idmap, &init_user_ns);
     647    64319202 :         args->gid = mapped_fsgid(idmap, &init_user_ns);
     648    64315881 :         args->prid = xfs_get_initial_prid(dp);
     649    64316666 :         args->mode = mode;
     650             : 
     651             :         /* Don't clobber the caller's flags */
     652    64316666 :         if (init_xattrs)
     653    29610054 :                 args->flags |= XFS_ICREATE_ARGS_INIT_XATTRS;
     654    64316666 : }
     655             : 
     656             : /* Set up inode attributes for newly created internal files. */
     657             : void
     658      571696 : xfs_icreate_args_rootfile(
     659             :         struct xfs_icreate_args *args,
     660             :         struct xfs_mount        *mp,
     661             :         umode_t                 mode,
     662             :         bool                    init_xattrs)
     663             : {
     664      571696 :         args->idmap = &nop_mnt_idmap;
     665      571696 :         args->uid = GLOBAL_ROOT_UID;
     666      571696 :         args->gid = GLOBAL_ROOT_GID;
     667      571696 :         args->prid = 0;
     668      571696 :         args->mode = mode;
     669      571696 :         args->flags = XFS_ICREATE_ARGS_FORCE_UID |
     670             :                       XFS_ICREATE_ARGS_FORCE_GID |
     671             :                       XFS_ICREATE_ARGS_FORCE_MODE;
     672      571696 :         if (init_xattrs)
     673       16502 :                 args->flags |= XFS_ICREATE_ARGS_INIT_XATTRS;
     674      571696 : }
     675             : 
     676             : int
     677    64869269 : xfs_icreate_dqalloc(
     678             :         const struct xfs_icreate_args   *args,
     679             :         struct xfs_dquot                **udqpp,
     680             :         struct xfs_dquot                **gdqpp,
     681             :         struct xfs_dquot                **pdqpp)
     682             : {
     683    64869269 :         unsigned int                    flags = XFS_QMOPT_QUOTALL;
     684             : 
     685    64869269 :         *udqpp = *gdqpp = *pdqpp = NULL;
     686             : 
     687    64869269 :         if (!(args->flags & XFS_ICREATE_ARGS_FORCE_GID))
     688    64315015 :                 flags |= XFS_QMOPT_INHERIT;
     689             : 
     690    64869269 :         return xfs_qm_vop_dqalloc(args->pip, args->uid, args->gid, args->prid,
     691             :                         flags, udqpp, gdqpp, pdqpp);
     692             : }
     693             : 
     694             : int
     695    33727690 : xfs_create(
     696             :         struct xfs_inode        *dp,
     697             :         struct xfs_name         *name,
     698             :         const struct xfs_icreate_args *args,
     699             :         struct xfs_inode        **ipp)
     700             : {
     701    33727690 :         struct xfs_dir_update   du = {
     702             :                 .dp             = dp,
     703             :                 .name           = name,
     704             :         };
     705    33727690 :         struct xfs_mount        *mp = dp->i_mount;
     706    33727690 :         struct xfs_trans        *tp = NULL;
     707    33727690 :         struct xfs_dquot        *udqp;
     708    33727690 :         struct xfs_dquot        *gdqp;
     709    33727690 :         struct xfs_dquot        *pdqp;
     710    33727690 :         struct xfs_trans_res    *tres;
     711    33727690 :         xfs_ino_t               ino;
     712    33727690 :         bool                    unlock_dp_on_error = false;
     713    33727690 :         bool                    is_dir = S_ISDIR(args->mode);
     714    33727690 :         uint                    resblks;
     715    33727690 :         int                     error;
     716             : 
     717    33727690 :         ASSERT(args->pip == dp);
     718    33727690 :         trace_xfs_create(dp, name);
     719             : 
     720    67455272 :         if (xfs_is_shutdown(mp))
     721             :                 return -EIO;
     722             : 
     723             :         /*
     724             :          * Make sure that we have allocated dquot(s) on disk.
     725             :          */
     726    33727633 :         error = xfs_icreate_dqalloc(args, &udqp, &gdqp, &pdqp);
     727    33731845 :         if (error)
     728             :                 return error;
     729             : 
     730    33730308 :         if (is_dir) {
     731     6903724 :                 resblks = xfs_mkdir_space_res(mp, name->len);
     732     6903749 :                 tres = &M_RES(mp)->tr_mkdir;
     733             :         } else {
     734    26826584 :                 resblks = xfs_create_space_res(mp, name->len);
     735    26826015 :                 tres = &M_RES(mp)->tr_create;
     736             :         }
     737             : 
     738    33729764 :         error = xfs_parent_start(mp, &du.parent);
     739    33728391 :         if (error)
     740         101 :                 goto out_release_dquots;
     741             : 
     742             :         /*
     743             :          * Initially assume that the file does not exist and
     744             :          * reserve the resources for that case.  If that is not
     745             :          * the case we'll drop the one we have and get a more
     746             :          * appropriate transaction later.
     747             :          */
     748    33728290 :         error = xfs_trans_alloc_icreate(mp, tres, udqp, gdqp, pdqp, resblks,
     749             :                         &tp);
     750    33730243 :         if (error == -ENOSPC) {
     751             :                 /* flush outstanding delalloc blocks and retry */
     752      330614 :                 xfs_flush_inodes(mp);
     753      330519 :                 error = xfs_trans_alloc_icreate(mp, tres, udqp, gdqp, pdqp,
     754             :                                 resblks, &tp);
     755             :         }
     756    33730238 :         if (error)
     757      318311 :                 goto out_parent;
     758             : 
     759    33411927 :         xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT);
     760    33412307 :         unlock_dp_on_error = true;
     761             : 
     762             :         /*
     763             :          * A newly created regular or special file just has one directory
     764             :          * entry pointing to them, but a directory also the "." entry
     765             :          * pointing to itself.
     766             :          */
     767    33412307 :         error = xfs_dialloc(&tp, dp, args->mode, &ino);
     768    33411617 :         if (!error)
     769    33264620 :                 error = xfs_icreate(tp, ino, args, &du.ip);
     770    33411611 :         if (error)
     771      146909 :                 goto out_trans_cancel;
     772             : 
     773             :         /*
     774             :          * Now we join the directory inode to the transaction.  We do not do it
     775             :          * earlier because xfs_dialloc might commit the previous transaction
     776             :          * (and release all the locks).  An error from here on will result in
     777             :          * the transaction cancel unlocking dp so don't do it explicitly in the
     778             :          * error path.
     779             :          */
     780    33264702 :         xfs_trans_ijoin(tp, dp, 0);
     781             : 
     782    33263699 :         error = xfs_dir_create_child(tp, resblks, &du);
     783    33264014 :         if (error)
     784         369 :                 goto out_trans_cancel;
     785             : 
     786             :         /*
     787             :          * If this is a synchronous mount, make sure that the
     788             :          * create transaction goes to disk before returning to
     789             :          * the user.
     790             :          */
     791    33263645 :         if (xfs_has_wsync(mp) || xfs_has_dirsync(mp))
     792         236 :                 xfs_trans_set_sync(tp);
     793             : 
     794             :         /*
     795             :          * Attach the dquot(s) to the inodes and modify them incore.
     796             :          * These ids of the inode couldn't have changed since the new
     797             :          * inode has been locked ever since it was created.
     798             :          */
     799    33263645 :         xfs_qm_vop_create_dqattach(tp, du.ip, udqp, gdqp, pdqp);
     800             : 
     801    33264773 :         error = xfs_trans_commit(tp);
     802    33265086 :         if (error)
     803         257 :                 goto out_release_inode;
     804             : 
     805    33264829 :         xfs_qm_dqrele(udqp);
     806    33264685 :         xfs_qm_dqrele(gdqp);
     807    33264893 :         xfs_qm_dqrele(pdqp);
     808             : 
     809    33264868 :         *ipp = du.ip;
     810    33264868 :         xfs_iunlock(du.ip, XFS_ILOCK_EXCL);
     811    33264611 :         xfs_iunlock(dp, XFS_ILOCK_EXCL);
     812    33264864 :         xfs_parent_finish(mp, du.parent);
     813             :         return 0;
     814             : 
     815      147278 :  out_trans_cancel:
     816      147278 :         xfs_trans_cancel(tp);
     817      147533 :  out_release_inode:
     818             :         /*
     819             :          * Wait until after the current transaction is aborted to finish the
     820             :          * setup of the inode and release the inode.  This prevents recursive
     821             :          * transactions and deadlocks from xfs_inactive.
     822             :          */
     823      147533 :         if (du.ip) {
     824         626 :                 xfs_iunlock(du.ip, XFS_ILOCK_EXCL);
     825         626 :                 xfs_finish_inode_setup(du.ip);
     826         626 :                 xfs_irele(du.ip);
     827             :         }
     828      146907 :  out_parent:
     829      465844 :         xfs_parent_finish(mp, du.parent);
     830      465952 :  out_release_dquots:
     831      465952 :         xfs_qm_dqrele(udqp);
     832      465960 :         xfs_qm_dqrele(gdqp);
     833      465524 :         xfs_qm_dqrele(pdqp);
     834             : 
     835      465958 :         if (unlock_dp_on_error)
     836      147532 :                 xfs_iunlock(dp, XFS_ILOCK_EXCL);
     837             :         return error;
     838             : }
     839             : 
     840             : int
     841     3435098 : xfs_create_tmpfile(
     842             :         struct xfs_inode        *dp,
     843             :         const struct xfs_icreate_args *args,
     844             :         struct xfs_inode        **ipp)
     845             : {
     846     3435098 :         struct xfs_mount        *mp = dp->i_mount;
     847     3435098 :         struct xfs_inode        *ip = NULL;
     848     3435098 :         struct xfs_trans        *tp = NULL;
     849     3435098 :         struct xfs_dquot        *udqp;
     850     3435098 :         struct xfs_dquot        *gdqp;
     851     3435098 :         struct xfs_dquot        *pdqp;
     852     3435098 :         struct xfs_trans_res    *tres;
     853     3435098 :         xfs_ino_t               ino;
     854     3435098 :         uint                    resblks;
     855     3435098 :         int                     error;
     856             : 
     857     3435098 :         ASSERT(args->nlink == 0);
     858     3435098 :         ASSERT(args->pip == dp);
     859             : 
     860     6870196 :         if (xfs_is_shutdown(mp))
     861             :                 return -EIO;
     862             : 
     863             :         /*
     864             :          * Make sure that we have allocated dquot(s) on disk.
     865             :          */
     866     3435098 :         error = xfs_icreate_dqalloc(args, &udqp, &gdqp, &pdqp);
     867     3435177 :         if (error)
     868             :                 return error;
     869             : 
     870     3435177 :         resblks = XFS_IALLOC_SPACE_RES(mp);
     871     3435177 :         tres = &M_RES(mp)->tr_create_tmpfile;
     872             : 
     873     3435177 :         error = xfs_trans_alloc_icreate(mp, tres, udqp, gdqp, pdqp, resblks,
     874             :                         &tp);
     875     3435176 :         if (error)
     876       37778 :                 goto out_release_dquots;
     877             : 
     878     3397398 :         error = xfs_dialloc(&tp, dp, args->mode, &ino);
     879     3396877 :         if (!error)
     880     3397350 :                 error = xfs_icreate(tp, ino, args, &ip);
     881     3396912 :         if (error)
     882           1 :                 goto out_trans_cancel;
     883             : 
     884     3396911 :         if (xfs_has_wsync(mp))
     885           0 :                 xfs_trans_set_sync(tp);
     886             : 
     887             :         /*
     888             :          * Attach the dquot(s) to the inodes and modify them incore.
     889             :          * These ids of the inode couldn't have changed since the new
     890             :          * inode has been locked ever since it was created.
     891             :          */
     892     3396911 :         xfs_qm_vop_create_dqattach(tp, ip, udqp, gdqp, pdqp);
     893             : 
     894     3397327 :         error = xfs_iunlink(tp, ip);
     895     3397371 :         if (error)
     896           4 :                 goto out_trans_cancel;
     897             : 
     898     3397367 :         error = xfs_trans_commit(tp);
     899     3397390 :         if (error)
     900           1 :                 goto out_release_inode;
     901             : 
     902     3397389 :         xfs_qm_dqrele(udqp);
     903     3394465 :         xfs_qm_dqrele(gdqp);
     904     3397393 :         xfs_qm_dqrele(pdqp);
     905             : 
     906     3397394 :         *ipp = ip;
     907     3397394 :         xfs_iunlock(ip, XFS_ILOCK_EXCL);
     908     3397394 :         return 0;
     909             : 
     910           5 :  out_trans_cancel:
     911           5 :         xfs_trans_cancel(tp);
     912           6 :  out_release_inode:
     913             :         /*
     914             :          * Wait until after the current transaction is aborted to finish the
     915             :          * setup of the inode and release the inode.  This prevents recursive
     916             :          * transactions and deadlocks from xfs_inactive.
     917             :          */
     918           6 :         if (ip) {
     919           5 :                 xfs_iunlock(ip, XFS_ILOCK_EXCL);
     920           5 :                 xfs_finish_inode_setup(ip);
     921           5 :                 xfs_irele(ip);
     922             :         }
     923           1 :  out_release_dquots:
     924       37784 :         xfs_qm_dqrele(udqp);
     925       37784 :         xfs_qm_dqrele(gdqp);
     926       37784 :         xfs_qm_dqrele(pdqp);
     927             : 
     928       37784 :         return error;
     929             : }
     930             : 
     931             : int
     932     6208188 : xfs_link(
     933             :         struct xfs_inode        *tdp,
     934             :         struct xfs_inode        *sip,
     935             :         struct xfs_name         *target_name)
     936             : {
     937     6208188 :         struct xfs_dir_update   du = {
     938             :                 .dp             = tdp,
     939             :                 .name           = target_name,
     940             :                 .ip             = sip,
     941             :         };
     942     6208188 :         struct xfs_mount        *mp = tdp->i_mount;
     943     6208188 :         struct xfs_trans        *tp;
     944     6208188 :         int                     error, nospace_error = 0;
     945     6208188 :         int                     resblks;
     946             : 
     947     6208188 :         trace_xfs_link(tdp, target_name);
     948             : 
     949     6208210 :         ASSERT(!S_ISDIR(VFS_I(sip)->i_mode));
     950             : 
     951    12416420 :         if (xfs_is_shutdown(mp))
     952             :                 return -EIO;
     953             : 
     954     6208209 :         error = xfs_qm_dqattach(sip);
     955     6208209 :         if (error)
     956           0 :                 goto std_return;
     957             : 
     958     6208209 :         error = xfs_qm_dqattach(tdp);
     959     6208174 :         if (error)
     960           2 :                 goto std_return;
     961             : 
     962     6208172 :         error = xfs_parent_start(mp, &du.parent);
     963     6208216 :         if (error)
     964           0 :                 goto std_return;
     965             : 
     966     6208216 :         resblks = xfs_link_space_res(mp, target_name->len);
     967     6208198 :         error = xfs_trans_alloc_dir(tdp, &M_RES(mp)->tr_link, sip, &resblks,
     968             :                         &tp, &nospace_error);
     969     6208294 :         if (error)
     970           1 :                 goto out_parent;
     971             : 
     972             :         /*
     973             :          * We don't allow reservationless or quotaless hardlinking when parent
     974             :          * pointers are enabled because we can't back out if the xattrs must
     975             :          * grow.
     976             :          */
     977     6208293 :         if (du.parent && nospace_error) {
     978       33199 :                 error = nospace_error;
     979       33199 :                 goto error_return;
     980             :         }
     981             : 
     982             :         /*
     983             :          * If we are using project inheritance, we only allow hard link
     984             :          * creation in our tree when the project IDs are the same; else
     985             :          * the tree quota mechanism could be circumvented.
     986             :          */
     987     6175094 :         if (unlikely((tdp->i_diflags & XFS_DIFLAG_PROJINHERIT) &&
     988             :                      tdp->i_projid != sip->i_projid)) {
     989           0 :                 error = -EXDEV;
     990           0 :                 goto error_return;
     991             :         }
     992             : 
     993     6175094 :         error = xfs_dir_add_child(tp, resblks, &du);
     994     6175032 :         if (error)
     995           1 :                 goto error_return;
     996             : 
     997             :         /*
     998             :          * If this is a synchronous mount, make sure that the
     999             :          * link transaction goes to disk before returning to
    1000             :          * the user.
    1001             :          */
    1002     6175031 :         if (xfs_has_wsync(mp) || xfs_has_dirsync(mp))
    1003           0 :                 xfs_trans_set_sync(tp);
    1004             : 
    1005     6175031 :         error = xfs_trans_commit(tp);
    1006     6175098 :         xfs_iunlock(tdp, XFS_ILOCK_EXCL);
    1007     6175089 :         xfs_iunlock(sip, XFS_ILOCK_EXCL);
    1008     6175088 :         xfs_parent_finish(mp, du.parent);
    1009             :         return error;
    1010             : 
    1011       33200 :  error_return:
    1012       33200 :         xfs_trans_cancel(tp);
    1013       33200 :         xfs_iunlock(tdp, XFS_ILOCK_EXCL);
    1014       33200 :         xfs_iunlock(sip, XFS_ILOCK_EXCL);
    1015       33201 :  out_parent:
    1016       33201 :         xfs_parent_finish(mp, du.parent);
    1017       33203 :  std_return:
    1018       33203 :         if (error == -ENOSPC && nospace_error)
    1019       32701 :                 error = nospace_error;
    1020             :         return error;
    1021             : }
    1022             : 
    1023             : /* Clear the reflink flag and the cowblocks tag if possible. */
    1024             : static void
    1025    11940335 : xfs_itruncate_clear_reflink_flags(
    1026             :         struct xfs_inode        *ip)
    1027             : {
    1028    11940335 :         struct xfs_ifork        *dfork;
    1029    11940335 :         struct xfs_ifork        *cfork;
    1030             : 
    1031    11940335 :         if (!xfs_is_reflink_inode(ip))
    1032             :                 return;
    1033     5383715 :         dfork = xfs_ifork_ptr(ip, XFS_DATA_FORK);
    1034     5383715 :         cfork = xfs_ifork_ptr(ip, XFS_COW_FORK);
    1035     5383715 :         if (dfork->if_bytes == 0 && cfork->if_bytes == 0)
    1036     1002308 :                 ip->i_diflags2 &= ~XFS_DIFLAG2_REFLINK;
    1037     5383715 :         if (cfork->if_bytes == 0)
    1038     5160113 :                 xfs_inode_clear_cowblocks_tag(ip);
    1039             : }
    1040             : 
    1041             : /*
    1042             :  * Free up the underlying blocks past new_size.  The new size must be smaller
    1043             :  * than the current size.  This routine can be used both for the attribute and
    1044             :  * data fork, and does not modify the inode size, which is left to the caller.
    1045             :  *
    1046             :  * The transaction passed to this routine must have made a permanent log
    1047             :  * reservation of at least XFS_ITRUNCATE_LOG_RES.  This routine may commit the
    1048             :  * given transaction and start new ones, so make sure everything involved in
    1049             :  * the transaction is tidy before calling here.  Some transaction will be
    1050             :  * returned to the caller to be committed.  The incoming transaction must
    1051             :  * already include the inode, and both inode locks must be held exclusively.
    1052             :  * The inode must also be "held" within the transaction.  On return the inode
    1053             :  * will be "held" within the returned transaction.  This routine does NOT
    1054             :  * require any disk space to be reserved for it within the transaction.
    1055             :  *
    1056             :  * If we get an error, we must return with the inode locked and linked into the
    1057             :  * current transaction. This keeps things simple for the higher level code,
    1058             :  * because it always knows that the inode is locked and held in the transaction
    1059             :  * that returns to it whether errors occur or not.  We don't mark the inode
    1060             :  * dirty on error so that transactions can be easily aborted if possible.
    1061             :  */
    1062             : int
    1063    13998573 : xfs_itruncate_extents_flags(
    1064             :         struct xfs_trans        **tpp,
    1065             :         struct xfs_inode        *ip,
    1066             :         int                     whichfork,
    1067             :         xfs_fsize_t             new_size,
    1068             :         int                     flags)
    1069             : {
    1070    13998573 :         struct xfs_mount        *mp = ip->i_mount;
    1071    13998573 :         struct xfs_trans        *tp = *tpp;
    1072    13998573 :         xfs_fileoff_t           first_unmap_block;
    1073    13998573 :         int                     error = 0;
    1074             : 
    1075    13998573 :         ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
    1076    23020382 :         ASSERT(!atomic_read(&VFS_I(ip)->i_count) ||
    1077             :                xfs_isilocked(ip, XFS_IOLOCK_EXCL));
    1078    27997146 :         ASSERT(new_size <= XFS_ISIZE(ip));
    1079    13998573 :         ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
    1080    13998573 :         ASSERT(ip->i_itemp != NULL);
    1081    13998573 :         ASSERT(ip->i_itemp->ili_lock_flags == 0);
    1082    13998573 :         ASSERT(!XFS_NOT_DQATTACHED(mp, ip));
    1083             : 
    1084    13998573 :         trace_xfs_itruncate_extents_start(ip, new_size);
    1085             : 
    1086    13998295 :         flags |= xfs_bmapi_aflag(whichfork);
    1087             : 
    1088             :         /*
    1089             :          * Since it is possible for space to become allocated beyond
    1090             :          * the end of the file (in a crash where the space is allocated
    1091             :          * but the inode size is not yet updated), simply remove any
    1092             :          * blocks which show up between the new EOF and the maximum
    1093             :          * possible file size.
    1094             :          *
    1095             :          * We have to free all the blocks to the bmbt maximum offset, even if
    1096             :          * the page cache can't scale that far.
    1097             :          */
    1098    13998295 :         first_unmap_block = XFS_B_TO_FSB(mp, (xfs_ufsize_t)new_size);
    1099    13998295 :         if (!xfs_verify_fileoff(mp, first_unmap_block)) {
    1100           0 :                 WARN_ON_ONCE(first_unmap_block > XFS_MAX_FILEOFF);
    1101           0 :                 return 0;
    1102             :         }
    1103             : 
    1104    13998045 :         error = xfs_bunmapi_range(&tp, ip, flags, first_unmap_block,
    1105             :                         XFS_MAX_FILEOFF);
    1106    13997952 :         if (error)
    1107        1922 :                 goto out;
    1108             : 
    1109    13996030 :         if (whichfork == XFS_DATA_FORK) {
    1110             :                 /* Remove all pending CoW reservations. */
    1111    11939268 :                 error = xfs_reflink_cancel_cow_blocks(ip, &tp,
    1112             :                                 first_unmap_block, XFS_MAX_FILEOFF, true);
    1113    11939340 :                 if (error)
    1114           0 :                         goto out;
    1115             : 
    1116    11939340 :                 xfs_itruncate_clear_reflink_flags(ip);
    1117             :         }
    1118             : 
    1119             :         /*
    1120             :          * Always re-log the inode so that our permanent transaction can keep
    1121             :          * on rolling it forward in the log.
    1122             :          */
    1123    13996120 :         xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
    1124             : 
    1125    13997709 :         trace_xfs_itruncate_extents_end(ip, new_size);
    1126             : 
    1127    13999645 : out:
    1128    13999645 :         *tpp = tp;
    1129    13999645 :         return error;
    1130             : }
    1131             : 
    1132             : int
    1133   457255308 : xfs_release(
    1134             :         struct xfs_inode        *ip,
    1135             :         bool                    want_free_eofblocks)
    1136             : {
    1137   457255308 :         struct xfs_mount        *mp = ip->i_mount;
    1138   457255308 :         int                     error = 0;
    1139             : 
    1140   457255308 :         if (!S_ISREG(VFS_I(ip)->i_mode) || (VFS_I(ip)->i_mode == 0))
    1141             :                 return 0;
    1142             : 
    1143             :         /* If this is a read-only mount, don't do this (would generate I/O) */
    1144   914516654 :         if (xfs_is_readonly(mp))
    1145             :                 return 0;
    1146             : 
    1147   913683576 :         if (!xfs_is_shutdown(mp)) {
    1148   456659187 :                 int truncated;
    1149             : 
    1150             :                 /*
    1151             :                  * If we previously truncated this file and removed old data
    1152             :                  * in the process, we want to initiate "early" writeout on
    1153             :                  * the last close.  This is an attempt to combat the notorious
    1154             :                  * NULL files problem which is particularly noticeable from a
    1155             :                  * truncate down, buffered (re-)write (delalloc), followed by
    1156             :                  * a crash.  What we are effectively doing here is
    1157             :                  * significantly reducing the time window where we'd otherwise
    1158             :                  * be exposed to that problem.
    1159             :                  */
    1160   456659187 :                 truncated = xfs_iflags_test_and_clear(ip, XFS_ITRUNCATED);
    1161   456674603 :                 if (truncated) {
    1162     1958282 :                         xfs_iflags_clear(ip, XFS_IDIRTY_RELEASE);
    1163     1958282 :                         if (ip->i_delayed_blks > 0) {
    1164      171902 :                                 error = filemap_flush(VFS_I(ip)->i_mapping);
    1165      171904 :                                 if (error)
    1166             :                                         return error;
    1167             :                         }
    1168             :                 }
    1169             :         }
    1170             : 
    1171   456857183 :         if (VFS_I(ip)->i_nlink == 0)
    1172             :                 return 0;
    1173             : 
    1174             :         /*
    1175             :          * If we can't get the iolock just skip truncating the blocks past EOF
    1176             :          * because we could deadlock with the mmap_lock otherwise. We'll get
    1177             :          * another chance to drop them once the last reference to the inode is
    1178             :          * dropped, so we'll never leak blocks permanently.
    1179             :          */
    1180   455859584 :         if (!want_free_eofblocks || !xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL))
    1181    57427717 :                 return 0;
    1182             : 
    1183   398434787 :         if (xfs_can_free_eofblocks(ip, false)) {
    1184             :                 /*
    1185             :                  * Check if the inode is being opened, written and closed
    1186             :                  * frequently and we have delayed allocation blocks outstanding
    1187             :                  * (e.g. streaming writes from the NFS server), truncating the
    1188             :                  * blocks past EOF will cause fragmentation to occur.
    1189             :                  *
    1190             :                  * In this case don't do the truncation, but we have to be
    1191             :                  * careful how we detect this case. Blocks beyond EOF show up as
    1192             :                  * i_delayed_blks even when the inode is clean, so we need to
    1193             :                  * truncate them away first before checking for a dirty release.
    1194             :                  * Hence on the first dirty close we will still remove the
    1195             :                  * speculative allocation, but after that we will leave it in
    1196             :                  * place.
    1197             :                  */
    1198    46820182 :                 if (xfs_iflags_test(ip, XFS_IDIRTY_RELEASE))
    1199    17817985 :                         goto out_unlock;
    1200             : 
    1201     5592291 :                 error = xfs_free_eofblocks(ip);
    1202     5593480 :                 if (error)
    1203           2 :                         goto out_unlock;
    1204             : 
    1205     5593478 :                 xfs_iflags_set(ip, XFS_IDIRTY_RELEASE);
    1206             :         }
    1207             : 
    1208   375018241 : out_unlock:
    1209   398429795 :         xfs_iunlock(ip, XFS_IOLOCK_EXCL);
    1210   398429795 :         return error;
    1211             : }
    1212             : 
    1213             : /*
    1214             :  * Mark all the buffers attached to this directory stale.  In theory we should
    1215             :  * never be freeing a directory with any blocks at all, but this covers the
    1216             :  * case where we've recovered a directory swap with a "temporary" directory
    1217             :  * created by online repair and now need to dump it.
    1218             :  */
    1219             : STATIC void
    1220           0 : xfs_inactive_dir(
    1221             :         struct xfs_inode        *dp)
    1222             : {
    1223           0 :         struct xfs_iext_cursor  icur;
    1224           0 :         struct xfs_bmbt_irec    got;
    1225           0 :         struct xfs_mount        *mp = dp->i_mount;
    1226           0 :         struct xfs_da_geometry  *geo = mp->m_dir_geo;
    1227           0 :         struct xfs_ifork        *ifp = xfs_ifork_ptr(dp, XFS_DATA_FORK);
    1228           0 :         xfs_fileoff_t           off;
    1229             : 
    1230             :         /*
    1231             :          * Invalidate each directory block.  All directory blocks are of
    1232             :          * fsbcount length and alignment, so we only need to walk those same
    1233             :          * offsets.  We hold the only reference to this inode, so we must wait
    1234             :          * for the buffer locks.
    1235             :          */
    1236           0 :         for_each_xfs_iext(ifp, &icur, &got) {
    1237           0 :                 for (off = round_up(got.br_startoff, geo->fsbcount);
    1238           0 :                      off < got.br_startoff + got.br_blockcount;
    1239           0 :                      off += geo->fsbcount) {
    1240           0 :                         struct xfs_buf  *bp = NULL;
    1241           0 :                         xfs_fsblock_t   fsbno;
    1242           0 :                         int             error;
    1243             : 
    1244           0 :                         fsbno = (off - got.br_startoff) + got.br_startblock;
    1245           0 :                         error = xfs_buf_incore(mp->m_ddev_targp,
    1246           0 :                                         XFS_FSB_TO_DADDR(mp, fsbno),
    1247           0 :                                         XFS_FSB_TO_BB(mp, geo->fsbcount),
    1248             :                                         XBF_LIVESCAN, &bp);
    1249           0 :                         if (error)
    1250           0 :                                 continue;
    1251             : 
    1252           0 :                         xfs_buf_stale(bp);
    1253           0 :                         xfs_buf_relse(bp);
    1254             :                 }
    1255             :         }
    1256           0 : }
    1257             : 
    1258             : /*
    1259             :  * xfs_inactive_truncate
    1260             :  *
    1261             :  * Called to perform a truncate when an inode becomes unlinked.
    1262             :  */
    1263             : STATIC int
    1264     2701842 : xfs_inactive_truncate(
    1265             :         struct xfs_inode *ip)
    1266             : {
    1267     2701842 :         struct xfs_mount        *mp = ip->i_mount;
    1268     2701842 :         struct xfs_trans        *tp;
    1269     2701842 :         int                     error;
    1270             : 
    1271     2701842 :         error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp);
    1272     2702871 :         if (error) {
    1273        1220 :                 ASSERT(xfs_is_shutdown(mp));
    1274         610 :                 return error;
    1275             :         }
    1276     2702261 :         xfs_ilock(ip, XFS_ILOCK_EXCL);
    1277     2702194 :         xfs_trans_ijoin(tp, ip, 0);
    1278             : 
    1279             :         /*
    1280             :          * Log the inode size first to prevent stale data exposure in the event
    1281             :          * of a system crash before the truncate completes. See the related
    1282             :          * comment in xfs_vn_setattr_size() for details.
    1283             :          */
    1284     2701307 :         ip->i_disk_size = 0;
    1285     2701307 :         xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
    1286             : 
    1287     2702250 :         error = xfs_itruncate_extents(&tp, ip, XFS_DATA_FORK, 0);
    1288     2702282 :         if (error)
    1289        1334 :                 goto error_trans_cancel;
    1290             : 
    1291     2700948 :         ASSERT(ip->i_df.if_nextents == 0);
    1292             : 
    1293     2700948 :         error = xfs_trans_commit(tp);
    1294     2700950 :         if (error)
    1295           0 :                 goto error_unlock;
    1296             : 
    1297     2700950 :         xfs_iunlock(ip, XFS_ILOCK_EXCL);
    1298     2700950 :         return 0;
    1299             : 
    1300             : error_trans_cancel:
    1301        1334 :         xfs_trans_cancel(tp);
    1302        1334 : error_unlock:
    1303        1334 :         xfs_iunlock(ip, XFS_ILOCK_EXCL);
    1304        1334 :         return error;
    1305             : }
    1306             : 
    1307             : /*
    1308             :  * xfs_inactive_ifree()
    1309             :  *
    1310             :  * Perform the inode free when an inode is unlinked.
    1311             :  */
    1312             : STATIC int
    1313    38025807 : xfs_inactive_ifree(
    1314             :         struct xfs_inode *ip)
    1315             : {
    1316    38025807 :         struct xfs_mount        *mp = ip->i_mount;
    1317    38025807 :         struct xfs_trans        *tp;
    1318    38025807 :         int                     error;
    1319             : 
    1320             :         /*
    1321             :          * We try to use a per-AG reservation for any block needed by the finobt
    1322             :          * tree, but as the finobt feature predates the per-AG reservation
    1323             :          * support a degraded file system might not have enough space for the
    1324             :          * reservation at mount time.  In that case try to dip into the reserved
    1325             :          * pool and pray.
    1326             :          *
    1327             :          * Send a warning if the reservation does happen to fail, as the inode
    1328             :          * now remains allocated and sits on the unlinked list until the fs is
    1329             :          * repaired.
    1330             :          */
    1331    38025807 :         if (unlikely(mp->m_finobt_nores)) {
    1332           0 :                 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ifree,
    1333             :                                 XFS_IFREE_SPACE_RES(mp), 0, XFS_TRANS_RESERVE,
    1334             :                                 &tp);
    1335             :         } else {
    1336    38025807 :                 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ifree, 0, 0, 0, &tp);
    1337             :         }
    1338    38026740 :         if (error) {
    1339           0 :                 if (error == -ENOSPC) {
    1340           0 :                         xfs_warn_ratelimited(mp,
    1341             :                         "Failed to remove inode(s) from unlinked list. "
    1342             :                         "Please free space, unmount and run xfs_repair.");
    1343             :                 } else {
    1344           0 :                         ASSERT(xfs_is_shutdown(mp));
    1345             :                 }
    1346           0 :                 return error;
    1347             :         }
    1348             : 
    1349             :         /*
    1350             :          * We do not hold the inode locked across the entire rolling transaction
    1351             :          * here. We only need to hold it for the first transaction that
    1352             :          * xfs_ifree() builds, which may mark the inode XFS_ISTALE if the
    1353             :          * underlying cluster buffer is freed. Relogging an XFS_ISTALE inode
    1354             :          * here breaks the relationship between cluster buffer invalidation and
    1355             :          * stale inode invalidation on cluster buffer item journal commit
    1356             :          * completion, and can result in leaving dirty stale inodes hanging
    1357             :          * around in memory.
    1358             :          *
    1359             :          * We have no need for serialising this inode operation against other
    1360             :          * operations - we freed the inode and hence reallocation is required
    1361             :          * and that will serialise on reallocating the space the deferops need
    1362             :          * to free. Hence we can unlock the inode on the first commit of
    1363             :          * the transaction rather than roll it right through the deferops. This
    1364             :          * avoids relogging the XFS_ISTALE inode.
    1365             :          *
    1366             :          * We check that xfs_ifree() hasn't grown an internal transaction roll
    1367             :          * by asserting that the inode is still locked when it returns.
    1368             :          */
    1369    38026740 :         xfs_ilock(ip, XFS_ILOCK_EXCL);
    1370    38026767 :         xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
    1371             : 
    1372    38024697 :         error = xfs_ifree(tp, ip);
    1373    38026010 :         ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
    1374    38026010 :         if (error) {
    1375             :                 /*
    1376             :                  * If we fail to free the inode, shut down.  The cancel
    1377             :                  * might do that, we need to make sure.  Otherwise the
    1378             :                  * inode might be lost for a long time or forever.
    1379             :                  */
    1380         306 :                 if (!xfs_is_shutdown(mp)) {
    1381           4 :                         xfs_notice(mp, "%s: xfs_ifree returned error %d",
    1382             :                                 __func__, error);
    1383           4 :                         xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR);
    1384             :                 }
    1385         153 :                 xfs_trans_cancel(tp);
    1386         153 :                 return error;
    1387             :         }
    1388             : 
    1389             :         /*
    1390             :          * Credit the quota account(s). The inode is gone.
    1391             :          */
    1392    38025857 :         xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_ICOUNT, -1);
    1393             : 
    1394    38025049 :         return xfs_trans_commit(tp);
    1395             : }
    1396             : 
    1397             : /*
    1398             :  * Returns true if we need to update the on-disk metadata before we can free
    1399             :  * the memory used by this inode.  Updates include freeing post-eof
    1400             :  * preallocations; freeing COW staging extents; and marking the inode free in
    1401             :  * the inobt if it is on the unlinked list.
    1402             :  */
    1403             : bool
    1404  1075622036 : xfs_inode_needs_inactive(
    1405             :         struct xfs_inode        *ip)
    1406             : {
    1407  1075622036 :         struct xfs_mount        *mp = ip->i_mount;
    1408  1075622036 :         struct xfs_ifork        *cow_ifp = xfs_ifork_ptr(ip, XFS_COW_FORK);
    1409             : 
    1410             :         /*
    1411             :          * If the inode is already free, then there can be nothing
    1412             :          * to clean up here.
    1413             :          */
    1414  1075622036 :         if (VFS_I(ip)->i_mode == 0)
    1415             :                 return false;
    1416             : 
    1417             :         /* If this is a read-only mount, don't do this (would generate I/O) */
    1418  2151244072 :         if (xfs_is_readonly(mp))
    1419             :                 return false;
    1420             : 
    1421             :         /* If the log isn't running, push inodes straight to reclaim. */
    1422  2148755648 :         if (xfs_is_shutdown(mp) || xfs_has_norecovery(mp))
    1423             :                 return false;
    1424             : 
    1425             :         /* Metadata inodes require explicit resource cleanup. */
    1426   769531591 :         if (xfs_is_metadata_inode(ip))
    1427             :                 return false;
    1428             : 
    1429             :         /* Want to clean out the cow blocks if there are any. */
    1430   769259603 :         if (cow_ifp && cow_ifp->if_bytes > 0)
    1431             :                 return true;
    1432             : 
    1433             :         /* Unlinked files must be freed. */
    1434   769223672 :         if (VFS_I(ip)->i_nlink == 0)
    1435             :                 return true;
    1436             : 
    1437             :         /*
    1438             :          * This file isn't being freed, so check if there are post-eof blocks
    1439             :          * to free.  @force is true because we are evicting an inode from the
    1440             :          * cache.  Post-eof blocks must be freed, lest we end up with broken
    1441             :          * free space accounting.
    1442             :          *
    1443             :          * Note: don't bother with iolock here since lockdep complains about
    1444             :          * acquiring it in reclaim context. We have the only reference to the
    1445             :          * inode at this point anyways.
    1446             :          */
    1447   731284321 :         return xfs_can_free_eofblocks(ip, true);
    1448             : }
    1449             : 
    1450             : /*
    1451             :  * Save health status somewhere, if we're dumping an inode with uncorrected
    1452             :  * errors and online repair isn't running.
    1453             :  */
    1454             : static inline void
    1455    38262893 : xfs_inactive_health(
    1456             :         struct xfs_inode        *ip)
    1457             : {
    1458    38262893 :         struct xfs_mount        *mp = ip->i_mount;
    1459    38262893 :         struct xfs_perag        *pag;
    1460    38262893 :         unsigned int            sick;
    1461    38262893 :         unsigned int            checked;
    1462             : 
    1463    38262893 :         xfs_inode_measure_sickness(ip, &sick, &checked);
    1464    38256714 :         if (!sick)
    1465    38256714 :                 return;
    1466             : 
    1467           0 :         trace_xfs_inode_unfixed_corruption(ip, sick);
    1468             : 
    1469           0 :         if (sick & XFS_SICK_INO_FORGET)
    1470             :                 return;
    1471             : 
    1472           0 :         pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
    1473           0 :         if (!pag) {
    1474             :                 /* There had better still be a perag structure! */
    1475           0 :                 ASSERT(0);
    1476           0 :                 return;
    1477             :         }
    1478             : 
    1479           0 :         xfs_ag_mark_sick(pag, XFS_SICK_AG_INODES);
    1480           0 :         xfs_perag_put(pag);
    1481             : }
    1482             : 
    1483             : /*
    1484             :  * xfs_inactive
    1485             :  *
    1486             :  * This is called when the vnode reference count for the vnode
    1487             :  * goes to zero.  If the file has been unlinked, then it must
    1488             :  * now be truncated.  Also, we clear all of the read-ahead state
    1489             :  * kept for the inode here since the file is now closed.
    1490             :  */
    1491             : int
    1492    38268533 : xfs_inactive(
    1493             :         xfs_inode_t     *ip)
    1494             : {
    1495    38268533 :         struct xfs_mount        *mp;
    1496    38268533 :         int                     error = 0;
    1497    38268533 :         int                     truncate = 0;
    1498             : 
    1499             :         /*
    1500             :          * If the inode is already free, then there can be nothing
    1501             :          * to clean up here.
    1502             :          */
    1503    38268533 :         if (VFS_I(ip)->i_mode == 0) {
    1504           0 :                 ASSERT(ip->i_df.if_broot_bytes == 0);
    1505           0 :                 goto out;
    1506             :         }
    1507             : 
    1508    38268533 :         mp = ip->i_mount;
    1509    76531861 :         ASSERT(!xfs_iflags_test(ip, XFS_IRECOVERY));
    1510             : 
    1511    38263328 :         xfs_inactive_health(ip);
    1512             : 
    1513             :         /* If this is a read-only mount, don't do this (would generate I/O) */
    1514    76536986 :         if (xfs_is_readonly(mp))
    1515           0 :                 goto out;
    1516             : 
    1517             :         /* Metadata inodes require explicit resource cleanup. */
    1518    38268493 :         if (xfs_is_metadata_inode(ip))
    1519           0 :                 goto out;
    1520             : 
    1521             :         /* Try to clean out the cow blocks if there are any. */
    1522    76536986 :         if (xfs_inode_has_cow_data(ip))
    1523       35931 :                 xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF, true);
    1524             : 
    1525    38268493 :         if (VFS_I(ip)->i_nlink != 0) {
    1526             :                 /*
    1527             :                  * force is true because we are evicting an inode from the
    1528             :                  * cache. Post-eof blocks must be freed, lest we end up with
    1529             :                  * broken free space accounting.
    1530             :                  *
    1531             :                  * Note: don't bother with iolock here since lockdep complains
    1532             :                  * about acquiring it in reclaim context. We have the only
    1533             :                  * reference to the inode at this point anyways.
    1534             :                  */
    1535      246113 :                 if (xfs_can_free_eofblocks(ip, true))
    1536      218875 :                         error = xfs_free_eofblocks(ip);
    1537             : 
    1538      246117 :                 goto out;
    1539             :         }
    1540             : 
    1541    38022380 :         if (S_ISREG(VFS_I(ip)->i_mode) &&
    1542    10030855 :             (ip->i_disk_size != 0 || XFS_ISIZE(ip) != 0 ||
    1543     7344939 :              ip->i_df.if_nextents > 0 || ip->i_delayed_blks > 0))
    1544             :                 truncate = 1;
    1545             : 
    1546    38022380 :         error = xfs_qm_dqattach(ip);
    1547    38019085 :         if (error)
    1548          10 :                 goto out;
    1549             : 
    1550    38019075 :         if (S_ISDIR(VFS_I(ip)->i_mode) && ip->i_df.if_nextents > 0) {
    1551           0 :                 xfs_inactive_dir(ip);
    1552           0 :                 truncate = 1;
    1553             :         }
    1554             : 
    1555    38019075 :         if (S_ISLNK(VFS_I(ip)->i_mode))
    1556    25830040 :                 error = xfs_inactive_symlink(ip);
    1557    12189035 :         else if (truncate)
    1558     2701672 :                 error = xfs_inactive_truncate(ip);
    1559    38022546 :         if (error)
    1560        1958 :                 goto out;
    1561             : 
    1562             :         /*
    1563             :          * If there are attributes associated with the file then blow them away
    1564             :          * now.  The code calls a routine that recursively deconstructs the
    1565             :          * attribute fork. If also blows away the in-core attribute fork.
    1566             :          */
    1567    38020588 :         if (xfs_inode_has_attr_fork(ip)) {
    1568    37370720 :                 error = xfs_attr_inactive(ip);
    1569    37374709 :                 if (error)
    1570         197 :                         goto out;
    1571             :         }
    1572             : 
    1573    38024380 :         ASSERT(ip->i_forkoff == 0);
    1574             : 
    1575             :         /*
    1576             :          * Free the inode.
    1577             :          */
    1578    38024380 :         error = xfs_inactive_ifree(ip);
    1579             : 
    1580    38275261 : out:
    1581             :         /*
    1582             :          * We're done making metadata updates for this inode, so we can release
    1583             :          * the attached dquots.
    1584             :          */
    1585    38275261 :         xfs_qm_dqdetach(ip);
    1586    38275266 :         return error;
    1587             : }
    1588             : 
    1589             : /*
    1590             :  * Find an inode on the unlinked list. This does not take references to the
    1591             :  * inode as we have existence guarantees by holding the AGI buffer lock and that
    1592             :  * only unlinked, referenced inodes can be on the unlinked inode list.  If we
    1593             :  * don't find the inode in cache, then let the caller handle the situation.
    1594             :  */
    1595             : struct xfs_inode *
    1596    18170580 : xfs_iunlink_lookup(
    1597             :         struct xfs_perag        *pag,
    1598             :         xfs_agino_t             agino)
    1599             : {
    1600    18170580 :         struct xfs_inode        *ip;
    1601             : 
    1602    18170580 :         rcu_read_lock();
    1603    18169306 :         ip = radix_tree_lookup(&pag->pag_ici_root, agino);
    1604             : 
    1605             :         /*
    1606             :          * Inode not in memory or in RCU freeing limbo should not happen.
    1607             :          * Warn about this and let the caller handle the failure.
    1608             :          */
    1609    36323353 :         if (WARN_ON_ONCE(!ip || !ip->i_ino)) {
    1610           0 :                 xfs_emerg(pag->pag_mount, "IUNLINK agno 0x%x agino 0x%x ino 0x%llx ip? %d", pag->pag_agno, agino, XFS_AGINO_TO_INO(pag->pag_mount, pag->pag_agno, agino), ip != NULL);
    1611           0 :                 rcu_read_unlock();
    1612           0 :                 return NULL;
    1613             :         }
    1614    36331743 :         ASSERT(!xfs_iflags_test(ip, XFS_IRECLAIMABLE | XFS_IRECLAIM));
    1615    36343673 :         if (xfs_iflags_test(ip, XFS_IRECLAIMABLE | XFS_IRECLAIM))
    1616           0 :                 xfs_emerg(pag->pag_mount, "IUNLINK agno 0x%x agino 0x%x ino 0x%llx ipino 0x%llx", pag->pag_agno, agino, XFS_AGINO_TO_INO(pag->pag_mount, pag->pag_agno, agino), ip->i_ino);
    1617    18172772 :         rcu_read_unlock();
    1618    18172772 :         return ip;
    1619             : }
    1620             : 
    1621             : /*
    1622             :  * Look up the inode number specified and if it is not already marked XFS_ISTALE
    1623             :  * mark it stale. We should only find clean inodes in this lookup that aren't
    1624             :  * already stale.
    1625             :  */
    1626             : static void
    1627     4113878 : xfs_ifree_mark_inode_stale(
    1628             :         struct xfs_perag        *pag,
    1629             :         struct xfs_inode        *free_ip,
    1630             :         xfs_ino_t               inum)
    1631             : {
    1632     4113878 :         struct xfs_mount        *mp = pag->pag_mount;
    1633     4113878 :         struct xfs_inode_log_item *iip;
    1634     4113878 :         struct xfs_inode        *ip;
    1635             : 
    1636     4113878 : retry:
    1637     4113878 :         rcu_read_lock();
    1638     4113816 :         ip = radix_tree_lookup(&pag->pag_ici_root, XFS_INO_TO_AGINO(mp, inum));
    1639             : 
    1640             :         /* Inode not in memory, nothing to do */
    1641     4114085 :         if (!ip) {
    1642      762952 :                 rcu_read_unlock();
    1643      762952 :                 return;
    1644             :         }
    1645             : 
    1646             :         /*
    1647             :          * because this is an RCU protected lookup, we could find a recently
    1648             :          * freed or even reallocated inode during the lookup. We need to check
    1649             :          * under the i_flags_lock for a valid inode here. Skip it if it is not
    1650             :          * valid, the wrong inode or stale.
    1651             :          */
    1652     3351133 :         spin_lock(&ip->i_flags_lock);
    1653     3351132 :         if (ip->i_ino != inum || __xfs_iflags_test(ip, XFS_ISTALE))
    1654       10964 :                 goto out_iflags_unlock;
    1655             : 
    1656             :         /*
    1657             :          * Don't try to lock/unlock the current inode, but we _cannot_ skip the
    1658             :          * other inodes that we did not find in the list attached to the buffer
    1659             :          * and are not already marked stale. If we can't lock it, back off and
    1660             :          * retry.
    1661             :          */
    1662     3340168 :         if (ip != free_ip) {
    1663     3267619 :                 if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) {
    1664           0 :                         spin_unlock(&ip->i_flags_lock);
    1665           0 :                         rcu_read_unlock();
    1666           0 :                         delay(1);
    1667           0 :                         goto retry;
    1668             :                 }
    1669             :         }
    1670     3339999 :         ip->i_flags |= XFS_ISTALE;
    1671             : 
    1672             :         /*
    1673             :          * If the inode is flushing, it is already attached to the buffer.  All
    1674             :          * we needed to do here is mark the inode stale so buffer IO completion
    1675             :          * will remove it from the AIL.
    1676             :          */
    1677     3339999 :         iip = ip->i_itemp;
    1678     3339999 :         if (__xfs_iflags_test(ip, XFS_IFLUSHING)) {
    1679         563 :                 ASSERT(!list_empty(&iip->ili_item.li_bio_list));
    1680         563 :                 ASSERT(iip->ili_last_fields);
    1681         563 :                 goto out_iunlock;
    1682             :         }
    1683             : 
    1684             :         /*
    1685             :          * Inodes not attached to the buffer can be released immediately.
    1686             :          * Everything else has to go through xfs_iflush_abort() on journal
    1687             :          * commit as the flock synchronises removal of the inode from the
    1688             :          * cluster buffer against inode reclaim.
    1689             :          */
    1690     3339436 :         if (!iip || list_empty(&iip->ili_item.li_bio_list))
    1691      164020 :                 goto out_iunlock;
    1692             : 
    1693     3175416 :         __xfs_iflags_set(ip, XFS_IFLUSHING);
    1694     3175416 :         spin_unlock(&ip->i_flags_lock);
    1695     3175469 :         rcu_read_unlock();
    1696             : 
    1697             :         /* we have a dirty inode in memory that has not yet been flushed. */
    1698     3175489 :         spin_lock(&iip->ili_lock);
    1699     3175546 :         iip->ili_last_fields = iip->ili_fields;
    1700     3175546 :         iip->ili_fields = 0;
    1701     3175546 :         iip->ili_fsync_fields = 0;
    1702     3175546 :         spin_unlock(&iip->ili_lock);
    1703     3175521 :         ASSERT(iip->ili_last_fields);
    1704             : 
    1705     3175521 :         if (ip != free_ip)
    1706     3102938 :                 xfs_iunlock(ip, XFS_ILOCK_EXCL);
    1707             :         return;
    1708             : 
    1709      164583 : out_iunlock:
    1710      164583 :         if (ip != free_ip)
    1711      164562 :                 xfs_iunlock(ip, XFS_ILOCK_EXCL);
    1712          21 : out_iflags_unlock:
    1713      175595 :         spin_unlock(&ip->i_flags_lock);
    1714      175604 :         rcu_read_unlock();
    1715             : }
    1716             : 
    1717             : /*
    1718             :  * A big issue when freeing the inode cluster is that we _cannot_ skip any
    1719             :  * inodes that are in memory - they all must be marked stale and attached to
    1720             :  * the cluster buffer.
    1721             :  */
    1722             : int
    1723       72604 : xfs_ifree_cluster(
    1724             :         struct xfs_trans        *tp,
    1725             :         struct xfs_perag        *pag,
    1726             :         struct xfs_inode        *free_ip,
    1727             :         struct xfs_icluster     *xic)
    1728             : {
    1729       72604 :         struct xfs_mount        *mp = free_ip->i_mount;
    1730       72604 :         struct xfs_ino_geometry *igeo = M_IGEO(mp);
    1731       72604 :         struct xfs_buf          *bp;
    1732       72604 :         xfs_daddr_t             blkno;
    1733       72604 :         xfs_ino_t               inum = xic->first_ino;
    1734       72604 :         int                     nbufs;
    1735       72604 :         int                     i, j;
    1736       72604 :         int                     ioffset;
    1737       72604 :         int                     error;
    1738             : 
    1739       72604 :         nbufs = igeo->ialloc_blks / igeo->blocks_per_cluster;
    1740             : 
    1741      217812 :         for (j = 0; j < nbufs; j++, inum += igeo->inodes_per_cluster) {
    1742             :                 /*
    1743             :                  * The allocation bitmap tells us which inodes of the chunk were
    1744             :                  * physically allocated. Skip the cluster if an inode falls into
    1745             :                  * a sparse region.
    1746             :                  */
    1747      145207 :                 ioffset = inum - xic->first_ino;
    1748      145207 :                 if ((xic->alloc & XFS_INOBT_MASK(ioffset)) == 0) {
    1749       16602 :                         ASSERT(ioffset % igeo->inodes_per_cluster == 0);
    1750       16602 :                         continue;
    1751             :                 }
    1752             : 
    1753      128605 :                 blkno = XFS_AGB_TO_DADDR(mp, XFS_INO_TO_AGNO(mp, inum),
    1754             :                                          XFS_INO_TO_AGBNO(mp, inum));
    1755             : 
    1756             :                 /*
    1757             :                  * We obtain and lock the backing buffer first in the process
    1758             :                  * here to ensure dirty inodes attached to the buffer remain in
    1759             :                  * the flushing state while we mark them stale.
    1760             :                  *
    1761             :                  * If we scan the in-memory inodes first, then buffer IO can
    1762             :                  * complete before we get a lock on it, and hence we may fail
    1763             :                  * to mark all the active inodes on the buffer stale.
    1764             :                  */
    1765      128605 :                 error = xfs_trans_get_buf(tp, mp->m_ddev_targp, blkno,
    1766      128605 :                                 mp->m_bsize * igeo->blocks_per_cluster,
    1767             :                                 XBF_UNMAPPED, &bp);
    1768      128606 :                 if (error)
    1769           0 :                         return error;
    1770             : 
    1771             :                 /*
    1772             :                  * This buffer may not have been correctly initialised as we
    1773             :                  * didn't read it from disk. That's not important because we are
    1774             :                  * only using to mark the buffer as stale in the log, and to
    1775             :                  * attach stale cached inodes on it. That means it will never be
    1776             :                  * dispatched for IO. If it is, we want to know about it, and we
    1777             :                  * want it to fail. We can acheive this by adding a write
    1778             :                  * verifier to the buffer.
    1779             :                  */
    1780      128606 :                 bp->b_ops = &xfs_inode_buf_ops;
    1781             : 
    1782             :                 /*
    1783             :                  * Now we need to set all the cached clean inodes as XFS_ISTALE,
    1784             :                  * too. This requires lookups, and will skip inodes that we've
    1785             :                  * already marked XFS_ISTALE.
    1786             :                  */
    1787     4242511 :                 for (i = 0; i < igeo->inodes_per_cluster; i++)
    1788     4113910 :                         xfs_ifree_mark_inode_stale(pag, free_ip, inum + i);
    1789             : 
    1790      128601 :                 xfs_trans_stale_inode_buf(tp, bp);
    1791      128595 :                 xfs_trans_binval(tp, bp);
    1792             :         }
    1793             :         return 0;
    1794             : }
    1795             : 
    1796             : /*
    1797             :  * This is called to return an inode to the inode free list.  The inode should
    1798             :  * already be truncated to 0 length and have no pages associated with it.  This
    1799             :  * routine also assumes that the inode is already a part of the transaction.
    1800             :  *
    1801             :  * The on-disk copy of the inode will have been added to the list of unlinked
    1802             :  * inodes in the AGI. We need to remove the inode from that list atomically with
    1803             :  * respect to freeing it here.
    1804             :  */
    1805             : int
    1806    38025306 : xfs_ifree(
    1807             :         struct xfs_trans        *tp,
    1808             :         struct xfs_inode        *ip)
    1809             : {
    1810    38025306 :         struct xfs_mount        *mp = ip->i_mount;
    1811    38025306 :         struct xfs_perag        *pag;
    1812    38025306 :         struct xfs_icluster     xic = { 0 };
    1813    38025306 :         struct xfs_inode_log_item *iip = ip->i_itemp;
    1814    38025306 :         int                     error;
    1815             : 
    1816    38025306 :         ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
    1817    38025306 :         ASSERT(VFS_I(ip)->i_nlink == 0);
    1818    38025306 :         ASSERT(ip->i_df.if_nextents == 0);
    1819    38025306 :         ASSERT(ip->i_disk_size == 0 || !S_ISREG(VFS_I(ip)->i_mode));
    1820    38025306 :         ASSERT(ip->i_nblocks == 0);
    1821             : 
    1822    38025306 :         pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
    1823             : 
    1824    38020887 :         error = xfs_dir_ifree(tp, pag, ip, &xic);
    1825    38025420 :         if (error)
    1826         153 :                 goto out;
    1827             : 
    1828    76051067 :         if (xfs_iflags_test(ip, XFS_IPRESERVE_DM_FIELDS))
    1829           0 :                 xfs_iflags_clear(ip, XFS_IPRESERVE_DM_FIELDS);
    1830             : 
    1831             :         /* Don't attempt to replay owner changes for a deleted inode */
    1832    38025800 :         spin_lock(&iip->ili_lock);
    1833    38025712 :         iip->ili_fields &= ~(XFS_ILOG_AOWNER | XFS_ILOG_DOWNER);
    1834    38025712 :         spin_unlock(&iip->ili_lock);
    1835             : 
    1836    38025281 :         if (xic.deleted)
    1837       72603 :                 error = xfs_ifree_cluster(tp, pag, ip, &xic);
    1838    37952678 : out:
    1839    38025436 :         xfs_perag_put(pag);
    1840    38026422 :         return error;
    1841             : }
    1842             : 
    1843             : /*
    1844             :  * This is called to unpin an inode.  The caller must have the inode locked
    1845             :  * in at least shared mode so that the buffer cannot be subsequently pinned
    1846             :  * once someone is waiting for it to be unpinned.
    1847             :  */
    1848             : static void
    1849          17 : xfs_iunpin(
    1850             :         struct xfs_inode        *ip)
    1851             : {
    1852          17 :         ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
    1853             : 
    1854          17 :         trace_xfs_inode_unpin_nowait(ip, _RET_IP_);
    1855             : 
    1856             :         /* Give the log a push to start the unpinning I/O */
    1857          17 :         xfs_log_force_seq(ip->i_mount, ip->i_itemp->ili_commit_seq, 0, NULL);
    1858             : 
    1859          17 : }
    1860             : 
    1861             : static void
    1862          17 : __xfs_iunpin_wait(
    1863             :         struct xfs_inode        *ip)
    1864             : {
    1865          17 :         wait_queue_head_t *wq = bit_waitqueue(&ip->i_flags, __XFS_IPINNED_BIT);
    1866          17 :         DEFINE_WAIT_BIT(wait, &ip->i_flags, __XFS_IPINNED_BIT);
    1867             : 
    1868          17 :         xfs_iunpin(ip);
    1869             : 
    1870          17 :         do {
    1871          17 :                 prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE);
    1872          17 :                 if (xfs_ipincount(ip))
    1873           7 :                         io_schedule();
    1874          17 :         } while (xfs_ipincount(ip));
    1875          17 :         finish_wait(wq, &wait.wq_entry);
    1876          17 : }
    1877             : 
    1878             : void
    1879   305761830 : xfs_iunpin_wait(
    1880             :         struct xfs_inode        *ip)
    1881             : {
    1882   305761830 :         if (xfs_ipincount(ip))
    1883          17 :                 __xfs_iunpin_wait(ip);
    1884   305761830 : }
    1885             : 
    1886             : /*
    1887             :  * Removing an inode from the namespace involves removing the directory entry
    1888             :  * and dropping the link count on the inode. Removing the directory entry can
    1889             :  * result in locking an AGF (directory blocks were freed) and removing a link
    1890             :  * count can result in placing the inode on an unlinked list which results in
    1891             :  * locking an AGI.
    1892             :  *
    1893             :  * The big problem here is that we have an ordering constraint on AGF and AGI
    1894             :  * locking - inode allocation locks the AGI, then can allocate a new extent for
    1895             :  * new inodes, locking the AGF after the AGI. Similarly, freeing the inode
    1896             :  * removes the inode from the unlinked list, requiring that we lock the AGI
    1897             :  * first, and then freeing the inode can result in an inode chunk being freed
    1898             :  * and hence freeing disk space requiring that we lock an AGF.
    1899             :  *
    1900             :  * Hence the ordering that is imposed by other parts of the code is AGI before
    1901             :  * AGF. This means we cannot remove the directory entry before we drop the inode
    1902             :  * reference count and put it on the unlinked list as this results in a lock
    1903             :  * order of AGF then AGI, and this can deadlock against inode allocation and
    1904             :  * freeing. Therefore we must drop the link counts before we remove the
    1905             :  * directory entry.
    1906             :  *
    1907             :  * This is still safe from a transactional point of view - it is not until we
    1908             :  * get to xfs_defer_finish() that we have the possibility of multiple
    1909             :  * transactions in this operation. Hence as long as we remove the directory
    1910             :  * entry and drop the link count in the first transaction of the remove
    1911             :  * operation, there are no transactional constraints on the ordering here.
    1912             :  */
    1913             : int
    1914    40996098 : xfs_remove(
    1915             :         struct xfs_inode        *dp,
    1916             :         struct xfs_name         *name,
    1917             :         struct xfs_inode        *ip)
    1918             : {
    1919    40996098 :         struct xfs_dir_update   du = {
    1920             :                 .dp             = dp,
    1921             :                 .name           = name,
    1922             :                 .ip             = ip,
    1923             :         };
    1924    40996098 :         struct xfs_mount        *mp = dp->i_mount;
    1925    40996098 :         struct xfs_trans        *tp = NULL;
    1926    40996098 :         int                     is_dir = S_ISDIR(VFS_I(ip)->i_mode);
    1927    40996098 :         int                     dontcare;
    1928    40996098 :         int                     error = 0;
    1929    40996098 :         uint                    resblks;
    1930             : 
    1931    40996098 :         trace_xfs_remove(dp, name);
    1932             : 
    1933    81993580 :         if (xfs_is_shutdown(mp))
    1934             :                 return -EIO;
    1935             : 
    1936    40996202 :         error = xfs_qm_dqattach(dp);
    1937    40995113 :         if (error)
    1938           3 :                 goto std_return;
    1939             : 
    1940    40995110 :         error = xfs_qm_dqattach(ip);
    1941    40994769 :         if (error)
    1942           0 :                 goto std_return;
    1943             : 
    1944    40994769 :         error = xfs_parent_start(mp, &du.parent);
    1945    40995673 :         if (error)
    1946           0 :                 goto std_return;
    1947             : 
    1948             :         /*
    1949             :          * We try to get the real space reservation first, allowing for
    1950             :          * directory btree deletion(s) implying possible bmap insert(s).  If we
    1951             :          * can't get the space reservation then we use 0 instead, and avoid the
    1952             :          * bmap btree insert(s) in the directory code by, if the bmap insert
    1953             :          * tries to happen, instead trimming the LAST block from the directory.
    1954             :          *
    1955             :          * Ignore EDQUOT and ENOSPC being returned via nospace_error because
    1956             :          * the directory code can handle a reservationless update and we don't
    1957             :          * want to prevent a user from trying to free space by deleting things.
    1958             :          */
    1959    40995673 :         resblks = xfs_remove_space_res(mp, name->len);
    1960    40995268 :         error = xfs_trans_alloc_dir(dp, &M_RES(mp)->tr_remove, ip, &resblks,
    1961             :                         &tp, &dontcare);
    1962    40996265 :         if (error) {
    1963           1 :                 ASSERT(error != -ENOSPC);
    1964           1 :                 goto out_parent;
    1965             :         }
    1966             : 
    1967    40996264 :         error = xfs_dir_remove_child(tp, resblks, &du);
    1968    40994857 :         if (error)
    1969     2055215 :                 goto out_trans_cancel;
    1970             : 
    1971             :         /*
    1972             :          * If this is a synchronous mount, make sure that the
    1973             :          * remove transaction goes to disk before returning to
    1974             :          * the user.
    1975             :          */
    1976    38939642 :         if (xfs_has_wsync(mp) || xfs_has_dirsync(mp))
    1977         218 :                 xfs_trans_set_sync(tp);
    1978             : 
    1979    38939642 :         error = xfs_trans_commit(tp);
    1980    38940957 :         if (error)
    1981           1 :                 goto out_unlock;
    1982             : 
    1983    38940956 :         if (is_dir && xfs_inode_is_filestream(ip))
    1984         802 :                 xfs_filestream_deassociate(ip);
    1985             : 
    1986    38940956 :         xfs_iunlock(ip, XFS_ILOCK_EXCL);
    1987    38941858 :         xfs_iunlock(dp, XFS_ILOCK_EXCL);
    1988    38941807 :         xfs_parent_finish(mp, du.parent);
    1989             :         return 0;
    1990             : 
    1991             :  out_trans_cancel:
    1992     2055215 :         xfs_trans_cancel(tp);
    1993     2055231 :  out_unlock:
    1994     2055231 :         xfs_iunlock(ip, XFS_ILOCK_EXCL);
    1995     2055227 :         xfs_iunlock(dp, XFS_ILOCK_EXCL);
    1996     2055231 :  out_parent:
    1997     2055231 :         xfs_parent_finish(mp, du.parent);
    1998             :  std_return:
    1999             :         return error;
    2000             : }
    2001             : 
    2002             : static inline void
    2003    31330229 : xfs_iunlock_rename(
    2004             :         struct xfs_inode        **i_tab,
    2005             :         int                     num_inodes)
    2006             : {
    2007    31330229 :         int                     i;
    2008             : 
    2009   136568394 :         for (i = num_inodes - 1; i >= 0; i--) {
    2010             :                 /* Skip duplicate inodes if src and target dps are the same */
    2011   105238171 :                 if (!i_tab[i] || (i > 0 && i_tab[i] == i_tab[i - 1]))
    2012     1211717 :                         continue;
    2013   104026454 :                 xfs_iunlock(i_tab[i], XFS_ILOCK_EXCL);
    2014             :         }
    2015    31330223 : }
    2016             : 
    2017             : /*
    2018             :  * Enter all inodes for a rename transaction into a sorted array.
    2019             :  */
    2020             : #define __XFS_SORT_INODES       5
    2021             : STATIC void
    2022    31569503 : xfs_sort_for_rename(
    2023             :         struct xfs_inode        *dp1,   /* in: old (source) directory inode */
    2024             :         struct xfs_inode        *dp2,   /* in: new (target) directory inode */
    2025             :         struct xfs_inode        *ip1,   /* in: inode of old entry */
    2026             :         struct xfs_inode        *ip2,   /* in: inode of new entry */
    2027             :         struct xfs_inode        *wip,   /* in: whiteout inode */
    2028             :         struct xfs_inode        **i_tab,/* out: sorted array of inodes */
    2029             :         int                     *num_inodes)  /* in/out: inodes in array */
    2030             : {
    2031    31569503 :         int                     i;
    2032             : 
    2033    31569503 :         ASSERT(*num_inodes == __XFS_SORT_INODES);
    2034    31569503 :         memset(i_tab, 0, *num_inodes * sizeof(struct xfs_inode *));
    2035             : 
    2036             :         /*
    2037             :          * i_tab contains a list of pointers to inodes.  We initialize
    2038             :          * the table here & we'll sort it.  We will then use it to
    2039             :          * order the acquisition of the inode locks.
    2040             :          *
    2041             :          * Note that the table may contain duplicates.  e.g., dp1 == dp2.
    2042             :          */
    2043    31569503 :         i = 0;
    2044    31569503 :         i_tab[i++] = dp1;
    2045    31569503 :         i_tab[i++] = dp2;
    2046    31569503 :         i_tab[i++] = ip1;
    2047    31569503 :         if (ip2)
    2048     8926538 :                 i_tab[i++] = ip2;
    2049    31569503 :         if (wip)
    2050     2420819 :                 i_tab[i++] = wip;
    2051    31569503 :         *num_inodes = i;
    2052             : 
    2053    31569503 :         xfs_sort_inodes(i_tab, *num_inodes);
    2054    31569489 : }
    2055             : 
    2056             : void
    2057    31569496 : xfs_sort_inodes(
    2058             :         struct xfs_inode        **i_tab,
    2059             :         unsigned int            num_inodes)
    2060             : {
    2061    31569496 :         int                     i, j;
    2062             : 
    2063    31569496 :         ASSERT(num_inodes <= __XFS_SORT_INODES);
    2064             : 
    2065             :         /*
    2066             :          * Sort the elements via bubble sort.  (Remember, there are at
    2067             :          * most 5 elements to sort, so this is adequate.)
    2068             :          */
    2069   137625242 :         for (i = 0; i < num_inodes; i++) {
    2070   363594181 :                 for (j = 1; j < num_inodes; j++) {
    2071   257538435 :                         if (i_tab[j]->i_ino < i_tab[j-1]->i_ino)
    2072   257538435 :                                 swap(i_tab[j], i_tab[j - 1]);
    2073             :                 }
    2074             :         }
    2075    31569496 : }
    2076             : 
    2077             : /*
    2078             :  * xfs_rename_alloc_whiteout()
    2079             :  *
    2080             :  * Return a referenced, unlinked, unlocked inode that can be used as a
    2081             :  * whiteout in a rename transaction. We use a tmpfile inode here so that if we
    2082             :  * crash between allocating the inode and linking it into the rename transaction
    2083             :  * recovery will free the inode and we won't leak it.
    2084             :  */
    2085             : static int
    2086     2458602 : xfs_rename_alloc_whiteout(
    2087             :         struct mnt_idmap        *idmap,
    2088             :         struct xfs_name         *src_name,
    2089             :         struct xfs_inode        *dp,
    2090             :         struct xfs_inode        **wip)
    2091             : {
    2092     2458602 :         struct xfs_icreate_args args = {
    2093             :                 .nlink          = 0,
    2094             :         };
    2095     2458602 :         struct xfs_inode        *tmpfile;
    2096     2458602 :         struct qstr             name;
    2097     2458602 :         int                     error;
    2098             : 
    2099     2458602 :         xfs_icreate_args_inherit(&args, dp, idmap, S_IFCHR | WHITEOUT_MODE,
    2100             :                         xfs_has_parent(dp->i_mount));
    2101             : 
    2102     2458598 :         error = xfs_create_tmpfile(dp, &args, &tmpfile);
    2103     2458606 :         if (error)
    2104             :                 return error;
    2105             : 
    2106     2420822 :         name.name = src_name->name;
    2107     2420822 :         name.len = src_name->len;
    2108     2420822 :         error = xfs_inode_init_security(VFS_I(tmpfile), VFS_I(dp), &name);
    2109     2420822 :         if (error) {
    2110           0 :                 xfs_finish_inode_setup(tmpfile);
    2111           0 :                 xfs_irele(tmpfile);
    2112           0 :                 return error;
    2113             :         }
    2114             : 
    2115             :         /*
    2116             :          * Prepare the tmpfile inode as if it were created through the VFS.
    2117             :          * Complete the inode setup and flag it as linkable.  nlink is already
    2118             :          * zero, so we can skip the drop_nlink.
    2119             :          */
    2120     2420822 :         xfs_setup_iops(tmpfile);
    2121     2420820 :         xfs_finish_inode_setup(tmpfile);
    2122     2420821 :         VFS_I(tmpfile)->i_state |= I_LINKABLE;
    2123             : 
    2124     2420821 :         *wip = tmpfile;
    2125     2420821 :         return 0;
    2126             : }
    2127             : 
    2128             : /*
    2129             :  * xfs_rename
    2130             :  */
    2131             : int
    2132    31607277 : xfs_rename(
    2133             :         struct mnt_idmap                *idmap,
    2134             :         struct xfs_inode                *src_dp,
    2135             :         struct xfs_name                 *src_name,
    2136             :         struct xfs_inode                *src_ip,
    2137             :         struct xfs_inode                *target_dp,
    2138             :         struct xfs_name                 *target_name,
    2139             :         struct xfs_inode                *target_ip,
    2140             :         unsigned int                    flags)
    2141             : {
    2142    31607277 :         struct xfs_dir_update           src = {
    2143             :                 .dp                     = src_dp,
    2144             :                 .name                   = src_name,
    2145             :                 .ip                     = src_ip,
    2146             :         };
    2147    31607277 :         struct xfs_dir_update           tgt = {
    2148             :                 .dp                     = target_dp,
    2149             :                 .name                   = target_name,
    2150             :                 .ip                     = target_ip,
    2151             :         };
    2152    31607277 :         struct xfs_mount                *mp = src_dp->i_mount;
    2153    31607277 :         struct xfs_trans                *tp;
    2154    31607277 :         struct xfs_inode                *wip = NULL;    /* whiteout inode */
    2155    31607277 :         struct xfs_inode                *inodes[__XFS_SORT_INODES];
    2156    31607277 :         int                             i;
    2157    31607277 :         int                             num_inodes = __XFS_SORT_INODES;
    2158    31607277 :         bool                            new_parent = (src_dp != target_dp);
    2159    31607277 :         bool                            src_is_directory =
    2160    31607277 :                                                 S_ISDIR(VFS_I(src_ip)->i_mode);
    2161    31607277 :         int                             spaceres;
    2162    31607277 :         bool                            retried = false;
    2163    31607277 :         int                             error, nospace_error = 0;
    2164    31607277 :         struct xfs_parent_defer         *wip_pptr = NULL;
    2165             : 
    2166    31607277 :         trace_xfs_rename(src_dp, target_dp, src_name, target_name);
    2167             : 
    2168    31607269 :         if ((flags & RENAME_EXCHANGE) && !target_ip)
    2169             :                 return -EINVAL;
    2170             : 
    2171             :         /*
    2172             :          * If we are doing a whiteout operation, allocate the whiteout inode
    2173             :          * we will be placing at the target and ensure the type is set
    2174             :          * appropriately.
    2175             :          */
    2176    31607269 :         if (flags & RENAME_WHITEOUT) {
    2177     2458599 :                 error = xfs_rename_alloc_whiteout(idmap, src_name,
    2178             :                                                   target_dp, &wip);
    2179     2458604 :                 if (error)
    2180             :                         return error;
    2181             : 
    2182             :                 /* setup target dirent info as whiteout */
    2183     2420820 :                 src_name->type = XFS_DIR3_FT_CHRDEV;
    2184             :         }
    2185             : 
    2186    31569490 :         xfs_sort_for_rename(src_dp, target_dp, src_ip, target_ip, wip,
    2187             :                                 inodes, &num_inodes);
    2188             : 
    2189    31569488 :         error = xfs_parent_start(mp, &src.parent);
    2190    31569494 :         if (error)
    2191           0 :                 goto out_release_wip;
    2192             : 
    2193    31569494 :         if (wip) {
    2194     2420813 :                 error = xfs_parent_start_locked(mp, &wip_pptr);
    2195     2420813 :                 if (error)
    2196           0 :                         goto out_src_ip_pptr;
    2197             :         }
    2198             : 
    2199    31569494 :         if (target_ip) {
    2200     8926530 :                 error = xfs_parent_start_locked(mp, &tgt.parent);
    2201     8926530 :                 if (error)
    2202           0 :                         goto out_wip_pptr;
    2203             :         }
    2204             : 
    2205    31569494 : retry:
    2206    31570675 :         nospace_error = 0;
    2207    63141335 :         spaceres = xfs_rename_space_res(mp, src_name->len, target_ip != NULL,
    2208    31570675 :                         target_name->len, wip != NULL);
    2209    31570660 :         error = xfs_trans_alloc(mp, &M_RES(mp)->tr_rename, spaceres, 0, 0, &tp);
    2210    31570690 :         if (error == -ENOSPC) {
    2211      239323 :                 nospace_error = error;
    2212      239323 :                 spaceres = 0;
    2213      239323 :                 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_rename, 0, 0, 0,
    2214             :                                 &tp);
    2215             :         }
    2216    31570690 :         if (error)
    2217         337 :                 goto out_tgt_ip_pptr;
    2218             : 
    2219             :         /*
    2220             :          * We don't allow reservationless renaming when parent pointers are
    2221             :          * enabled because we can't back out if the xattrs must grow.
    2222             :          */
    2223    31570353 :         if (src.parent && nospace_error) {
    2224      239323 :                 error = nospace_error;
    2225      239323 :                 xfs_trans_cancel(tp);
    2226      239323 :                 goto out_tgt_ip_pptr;
    2227             :         }
    2228             : 
    2229             :         /*
    2230             :          * Attach the dquots to the inodes
    2231             :          */
    2232    31331030 :         error = xfs_qm_vop_rename_dqattach(inodes);
    2233    31331022 :         if (error) {
    2234         797 :                 xfs_trans_cancel(tp);
    2235         797 :                 goto out_tgt_ip_pptr;
    2236             :         }
    2237             : 
    2238             :         /*
    2239             :          * Lock all the participating inodes. Depending upon whether
    2240             :          * the target_name exists in the target directory, and
    2241             :          * whether the target directory is the same as the source
    2242             :          * directory, we can lock from 2 to 5 inodes.
    2243             :          */
    2244    31330225 :         xfs_lock_inodes(inodes, num_inodes, XFS_ILOCK_EXCL);
    2245             : 
    2246             :         /*
    2247             :          * Join all the inodes to the transaction.
    2248             :          */
    2249    31330217 :         xfs_trans_ijoin(tp, src_dp, 0);
    2250    31330227 :         if (new_parent)
    2251    30118515 :                 xfs_trans_ijoin(tp, target_dp, 0);
    2252    31330227 :         xfs_trans_ijoin(tp, src_ip, 0);
    2253    31330222 :         if (target_ip)
    2254     8861851 :                 xfs_trans_ijoin(tp, target_ip, 0);
    2255    31330213 :         if (wip)
    2256     2385662 :                 xfs_trans_ijoin(tp, wip, 0);
    2257             : 
    2258             :         /*
    2259             :          * If we are using project inheritance, we only allow renames
    2260             :          * into our tree when the project IDs are the same; else the
    2261             :          * tree quota mechanism would be circumvented.
    2262             :          */
    2263    31330226 :         if (unlikely((target_dp->i_diflags & XFS_DIFLAG_PROJINHERIT) &&
    2264             :                      target_dp->i_projid != src_ip->i_projid)) {
    2265           0 :                 error = -EXDEV;
    2266           0 :                 goto out_trans_cancel;
    2267             :         }
    2268             : 
    2269             :         /* RENAME_EXCHANGE is unique from here on. */
    2270    31330226 :         if (flags & RENAME_EXCHANGE) {
    2271     8608838 :                 error = xfs_dir_exchange_children(tp, &src, &tgt, spaceres);
    2272     8608839 :                 if (error)
    2273         127 :                         goto out_trans_cancel;
    2274     8608712 :                 goto out_commit;
    2275             :         }
    2276             : 
    2277             :         /*
    2278             :          * Try to reserve quota to handle an expansion of the target directory.
    2279             :          * We'll allow the rename to continue in reservationless mode if we hit
    2280             :          * a space usage constraint.  If we trigger reservationless mode, save
    2281             :          * the errno if there isn't any free space in the target directory.
    2282             :          */
    2283    22721388 :         if (spaceres != 0) {
    2284    22721388 :                 error = xfs_trans_reserve_quota_nblks(tp, target_dp, spaceres,
    2285             :                                 0, false);
    2286    22721391 :                 if (error == -EDQUOT || error == -ENOSPC) {
    2287        2122 :                         if (!retried) {
    2288        1181 :                                 xfs_trans_cancel(tp);
    2289        1181 :                                 xfs_iunlock_rename(inodes, num_inodes);
    2290        1181 :                                 xfs_blockgc_free_quota(target_dp, 0);
    2291        1181 :                                 retried = true;
    2292        1181 :                                 goto retry;
    2293             :                         }
    2294             : 
    2295             :                         nospace_error = error;
    2296             :                         spaceres = 0;
    2297             :                         error = 0;
    2298             :                 }
    2299    22720210 :                 if (error)
    2300           0 :                         goto out_trans_cancel;
    2301             :         }
    2302             : 
    2303             :         /*
    2304             :          * We don't allow quotaless renaming when parent pointers are enabled
    2305             :          * because we can't back out if the xattrs must grow.
    2306             :          */
    2307    22720210 :         if (src.parent && nospace_error) {
    2308         942 :                 error = nospace_error;
    2309         942 :                 goto out_trans_cancel;
    2310             :         }
    2311             : 
    2312             :         /*
    2313             :          * Lock the AGI buffers we need to handle bumping the nlink of the
    2314             :          * whiteout inode off the unlinked list and to handle dropping the
    2315             :          * nlink of the target inode.  Per locking order rules, do this in
    2316             :          * increasing AG order and before directory block allocation tries to
    2317             :          * grab AGFs because we grab AGIs before AGFs.
    2318             :          *
    2319             :          * The (vfs) caller must ensure that if src is a directory then
    2320             :          * target_ip is either null or an empty directory.
    2321             :          */
    2322    93515738 :         for (i = 0; i < num_inodes && inodes[i] != NULL; i++) {
    2323    70796480 :                 if (inodes[i] == wip ||
    2324      253016 :                     (inodes[i] == target_ip &&
    2325      253016 :                      (VFS_I(target_ip)->i_nlink == 1 || src_is_directory))) {
    2326     2613842 :                         struct xfs_perag        *pag;
    2327     2613842 :                         struct xfs_buf          *bp;
    2328             : 
    2329     2613842 :                         pag = xfs_perag_get(mp,
    2330     2613842 :                                         XFS_INO_TO_AGNO(mp, inodes[i]->i_ino));
    2331     2613841 :                         error = xfs_read_agi(pag, tp, &bp);
    2332     2613841 :                         xfs_perag_put(pag);
    2333     2613841 :                         if (error)
    2334           9 :                                 goto out_trans_cancel;
    2335             :                 }
    2336             :         }
    2337             : 
    2338    22719258 :         error = xfs_dir_rename_children(tp, &src, &tgt, spaceres, wip,
    2339             :                         wip_pptr);
    2340    22719258 :         if (error)
    2341          41 :                 goto out_trans_cancel;
    2342             : 
    2343    22719217 :         if (wip) {
    2344             :                 /*
    2345             :                  * Now we have a real link, clear the "I'm a tmpfile" state
    2346             :                  * flag from the inode so it doesn't accidentally get misused in
    2347             :                  * future.
    2348             :                  */
    2349     2385660 :                 VFS_I(wip)->i_state &= ~I_LINKABLE;
    2350             :         }
    2351             : 
    2352    20333557 : out_commit:
    2353             :         /*
    2354             :          * If this is a synchronous mount, make sure that the rename
    2355             :          * transaction goes to disk before returning to the user.
    2356             :          */
    2357    31327929 :         if (xfs_has_wsync(tp->t_mountp) || xfs_has_dirsync(tp->t_mountp))
    2358           4 :                 xfs_trans_set_sync(tp);
    2359             : 
    2360    31327929 :         error = xfs_trans_commit(tp);
    2361    31327928 :         nospace_error = 0;
    2362    31327928 :         goto out_unlock;
    2363             : 
    2364        1119 : out_trans_cancel:
    2365        1119 :         xfs_trans_cancel(tp);
    2366    31329047 : out_unlock:
    2367    31329047 :         xfs_iunlock_rename(inodes, num_inodes);
    2368    31569508 : out_tgt_ip_pptr:
    2369    31569508 :         xfs_parent_finish(mp, tgt.parent);
    2370    31569509 : out_wip_pptr:
    2371    31569509 :         xfs_parent_finish(mp, wip_pptr);
    2372    31569509 : out_src_ip_pptr:
    2373    31569509 :         xfs_parent_finish(mp, src.parent);
    2374    31569507 : out_release_wip:
    2375    31569507 :         if (wip)
    2376     2420822 :                 xfs_irele(wip);
    2377    31569501 :         if (error == -ENOSPC && nospace_error)
    2378      239321 :                 error = nospace_error;
    2379             :         return error;
    2380             : }
    2381             : 
    2382             : static int
    2383   285535257 : xfs_iflush(
    2384             :         struct xfs_inode        *ip,
    2385             :         struct xfs_buf          *bp)
    2386             : {
    2387   285535257 :         struct xfs_inode_log_item *iip = ip->i_itemp;
    2388   285535257 :         struct xfs_dinode       *dip;
    2389   285535257 :         struct xfs_mount        *mp = ip->i_mount;
    2390   285535257 :         int                     error;
    2391             : 
    2392   285535257 :         ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
    2393   571070514 :         ASSERT(xfs_iflags_test(ip, XFS_IFLUSHING));
    2394   285535257 :         ASSERT(ip->i_df.if_format != XFS_DINODE_FMT_BTREE ||
    2395             :                ip->i_df.if_nextents > XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK));
    2396   285535257 :         ASSERT(iip->ili_item.li_buf == bp);
    2397             : 
    2398   285535257 :         dip = xfs_buf_offset(bp, ip->i_imap.im_boffset);
    2399             : 
    2400             :         /*
    2401             :          * We don't flush the inode if any of the following checks fail, but we
    2402             :          * do still update the log item and attach to the backing buffer as if
    2403             :          * the flush happened. This is a formality to facilitate predictable
    2404             :          * error handling as the caller will shutdown and fail the buffer.
    2405             :          */
    2406   285535257 :         error = -EFSCORRUPTED;
    2407   285535257 :         if (XFS_TEST_ERROR(dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC),
    2408             :                                mp, XFS_ERRTAG_IFLUSH_1)) {
    2409           0 :                 xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
    2410             :                         "%s: Bad inode %llu magic number 0x%x, ptr "PTR_FMT,
    2411             :                         __func__, ip->i_ino, be16_to_cpu(dip->di_magic), dip);
    2412           0 :                 goto flush_out;
    2413             :         }
    2414   285535257 :         if (ip->i_df.if_format == XFS_DINODE_FMT_RMAP) {
    2415       36728 :                 if (!S_ISREG(VFS_I(ip)->i_mode) ||
    2416       36728 :                     !(ip->i_diflags2 & XFS_DIFLAG2_METADIR)) {
    2417           0 :                         xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
    2418             :                                 "%s: Bad rt rmapbt inode %Lu, ptr "PTR_FMT,
    2419             :                                 __func__, ip->i_ino, ip);
    2420           0 :                         goto flush_out;
    2421             :                 }
    2422   285498529 :         } else if (ip->i_df.if_format == XFS_DINODE_FMT_REFCOUNT) {
    2423       28277 :                 if (!S_ISREG(VFS_I(ip)->i_mode) ||
    2424       28277 :                     !(ip->i_diflags2 & XFS_DIFLAG2_METADIR)) {
    2425           0 :                         xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
    2426             :                                 "%s: Bad rt refcountbt inode %Lu, ptr "PTR_FMT,
    2427             :                                 __func__, ip->i_ino, ip);
    2428           0 :                         goto flush_out;
    2429             :                 }
    2430   285470252 :         } else if (S_ISREG(VFS_I(ip)->i_mode)) {
    2431   198750449 :                 if (XFS_TEST_ERROR(
    2432             :                     ip->i_df.if_format != XFS_DINODE_FMT_EXTENTS &&
    2433             :                     ip->i_df.if_format != XFS_DINODE_FMT_BTREE,
    2434             :                     mp, XFS_ERRTAG_IFLUSH_3)) {
    2435           0 :                         xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
    2436             :                                 "%s: Bad regular inode %llu, ptr "PTR_FMT,
    2437             :                                 __func__, ip->i_ino, ip);
    2438           0 :                         goto flush_out;
    2439             :                 }
    2440    86719803 :         } else if (S_ISDIR(VFS_I(ip)->i_mode)) {
    2441    59910059 :                 if (XFS_TEST_ERROR(
    2442             :                     ip->i_df.if_format != XFS_DINODE_FMT_EXTENTS &&
    2443             :                     ip->i_df.if_format != XFS_DINODE_FMT_BTREE &&
    2444             :                     ip->i_df.if_format != XFS_DINODE_FMT_LOCAL,
    2445             :                     mp, XFS_ERRTAG_IFLUSH_4)) {
    2446           0 :                         xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
    2447             :                                 "%s: Bad directory inode %llu, ptr "PTR_FMT,
    2448             :                                 __func__, ip->i_ino, ip);
    2449           0 :                         goto flush_out;
    2450             :                 }
    2451             :         }
    2452   571070514 :         if (XFS_TEST_ERROR(ip->i_df.if_nextents + xfs_ifork_nextents(&ip->i_af) >
    2453             :                                 ip->i_nblocks, mp, XFS_ERRTAG_IFLUSH_5)) {
    2454           0 :                 xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
    2455             :                         "%s: detected corrupt incore inode %llu, "
    2456             :                         "total extents = %llu nblocks = %lld, ptr "PTR_FMT,
    2457             :                         __func__, ip->i_ino,
    2458             :                         ip->i_df.if_nextents + xfs_ifork_nextents(&ip->i_af),
    2459             :                         ip->i_nblocks, ip);
    2460           0 :                 goto flush_out;
    2461             :         }
    2462   285535257 :         if (XFS_TEST_ERROR(ip->i_forkoff > mp->m_sb.sb_inodesize,
    2463             :                                 mp, XFS_ERRTAG_IFLUSH_6)) {
    2464           0 :                 xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
    2465             :                         "%s: bad inode %llu, forkoff 0x%x, ptr "PTR_FMT,
    2466             :                         __func__, ip->i_ino, ip->i_forkoff, ip);
    2467           0 :                 goto flush_out;
    2468             :         }
    2469             : 
    2470   285535257 :         if (xfs_inode_has_attr_fork(ip)) {
    2471   283523801 :                 if (ip->i_af.if_format == XFS_DINODE_FMT_RMAP) {
    2472           0 :                         xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
    2473             :                                 "%s: rt rmapbt in inode %Lu attr fork, ptr "PTR_FMT,
    2474             :                                 __func__, ip->i_ino, ip);
    2475           0 :                         goto flush_out;
    2476   283523801 :                 } else if (ip->i_af.if_format == XFS_DINODE_FMT_REFCOUNT) {
    2477           0 :                         xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
    2478             :                                 "%s: rt refcountbt in inode %Lu attr fork, ptr "PTR_FMT,
    2479             :                                 __func__, ip->i_ino, ip);
    2480           0 :                         goto flush_out;
    2481             :                 }
    2482             :         }
    2483             : 
    2484             :         /*
    2485             :          * Inode item log recovery for v2 inodes are dependent on the flushiter
    2486             :          * count for correct sequencing.  We bump the flush iteration count so
    2487             :          * we can detect flushes which postdate a log record during recovery.
    2488             :          * This is redundant as we now log every change and hence this can't
    2489             :          * happen but we need to still do it to ensure backwards compatibility
    2490             :          * with old kernels that predate logging all inode changes.
    2491             :          */
    2492   285535257 :         if (!xfs_has_v3inodes(mp))
    2493         242 :                 ip->i_flushiter++;
    2494             : 
    2495             :         /*
    2496             :          * If there are inline format data / attr forks attached to this inode,
    2497             :          * make sure they are not corrupt.
    2498             :          */
    2499   344193412 :         if (ip->i_df.if_format == XFS_DINODE_FMT_LOCAL &&
    2500    58658155 :             xfs_ifork_verify_local_data(ip))
    2501           0 :                 goto flush_out;
    2502   285535257 :         if (xfs_inode_has_attr_fork(ip) &&
    2503   472029400 :             ip->i_af.if_format == XFS_DINODE_FMT_LOCAL &&
    2504   188505599 :             xfs_ifork_verify_local_attr(ip))
    2505           0 :                 goto flush_out;
    2506             : 
    2507             :         /*
    2508             :          * Copy the dirty parts of the inode into the on-disk inode.  We always
    2509             :          * copy out the core of the inode, because if the inode is dirty at all
    2510             :          * the core must be.
    2511             :          */
    2512   285535257 :         xfs_inode_to_disk(ip, dip, iip->ili_item.li_lsn);
    2513             : 
    2514             :         /* Wrap, we never let the log put out DI_MAX_FLUSH */
    2515   285535257 :         if (!xfs_has_v3inodes(mp)) {
    2516         242 :                 if (ip->i_flushiter == DI_MAX_FLUSH)
    2517           0 :                         ip->i_flushiter = 0;
    2518             :         }
    2519             : 
    2520   285535257 :         xfs_iflush_fork(ip, dip, iip, XFS_DATA_FORK);
    2521   285535257 :         if (xfs_inode_has_attr_fork(ip))
    2522   283523801 :                 xfs_iflush_fork(ip, dip, iip, XFS_ATTR_FORK);
    2523             : 
    2524             :         /*
    2525             :          * We've recorded everything logged in the inode, so we'd like to clear
    2526             :          * the ili_fields bits so we don't log and flush things unnecessarily.
    2527             :          * However, we can't stop logging all this information until the data
    2528             :          * we've copied into the disk buffer is written to disk.  If we did we
    2529             :          * might overwrite the copy of the inode in the log with all the data
    2530             :          * after re-logging only part of it, and in the face of a crash we
    2531             :          * wouldn't have all the data we need to recover.
    2532             :          *
    2533             :          * What we do is move the bits to the ili_last_fields field.  When
    2534             :          * logging the inode, these bits are moved back to the ili_fields field.
    2535             :          * In the xfs_buf_inode_iodone() routine we clear ili_last_fields, since
    2536             :          * we know that the information those bits represent is permanently on
    2537             :          * disk.  As long as the flush completes before the inode is logged
    2538             :          * again, then both ili_fields and ili_last_fields will be cleared.
    2539             :          */
    2540             :         error = 0;
    2541   285535257 : flush_out:
    2542   285535257 :         spin_lock(&iip->ili_lock);
    2543   285535257 :         iip->ili_last_fields = iip->ili_fields;
    2544   285535257 :         iip->ili_fields = 0;
    2545   285535257 :         iip->ili_fsync_fields = 0;
    2546   285535257 :         spin_unlock(&iip->ili_lock);
    2547             : 
    2548             :         /*
    2549             :          * Store the current LSN of the inode so that we can tell whether the
    2550             :          * item has moved in the AIL from xfs_buf_inode_iodone().
    2551             :          */
    2552   285535257 :         xfs_trans_ail_copy_lsn(mp->m_ail, &iip->ili_flush_lsn,
    2553             :                                 &iip->ili_item.li_lsn);
    2554             : 
    2555             :         /* generate the checksum. */
    2556   285535257 :         xfs_dinode_calc_crc(mp, dip);
    2557   285535257 :         if (error)
    2558           0 :                 xfs_inode_mark_sick(ip, XFS_SICK_INO_CORE);
    2559   285535257 :         return error;
    2560             : }
    2561             : 
    2562             : /*
    2563             :  * Non-blocking flush of dirty inode metadata into the backing buffer.
    2564             :  *
    2565             :  * The caller must have a reference to the inode and hold the cluster buffer
    2566             :  * locked. The function will walk across all the inodes on the cluster buffer it
    2567             :  * can find and lock without blocking, and flush them to the cluster buffer.
    2568             :  *
    2569             :  * On successful flushing of at least one inode, the caller must write out the
    2570             :  * buffer and release it. If no inodes are flushed, -EAGAIN will be returned and
    2571             :  * the caller needs to release the buffer. On failure, the filesystem will be
    2572             :  * shut down, the buffer will have been unlocked and released, and EFSCORRUPTED
    2573             :  * will be returned.
    2574             :  */
    2575             : int
    2576    47283597 : xfs_iflush_cluster(
    2577             :         struct xfs_buf          *bp)
    2578             : {
    2579    47283597 :         struct xfs_mount        *mp = bp->b_mount;
    2580    47283597 :         struct xfs_log_item     *lip, *n;
    2581    47283597 :         struct xfs_inode        *ip;
    2582    47283597 :         struct xfs_inode_log_item *iip;
    2583    47283597 :         int                     clcount = 0;
    2584    47283597 :         int                     error = 0;
    2585             : 
    2586             :         /*
    2587             :          * We must use the safe variant here as on shutdown xfs_iflush_abort()
    2588             :          * will remove itself from the list.
    2589             :          */
    2590   342895505 :         list_for_each_entry_safe(lip, n, &bp->b_li_list, li_bio_list) {
    2591   295611908 :                 iip = (struct xfs_inode_log_item *)lip;
    2592   295611908 :                 ip = iip->ili_inode;
    2593             : 
    2594             :                 /*
    2595             :                  * Quick and dirty check to avoid locks if possible.
    2596             :                  */
    2597   295611908 :                 if (__xfs_iflags_test(ip, XFS_IRECLAIM | XFS_IFLUSHING))
    2598      158553 :                         continue;
    2599   295453355 :                 if (xfs_ipincount(ip))
    2600     8362654 :                         continue;
    2601             : 
    2602             :                 /*
    2603             :                  * The inode is still attached to the buffer, which means it is
    2604             :                  * dirty but reclaim might try to grab it. Check carefully for
    2605             :                  * that, and grab the ilock while still holding the i_flags_lock
    2606             :                  * to guarantee reclaim will not be able to reclaim this inode
    2607             :                  * once we drop the i_flags_lock.
    2608             :                  */
    2609   287090701 :                 spin_lock(&ip->i_flags_lock);
    2610   287090701 :                 ASSERT(!__xfs_iflags_test(ip, XFS_ISTALE));
    2611   287090701 :                 if (__xfs_iflags_test(ip, XFS_IRECLAIM | XFS_IFLUSHING)) {
    2612           3 :                         spin_unlock(&ip->i_flags_lock);
    2613           3 :                         continue;
    2614             :                 }
    2615             : 
    2616             :                 /*
    2617             :                  * ILOCK will pin the inode against reclaim and prevent
    2618             :                  * concurrent transactions modifying the inode while we are
    2619             :                  * flushing the inode. If we get the lock, set the flushing
    2620             :                  * state before we drop the i_flags_lock.
    2621             :                  */
    2622   287090698 :                 if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) {
    2623      541304 :                         spin_unlock(&ip->i_flags_lock);
    2624      541304 :                         continue;
    2625             :                 }
    2626   286549394 :                 __xfs_iflags_set(ip, XFS_IFLUSHING);
    2627   286549394 :                 spin_unlock(&ip->i_flags_lock);
    2628             : 
    2629             :                 /*
    2630             :                  * Abort flushing this inode if we are shut down because the
    2631             :                  * inode may not currently be in the AIL. This can occur when
    2632             :                  * log I/O failure unpins the inode without inserting into the
    2633             :                  * AIL, leaving a dirty/unpinned inode attached to the buffer
    2634             :                  * that otherwise looks like it should be flushed.
    2635             :                  */
    2636   573098788 :                 if (xlog_is_shutdown(mp->m_log)) {
    2637     1014133 :                         xfs_iunpin_wait(ip);
    2638     1014133 :                         xfs_iflush_abort(ip);
    2639     1014133 :                         xfs_iunlock(ip, XFS_ILOCK_SHARED);
    2640     1014133 :                         error = -EIO;
    2641     1014133 :                         continue;
    2642             :                 }
    2643             : 
    2644             :                 /* don't block waiting on a log force to unpin dirty inodes */
    2645   285535261 :                 if (xfs_ipincount(ip)) {
    2646           4 :                         xfs_iflags_clear(ip, XFS_IFLUSHING);
    2647           4 :                         xfs_iunlock(ip, XFS_ILOCK_SHARED);
    2648           4 :                         continue;
    2649             :                 }
    2650             : 
    2651   285535257 :                 if (!xfs_inode_clean(ip))
    2652   285535257 :                         error = xfs_iflush(ip, bp);
    2653             :                 else
    2654           0 :                         xfs_iflags_clear(ip, XFS_IFLUSHING);
    2655   285535257 :                 xfs_iunlock(ip, XFS_ILOCK_SHARED);
    2656   285535257 :                 if (error)
    2657             :                         break;
    2658   285535257 :                 clcount++;
    2659             :         }
    2660             : 
    2661    47283597 :         if (error) {
    2662             :                 /*
    2663             :                  * Shutdown first so we kill the log before we release this
    2664             :                  * buffer. If it is an INODE_ALLOC buffer and pins the tail
    2665             :                  * of the log, failing it before the _log_ is shut down can
    2666             :                  * result in the log tail being moved forward in the journal
    2667             :                  * on disk because log writes can still be taking place. Hence
    2668             :                  * unpinning the tail will allow the ICREATE intent to be
    2669             :                  * removed from the log an recovery will fail with uninitialised
    2670             :                  * inode cluster buffers.
    2671             :                  */
    2672      269557 :                 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
    2673      269557 :                 bp->b_flags |= XBF_ASYNC;
    2674      269557 :                 xfs_buf_ioend_fail(bp);
    2675      269557 :                 return error;
    2676             :         }
    2677             : 
    2678    47014040 :         if (!clcount)
    2679             :                 return -EAGAIN;
    2680             : 
    2681    46941300 :         XFS_STATS_INC(mp, xs_icluster_flushcnt);
    2682    46941300 :         XFS_STATS_ADD(mp, xs_icluster_flushinode, clcount);
    2683    46941300 :         return 0;
    2684             : 
    2685             : }
    2686             : 
    2687             : /* Release an inode. */
    2688             : void
    2689 75819402452 : xfs_irele(
    2690             :         struct xfs_inode        *ip)
    2691             : {
    2692 75819402452 :         trace_xfs_irele(ip, _RET_IP_);
    2693 75880114508 :         iput(VFS_I(ip));
    2694 75772796917 : }
    2695             : 
    2696             : void
    2697      394539 : xfs_imeta_irele(
    2698             :         struct xfs_inode        *ip)
    2699             : {
    2700      394539 :         ASSERT(!xfs_has_metadir(ip->i_mount) || xfs_is_metadir_inode(ip));
    2701             : 
    2702      394539 :         xfs_irele(ip);
    2703      394539 : }
    2704             : 
    2705             : /*
    2706             :  * Ensure all commited transactions touching the inode are written to the log.
    2707             :  */
    2708             : int
    2709      622572 : xfs_log_force_inode(
    2710             :         struct xfs_inode        *ip)
    2711             : {
    2712      622572 :         xfs_csn_t               seq = 0;
    2713             : 
    2714      622572 :         xfs_ilock(ip, XFS_ILOCK_SHARED);
    2715      622573 :         if (xfs_ipincount(ip))
    2716       30840 :                 seq = ip->i_itemp->ili_commit_seq;
    2717      622573 :         xfs_iunlock(ip, XFS_ILOCK_SHARED);
    2718             : 
    2719      622572 :         if (!seq)
    2720             :                 return 0;
    2721       30840 :         return xfs_log_force_seq(ip->i_mount, seq, XFS_LOG_SYNC, NULL);
    2722             : }
    2723             : 
    2724             : /*
    2725             :  * Grab the exclusive iolock for a data copy from src to dest, making sure to
    2726             :  * abide vfs locking order (lowest pointer value goes first) and breaking the
    2727             :  * layout leases before proceeding.  The loop is needed because we cannot call
    2728             :  * the blocking break_layout() with the iolocks held, and therefore have to
    2729             :  * back out both locks.
    2730             :  */
    2731             : static int
    2732   272982566 : xfs_iolock_two_inodes_and_break_layout(
    2733             :         struct inode            *src,
    2734             :         struct inode            *dest)
    2735             : {
    2736   272982566 :         int                     error;
    2737             : 
    2738   272982566 :         if (src > dest)
    2739   135899659 :                 swap(src, dest);
    2740             : 
    2741   272982566 : retry:
    2742             :         /* Wait to break both inodes' layouts before we start locking. */
    2743   272982566 :         error = break_layout(src, true);
    2744   272980442 :         if (error)
    2745           0 :                 return error;
    2746   272980442 :         if (src != dest) {
    2747   270673456 :                 error = break_layout(dest, true);
    2748   270676539 :                 if (error)
    2749           0 :                         return error;
    2750             :         }
    2751             : 
    2752             :         /* Lock one inode and make sure nobody got in and leased it. */
    2753   272983525 :         inode_lock(src);
    2754   272985239 :         error = break_layout(src, false);
    2755   272984500 :         if (error) {
    2756           0 :                 inode_unlock(src);
    2757           0 :                 if (error == -EWOULDBLOCK)
    2758           0 :                         goto retry;
    2759           0 :                 return error;
    2760             :         }
    2761             : 
    2762   272984500 :         if (src == dest)
    2763             :                 return 0;
    2764             : 
    2765             :         /* Lock the other inode and make sure nobody got in and leased it. */
    2766   270677515 :         inode_lock_nested(dest, I_MUTEX_NONDIR2);
    2767   270682209 :         error = break_layout(dest, false);
    2768   270672708 :         if (error) {
    2769           0 :                 inode_unlock(src);
    2770           0 :                 inode_unlock(dest);
    2771           0 :                 if (error == -EWOULDBLOCK)
    2772           0 :                         goto retry;
    2773           0 :                 return error;
    2774             :         }
    2775             : 
    2776             :         return 0;
    2777             : }
    2778             : 
    2779             : static int
    2780             : xfs_mmaplock_two_inodes_and_break_dax_layout(
    2781             :         struct xfs_inode        *ip1,
    2782             :         struct xfs_inode        *ip2)
    2783             : {
    2784             :         int                     error;
    2785             :         bool                    retry;
    2786             :         struct page             *page;
    2787             : 
    2788             :         if (ip1->i_ino > ip2->i_ino)
    2789             :                 swap(ip1, ip2);
    2790             : 
    2791             : again:
    2792             :         retry = false;
    2793             :         /* Lock the first inode */
    2794             :         xfs_ilock(ip1, XFS_MMAPLOCK_EXCL);
    2795             :         error = xfs_break_dax_layouts(VFS_I(ip1), &retry);
    2796             :         if (error || retry) {
    2797             :                 xfs_iunlock(ip1, XFS_MMAPLOCK_EXCL);
    2798             :                 if (error == 0 && retry)
    2799             :                         goto again;
    2800             :                 return error;
    2801             :         }
    2802             : 
    2803             :         if (ip1 == ip2)
    2804             :                 return 0;
    2805             : 
    2806             :         /* Nested lock the second inode */
    2807             :         xfs_ilock(ip2, xfs_lock_inumorder(XFS_MMAPLOCK_EXCL, 1));
    2808             :         /*
    2809             :          * We cannot use xfs_break_dax_layouts() directly here because it may
    2810             :          * need to unlock & lock the XFS_MMAPLOCK_EXCL which is not suitable
    2811             :          * for this nested lock case.
    2812             :          */
    2813             :         page = dax_layout_busy_page(VFS_I(ip2)->i_mapping);
    2814             :         if (page && page_ref_count(page) != 1) {
    2815             :                 xfs_iunlock(ip2, XFS_MMAPLOCK_EXCL);
    2816             :                 xfs_iunlock(ip1, XFS_MMAPLOCK_EXCL);
    2817             :                 goto again;
    2818             :         }
    2819             : 
    2820             :         return 0;
    2821             : }
    2822             : 
    2823             : /*
    2824             :  * Lock two inodes so that userspace cannot initiate I/O via file syscalls or
    2825             :  * mmap activity.
    2826             :  */
    2827             : int
    2828   272967984 : xfs_ilock2_io_mmap(
    2829             :         struct xfs_inode        *ip1,
    2830             :         struct xfs_inode        *ip2)
    2831             : {
    2832   272967984 :         int                     ret;
    2833             : 
    2834   272967984 :         ret = xfs_iolock_two_inodes_and_break_layout(VFS_I(ip1), VFS_I(ip2));
    2835   272990528 :         if (ret)
    2836             :                 return ret;
    2837             : 
    2838   272990338 :         if (IS_DAX(VFS_I(ip1)) && IS_DAX(VFS_I(ip2))) {
    2839             :                 ret = xfs_mmaplock_two_inodes_and_break_dax_layout(ip1, ip2);
    2840             :                 if (ret) {
    2841             :                         inode_unlock(VFS_I(ip2));
    2842             :                         if (ip1 != ip2)
    2843             :                                 inode_unlock(VFS_I(ip1));
    2844             :                         return ret;
    2845             :                 }
    2846             :         } else
    2847   272990338 :                 filemap_invalidate_lock_two(VFS_I(ip1)->i_mapping,
    2848             :                                             VFS_I(ip2)->i_mapping);
    2849             : 
    2850   272990338 :         return 0;
    2851             : }
    2852             : 
    2853             : /* Unlock both inodes to allow IO and mmap activity. */
    2854             : void
    2855   272967480 : xfs_iunlock2_io_mmap(
    2856             :         struct xfs_inode        *ip1,
    2857             :         struct xfs_inode        *ip2)
    2858             : {
    2859   272967480 :         if (IS_DAX(VFS_I(ip1)) && IS_DAX(VFS_I(ip2))) {
    2860             :                 xfs_iunlock(ip2, XFS_MMAPLOCK_EXCL);
    2861             :                 if (ip1 != ip2)
    2862             :                         xfs_iunlock(ip1, XFS_MMAPLOCK_EXCL);
    2863             :         } else
    2864   272967480 :                 filemap_invalidate_unlock_two(VFS_I(ip1)->i_mapping,
    2865             :                                               VFS_I(ip2)->i_mapping);
    2866             : 
    2867   272968928 :         inode_unlock(VFS_I(ip2));
    2868   272966111 :         if (ip1 != ip2)
    2869   270659128 :                 inode_unlock(VFS_I(ip1));
    2870   272970701 : }
    2871             : 
    2872             : /* Compute the number of data and realtime blocks used by a file. */
    2873             : void
    2874    99408380 : xfs_inode_count_blocks(
    2875             :         struct xfs_trans        *tp,
    2876             :         struct xfs_inode        *ip,
    2877             :         xfs_filblks_t           *dblocks,
    2878             :         xfs_filblks_t           *rblocks)
    2879             : {
    2880    99408380 :         struct xfs_ifork        *ifp = xfs_ifork_ptr(ip, XFS_DATA_FORK);
    2881             : 
    2882    99408380 :         if (!XFS_IS_REALTIME_INODE(ip)) {
    2883    84876371 :                 *dblocks = ip->i_nblocks;
    2884    84876371 :                 *rblocks = 0;
    2885    84876371 :                 return;
    2886             :         }
    2887             : 
    2888    14532009 :         *rblocks = 0;
    2889    14532009 :         xfs_bmap_count_leaves(ifp, rblocks);
    2890    14532021 :         *dblocks = ip->i_nblocks - *rblocks;
    2891             : }
    2892             : 
    2893             : static void
    2894             : xfs_wait_dax_page(
    2895             :         struct inode            *inode)
    2896             : {
    2897             :         struct xfs_inode        *ip = XFS_I(inode);
    2898             : 
    2899             :         xfs_iunlock(ip, XFS_MMAPLOCK_EXCL);
    2900             :         schedule();
    2901             :         xfs_ilock(ip, XFS_MMAPLOCK_EXCL);
    2902             : }
    2903             : 
    2904             : int
    2905    20429784 : xfs_break_dax_layouts(
    2906             :         struct inode            *inode,
    2907             :         bool                    *retry)
    2908             : {
    2909    20429784 :         struct page             *page;
    2910             : 
    2911    20429784 :         ASSERT(xfs_isilocked(XFS_I(inode), XFS_MMAPLOCK_EXCL));
    2912             : 
    2913    20429784 :         page = dax_layout_busy_page(inode->i_mapping);
    2914    20429784 :         if (!page)
    2915    20429784 :                 return 0;
    2916             : 
    2917             :         *retry = true;
    2918             :         return ___wait_var_event(&page->_refcount,
    2919             :                         atomic_read(&page->_refcount) == 1, TASK_INTERRUPTIBLE,
    2920             :                         0, 0, xfs_wait_dax_page(inode));
    2921             : }
    2922             : 
    2923             : int
    2924   105625725 : xfs_break_layouts(
    2925             :         struct inode            *inode,
    2926             :         uint                    *iolock,
    2927             :         enum layout_break_reason reason)
    2928             : {
    2929   105625725 :         bool                    retry;
    2930   105625725 :         int                     error;
    2931             : 
    2932   105625725 :         ASSERT(xfs_isilocked(XFS_I(inode), XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL));
    2933             : 
    2934   105625367 :         do {
    2935   105625367 :                 retry = false;
    2936   105625367 :                 switch (reason) {
    2937    20429869 :                 case BREAK_UNMAP:
    2938    20429869 :                         error = xfs_break_dax_layouts(inode, &retry);
    2939    20429784 :                         if (error || retry)
    2940             :                                 break;
    2941   105625257 :                         fallthrough;
    2942             :                 case BREAK_WRITE:
    2943   105625257 :                         error = xfs_break_leased_layouts(inode, iolock, &retry);
    2944   105625257 :                         break;
    2945             :                 default:
    2946           0 :                         WARN_ON_ONCE(1);
    2947           0 :                         error = -EINVAL;
    2948             :                 }
    2949   105662003 :         } while (error == 0 && retry);
    2950             : 
    2951   105662361 :         return error;
    2952             : }
    2953             : 
    2954             : /* Returns the size of fundamental allocation unit for a file, in bytes. */
    2955             : unsigned int
    2956   274781598 : xfs_inode_alloc_unitsize(
    2957             :         struct xfs_inode        *ip)
    2958             : {
    2959   274781598 :         unsigned int            blocks = 1;
    2960             : 
    2961   274781598 :         if (XFS_IS_REALTIME_INODE(ip))
    2962   115616933 :                 blocks = ip->i_mount->m_sb.sb_rextsize;
    2963             : 
    2964   274781598 :         return XFS_FSB_TO_B(ip->i_mount, blocks);
    2965             : }
    2966             : 
    2967             : /* Should we always be using copy on write for file writes? */
    2968             : bool
    2969  1181738134 : xfs_is_always_cow_inode(
    2970             :         struct xfs_inode        *ip)
    2971             : {
    2972  1181738134 :         return ip->i_mount->m_always_cow && xfs_has_reflink(ip->i_mount);
    2973             : }

Generated by: LCOV version 1.14