LCOV - fstests of 6.5.0-rc3-djwx @ Mon Jul 31 20:08:22 PDT 2023

LCOV - code coverage report

Current view:	top level - fs/xfs - xfs_file.c (source / functions)		Hit	Total	Coverage
Test:	fstests of 6.5.0-rc3-djwx @ Mon Jul 31 20:08:22 PDT 2023	Lines:	503	579	86.9 %
Date:	2023-07-31 20:08:22	Functions:	33	38	86.8 %

          Line data    Source code

       1             : // SPDX-License-Identifier: GPL-2.0
       2             : /*
       3             :  * Copyright (c) 2000-2005 Silicon Graphics, Inc.
       4             :  * All Rights Reserved.
       5             :  */
       6             : #include "xfs.h"
       7             : #include "xfs_fs.h"
       8             : #include "xfs_shared.h"
       9             : #include "xfs_format.h"
      10             : #include "xfs_log_format.h"
      11             : #include "xfs_trans_resv.h"
      12             : #include "xfs_mount.h"
      13             : #include "xfs_inode.h"
      14             : #include "xfs_trans.h"
      15             : #include "xfs_inode_item.h"
      16             : #include "xfs_bmap.h"
      17             : #include "xfs_bmap_util.h"
      18             : #include "xfs_dir2.h"
      19             : #include "xfs_dir2_priv.h"
      20             : #include "xfs_ioctl.h"
      21             : #include "xfs_trace.h"
      22             : #include "xfs_log.h"
      23             : #include "xfs_icache.h"
      24             : #include "xfs_pnfs.h"
      25             : #include "xfs_iomap.h"
      26             : #include "xfs_reflink.h"
      27             : 
      28             : #include <linux/dax.h>
      29             : #include <linux/falloc.h>
      30             : #include <linux/backing-dev.h>
      31             : #include <linux/mman.h>
      32             : #include <linux/fadvise.h>
      33             : #include <linux/mount.h>
      34             : 
      35             : static const struct vm_operations_struct xfs_file_vm_ops;
      36             : 
      37             : /*
      38             :  * Decide if the given file range is aligned to the size of the fundamental
      39             :  * allocation unit for the file.
      40             :  */
      41             : static bool
      42     4939099 : xfs_is_falloc_aligned(
      43             :         struct xfs_inode        *ip,
      44             :         loff_t                  pos,
      45             :         long long int           len)
      46             : {
      47     4939099 :         struct xfs_mount        *mp = ip->i_mount;
      48     4939099 :         uint64_t                mask;
      49             : 
      50     4939099 :         if (XFS_IS_REALTIME_INODE(ip)) {
      51     3773610 :                 if (!is_power_of_2(mp->m_sb.sb_rextsize)) {
      52      265827 :                         u64     rextbytes;
      53      265827 :                         u32     mod;
      54             : 
      55      265827 :                         rextbytes = XFS_FSB_TO_B(mp, mp->m_sb.sb_rextsize);
      56      265827 :                         div_u64_rem(pos, rextbytes, &mod);
      57      265827 :                         if (mod)
      58             :                                 return false;
      59      246562 :                         div_u64_rem(len, rextbytes, &mod);
      60      246562 :                         return mod == 0;
      61             :                 }
      62     1620978 :                 mask = XFS_FSB_TO_B(mp, mp->m_sb.sb_rextsize) - 1;
      63             :         } else {
      64     3052294 :                 mask = mp->m_sb.sb_blocksize - 1;
      65             :         }
      66             : 
      67     4673272 :         return !((pos | len) & mask);
      68             : }
      69             : 
      70             : /*
      71             :  * Fsync operations on directories are much simpler than on regular files,
      72             :  * as there is no file data to flush, and thus also no need for explicit
      73             :  * cache flush operations, and there are no non-transaction metadata updates
      74             :  * on directories either.
      75             :  */
      76             : STATIC int
      77      499229 : xfs_dir_fsync(
      78             :         struct file             *file,
      79             :         loff_t                  start,
      80             :         loff_t                  end,
      81             :         int                     datasync)
      82             : {
      83      499229 :         struct xfs_inode        *ip = XFS_I(file->f_mapping->host);
      84             : 
      85      499229 :         trace_xfs_dir_fsync(ip);
      86      499218 :         return xfs_log_force_inode(ip);
      87             : }
      88             : 
      89             : static xfs_csn_t
      90     3863870 : xfs_fsync_seq(
      91             :         struct xfs_inode        *ip,
      92             :         bool                    datasync)
      93             : {
      94     3863870 :         if (!xfs_ipincount(ip))
      95             :                 return 0;
      96     3862657 :         if (datasync && !(ip->i_itemp->ili_fsync_fields & ~XFS_ILOG_TIMESTAMP))
      97             :                 return 0;
      98     3575166 :         return ip->i_itemp->ili_commit_seq;
      99             : }
     100             : 
     101             : /*
     102             :  * All metadata updates are logged, which means that we just have to flush the
     103             :  * log up to the latest LSN that touched the inode.
     104             :  *
     105             :  * If we have concurrent fsync/fdatasync() calls, we need them to all block on
     106             :  * the log force before we clear the ili_fsync_fields field. This ensures that
     107             :  * we don't get a racing sync operation that does not wait for the metadata to
     108             :  * hit the journal before returning.  If we race with clearing ili_fsync_fields,
     109             :  * then all that will happen is the log force will do nothing as the lsn will
     110             :  * already be on disk.  We can't race with setting ili_fsync_fields because that
     111             :  * is done under XFS_ILOCK_EXCL, and that can't happen because we hold the lock
     112             :  * shared until after the ili_fsync_fields is cleared.
     113             :  */
     114             : static  int
     115     3863967 : xfs_fsync_flush_log(
     116             :         struct xfs_inode        *ip,
     117             :         bool                    datasync,
     118             :         int                     *log_flushed)
     119             : {
     120     3863967 :         int                     error = 0;
     121     3863967 :         xfs_csn_t               seq;
     122             : 
     123     3863967 :         xfs_ilock(ip, XFS_ILOCK_SHARED);
     124     3863871 :         seq = xfs_fsync_seq(ip, datasync);
     125     3863613 :         if (seq) {
     126     3575195 :                 error = xfs_log_force_seq(ip->i_mount, seq, XFS_LOG_SYNC,
     127             :                                           log_flushed);
     128             : 
     129     3574938 :                 spin_lock(&ip->i_itemp->ili_lock);
     130     3575222 :                 ip->i_itemp->ili_fsync_fields = 0;
     131     3575222 :                 spin_unlock(&ip->i_itemp->ili_lock);
     132             :         }
     133     3863676 :         xfs_iunlock(ip, XFS_ILOCK_SHARED);
     134     3863110 :         return error;
     135             : }
     136             : 
     137             : STATIC int
     138    14441839 : xfs_file_fsync(
     139             :         struct file             *file,
     140             :         loff_t                  start,
     141             :         loff_t                  end,
     142             :         int                     datasync)
     143             : {
     144    14441839 :         struct xfs_inode        *ip = XFS_I(file->f_mapping->host);
     145    14441839 :         struct xfs_mount        *mp = ip->i_mount;
     146    14441839 :         int                     error, err2;
     147    14441839 :         int                     log_flushed = 0;
     148             : 
     149    14441839 :         trace_xfs_file_fsync(ip);
     150             : 
     151    14440848 :         error = file_write_and_wait_range(file, start, end);
     152    14443036 :         if (error)
     153             :                 return error;
     154             : 
     155    28879358 :         if (xfs_is_shutdown(mp))
     156             :                 return -EIO;
     157             : 
     158    14437469 :         xfs_iflags_clear(ip, XFS_ITRUNCATED);
     159             : 
     160             :         /*
     161             :          * If we have an RT and/or log subvolume we need to make sure to flush
     162             :          * the write cache the device used for file data first.  This is to
     163             :          * ensure newly written file data make it to disk before logging the new
     164             :          * inode size in case of an extending write.
     165             :          */
     166    14439596 :         if (XFS_IS_REALTIME_INODE(ip))
     167     6250565 :                 error = blkdev_issue_flush(mp->m_rtdev_targp->bt_bdev);
     168     8189031 :         else if (mp->m_logdev_targp != mp->m_ddev_targp)
     169      414584 :                 error = blkdev_issue_flush(mp->m_ddev_targp->bt_bdev);
     170             : 
     171             :         /*
     172             :          * Any inode that has dirty modifications in the log is pinned.  The
     173             :          * racy check here for a pinned inode will not catch modifications
     174             :          * that happen concurrently to the fsync call, but fsync semantics
     175             :          * only require to sync previously completed I/O.
     176             :          */
     177    14432814 :         if (xfs_ipincount(ip)) {
     178     3863873 :                 err2 = xfs_fsync_flush_log(ip, datasync, &log_flushed);
     179     3863077 :                 if (err2 && !error)
     180        1108 :                         error = err2;
     181             :         }
     182             : 
     183             :         /*
     184             :          * If we only have a single device, and the log force about was
     185             :          * a no-op we might have to flush the data device cache here.
     186             :          * This can only happen for fdatasync/O_DSYNC if we were overwriting
     187             :          * an already allocated file and thus do not have any metadata to
     188             :          * commit.
     189             :          */
     190    14432018 :         if (!log_flushed && !XFS_IS_REALTIME_INODE(ip) &&
     191     6330955 :             mp->m_logdev_targp == mp->m_ddev_targp) {
     192     6154491 :                 err2 = blkdev_issue_flush(mp->m_ddev_targp->bt_bdev);
     193     6149372 :                 if (err2 && !error)
     194         317 :                         error = err2;
     195             :         }
     196             : 
     197             :         return error;
     198             : }
     199             : 
     200             : static int
     201  1170515961 : xfs_ilock_iocb(
     202             :         struct kiocb            *iocb,
     203             :         unsigned int            lock_mode)
     204             : {
     205  1170515961 :         struct xfs_inode        *ip = XFS_I(file_inode(iocb->ki_filp));
     206             : 
     207  1170515961 :         if (iocb->ki_flags & IOCB_NOWAIT) {
     208           0 :                 if (!xfs_ilock_nowait(ip, lock_mode))
     209           0 :                         return -EAGAIN;
     210             :         } else {
     211  1170515961 :                 xfs_ilock(ip, lock_mode);
     212             :         }
     213             : 
     214             :         return 0;
     215             : }
     216             : 
     217             : STATIC ssize_t
     218   524209125 : xfs_file_dio_read(
     219             :         struct kiocb            *iocb,
     220             :         struct iov_iter         *to)
     221             : {
     222   524209125 :         struct xfs_inode        *ip = XFS_I(file_inode(iocb->ki_filp));
     223   524209125 :         ssize_t                 ret;
     224             : 
     225   524209125 :         trace_xfs_file_direct_read(iocb, to);
     226             : 
     227   524208867 :         if (!iov_iter_count(to))
     228             :                 return 0; /* skip atime */
     229             : 
     230   524185244 :         file_accessed(iocb->ki_filp);
     231             : 
     232   524183861 :         ret = xfs_ilock_iocb(iocb, XFS_IOLOCK_SHARED);
     233   524183781 :         if (ret)
     234             :                 return ret;
     235   524183791 :         ret = iomap_dio_rw(iocb, to, &xfs_read_iomap_ops, NULL, 0, NULL, 0);
     236   524186286 :         xfs_iunlock(ip, XFS_IOLOCK_SHARED);
     237             : 
     238   524186286 :         return ret;
     239             : }
     240             : 
     241             : static noinline ssize_t
     242           0 : xfs_file_dax_read(
     243             :         struct kiocb            *iocb,
     244             :         struct iov_iter         *to)
     245             : {
     246           0 :         struct xfs_inode        *ip = XFS_I(iocb->ki_filp->f_mapping->host);
     247           0 :         ssize_t                 ret = 0;
     248             : 
     249           0 :         trace_xfs_file_dax_read(iocb, to);
     250             : 
     251           0 :         if (!iov_iter_count(to))
     252             :                 return 0; /* skip atime */
     253             : 
     254           0 :         ret = xfs_ilock_iocb(iocb, XFS_IOLOCK_SHARED);
     255           0 :         if (ret)
     256             :                 return ret;
     257           0 :         ret = dax_iomap_rw(iocb, to, &xfs_read_iomap_ops);
     258           0 :         xfs_iunlock(ip, XFS_IOLOCK_SHARED);
     259             : 
     260           0 :         file_accessed(iocb->ki_filp);
     261           0 :         return ret;
     262             : }
     263             : 
     264             : STATIC ssize_t
     265   379687963 : xfs_file_buffered_read(
     266             :         struct kiocb            *iocb,
     267             :         struct iov_iter         *to)
     268             : {
     269   379687963 :         struct xfs_inode        *ip = XFS_I(file_inode(iocb->ki_filp));
     270   379687963 :         ssize_t                 ret;
     271             : 
     272   379687963 :         trace_xfs_file_buffered_read(iocb, to);
     273             : 
     274   379708021 :         ret = xfs_ilock_iocb(iocb, XFS_IOLOCK_SHARED);
     275   378724458 :         if (ret)
     276             :                 return ret;
     277   378755985 :         ret = generic_file_read_iter(iocb, to);
     278   379954919 :         xfs_iunlock(ip, XFS_IOLOCK_SHARED);
     279             : 
     280   379954919 :         return ret;
     281             : }
     282             : 
     283             : STATIC ssize_t
     284   905563243 : xfs_file_read_iter(
     285             :         struct kiocb            *iocb,
     286             :         struct iov_iter         *to)
     287             : {
     288   905563243 :         struct inode            *inode = file_inode(iocb->ki_filp);
     289   905563243 :         struct xfs_mount        *mp = XFS_I(inode)->i_mount;
     290   905563243 :         ssize_t                 ret = 0;
     291             : 
     292   905563243 :         XFS_STATS_INC(mp, xs_read_calls);
     293             : 
     294  1807066148 :         if (xfs_is_shutdown(mp))
     295             :                 return -EIO;
     296             : 
     297   903530249 :         if (IS_DAX(inode))
     298           0 :                 ret = xfs_file_dax_read(iocb, to);
     299   903530249 :         else if (iocb->ki_flags & IOCB_DIRECT)
     300   524209173 :                 ret = xfs_file_dio_read(iocb, to);
     301             :         else
     302   379321076 :                 ret = xfs_file_buffered_read(iocb, to);
     303             : 
     304   902911849 :         if (ret > 0)
     305   373870074 :                 XFS_STATS_ADD(mp, xs_read_bytes, ret);
     306             :         return ret;
     307             : }
     308             : 
     309             : STATIC ssize_t
     310     9923454 : xfs_file_splice_read(
     311             :         struct file             *in,
     312             :         loff_t                  *ppos,
     313             :         struct pipe_inode_info  *pipe,
     314             :         size_t                  len,
     315             :         unsigned int            flags)
     316             : {
     317     9923454 :         struct inode            *inode = file_inode(in);
     318     9923454 :         struct xfs_inode        *ip = XFS_I(inode);
     319     9923454 :         struct xfs_mount        *mp = ip->i_mount;
     320     9923454 :         ssize_t                 ret = 0;
     321             : 
     322     9923454 :         XFS_STATS_INC(mp, xs_read_calls);
     323             : 
     324    19846882 :         if (xfs_is_shutdown(mp))
     325             :                 return -EIO;
     326             : 
     327     9923410 :         trace_xfs_file_splice_read(ip, *ppos, len);
     328             : 
     329     9923338 :         xfs_ilock(ip, XFS_IOLOCK_SHARED);
     330     9923298 :         ret = filemap_splice_read(in, ppos, pipe, len, flags);
     331     9923378 :         xfs_iunlock(ip, XFS_IOLOCK_SHARED);
     332     9923387 :         if (ret > 0)
     333     9923172 :                 XFS_STATS_ADD(mp, xs_read_bytes, ret);
     334             :         return ret;
     335             : }
     336             : 
     337             : /*
     338             :  * Common pre-write limit and setup checks.
     339             :  *
     340             :  * Called with the iolocked held either shared and exclusive according to
     341             :  * @iolock, and returns with it held.  Might upgrade the iolock to exclusive
     342             :  * if called for a direct write beyond i_size.
     343             :  */
     344             : STATIC ssize_t
     345   260642712 : xfs_file_write_checks(
     346             :         struct kiocb            *iocb,
     347             :         struct iov_iter         *from,
     348             :         unsigned int            *iolock)
     349             : {
     350   260642712 :         struct file             *file = iocb->ki_filp;
     351   260642712 :         struct inode            *inode = file->f_mapping->host;
     352   260642712 :         struct xfs_inode        *ip = XFS_I(inode);
     353   260642712 :         ssize_t                 error = 0;
     354   260642712 :         size_t                  count = iov_iter_count(from);
     355   260642712 :         bool                    drained_dio = false;
     356   327539145 :         loff_t                  isize;
     357             : 
     358             : restart:
     359   327539145 :         error = generic_write_checks(iocb, from);
     360   327702182 :         if (error <= 0)
     361          30 :                 return error;
     362             : 
     363   327702152 :         if (iocb->ki_flags & IOCB_NOWAIT) {
     364           0 :                 error = break_layout(inode, false);
     365           0 :                 if (error == -EWOULDBLOCK)
     366             :                         error = -EAGAIN;
     367             :         } else {
     368   327702152 :                 error = xfs_break_layouts(inode, iolock, BREAK_WRITE);
     369             :         }
     370             : 
     371   328430801 :         if (error)
     372           0 :                 return error;
     373             : 
     374             :         /*
     375             :          * For changing security info in file_remove_privs() we need i_rwsem
     376             :          * exclusively.
     377             :          */
     378   328430801 :         if (*iolock == XFS_IOLOCK_SHARED && !IS_NOSEC(inode)) {
     379        1461 :                 xfs_iunlock(ip, *iolock);
     380        1461 :                 *iolock = XFS_IOLOCK_EXCL;
     381        1461 :                 error = xfs_ilock_iocb(iocb, *iolock);
     382        1461 :                 if (error) {
     383           0 :                         *iolock = 0;
     384           0 :                         return error;
     385             :                 }
     386        1461 :                 goto restart;
     387             :         }
     388             : 
     389             :         /*
     390             :          * If the offset is beyond the size of the file, we need to zero any
     391             :          * blocks that fall between the existing EOF and the start of this
     392             :          * write.  If zeroing is needed and we are currently holding the iolock
     393             :          * shared, we need to update it to exclusive which implies having to
     394             :          * redo all checks before.
     395             :          *
     396             :          * We need to serialise against EOF updates that occur in IO completions
     397             :          * here. We want to make sure that nobody is changing the size while we
     398             :          * do this check until we have placed an IO barrier (i.e.  hold the
     399             :          * XFS_IOLOCK_EXCL) that prevents new IO from being dispatched.  The
     400             :          * spinlock effectively forms a memory barrier once we have the
     401             :          * XFS_IOLOCK_EXCL so we are guaranteed to see the latest EOF value and
     402             :          * hence be able to correctly determine if we need to run zeroing.
     403             :          *
     404             :          * We can do an unlocked check here safely as IO completion can only
     405             :          * extend EOF. Truncate is locked out at this point, so the EOF can
     406             :          * not move backwards, only forwards. Hence we only need to take the
     407             :          * slow path and spin locks when we are at or beyond the current EOF.
     408             :          */
     409   328429340 :         if (iocb->ki_pos <= i_size_read(inode))
     410   193606103 :                 goto out;
     411             : 
     412   134823237 :         spin_lock(&ip->i_flags_lock);
     413   135090601 :         isize = i_size_read(inode);
     414   135090601 :         if (iocb->ki_pos > isize) {
     415   135090601 :                 spin_unlock(&ip->i_flags_lock);
     416             : 
     417   134853758 :                 if (iocb->ki_flags & IOCB_NOWAIT)
     418             :                         return -EAGAIN;
     419             : 
     420   134853758 :                 if (!drained_dio) {
     421    67372943 :                         if (*iolock == XFS_IOLOCK_SHARED) {
     422     5074539 :                                 xfs_iunlock(ip, *iolock);
     423     4977010 :                                 *iolock = XFS_IOLOCK_EXCL;
     424     4977010 :                                 xfs_ilock(ip, *iolock);
     425     5005263 :                                 iov_iter_reexpand(from, count);
     426             :                         }
     427             :                         /*
     428             :                          * We now have an IO submission barrier in place, but
     429             :                          * AIO can do EOF updates during IO completion and hence
     430             :                          * we now need to wait for all of them to drain. Non-AIO
     431             :                          * DIO will have drained before we are given the
     432             :                          * XFS_IOLOCK_EXCL, and so for most cases this wait is a
     433             :                          * no-op.
     434             :                          */
     435    67303667 :                         inode_dio_wait(inode);
     436    66894972 :                         drained_dio = true;
     437    66894972 :                         goto restart;
     438             :                 }
     439             : 
     440    67480815 :                 trace_xfs_zero_eof(ip, isize, iocb->ki_pos - isize);
     441    67264534 :                 error = xfs_zero_range(ip, isize, iocb->ki_pos - isize, NULL);
     442    66623627 :                 if (error)
     443             :                         return error;
     444             :         } else
     445           0 :                 spin_unlock(&ip->i_flags_lock);
     446             : 
     447   260215823 : out:
     448   260215823 :         return kiocb_modified(iocb);
     449             : }
     450             : 
     451             : static int
     452    23229437 : xfs_dio_write_end_io(
     453             :         struct kiocb            *iocb,
     454             :         ssize_t                 size,
     455             :         int                     error,
     456             :         unsigned                flags)
     457             : {
     458    23229437 :         struct inode            *inode = file_inode(iocb->ki_filp);
     459    23229437 :         struct xfs_inode        *ip = XFS_I(inode);
     460    23229437 :         loff_t                  offset = iocb->ki_pos;
     461    23229437 :         unsigned int            nofs_flag;
     462             : 
     463    23229437 :         trace_xfs_end_io_direct_write(ip, offset, size);
     464             : 
     465    46394446 :         if (xfs_is_shutdown(ip->i_mount))
     466             :                 return -EIO;
     467             : 
     468    23196355 :         if (error)
     469             :                 return error;
     470    16488391 :         if (!size)
     471             :                 return 0;
     472             : 
     473             :         /*
     474             :          * Capture amount written on completion as we can't reliably account
     475             :          * for it on submission.
     476             :          */
     477    16488391 :         XFS_STATS_ADD(ip->i_mount, xs_write_bytes, size);
     478             : 
     479             :         /*
     480             :          * We can allocate memory here while doing writeback on behalf of
     481             :          * memory reclaim.  To avoid memory allocation deadlocks set the
     482             :          * task-wide nofs context for the following operations.
     483             :          */
     484    16488537 :         nofs_flag = memalloc_nofs_save();
     485             : 
     486    16488537 :         if (flags & IOMAP_DIO_COW) {
     487     3488587 :                 error = xfs_reflink_end_cow(ip, offset, size);
     488     3488582 :                 if (error)
     489          21 :                         goto out;
     490             :         }
     491             : 
     492             :         /*
     493             :          * Unwritten conversion updates the in-core isize after extent
     494             :          * conversion but before updating the on-disk size. Updating isize any
     495             :          * earlier allows a racing dio read to find unwritten extents before
     496             :          * they are converted.
     497             :          */
     498    16488511 :         if (flags & IOMAP_DIO_UNWRITTEN) {
     499     7946931 :                 error = xfs_iomap_write_unwritten(ip, offset, size, true);
     500     7945834 :                 goto out;
     501             :         }
     502             : 
     503             :         /*
     504             :          * We need to update the in-core inode size here so that we don't end up
     505             :          * with the on-disk inode size being outside the in-core inode size. We
     506             :          * have no other method of updating EOF for AIO, so always do it here
     507             :          * if necessary.
     508             :          *
     509             :          * We need to lock the test/set EOF update as we can be racing with
     510             :          * other IO completions here to update the EOF. Failing to serialise
     511             :          * here can result in EOF moving backwards and Bad Things Happen when
     512             :          * that occurs.
     513             :          *
     514             :          * As IO completion only ever extends EOF, we can do an unlocked check
     515             :          * here to avoid taking the spinlock. If we land within the current EOF,
     516             :          * then we do not need to do an extending update at all, and we don't
     517             :          * need to take the lock to check this. If we race with an update moving
     518             :          * EOF, then we'll either still be beyond EOF and need to take the lock,
     519             :          * or we'll be within EOF and we don't need to take it at all.
     520             :          */
     521     8541580 :         if (offset + size <= i_size_read(inode))
     522     7678672 :                 goto out;
     523             : 
     524      862908 :         spin_lock(&ip->i_flags_lock);
     525      862908 :         if (offset + size > i_size_read(inode)) {
     526      862908 :                 i_size_write(inode, offset + size);
     527      862908 :                 spin_unlock(&ip->i_flags_lock);
     528      862908 :                 error = xfs_setfilesize(ip, offset, size);
     529             :         } else {
     530           0 :                 spin_unlock(&ip->i_flags_lock);
     531             :         }
     532             : 
     533    16487435 : out:
     534    16487435 :         memalloc_nofs_restore(nofs_flag);
     535    16487435 :         return error;
     536             : }
     537             : 
     538             : static const struct iomap_dio_ops xfs_dio_write_ops = {
     539             :         .end_io         = xfs_dio_write_end_io,
     540             : };
     541             : 
     542             : /*
     543             :  * Handle block aligned direct I/O writes
     544             :  */
     545             : static noinline ssize_t
     546    17637732 : xfs_file_dio_write_aligned(
     547             :         struct xfs_inode        *ip,
     548             :         struct kiocb            *iocb,
     549             :         struct iov_iter         *from)
     550             : {
     551    17637732 :         unsigned int            iolock = XFS_IOLOCK_SHARED;
     552    17637732 :         ssize_t                 ret;
     553             : 
     554    17637732 :         ret = xfs_ilock_iocb(iocb, iolock);
     555    17603221 :         if (ret)
     556             :                 return ret;
     557    17605777 :         ret = xfs_file_write_checks(iocb, from, &iolock);
     558    17595783 :         if (ret)
     559         700 :                 goto out_unlock;
     560             : 
     561             :         /*
     562             :          * We don't need to hold the IOLOCK exclusively across the IO, so demote
     563             :          * the iolock back to shared if we had to take the exclusive lock in
     564             :          * xfs_file_write_checks() for other reasons.
     565             :          */
     566    17595083 :         if (iolock == XFS_IOLOCK_EXCL) {
     567     5021264 :                 xfs_ilock_demote(ip, XFS_IOLOCK_EXCL);
     568     4962614 :                 iolock = XFS_IOLOCK_SHARED;
     569             :         }
     570    17536433 :         trace_xfs_file_direct_write(iocb, from);
     571    17449041 :         ret = iomap_dio_rw(iocb, from, &xfs_direct_write_iomap_ops,
     572             :                            &xfs_dio_write_ops, 0, NULL, 0);
     573    17657725 : out_unlock:
     574    17657725 :         if (iolock)
     575    17655130 :                 xfs_iunlock(ip, iolock);
     576             :         return ret;
     577             : }
     578             : 
     579             : /*
     580             :  * Handle block unaligned direct I/O writes
     581             :  *
     582             :  * In most cases direct I/O writes will be done holding IOLOCK_SHARED, allowing
     583             :  * them to be done in parallel with reads and other direct I/O writes.  However,
     584             :  * if the I/O is not aligned to filesystem blocks, the direct I/O layer may need
     585             :  * to do sub-block zeroing and that requires serialisation against other direct
     586             :  * I/O to the same block.  In this case we need to serialise the submission of
     587             :  * the unaligned I/O so that we don't get racing block zeroing in the dio layer.
     588             :  * In the case where sub-block zeroing is not required, we can do concurrent
     589             :  * sub-block dios to the same block successfully.
     590             :  *
     591             :  * Optimistically submit the I/O using the shared lock first, but use the
     592             :  * IOMAP_DIO_OVERWRITE_ONLY flag to tell the lower layers to return -EAGAIN
     593             :  * if block allocation or partial block zeroing would be required.  In that case
     594             :  * we try again with the exclusive lock.
     595             :  */
     596             : static noinline ssize_t
     597    10458095 : xfs_file_dio_write_unaligned(
     598             :         struct xfs_inode        *ip,
     599             :         struct kiocb            *iocb,
     600             :         struct iov_iter         *from)
     601             : {
     602    10458095 :         size_t                  isize = i_size_read(VFS_I(ip));
     603    10458095 :         size_t                  count = iov_iter_count(from);
     604    10458095 :         unsigned int            iolock = XFS_IOLOCK_SHARED;
     605    10458095 :         unsigned int            flags = IOMAP_DIO_OVERWRITE_ONLY;
     606    10458095 :         ssize_t                 ret;
     607             : 
     608             :         /*
     609             :          * Extending writes need exclusivity because of the sub-block zeroing
     610             :          * that the DIO code always does for partial tail blocks beyond EOF, so
     611             :          * don't even bother trying the fast path in this case.
     612             :          */
     613    10458095 :         if (iocb->ki_pos > isize || iocb->ki_pos + count >= isize) {
     614     6762999 :                 if (iocb->ki_flags & IOCB_NOWAIT)
     615             :                         return -EAGAIN;
     616     6762999 : retry_exclusive:
     617     7886243 :                 iolock = XFS_IOLOCK_EXCL;
     618     7886243 :                 flags = IOMAP_DIO_FORCE_WAIT;
     619             :         }
     620             : 
     621    11581339 :         ret = xfs_ilock_iocb(iocb, iolock);
     622    11581223 :         if (ret)
     623             :                 return ret;
     624             : 
     625             :         /*
     626             :          * We can't properly handle unaligned direct I/O to reflink files yet,
     627             :          * as we can't unshare a partial block.
     628             :          */
     629    11581208 :         if (xfs_is_cow_inode(ip)) {
     630     5985651 :                 trace_xfs_reflink_bounce_dio_write(iocb, from);
     631     5985652 :                 ret = -ENOTBLK;
     632     5985652 :                 goto out_unlock;
     633             :         }
     634             : 
     635     5595548 :         ret = xfs_file_write_checks(iocb, from, &iolock);
     636     5595252 :         if (ret)
     637         124 :                 goto out_unlock;
     638             : 
     639             :         /*
     640             :          * If we are doing exclusive unaligned I/O, this must be the only I/O
     641             :          * in-flight.  Otherwise we risk data corruption due to unwritten extent
     642             :          * conversions from the AIO end_io handler.  Wait for all other I/O to
     643             :          * drain first.
     644             :          */
     645     5595128 :         if (flags & IOMAP_DIO_FORCE_WAIT)
     646     4209946 :                 inode_dio_wait(VFS_I(ip));
     647             : 
     648     5595029 :         trace_xfs_file_direct_write(iocb, from);
     649     5594906 :         ret = iomap_dio_rw(iocb, from, &xfs_direct_write_iomap_ops,
     650             :                            &xfs_dio_write_ops, flags, NULL, 0);
     651             : 
     652             :         /*
     653             :          * Retry unaligned I/O with exclusive blocking semantics if the DIO
     654             :          * layer rejected it for mapping or locking reasons. If we are doing
     655             :          * nonblocking user I/O, propagate the error.
     656             :          */
     657     5595281 :         if (ret == -EAGAIN && !(iocb->ki_flags & IOCB_NOWAIT)) {
     658     1123267 :                 ASSERT(flags & IOMAP_DIO_OVERWRITE_ONLY);
     659     1123267 :                 xfs_iunlock(ip, iolock);
     660     1123244 :                 goto retry_exclusive;
     661             :         }
     662             : 
     663     4472014 : out_unlock:
     664    10457790 :         if (iolock)
     665    10457481 :                 xfs_iunlock(ip, iolock);
     666             :         return ret;
     667             : }
     668             : 
     669             : static ssize_t
     670    28093716 : xfs_file_dio_write(
     671             :         struct kiocb            *iocb,
     672             :         struct iov_iter         *from)
     673             : {
     674    28093716 :         struct xfs_inode        *ip = XFS_I(file_inode(iocb->ki_filp));
     675    28093716 :         struct xfs_buftarg      *target = xfs_inode_buftarg(ip);
     676    28093716 :         size_t                  count = iov_iter_count(from);
     677             : 
     678             :         /* direct I/O must be aligned to device logical sector size */
     679    28093716 :         if ((iocb->ki_pos | count) & target->bt_logical_sectormask)
     680             :                 return -EINVAL;
     681    28093716 :         if ((iocb->ki_pos | count) & ip->i_mount->m_blockmask)
     682    10458245 :                 return xfs_file_dio_write_unaligned(ip, iocb, from);
     683    17635471 :         return xfs_file_dio_write_aligned(ip, iocb, from);
     684             : }
     685             : 
     686             : static noinline ssize_t
     687           0 : xfs_file_dax_write(
     688             :         struct kiocb            *iocb,
     689             :         struct iov_iter         *from)
     690             : {
     691           0 :         struct inode            *inode = iocb->ki_filp->f_mapping->host;
     692           0 :         struct xfs_inode        *ip = XFS_I(inode);
     693           0 :         unsigned int            iolock = XFS_IOLOCK_EXCL;
     694           0 :         ssize_t                 ret, error = 0;
     695           0 :         loff_t                  pos;
     696             : 
     697           0 :         ret = xfs_ilock_iocb(iocb, iolock);
     698           0 :         if (ret)
     699             :                 return ret;
     700           0 :         ret = xfs_file_write_checks(iocb, from, &iolock);
     701           0 :         if (ret)
     702           0 :                 goto out;
     703             : 
     704           0 :         pos = iocb->ki_pos;
     705             : 
     706           0 :         trace_xfs_file_dax_write(iocb, from);
     707           0 :         ret = dax_iomap_rw(iocb, from, &xfs_dax_write_iomap_ops);
     708           0 :         if (ret > 0 && iocb->ki_pos > i_size_read(inode)) {
     709           0 :                 i_size_write(inode, iocb->ki_pos);
     710           0 :                 error = xfs_setfilesize(ip, pos, ret);
     711             :         }
     712           0 : out:
     713           0 :         if (iolock)
     714           0 :                 xfs_iunlock(ip, iolock);
     715           0 :         if (error)
     716             :                 return error;
     717             : 
     718           0 :         if (ret > 0) {
     719           0 :                 XFS_STATS_ADD(ip->i_mount, xs_write_bytes, ret);
     720             : 
     721             :                 /* Handle various SYNC-type writes */
     722           0 :                 ret = generic_write_sync(iocb, ret);
     723             :         }
     724             :         return ret;
     725             : }
     726             : 
     727             : STATIC ssize_t
     728   236289084 : xfs_file_buffered_write(
     729             :         struct kiocb            *iocb,
     730             :         struct iov_iter         *from)
     731             : {
     732   236289084 :         struct inode            *inode = iocb->ki_filp->f_mapping->host;
     733   236289084 :         struct xfs_inode        *ip = XFS_I(inode);
     734   236289084 :         ssize_t                 ret;
     735   236289084 :         bool                    cleared_space = false;
     736   237428582 :         unsigned int            iolock;
     737             : 
     738             : write_retry:
     739   237428582 :         iolock = XFS_IOLOCK_EXCL;
     740   237428582 :         ret = xfs_ilock_iocb(iocb, iolock);
     741   237255780 :         if (ret)
     742           0 :                 return ret;
     743             : 
     744   237255780 :         ret = xfs_file_write_checks(iocb, from, &iolock);
     745   237172079 :         if (ret)
     746       13887 :                 goto out;
     747             : 
     748   237158192 :         trace_xfs_file_buffered_write(iocb, from);
     749   236822191 :         ret = iomap_file_buffered_write(iocb, from,
     750             :                         &xfs_buffered_write_iomap_ops);
     751             : 
     752             :         /*
     753             :          * If we hit a space limit, try to free up some lingering preallocated
     754             :          * space before returning an error. In the case of ENOSPC, first try to
     755             :          * write back all dirty inodes to free up some of the excess reserved
     756             :          * metadata space. This reduces the chances that the eofblocks scan
     757             :          * waits on dirty mappings. Since xfs_flush_inodes() is serialized, this
     758             :          * also behaves as a filter to prevent too many eofblocks scans from
     759             :          * running at the same time.  Use a synchronous scan to increase the
     760             :          * effectiveness of the scan.
     761             :          */
     762   237295558 :         if (ret == -EDQUOT && !cleared_space) {
     763        2826 :                 xfs_iunlock(ip, iolock);
     764        2826 :                 xfs_blockgc_free_quota(ip, XFS_ICWALK_FLAG_SYNC);
     765        2825 :                 cleared_space = true;
     766        2825 :                 goto write_retry;
     767   237292732 :         } else if (ret == -ENOSPC && !cleared_space) {
     768     1136138 :                 struct xfs_icwalk       icw = {0};
     769             : 
     770     1136138 :                 cleared_space = true;
     771     1136138 :                 xfs_flush_inodes(ip->i_mount);
     772             : 
     773     1136355 :                 xfs_iunlock(ip, iolock);
     774     1136073 :                 icw.icw_flags = XFS_ICWALK_FLAG_SYNC;
     775     1136073 :                 xfs_blockgc_free_space(ip->i_mount, &icw);
     776     1136673 :                 goto write_retry;
     777             :         }
     778             : 
     779   236156594 : out:
     780   236170481 :         if (iolock)
     781   236017395 :                 xfs_iunlock(ip, iolock);
     782             : 
     783   236405752 :         if (ret > 0) {
     784   235218549 :                 XFS_STATS_ADD(ip->i_mount, xs_write_bytes, ret);
     785             :                 /* Handle various SYNC-type writes */
     786   235280718 :                 ret = generic_write_sync(iocb, ret);
     787             :         }
     788             :         return ret;
     789             : }
     790             : 
     791             : STATIC ssize_t
     792   258521131 : xfs_file_write_iter(
     793             :         struct kiocb            *iocb,
     794             :         struct iov_iter         *from)
     795             : {
     796   258521131 :         struct inode            *inode = iocb->ki_filp->f_mapping->host;
     797   258521131 :         struct xfs_inode        *ip = XFS_I(inode);
     798   258521131 :         ssize_t                 ret;
     799   258521131 :         size_t                  ocount = iov_iter_count(from);
     800             : 
     801   258521131 :         XFS_STATS_INC(ip->i_mount, xs_write_calls);
     802             : 
     803   258550739 :         if (ocount == 0)
     804             :                 return 0;
     805             : 
     806   517101036 :         if (xfs_is_shutdown(ip->i_mount))
     807             :                 return -EIO;
     808             : 
     809   258537443 :         if (IS_DAX(inode))
     810           0 :                 return xfs_file_dax_write(iocb, from);
     811             : 
     812   258537443 :         if (iocb->ki_flags & IOCB_DIRECT) {
     813             :                 /*
     814             :                  * Allow a directio write to fall back to a buffered
     815             :                  * write *only* in the case that we're doing a reflink
     816             :                  * CoW.  In all other directio scenarios we do not
     817             :                  * allow an operation to fall back to buffered mode.
     818             :                  */
     819    28095888 :                 ret = xfs_file_dio_write(iocb, from);
     820    28037223 :                 if (ret != -ENOTBLK)
     821             :                         return ret;
     822             :         }
     823             : 
     824   236427575 :         return xfs_file_buffered_write(iocb, from);
     825             : }
     826             : 
     827             : static void
     828           0 : xfs_wait_dax_page(
     829             :         struct inode            *inode)
     830             : {
     831           0 :         struct xfs_inode        *ip = XFS_I(inode);
     832             : 
     833           0 :         xfs_iunlock(ip, XFS_MMAPLOCK_EXCL);
     834           0 :         schedule();
     835           0 :         xfs_ilock(ip, XFS_MMAPLOCK_EXCL);
     836           0 : }
     837             : 
     838             : int
     839    61393942 : xfs_break_dax_layouts(
     840             :         struct inode            *inode,
     841             :         bool                    *retry)
     842             : {
     843    61393942 :         struct page             *page;
     844             : 
     845    61393942 :         ASSERT(xfs_isilocked(XFS_I(inode), XFS_MMAPLOCK_EXCL));
     846             : 
     847    61393433 :         page = dax_layout_busy_page(inode->i_mapping);
     848    61392528 :         if (!page)
     849             :                 return 0;
     850             : 
     851           0 :         *retry = true;
     852           0 :         return ___wait_var_event(&page->_refcount,
     853             :                         atomic_read(&page->_refcount) == 1, TASK_INTERRUPTIBLE,
     854             :                         0, 0, xfs_wait_dax_page(inode));
     855             : }
     856             : 
     857             : int
     858   389083378 : xfs_break_layouts(
     859             :         struct inode            *inode,
     860             :         uint                    *iolock,
     861             :         enum layout_break_reason reason)
     862             : {
     863   389083378 :         bool                    retry;
     864   389083378 :         int                     error;
     865             : 
     866   389083378 :         ASSERT(xfs_isilocked(XFS_I(inode), XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL));
     867             : 
     868   388943912 :         do {
     869   388943912 :                 retry = false;
     870   388943912 :                 switch (reason) {
     871    61393903 :                 case BREAK_UNMAP:
     872    61393903 :                         error = xfs_break_dax_layouts(inode, &retry);
     873    61392687 :                         if (error || retry)
     874             :                                 break;
     875   388942752 :                         fallthrough;
     876             :                 case BREAK_WRITE:
     877   388942752 :                         error = xfs_break_leased_layouts(inode, iolock, &retry);
     878   388942752 :                         break;
     879             :                 default:
     880           0 :                         WARN_ON_ONCE(1);
     881           0 :                         error = -EINVAL;
     882             :                 }
     883   389873778 :         } while (error == 0 && retry);
     884             : 
     885   389861460 :         return error;
     886             : }
     887             : 
     888             : /* Does this file, inode, or mount want synchronous writes? */
     889   222656640 : static inline bool xfs_file_sync_writes(struct file *filp)
     890             : {
     891   222656640 :         struct xfs_inode        *ip = XFS_I(file_inode(filp));
     892             : 
     893   222656640 :         if (xfs_has_wsync(ip->i_mount))
     894             :                 return true;
     895   222656616 :         if (filp->f_flags & (__O_SYNC | O_DSYNC))
     896             :                 return true;
     897   222626715 :         if (IS_SYNC(file_inode(filp)))
     898          13 :                 return true;
     899             : 
     900             :         return false;
     901             : }
     902             : 
     903             : #define XFS_FALLOC_FL_SUPPORTED                                         \
     904             :                 (FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE |           \
     905             :                  FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE |      \
     906             :                  FALLOC_FL_INSERT_RANGE | FALLOC_FL_UNSHARE_RANGE)
     907             : 
     908             : STATIC long
     909    54201349 : xfs_file_fallocate(
     910             :         struct file             *file,
     911             :         int                     mode,
     912             :         loff_t                  offset,
     913             :         loff_t                  len)
     914             : {
     915    54201349 :         struct inode            *inode = file_inode(file);
     916    54201349 :         struct xfs_inode        *ip = XFS_I(inode);
     917    54201349 :         long                    error;
     918    54201349 :         uint                    iolock = XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL;
     919    54201349 :         loff_t                  new_size = 0;
     920    54201349 :         bool                    do_file_insert = false;
     921             : 
     922    54201349 :         if (!S_ISREG(inode->i_mode))
     923             :                 return -EINVAL;
     924    54201349 :         if (mode & ~XFS_FALLOC_FL_SUPPORTED)
     925             :                 return -EOPNOTSUPP;
     926             : 
     927    54201349 :         xfs_ilock(ip, iolock);
     928    54201731 :         error = xfs_break_layouts(inode, &iolock, BREAK_UNMAP);
     929    54201831 :         if (error)
     930           0 :                 goto out_unlock;
     931             : 
     932             :         /*
     933             :          * Must wait for all AIO to complete before we continue as AIO can
     934             :          * change the file size on completion without holding any locks we
     935             :          * currently hold. We must do this first because AIO can update both
     936             :          * the on disk and in memory inode sizes, and the operations that follow
     937             :          * require the in-memory size to be fully up-to-date.
     938             :          */
     939    54201831 :         inode_dio_wait(inode);
     940             : 
     941             :         /*
     942             :          * Now AIO and DIO has drained we flush and (if necessary) invalidate
     943             :          * the cached range over the first operation we are about to run.
     944             :          *
     945             :          * We care about zero and collapse here because they both run a hole
     946             :          * punch over the range first. Because that can zero data, and the range
     947             :          * of invalidation for the shift operations is much larger, we still do
     948             :          * the required flush for collapse in xfs_prepare_shift().
     949             :          *
     950             :          * Insert has the same range requirements as collapse, and we extend the
     951             :          * file first which can zero data. Hence insert has the same
     952             :          * flush/invalidate requirements as collapse and so they are both
     953             :          * handled at the right time by xfs_prepare_shift().
     954             :          */
     955    54201063 :         if (mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_ZERO_RANGE |
     956             :                     FALLOC_FL_COLLAPSE_RANGE)) {
     957    43544651 :                 error = xfs_flush_unmap_range(ip, offset, len);
     958    43544523 :                 if (error)
     959         340 :                         goto out_unlock;
     960             :         }
     961             : 
     962    54200595 :         error = file_modified(file);
     963    54201535 :         if (error)
     964          12 :                 goto out_unlock;
     965             : 
     966    54201523 :         if (mode & FALLOC_FL_PUNCH_HOLE) {
     967    37245654 :                 error = xfs_free_file_space(ip, offset, len);
     968    37245808 :                 if (error)
     969       48691 :                         goto out_unlock;
     970    16955869 :         } else if (mode & FALLOC_FL_COLLAPSE_RANGE) {
     971     2796012 :                 if (!xfs_is_falloc_aligned(ip, offset, len)) {
     972      326171 :                         error = -EINVAL;
     973      326171 :                         goto out_unlock;
     974             :                 }
     975             : 
     976             :                 /*
     977             :                  * There is no need to overlap collapse range with EOF,
     978             :                  * in which case it is effectively a truncate operation
     979             :                  */
     980     2469839 :                 if (offset + len >= i_size_read(inode)) {
     981      178795 :                         error = -EINVAL;
     982      178795 :                         goto out_unlock;
     983             :                 }
     984             : 
     985     2291044 :                 new_size = i_size_read(inode) - len;
     986             : 
     987     2291044 :                 error = xfs_collapse_file_space(ip, offset, len);
     988     2291051 :                 if (error)
     989        2058 :                         goto out_unlock;
     990    14159857 :         } else if (mode & FALLOC_FL_INSERT_RANGE) {
     991     2143097 :                 loff_t          isize = i_size_read(inode);
     992             : 
     993     2143097 :                 if (!xfs_is_falloc_aligned(ip, offset, len)) {
     994      310490 :                         error = -EINVAL;
     995      310490 :                         goto out_unlock;
     996             :                 }
     997             : 
     998             :                 /*
     999             :                  * New inode size must not exceed ->s_maxbytes, accounting for
    1000             :                  * possible signed overflow.
    1001             :                  */
    1002     1832605 :                 if (inode->i_sb->s_maxbytes - isize < len) {
    1003          10 :                         error = -EFBIG;
    1004          10 :                         goto out_unlock;
    1005             :                 }
    1006     1832595 :                 new_size = isize + len;
    1007             : 
    1008             :                 /* Offset should be less than i_size */
    1009     1832595 :                 if (offset >= isize) {
    1010      140032 :                         error = -EINVAL;
    1011      140032 :                         goto out_unlock;
    1012             :                 }
    1013             :                 do_file_insert = true;
    1014             :         } else {
    1015    12016760 :                 if (!(mode & FALLOC_FL_KEEP_SIZE) &&
    1016     6871564 :                     offset + len > i_size_read(inode)) {
    1017     4719109 :                         new_size = offset + len;
    1018     4719109 :                         error = inode_newsize_ok(inode, new_size);
    1019     4719111 :                         if (error)
    1020          10 :                                 goto out_unlock;
    1021             :                 }
    1022             : 
    1023    12016752 :                 if (mode & FALLOC_FL_ZERO_RANGE) {
    1024             :                         /*
    1025             :                          * Punch a hole and prealloc the range.  We use a hole
    1026             :                          * punch rather than unwritten extent conversion for two
    1027             :                          * reasons:
    1028             :                          *
    1029             :                          *   1.) Hole punch handles partial block zeroing for us.
    1030             :                          *   2.) If prealloc returns ENOSPC, the file range is
    1031             :                          *       still zero-valued by virtue of the hole punch.
    1032             :                          */
    1033     3502925 :                         unsigned int blksize = i_blocksize(inode);
    1034             : 
    1035     3502926 :                         trace_xfs_zero_file_space(ip);
    1036             : 
    1037     3502912 :                         error = xfs_free_file_space(ip, offset, len);
    1038     3502924 :                         if (error)
    1039       14945 :                                 goto out_unlock;
    1040             : 
    1041     3487979 :                         len = round_up(offset + len, blksize) -
    1042     3487979 :                               round_down(offset, blksize);
    1043     3487979 :                         offset = round_down(offset, blksize);
    1044     8513827 :                 } else if (mode & FALLOC_FL_UNSHARE_RANGE) {
    1045         205 :                         error = xfs_reflink_unshare(ip, offset, len);
    1046         205 :                         if (error)
    1047           6 :                                 goto out_unlock;
    1048             :                 } else {
    1049             :                         /*
    1050             :                          * If always_cow mode we can't use preallocations and
    1051             :                          * thus should not create them.
    1052             :                          */
    1053     8513622 :                         if (xfs_is_always_cow_inode(ip)) {
    1054      407637 :                                 error = -EOPNOTSUPP;
    1055      407637 :                                 goto out_unlock;
    1056             :                         }
    1057             :                 }
    1058             : 
    1059    11594143 :                 if (!xfs_is_always_cow_inode(ip)) {
    1060    11357263 :                         error = xfs_alloc_file_space(ip, offset, len);
    1061    11357381 :                         if (error)
    1062      291989 :                                 goto out_unlock;
    1063             :                 }
    1064             :         }
    1065             : 
    1066             :         /* Change file size if needed */
    1067    52480917 :         if (new_size) {
    1068     8365441 :                 struct iattr iattr;
    1069             : 
    1070     8365441 :                 iattr.ia_valid = ATTR_SIZE;
    1071     8365441 :                 iattr.ia_size = new_size;
    1072    16730882 :                 error = xfs_vn_setattr_size(file_mnt_idmap(file),
    1073             :                                             file_dentry(file), &iattr);
    1074     8365428 :                 if (error)
    1075        1231 :                         goto out_unlock;
    1076             :         }
    1077             : 
    1078             :         /*
    1079             :          * Perform hole insertion now that the file size has been
    1080             :          * updated so that if we crash during the operation we don't
    1081             :          * leave shifted extents past EOF and hence losing access to
    1082             :          * the data that is contained within them.
    1083             :          */
    1084    52479673 :         if (do_file_insert) {
    1085     1692133 :                 error = xfs_insert_file_space(ip, offset, len);
    1086     1692137 :                 if (error)
    1087        2367 :                         goto out_unlock;
    1088             :         }
    1089             : 
    1090    52477310 :         if (xfs_file_sync_writes(file))
    1091       29890 :                 error = xfs_log_force_inode(ip);
    1092             : 
    1093    52447420 : out_unlock:
    1094    54202094 :         xfs_iunlock(ip, iolock);
    1095    54202094 :         return error;
    1096             : }
    1097             : 
    1098             : STATIC int
    1099    11728153 : xfs_file_fadvise(
    1100             :         struct file     *file,
    1101             :         loff_t          start,
    1102             :         loff_t          end,
    1103             :         int             advice)
    1104             : {
    1105    11728153 :         struct xfs_inode *ip = XFS_I(file_inode(file));
    1106    11728153 :         int ret;
    1107    11728153 :         int lockflags = 0;
    1108             : 
    1109             :         /*
    1110             :          * Operations creating pages in page cache need protection from hole
    1111             :          * punching and similar ops
    1112             :          */
    1113    11728153 :         if (advice == POSIX_FADV_WILLNEED) {
    1114           0 :                 lockflags = XFS_IOLOCK_SHARED;
    1115           0 :                 xfs_ilock(ip, lockflags);
    1116             :         }
    1117    11728153 :         ret = generic_fadvise(file, start, end, advice);
    1118    11708156 :         if (lockflags)
    1119           0 :                 xfs_iunlock(ip, lockflags);
    1120    11708156 :         return ret;
    1121             : }
    1122             : 
    1123             : STATIC loff_t
    1124   228198733 : xfs_file_remap_range(
    1125             :         struct file             *file_in,
    1126             :         loff_t                  pos_in,
    1127             :         struct file             *file_out,
    1128             :         loff_t                  pos_out,
    1129             :         loff_t                  len,
    1130             :         unsigned int            remap_flags)
    1131             : {
    1132   228198733 :         struct inode            *inode_in = file_inode(file_in);
    1133   228198733 :         struct xfs_inode        *src = XFS_I(inode_in);
    1134   228198733 :         struct inode            *inode_out = file_inode(file_out);
    1135   228198733 :         struct xfs_inode        *dest = XFS_I(inode_out);
    1136   228198733 :         struct xfs_mount        *mp = src->i_mount;
    1137   228198733 :         loff_t                  remapped = 0;
    1138   228198733 :         xfs_extlen_t            cowextsize;
    1139   228198733 :         int                     ret;
    1140             : 
    1141   228198733 :         if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY))
    1142             :                 return -EINVAL;
    1143             : 
    1144   228198733 :         if (!xfs_has_reflink(mp))
    1145             :                 return -EOPNOTSUPP;
    1146             : 
    1147   280292276 :         if (xfs_is_shutdown(mp))
    1148             :                 return -EIO;
    1149             : 
    1150             :         /* Prepare and then clone file data. */
    1151   140137567 :         ret = xfs_reflink_remap_prep(file_in, pos_in, file_out, pos_out,
    1152             :                         &len, remap_flags);
    1153   140147126 :         if (ret || len == 0)
    1154    54082124 :                 return ret;
    1155             : 
    1156    86065002 :         trace_xfs_reflink_remap_range(src, pos_in, len, dest, pos_out);
    1157             : 
    1158    86062817 :         ret = xfs_reflink_remap_blocks(src, pos_in, dest, pos_out, len,
    1159             :                         &remapped);
    1160    86053976 :         if (ret)
    1161      945383 :                 goto out_unlock;
    1162             : 
    1163             :         /*
    1164             :          * Carry the cowextsize hint from src to dest if we're sharing the
    1165             :          * entire source file to the entire destination file, the source file
    1166             :          * has a cowextsize hint, and the destination file does not.
    1167             :          */
    1168    85108593 :         cowextsize = 0;
    1169    85108593 :         if (pos_in == 0 && len == i_size_read(inode_in) &&
    1170      116460 :             (src->i_diflags2 & XFS_DIFLAG2_COWEXTSIZE) &&
    1171         140 :             pos_out == 0 && len >= i_size_read(inode_out) &&
    1172         134 :             !(dest->i_diflags2 & XFS_DIFLAG2_COWEXTSIZE))
    1173          18 :                 cowextsize = src->i_cowextsize;
    1174             : 
    1175    85108593 :         ret = xfs_reflink_update_dest(dest, pos_out + len, cowextsize,
    1176             :                         remap_flags);
    1177    85091269 :         if (ret)
    1178           1 :                 goto out_unlock;
    1179             : 
    1180    85091268 :         if (xfs_file_sync_writes(file_in) || xfs_file_sync_writes(file_out))
    1181         725 :                 xfs_log_force_inode(dest);
    1182    85090706 : out_unlock:
    1183    86036138 :         xfs_iunlock2_io_mmap(src, dest);
    1184    86057827 :         if (ret)
    1185      945362 :                 trace_xfs_reflink_remap_range_error(dest, ret, _RET_IP_);
    1186    86057675 :         return remapped > 0 ? remapped : ret;
    1187             : }
    1188             : 
    1189             : STATIC int
    1190   597213522 : xfs_file_open(
    1191             :         struct inode    *inode,
    1192             :         struct file     *file)
    1193             : {
    1194  1194427044 :         if (xfs_is_shutdown(XFS_M(inode->i_sb)))
    1195             :                 return -EIO;
    1196   597199407 :         file->f_mode |= FMODE_NOWAIT | FMODE_BUF_RASYNC | FMODE_BUF_WASYNC |
    1197             :                         FMODE_DIO_PARALLEL_WRITE | FMODE_CAN_ODIRECT;
    1198   597199407 :         return generic_file_open(inode, file);
    1199             : }
    1200             : 
    1201             : STATIC int
    1202    86962988 : xfs_dir_open(
    1203             :         struct inode    *inode,
    1204             :         struct file     *file)
    1205             : {
    1206    86962988 :         struct xfs_inode *ip = XFS_I(inode);
    1207    86962988 :         unsigned int    mode;
    1208    86962988 :         int             error;
    1209             : 
    1210    86962988 :         error = xfs_file_open(inode, file);
    1211    86483712 :         if (error)
    1212             :                 return error;
    1213             : 
    1214             :         /*
    1215             :          * If there are any blocks, read-ahead block 0 as we're almost
    1216             :          * certain to have the next operation be a read there.
    1217             :          */
    1218    86510973 :         mode = xfs_ilock_data_map_shared(ip);
    1219    86154952 :         if (ip->i_df.if_nextents > 0)
    1220     7569450 :                 error = xfs_dir3_data_readahead(ip, 0, 0);
    1221    86155579 :         xfs_iunlock(ip, mode);
    1222    86155579 :         return error;
    1223             : }
    1224             : 
    1225             : STATIC int
    1226   510043990 : xfs_file_release(
    1227             :         struct inode    *inode,
    1228             :         struct file     *filp)
    1229             : {
    1230   510043990 :         return xfs_release(XFS_I(inode));
    1231             : }
    1232             : 
    1233             : STATIC int
    1234   167399125 : xfs_file_readdir(
    1235             :         struct file     *file,
    1236             :         struct dir_context *ctx)
    1237             : {
    1238   167399125 :         struct inode    *inode = file_inode(file);
    1239   167399125 :         xfs_inode_t     *ip = XFS_I(inode);
    1240   167399125 :         size_t          bufsize;
    1241             : 
    1242             :         /*
    1243             :          * The Linux API doesn't pass down the total size of the buffer
    1244             :          * we read into down to the filesystem.  With the filldir concept
    1245             :          * it's not needed for correct information, but the XFS dir2 leaf
    1246             :          * code wants an estimate of the buffer size to calculate it's
    1247             :          * readahead window and size the buffers used for mapping to
    1248             :          * physical blocks.
    1249             :          *
    1250             :          * Try to give it an estimate that's good enough, maybe at some
    1251             :          * point we can change the ->readdir prototype to include the
    1252             :          * buffer size.  For now we use the current glibc buffer size.
    1253             :          */
    1254   167399125 :         bufsize = (size_t)min_t(loff_t, XFS_READDIR_BUFSIZE, ip->i_disk_size);
    1255             : 
    1256   167399125 :         return xfs_readdir(NULL, ip, ctx, bufsize);
    1257             : }
    1258             : 
    1259             : STATIC loff_t
    1260    83141950 : xfs_file_llseek(
    1261             :         struct file     *file,
    1262             :         loff_t          offset,
    1263             :         int             whence)
    1264             : {
    1265    83141950 :         struct inode            *inode = file->f_mapping->host;
    1266             : 
    1267   166283900 :         if (xfs_is_shutdown(XFS_I(inode)->i_mount))
    1268             :                 return -EIO;
    1269             : 
    1270    83141942 :         switch (whence) {
    1271    82944505 :         default:
    1272    82944505 :                 return generic_file_llseek(file, offset, whence);
    1273        1482 :         case SEEK_HOLE:
    1274        1482 :                 offset = iomap_seek_hole(inode, offset, &xfs_seek_iomap_ops);
    1275        1482 :                 break;
    1276      195955 :         case SEEK_DATA:
    1277      195955 :                 offset = iomap_seek_data(inode, offset, &xfs_seek_iomap_ops);
    1278      195955 :                 break;
    1279             :         }
    1280             : 
    1281      197437 :         if (offset < 0)
    1282             :                 return offset;
    1283      154312 :         return vfs_setpos(file, offset, inode->i_sb->s_maxbytes);
    1284             : }
    1285             : 
    1286             : #ifdef CONFIG_FS_DAX
    1287             : static inline vm_fault_t
    1288           0 : xfs_dax_fault(
    1289             :         struct vm_fault         *vmf,
    1290             :         enum page_entry_size    pe_size,
    1291             :         bool                    write_fault,
    1292             :         pfn_t                   *pfn)
    1293             : {
    1294           0 :         return dax_iomap_fault(vmf, pe_size, pfn, NULL,
    1295           0 :                         (write_fault && !vmf->cow_page) ?
    1296             :                                 &xfs_dax_write_iomap_ops :
    1297             :                                 &xfs_read_iomap_ops);
    1298             : }
    1299             : #else
    1300             : static inline vm_fault_t
    1301             : xfs_dax_fault(
    1302             :         struct vm_fault         *vmf,
    1303             :         enum page_entry_size    pe_size,
    1304             :         bool                    write_fault,
    1305             :         pfn_t                   *pfn)
    1306             : {
    1307             :         ASSERT(0);
    1308             :         return VM_FAULT_SIGBUS;
    1309             : }
    1310             : #endif
    1311             : 
    1312             : /*
    1313             :  * Locking for serialisation of IO during page faults. This results in a lock
    1314             :  * ordering of:
    1315             :  *
    1316             :  * mmap_lock (MM)
    1317             :  *   sb_start_pagefault(vfs, freeze)
    1318             :  *     invalidate_lock (vfs/XFS_MMAPLOCK - truncate serialisation)
    1319             :  *       page_lock (MM)
    1320             :  *         i_lock (XFS - extent map serialisation)
    1321             :  */
    1322             : static vm_fault_t
    1323   181896922 : __xfs_filemap_fault(
    1324             :         struct vm_fault         *vmf,
    1325             :         enum page_entry_size    pe_size,
    1326             :         bool                    write_fault)
    1327             : {
    1328   181896922 :         struct inode            *inode = file_inode(vmf->vma->vm_file);
    1329   181896922 :         struct xfs_inode        *ip = XFS_I(inode);
    1330   181896922 :         vm_fault_t              ret;
    1331             : 
    1332   181896922 :         trace_xfs_filemap_fault(ip, pe_size, write_fault);
    1333             : 
    1334   181890837 :         if (write_fault) {
    1335    78443786 :                 sb_start_pagefault(inode->i_sb);
    1336    78349763 :                 file_update_time(vmf->vma->vm_file);
    1337             :         }
    1338             : 
    1339   181933539 :         if (IS_DAX(inode)) {
    1340           0 :                 pfn_t pfn;
    1341             : 
    1342           0 :                 xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
    1343           0 :                 ret = xfs_dax_fault(vmf, pe_size, write_fault, &pfn);
    1344           0 :                 if (ret & VM_FAULT_NEEDDSYNC)
    1345           0 :                         ret = dax_finish_sync_fault(vmf, pe_size, pfn);
    1346           0 :                 xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
    1347             :         } else {
    1348   181933539 :                 if (write_fault) {
    1349    78468031 :                         xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
    1350    78367835 :                         ret = iomap_page_mkwrite(vmf,
    1351             :                                         &xfs_page_mkwrite_iomap_ops);
    1352    78396468 :                         xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
    1353             :                 } else {
    1354   103465508 :                         ret = filemap_fault(vmf);
    1355             :                 }
    1356             :         }
    1357             : 
    1358   181915311 :         if (write_fault)
    1359    78509544 :                 sb_end_pagefault(inode->i_sb);
    1360   181864266 :         return ret;
    1361             : }
    1362             : 
    1363             : static inline bool
    1364             : xfs_is_write_fault(
    1365             :         struct vm_fault         *vmf)
    1366             : {
    1367           0 :         return (vmf->flags & FAULT_FLAG_WRITE) &&
    1368           0 :                (vmf->vma->vm_flags & VM_SHARED);
    1369             : }
    1370             : 
    1371             : static vm_fault_t
    1372   103522390 : xfs_filemap_fault(
    1373             :         struct vm_fault         *vmf)
    1374             : {
    1375             :         /* DAX can shortcut the normal fault path on write faults! */
    1376   103522390 :         return __xfs_filemap_fault(vmf, PE_SIZE_PTE,
    1377   103522390 :                         IS_DAX(file_inode(vmf->vma->vm_file)) &&
    1378             :                         xfs_is_write_fault(vmf));
    1379             : }
    1380             : 
    1381             : static vm_fault_t
    1382       16423 : xfs_filemap_huge_fault(
    1383             :         struct vm_fault         *vmf,
    1384             :         enum page_entry_size    pe_size)
    1385             : {
    1386       16423 :         if (!IS_DAX(file_inode(vmf->vma->vm_file)))
    1387             :                 return VM_FAULT_FALLBACK;
    1388             : 
    1389             :         /* DAX can shortcut the normal fault path on write faults! */
    1390           0 :         return __xfs_filemap_fault(vmf, pe_size,
    1391             :                         xfs_is_write_fault(vmf));
    1392             : }
    1393             : 
    1394             : static vm_fault_t
    1395    78494868 : xfs_filemap_page_mkwrite(
    1396             :         struct vm_fault         *vmf)
    1397             : {
    1398    78494868 :         return __xfs_filemap_fault(vmf, PE_SIZE_PTE, true);
    1399             : }
    1400             : 
    1401             : /*
    1402             :  * pfn_mkwrite was originally intended to ensure we capture time stamp updates
    1403             :  * on write faults. In reality, it needs to serialise against truncate and
    1404             :  * prepare memory for writing so handle is as standard write fault.
    1405             :  */
    1406             : static vm_fault_t
    1407           0 : xfs_filemap_pfn_mkwrite(
    1408             :         struct vm_fault         *vmf)
    1409             : {
    1410             : 
    1411           0 :         return __xfs_filemap_fault(vmf, PE_SIZE_PTE, true);
    1412             : }
    1413             : 
    1414             : static const struct vm_operations_struct xfs_file_vm_ops = {
    1415             :         .fault          = xfs_filemap_fault,
    1416             :         .huge_fault     = xfs_filemap_huge_fault,
    1417             :         .map_pages      = filemap_map_pages,
    1418             :         .page_mkwrite   = xfs_filemap_page_mkwrite,
    1419             :         .pfn_mkwrite    = xfs_filemap_pfn_mkwrite,
    1420             : };
    1421             : 
    1422             : STATIC int
    1423    11760057 : xfs_file_mmap(
    1424             :         struct file             *file,
    1425             :         struct vm_area_struct   *vma)
    1426             : {
    1427    11760057 :         struct inode            *inode = file_inode(file);
    1428    11760057 :         struct xfs_buftarg      *target = xfs_inode_buftarg(XFS_I(inode));
    1429             : 
    1430             :         /*
    1431             :          * We don't support synchronous mappings for non-DAX files and
    1432             :          * for DAX files if underneath dax_device is not synchronous.
    1433             :          */
    1434    11760057 :         if (!daxdev_mapping_supported(vma, target->bt_daxdev))
    1435             :                 return -EOPNOTSUPP;
    1436             : 
    1437    11759956 :         file_accessed(file);
    1438    11760077 :         vma->vm_ops = &xfs_file_vm_ops;
    1439    11760077 :         if (IS_DAX(inode))
    1440           0 :                 vm_flags_set(vma, VM_HUGEPAGE);
    1441             :         return 0;
    1442             : }
    1443             : 
    1444             : const struct file_operations xfs_file_operations = {
    1445             :         .llseek         = xfs_file_llseek,
    1446             :         .read_iter      = xfs_file_read_iter,
    1447             :         .write_iter     = xfs_file_write_iter,
    1448             :         .splice_read    = xfs_file_splice_read,
    1449             :         .splice_write   = iter_file_splice_write,
    1450             :         .iopoll         = iocb_bio_iopoll,
    1451             :         .unlocked_ioctl = xfs_file_ioctl,
    1452             : #ifdef CONFIG_COMPAT
    1453             :         .compat_ioctl   = xfs_file_compat_ioctl,
    1454             : #endif
    1455             :         .mmap           = xfs_file_mmap,
    1456             :         .mmap_supported_flags = MAP_SYNC,
    1457             :         .open           = xfs_file_open,
    1458             :         .release        = xfs_file_release,
    1459             :         .fsync          = xfs_file_fsync,
    1460             :         .get_unmapped_area = thp_get_unmapped_area,
    1461             :         .fallocate      = xfs_file_fallocate,
    1462             :         .fadvise        = xfs_file_fadvise,
    1463             :         .remap_file_range = xfs_file_remap_range,
    1464             : };
    1465             : 
    1466             : const struct file_operations xfs_dir_file_operations = {
    1467             :         .open           = xfs_dir_open,
    1468             :         .read           = generic_read_dir,
    1469             :         .iterate_shared = xfs_file_readdir,
    1470             :         .llseek         = generic_file_llseek,
    1471             :         .unlocked_ioctl = xfs_file_ioctl,
    1472             : #ifdef CONFIG_COMPAT
    1473             :         .compat_ioctl   = xfs_file_compat_ioctl,
    1474             : #endif
    1475             :         .fsync          = xfs_dir_fsync,
    1476             : };

Generated by: LCOV version 1.14