LCOV - code coverage report
Current view: top level - fs/xfs - xfs_file.c (source / functions) Hit Total Coverage
Test: fstests of 6.5.0-rc4-xfsa @ Mon Jul 31 20:08:27 PDT 2023 Lines: 506 596 84.9 %
Date: 2023-07-31 20:08:27 Functions: 33 37 89.2 %

          Line data    Source code
       1             : // SPDX-License-Identifier: GPL-2.0
       2             : /*
       3             :  * Copyright (c) 2000-2005 Silicon Graphics, Inc.
       4             :  * All Rights Reserved.
       5             :  */
       6             : #include "xfs.h"
       7             : #include "xfs_fs.h"
       8             : #include "xfs_shared.h"
       9             : #include "xfs_format.h"
      10             : #include "xfs_log_format.h"
      11             : #include "xfs_trans_resv.h"
      12             : #include "xfs_mount.h"
      13             : #include "xfs_inode.h"
      14             : #include "xfs_trans.h"
      15             : #include "xfs_inode_item.h"
      16             : #include "xfs_bmap.h"
      17             : #include "xfs_bmap_util.h"
      18             : #include "xfs_dir2.h"
      19             : #include "xfs_dir2_priv.h"
      20             : #include "xfs_ioctl.h"
      21             : #include "xfs_trace.h"
      22             : #include "xfs_log.h"
      23             : #include "xfs_icache.h"
      24             : #include "xfs_pnfs.h"
      25             : #include "xfs_iomap.h"
      26             : #include "xfs_reflink.h"
      27             : #include "xfs_file.h"
      28             : 
      29             : #include <linux/dax.h>
      30             : #include <linux/falloc.h>
      31             : #include <linux/backing-dev.h>
      32             : #include <linux/mman.h>
      33             : #include <linux/fadvise.h>
      34             : #include <linux/mount.h>
      35             : #include <linux/buffer_head.h> /* for block_page_mkwrite_return */
      36             : 
      37             : static const struct vm_operations_struct xfs_file_vm_ops;
      38             : 
      39             : /*
      40             :  * Decide if the given file range is aligned to the size of the fundamental
      41             :  * allocation unit for the file.
      42             :  */
      43             : bool
      44     1813513 : xfs_is_falloc_aligned(
      45             :         struct xfs_inode        *ip,
      46             :         loff_t                  pos,
      47             :         long long int           len)
      48             : {
      49     1813513 :         unsigned int            alloc_unit = xfs_inode_alloc_unitsize(ip);
      50             : 
      51     2432538 :         if (XFS_IS_REALTIME_INODE(ip) && !is_power_of_2(alloc_unit))
      52           8 :                 return isaligned_64(pos, alloc_unit) &&
      53           4 :                        isaligned_64(len, alloc_unit);
      54             : 
      55     1813507 :         return !((pos | len) & (alloc_unit - 1));
      56             : }
      57             : 
      58             : /*
      59             :  * Fsync operations on directories are much simpler than on regular files,
      60             :  * as there is no file data to flush, and thus also no need for explicit
      61             :  * cache flush operations, and there are no non-transaction metadata updates
      62             :  * on directories either.
      63             :  */
      64             : STATIC int
      65      615427 : xfs_dir_fsync(
      66             :         struct file             *file,
      67             :         loff_t                  start,
      68             :         loff_t                  end,
      69             :         int                     datasync)
      70             : {
      71      615427 :         struct xfs_inode        *ip = XFS_I(file->f_mapping->host);
      72             : 
      73      615427 :         trace_xfs_dir_fsync(ip);
      74      615426 :         return xfs_log_force_inode(ip);
      75             : }
      76             : 
      77             : static xfs_csn_t
      78     2538341 : xfs_fsync_seq(
      79             :         struct xfs_inode        *ip,
      80             :         bool                    datasync)
      81             : {
      82     2538341 :         if (!xfs_ipincount(ip))
      83             :                 return 0;
      84     2538241 :         if (datasync && !(ip->i_itemp->ili_fsync_fields & ~XFS_ILOG_TIMESTAMP))
      85             :                 return 0;
      86     2470226 :         return ip->i_itemp->ili_commit_seq;
      87             : }
      88             : 
      89             : /*
      90             :  * All metadata updates are logged, which means that we just have to flush the
      91             :  * log up to the latest LSN that touched the inode.
      92             :  *
      93             :  * If we have concurrent fsync/fdatasync() calls, we need them to all block on
      94             :  * the log force before we clear the ili_fsync_fields field. This ensures that
      95             :  * we don't get a racing sync operation that does not wait for the metadata to
      96             :  * hit the journal before returning.  If we race with clearing ili_fsync_fields,
      97             :  * then all that will happen is the log force will do nothing as the lsn will
      98             :  * already be on disk.  We can't race with setting ili_fsync_fields because that
      99             :  * is done under XFS_ILOCK_EXCL, and that can't happen because we hold the lock
     100             :  * shared until after the ili_fsync_fields is cleared.
     101             :  */
     102             : static  int
     103     2538371 : xfs_fsync_flush_log(
     104             :         struct xfs_inode        *ip,
     105             :         bool                    datasync,
     106             :         int                     *log_flushed)
     107             : {
     108     2538371 :         int                     error = 0;
     109     2538371 :         xfs_csn_t               seq;
     110             : 
     111     2538371 :         xfs_ilock(ip, XFS_ILOCK_SHARED);
     112     2538366 :         seq = xfs_fsync_seq(ip, datasync);
     113     2538362 :         if (seq) {
     114     2470246 :                 error = xfs_log_force_seq(ip->i_mount, seq, XFS_LOG_SYNC,
     115             :                                           log_flushed);
     116             : 
     117     2469049 :                 spin_lock(&ip->i_itemp->ili_lock);
     118     2470208 :                 ip->i_itemp->ili_fsync_fields = 0;
     119     2470208 :                 spin_unlock(&ip->i_itemp->ili_lock);
     120             :         }
     121     2538214 :         xfs_iunlock(ip, XFS_ILOCK_SHARED);
     122     2538172 :         return error;
     123             : }
     124             : 
     125             : STATIC int
     126    32138353 : xfs_file_fsync(
     127             :         struct file             *file,
     128             :         loff_t                  start,
     129             :         loff_t                  end,
     130             :         int                     datasync)
     131             : {
     132    32138353 :         struct xfs_inode        *ip = XFS_I(file->f_mapping->host);
     133    32138353 :         struct xfs_mount        *mp = ip->i_mount;
     134    32138353 :         int                     error, err2;
     135    32138353 :         int                     log_flushed = 0;
     136             : 
     137    32138353 :         trace_xfs_file_fsync(ip);
     138             : 
     139    32138630 :         error = file_write_and_wait_range(file, start, end);
     140    32139489 :         if (error)
     141             :                 return error;
     142             : 
     143    64275728 :         if (xfs_is_shutdown(mp))
     144             :                 return -EIO;
     145             : 
     146    32135771 :         xfs_iflags_clear(ip, XFS_ITRUNCATED);
     147             : 
     148             :         /*
     149             :          * If we have an RT and/or log subvolume we need to make sure to flush
     150             :          * the write cache the device used for file data first.  This is to
     151             :          * ensure newly written file data make it to disk before logging the new
     152             :          * inode size in case of an extending write.
     153             :          */
     154    32135254 :         if (XFS_IS_REALTIME_INODE(ip))
     155    13030179 :                 error = xfs_buftarg_flush(mp->m_rtdev_targp);
     156    19105075 :         else if (mp->m_logdev_targp != mp->m_ddev_targp)
     157           0 :                 error = xfs_buftarg_flush(mp->m_ddev_targp);
     158             : 
     159             :         /*
     160             :          * Any inode that has dirty modifications in the log is pinned.  The
     161             :          * racy check here for a pinned inode will not catch modifications
     162             :          * that happen concurrently to the fsync call, but fsync semantics
     163             :          * only require to sync previously completed I/O.
     164             :          */
     165    32134053 :         if (xfs_ipincount(ip)) {
     166     2538373 :                 err2 = xfs_fsync_flush_log(ip, datasync, &log_flushed);
     167     2537867 :                 if (err2 && !error)
     168         975 :                         error = err2;
     169             :         }
     170             : 
     171             :         /*
     172             :          * If we only have a single device, and the log force about was
     173             :          * a no-op we might have to flush the data device cache here.
     174             :          * This can only happen for fdatasync/O_DSYNC if we were overwriting
     175             :          * an already allocated file and thus do not have any metadata to
     176             :          * commit.
     177             :          */
     178    32133547 :         if (!log_flushed && !XFS_IS_REALTIME_INODE(ip) &&
     179    17753166 :             mp->m_logdev_targp == mp->m_ddev_targp) {
     180    17753000 :                 err2 = xfs_buftarg_flush(mp->m_ddev_targp);
     181    17752934 :                 if (err2 && !error)
     182         283 :                         error = err2;
     183             :         }
     184             : 
     185             :         return error;
     186             : }
     187             : 
     188             : static int
     189   219221358 : xfs_ilock_iocb(
     190             :         struct kiocb            *iocb,
     191             :         unsigned int            lock_mode)
     192             : {
     193   219221358 :         struct xfs_inode        *ip = XFS_I(file_inode(iocb->ki_filp));
     194             : 
     195   219221358 :         if (iocb->ki_flags & IOCB_NOWAIT) {
     196           0 :                 if (!xfs_ilock_nowait(ip, lock_mode))
     197           0 :                         return -EAGAIN;
     198             :         } else {
     199   219221358 :                 xfs_ilock(ip, lock_mode);
     200             :         }
     201             : 
     202             :         return 0;
     203             : }
     204             : 
     205             : STATIC ssize_t
     206    82967708 : xfs_file_dio_read(
     207             :         struct kiocb            *iocb,
     208             :         struct iov_iter         *to)
     209             : {
     210    82967708 :         struct xfs_inode        *ip = XFS_I(file_inode(iocb->ki_filp));
     211    82967708 :         ssize_t                 ret;
     212             : 
     213    82967708 :         trace_xfs_file_direct_read(iocb, to);
     214             : 
     215    82967710 :         if (!iov_iter_count(to))
     216             :                 return 0; /* skip atime */
     217             : 
     218    82967703 :         file_accessed(iocb->ki_filp);
     219             : 
     220    82967745 :         ret = xfs_ilock_iocb(iocb, XFS_IOLOCK_SHARED);
     221    82967744 :         if (ret)
     222             :                 return ret;
     223    82967735 :         ret = iomap_dio_rw(iocb, to, &xfs_read_iomap_ops, NULL, 0, NULL, 0);
     224    82967737 :         xfs_iunlock(ip, XFS_IOLOCK_SHARED);
     225             : 
     226    82967737 :         return ret;
     227             : }
     228             : 
     229             : static noinline ssize_t
     230             : xfs_file_dax_read(
     231             :         struct kiocb            *iocb,
     232             :         struct iov_iter         *to)
     233             : {
     234             :         struct xfs_inode        *ip = XFS_I(iocb->ki_filp->f_mapping->host);
     235             :         ssize_t                 ret = 0;
     236             : 
     237             :         trace_xfs_file_dax_read(iocb, to);
     238             : 
     239             :         if (!iov_iter_count(to))
     240             :                 return 0; /* skip atime */
     241             : 
     242             :         ret = xfs_ilock_iocb(iocb, XFS_IOLOCK_SHARED);
     243             :         if (ret)
     244             :                 return ret;
     245             :         ret = dax_iomap_rw(iocb, to, &xfs_read_iomap_ops);
     246             :         xfs_iunlock(ip, XFS_IOLOCK_SHARED);
     247             : 
     248             :         file_accessed(iocb->ki_filp);
     249             :         return ret;
     250             : }
     251             : 
     252             : STATIC ssize_t
     253    65519523 : xfs_file_buffered_read(
     254             :         struct kiocb            *iocb,
     255             :         struct iov_iter         *to)
     256             : {
     257    65519523 :         struct xfs_inode        *ip = XFS_I(file_inode(iocb->ki_filp));
     258    65519523 :         ssize_t                 ret;
     259             : 
     260    65519523 :         trace_xfs_file_buffered_read(iocb, to);
     261             : 
     262    65531540 :         ret = xfs_ilock_iocb(iocb, XFS_IOLOCK_SHARED);
     263    65519416 :         if (ret)
     264             :                 return ret;
     265    65533149 :         ret = generic_file_read_iter(iocb, to);
     266    65530536 :         xfs_iunlock(ip, XFS_IOLOCK_SHARED);
     267             : 
     268    65530536 :         return ret;
     269             : }
     270             : 
     271             : STATIC ssize_t
     272   148493266 : xfs_file_read_iter(
     273             :         struct kiocb            *iocb,
     274             :         struct iov_iter         *to)
     275             : {
     276   148493266 :         struct inode            *inode = file_inode(iocb->ki_filp);
     277   148493266 :         struct xfs_mount        *mp = XFS_I(inode)->i_mount;
     278   148493266 :         ssize_t                 ret = 0;
     279             : 
     280   148493266 :         XFS_STATS_INC(mp, xs_read_calls);
     281             : 
     282   296986532 :         if (xfs_is_shutdown(mp))
     283             :                 return -EIO;
     284             : 
     285   148490463 :         if (IS_DAX(inode))
     286             :                 ret = xfs_file_dax_read(iocb, to);
     287   148490463 :         else if (iocb->ki_flags & IOCB_DIRECT)
     288    82967680 :                 ret = xfs_file_dio_read(iocb, to);
     289             :         else
     290    65522783 :                 ret = xfs_file_buffered_read(iocb, to);
     291             : 
     292   148500250 :         if (ret > 0)
     293    68985305 :                 XFS_STATS_ADD(mp, xs_read_bytes, ret);
     294             :         return ret;
     295             : }
     296             : 
     297             : STATIC ssize_t
     298     6345641 : xfs_file_splice_read(
     299             :         struct file             *in,
     300             :         loff_t                  *ppos,
     301             :         struct pipe_inode_info  *pipe,
     302             :         size_t                  len,
     303             :         unsigned int            flags)
     304             : {
     305     6345641 :         struct inode            *inode = file_inode(in);
     306     6345641 :         struct xfs_inode        *ip = XFS_I(inode);
     307     6345641 :         struct xfs_mount        *mp = ip->i_mount;
     308     6345641 :         ssize_t                 ret = 0;
     309             : 
     310     6345641 :         XFS_STATS_INC(mp, xs_read_calls);
     311             : 
     312    12691282 :         if (xfs_is_shutdown(mp))
     313             :                 return -EIO;
     314             : 
     315     6345636 :         trace_xfs_file_splice_read(ip, *ppos, len);
     316             : 
     317     6345639 :         xfs_ilock(ip, XFS_IOLOCK_SHARED);
     318     6345637 :         ret = filemap_splice_read(in, ppos, pipe, len, flags);
     319     6345661 :         xfs_iunlock(ip, XFS_IOLOCK_SHARED);
     320     6345661 :         if (ret > 0)
     321     6345528 :                 XFS_STATS_ADD(mp, xs_read_bytes, ret);
     322             :         return ret;
     323             : }
     324             : 
     325             : /*
     326             :  * Decide if this file write requires COWing-around at either end of the write
     327             :  * range.  This is only required if the file allocation unit is larger than
     328             :  * 1FSB and the write range is not aligned with the allocation unit.
     329             :  */
     330             : static bool
     331   128286184 : xfs_file_write_needs_cow_around(
     332             :         struct xfs_inode        *ip,
     333             :         loff_t                  pos,
     334             :         long long int           count)
     335             : {
     336             :         /*
     337             :          * No COWing required if this inode doesn't do COW.
     338             :          *
     339             :          * If the allocation unit is 1FSB, we do not need to COW around the
     340             :          * edges of the operation range.  This applies to all files on the data
     341             :          * device and rt files that have an extent size of 1FSB.
     342             :          */
     343   128286184 :         if (!xfs_inode_needs_cow_around(ip))
     344             :                 return false;
     345             : 
     346             :         /*
     347             :          * Otherwise, check that the operation is aligned to the rt extent
     348             :          * size.  Any unaligned operation /must/ be COWed around since the
     349             :          * regular reflink code only handles extending writes up to fsblock
     350             :          * boundaries.
     351             :          */
     352           0 :         return !xfs_is_falloc_aligned(ip, pos, count);
     353             : }
     354             : 
     355             : /* Do we need to COW-around at this offset to handle a truncate up or down? */
     356             : bool
     357     6209813 : xfs_truncate_needs_cow_around(
     358             :         struct xfs_inode        *ip,
     359             :         loff_t                  pos)
     360             : {
     361     6209813 :         return xfs_file_write_needs_cow_around(ip, pos, 0);
     362             : }
     363             : 
     364             : /* Does this file write require COWing around? */
     365             : static inline bool
     366             : xfs_iocb_needs_cow_around(
     367             :         struct xfs_inode        *ip,
     368             :         const struct kiocb      *iocb,
     369             :         const struct iov_iter   *from)
     370             : {
     371     4174674 :         return xfs_file_write_needs_cow_around(ip, iocb->ki_pos,
     372             :                         iov_iter_count(from));
     373             : }
     374             : 
     375             : /* Unshare the allocation unit mapped to the given file position.  */
     376             : inline int
     377           0 : xfs_file_unshare_at(
     378             :         struct xfs_inode        *ip,
     379             :         loff_t                  pos)
     380             : {
     381           0 :         loff_t                  isize = i_size_read(VFS_I(ip));
     382           0 :         unsigned int            extsize, len;
     383           0 :         uint32_t                mod;
     384             : 
     385           0 :         len = extsize = xfs_inode_alloc_unitsize(ip);
     386             : 
     387             :         /* Open-coded rounddown_64 so that we can skip out if aligned */
     388           0 :         div_u64_rem(pos, extsize, &mod);
     389           0 :         if (mod == 0)
     390             :                 return 0;
     391           0 :         pos -= mod;
     392             : 
     393             :         /* Do not extend the file. */
     394           0 :         if (pos >= isize)
     395             :                 return 0;
     396           0 :         if (pos + len > isize)
     397           0 :                 len = isize - pos;
     398             : 
     399           0 :         trace_xfs_file_cow_around(ip, pos, len);
     400             : 
     401           0 :         if (IS_DAX(VFS_I(ip)))
     402             :                 return dax_file_unshare(VFS_I(ip), pos, len,
     403             :                                 &xfs_dax_write_iomap_ops);
     404           0 :         return iomap_file_unshare(VFS_I(ip), pos, len,
     405             :                         &xfs_buffered_write_iomap_ops);
     406             : }
     407             : 
     408             : /*
     409             :  * Dirty the pages on either side of a write request as needed to satisfy
     410             :  * alignment requirements if we're going to perform a copy-write.
     411             :  *
     412             :  * This is only needed for realtime files when the rt extent size is larger
     413             :  * than 1 fs block, because we don't allow a logical rt extent in a file to map
     414             :  * to multiple physical rt extents.  In other words, we can only map and unmap
     415             :  * full rt extents.  Note that page cache doesn't exist above EOF, so be
     416             :  * careful to stay below EOF.
     417             :  */
     418             : static int
     419           0 : xfs_file_cow_around(
     420             :         struct xfs_inode        *ip,
     421             :         loff_t                  pos,
     422             :         long long int           count)
     423             : {
     424           0 :         int                     error;
     425             : 
     426             :         /* Unshare at the start of the extent. */
     427           0 :         error = xfs_file_unshare_at(ip,  pos);
     428           0 :         if (error)
     429             :                 return error;
     430             : 
     431             :         /* Unshare at the end. */
     432           0 :         return xfs_file_unshare_at(ip, pos + count);
     433             : }
     434             : 
     435             : /*
     436             :  * Common pre-write limit and setup checks.
     437             :  *
     438             :  * Called with the iolocked held either shared and exclusive according to
     439             :  * @iolock, and returns with it held.  Might upgrade the iolock to exclusive
     440             :  * if called for a direct write beyond i_size.
     441             :  */
     442             : STATIC ssize_t
     443    66052703 : xfs_file_write_checks(
     444             :         struct kiocb            *iocb,
     445             :         struct iov_iter         *from,
     446             :         unsigned int            *iolock)
     447             : {
     448    66052703 :         struct file             *file = iocb->ki_filp;
     449    66052703 :         struct inode            *inode = file->f_mapping->host;
     450    66052703 :         struct xfs_inode        *ip = XFS_I(inode);
     451    66052703 :         ssize_t                 error = 0;
     452    66052703 :         size_t                  count = iov_iter_count(from);
     453    66052703 :         bool                    drained_dio = false;
     454    85162436 :         loff_t                  isize;
     455             : 
     456             : restart:
     457    85162436 :         error = generic_write_checks(iocb, from);
     458    85149389 :         if (error <= 0)
     459           6 :                 return error;
     460             : 
     461    85149383 :         if (iocb->ki_flags & IOCB_NOWAIT) {
     462           0 :                 error = break_layout(inode, false);
     463           0 :                 if (error == -EWOULDBLOCK)
     464             :                         error = -EAGAIN;
     465             :         } else {
     466    85149383 :                 error = xfs_break_layouts(inode, iolock, BREAK_WRITE);
     467             :         }
     468             : 
     469    85156521 :         if (error)
     470           0 :                 return error;
     471             : 
     472             :         /*
     473             :          * For changing security info in file_remove_privs() we need i_rwsem
     474             :          * exclusively.  We also need it to COW around the range being written.
     475             :          */
     476    85156521 :         if (*iolock == XFS_IOLOCK_SHARED &&
     477     8536747 :             (!IS_NOSEC(inode) || xfs_iocb_needs_cow_around(ip, iocb, from))) {
     478      187432 :                 xfs_iunlock(ip, *iolock);
     479      187431 :                 *iolock = XFS_IOLOCK_EXCL;
     480      187431 :                 error = xfs_ilock_iocb(iocb, *iolock);
     481      187430 :                 if (error) {
     482           0 :                         *iolock = 0;
     483           0 :                         return error;
     484             :                 }
     485      187430 :                 goto restart;
     486             :         }
     487             : 
     488             :         /*
     489             :          * The write is not aligned to the file's allocation unit.  If either
     490             :          * of the allocation units at the start or end of the write range are
     491             :          * shared, unshare them through the page cache.
     492             :          */
     493    84969056 :         if (xfs_iocb_needs_cow_around(ip, iocb, from)) {
     494           0 :                 ASSERT(*iolock == XFS_IOLOCK_EXCL);
     495             : 
     496           0 :                 inode_dio_wait(VFS_I(ip));
     497           0 :                 drained_dio = true;
     498             : 
     499           0 :                 error = xfs_file_cow_around(ip, iocb->ki_pos, count);
     500           0 :                 if (error)
     501           0 :                         return error;
     502             :         }
     503             : 
     504             :         /*
     505             :          * If the offset is beyond the size of the file, we need to zero any
     506             :          * blocks that fall between the existing EOF and the start of this
     507             :          * write.  If zeroing is needed and we are currently holding the iolock
     508             :          * shared, we need to update it to exclusive which implies having to
     509             :          * redo all checks before.
     510             :          *
     511             :          * We need to serialise against EOF updates that occur in IO completions
     512             :          * here. We want to make sure that nobody is changing the size while we
     513             :          * do this check until we have placed an IO barrier (i.e.  hold the
     514             :          * XFS_IOLOCK_EXCL) that prevents new IO from being dispatched.  The
     515             :          * spinlock effectively forms a memory barrier once we have the
     516             :          * XFS_IOLOCK_EXCL so we are guaranteed to see the latest EOF value and
     517             :          * hence be able to correctly determine if we need to run zeroing.
     518             :          *
     519             :          * We can do an unlocked check here safely as IO completion can only
     520             :          * extend EOF. Truncate is locked out at this point, so the EOF can
     521             :          * not move backwards, only forwards. Hence we only need to take the
     522             :          * slow path and spin locks when we are at or beyond the current EOF.
     523             :          */
     524    84963809 :         if (iocb->ki_pos <= i_size_read(inode))
     525    47140600 :                 goto out;
     526             : 
     527    37823209 :         spin_lock(&ip->i_flags_lock);
     528    37833517 :         isize = i_size_read(inode);
     529    37833517 :         if (iocb->ki_pos > isize) {
     530    37833517 :                 spin_unlock(&ip->i_flags_lock);
     531             : 
     532    37839267 :                 if (iocb->ki_flags & IOCB_NOWAIT)
     533             :                         return -EAGAIN;
     534             : 
     535    37839267 :                 if (!drained_dio) {
     536    18928783 :                         if (*iolock == XFS_IOLOCK_SHARED) {
     537      100403 :                                 xfs_iunlock(ip, *iolock);
     538      100395 :                                 *iolock = XFS_IOLOCK_EXCL;
     539      100395 :                                 xfs_ilock(ip, *iolock);
     540      100400 :                                 iov_iter_reexpand(from, count);
     541             :                         }
     542             :                         /*
     543             :                          * We now have an IO submission barrier in place, but
     544             :                          * AIO can do EOF updates during IO completion and hence
     545             :                          * we now need to wait for all of them to drain. Non-AIO
     546             :                          * DIO will have drained before we are given the
     547             :                          * XFS_IOLOCK_EXCL, and so for most cases this wait is a
     548             :                          * no-op.
     549             :                          */
     550    18928780 :                         inode_dio_wait(inode);
     551    18922303 :                         drained_dio = true;
     552    18922303 :                         goto restart;
     553             :                 }
     554             : 
     555             :                 /*
     556             :                  * If we're starting the write past EOF, COW the allocation
     557             :                  * unit containing the current EOF before we start zeroing the
     558             :                  * range between EOF and the start of the write.
     559             :                  */
     560    18910484 :                 if (xfs_truncate_needs_cow_around(ip, isize)) {
     561           0 :                         error = xfs_file_unshare_at(ip, isize);
     562           0 :                         if (error)
     563             :                                 return error;
     564             :                 }
     565             : 
     566    18912862 :                 trace_xfs_zero_eof(ip, isize, iocb->ki_pos - isize);
     567    18912727 :                 error = xfs_zero_range(ip, isize, iocb->ki_pos - isize, NULL);
     568    18909880 :                 if (error)
     569             :                         return error;
     570             :         } else
     571           0 :                 spin_unlock(&ip->i_flags_lock);
     572             : 
     573    66049829 : out:
     574    66049829 :         return kiocb_modified(iocb);
     575             : }
     576             : 
     577             : static int
     578     6769038 : xfs_dio_write_end_io(
     579             :         struct kiocb            *iocb,
     580             :         ssize_t                 size,
     581             :         int                     error,
     582             :         unsigned                flags)
     583             : {
     584     6769038 :         struct inode            *inode = file_inode(iocb->ki_filp);
     585     6769038 :         struct xfs_inode        *ip = XFS_I(inode);
     586     6769038 :         loff_t                  offset = iocb->ki_pos;
     587     6769038 :         unsigned int            nofs_flag;
     588             : 
     589     6769038 :         trace_xfs_end_io_direct_write(ip, offset, size);
     590             : 
     591    13537904 :         if (xfs_is_shutdown(ip->i_mount))
     592             :                 return -EIO;
     593             : 
     594     6768263 :         if (error)
     595             :                 return error;
     596     6219394 :         if (!size)
     597             :                 return 0;
     598             : 
     599             :         /*
     600             :          * Capture amount written on completion as we can't reliably account
     601             :          * for it on submission.
     602             :          */
     603     6219394 :         XFS_STATS_ADD(ip->i_mount, xs_write_bytes, size);
     604             : 
     605             :         /*
     606             :          * We can allocate memory here while doing writeback on behalf of
     607             :          * memory reclaim.  To avoid memory allocation deadlocks set the
     608             :          * task-wide nofs context for the following operations.
     609             :          */
     610     6219394 :         nofs_flag = memalloc_nofs_save();
     611             : 
     612     6219394 :         if (flags & IOMAP_DIO_COW) {
     613     1132002 :                 error = xfs_reflink_end_cow(ip, offset, size);
     614     1132003 :                 if (error)
     615          15 :                         goto out;
     616             :         }
     617             : 
     618             :         /*
     619             :          * Unwritten conversion updates the in-core isize after extent
     620             :          * conversion but before updating the on-disk size. Updating isize any
     621             :          * earlier allows a racing dio read to find unwritten extents before
     622             :          * they are converted.
     623             :          */
     624     6219380 :         if (flags & IOMAP_DIO_UNWRITTEN) {
     625     3301173 :                 error = xfs_iomap_write_unwritten(ip, offset, size, true);
     626     3301222 :                 goto out;
     627             :         }
     628             : 
     629             :         /*
     630             :          * We need to update the in-core inode size here so that we don't end up
     631             :          * with the on-disk inode size being outside the in-core inode size. We
     632             :          * have no other method of updating EOF for AIO, so always do it here
     633             :          * if necessary.
     634             :          *
     635             :          * We need to lock the test/set EOF update as we can be racing with
     636             :          * other IO completions here to update the EOF. Failing to serialise
     637             :          * here can result in EOF moving backwards and Bad Things Happen when
     638             :          * that occurs.
     639             :          *
     640             :          * As IO completion only ever extends EOF, we can do an unlocked check
     641             :          * here to avoid taking the spinlock. If we land within the current EOF,
     642             :          * then we do not need to do an extending update at all, and we don't
     643             :          * need to take the lock to check this. If we race with an update moving
     644             :          * EOF, then we'll either still be beyond EOF and need to take the lock,
     645             :          * or we'll be within EOF and we don't need to take it at all.
     646             :          */
     647     2918207 :         if (offset + size <= i_size_read(inode))
     648     2752012 :                 goto out;
     649             : 
     650      166195 :         spin_lock(&ip->i_flags_lock);
     651      166195 :         if (offset + size > i_size_read(inode)) {
     652      166195 :                 i_size_write(inode, offset + size);
     653      166195 :                 spin_unlock(&ip->i_flags_lock);
     654      166195 :                 error = xfs_setfilesize(ip, offset, size);
     655             :         } else {
     656           0 :                 spin_unlock(&ip->i_flags_lock);
     657             :         }
     658             : 
     659     6219444 : out:
     660     6219444 :         memalloc_nofs_restore(nofs_flag);
     661     6219444 :         return error;
     662             : }
     663             : 
     664             : static const struct iomap_dio_ops xfs_dio_write_ops = {
     665             :         .end_io         = xfs_dio_write_end_io,
     666             : };
     667             : 
     668             : /*
     669             :  * Handle block aligned direct I/O writes
     670             :  */
     671             : static noinline ssize_t
     672     3739988 : xfs_file_dio_write_aligned(
     673             :         struct xfs_inode        *ip,
     674             :         struct kiocb            *iocb,
     675             :         struct iov_iter         *from)
     676             : {
     677     3739988 :         unsigned int            iolock = XFS_IOLOCK_SHARED;
     678     3739988 :         ssize_t                 ret;
     679             : 
     680             :         /*
     681             :          * If the range to write is not aligned to an allocation unit, we will
     682             :          * have to COW the allocation units on both ends of the write.  Because
     683             :          * this runs through the page cache, it requires IOLOCK_EXCL.  This
     684             :          * predicate performs an unlocked access of the rt and reflink inode
     685             :          * state.
     686             :          */
     687     3739988 :         if (xfs_iocb_needs_cow_around(ip, iocb, from))
     688           0 :                 iolock = XFS_IOLOCK_EXCL;
     689             : 
     690     3739980 :         ret = xfs_ilock_iocb(iocb, iolock);
     691     3740089 :         if (ret)
     692             :                 return ret;
     693     3740087 :         ret = xfs_file_write_checks(iocb, from, &iolock);
     694     3740036 :         if (ret)
     695         631 :                 goto out_unlock;
     696             : 
     697             :         /*
     698             :          * We don't need to hold the IOLOCK exclusively across the IO, so demote
     699             :          * the iolock back to shared if we had to take the exclusive lock in
     700             :          * xfs_file_write_checks() for other reasons.
     701             :          */
     702     3739405 :         if (iolock == XFS_IOLOCK_EXCL) {
     703      140954 :                 xfs_ilock_demote(ip, XFS_IOLOCK_EXCL);
     704      140953 :                 iolock = XFS_IOLOCK_SHARED;
     705             :         }
     706     3739404 :         trace_xfs_file_direct_write(iocb, from);
     707     3739395 :         ret = iomap_dio_rw(iocb, from, &xfs_direct_write_iomap_ops,
     708             :                            &xfs_dio_write_ops, 0, NULL, 0);
     709     3740108 : out_unlock:
     710     3740108 :         if (iolock)
     711     3740112 :                 xfs_iunlock(ip, iolock);
     712             :         return ret;
     713             : }
     714             : 
     715             : /*
     716             :  * Handle block unaligned direct I/O writes
     717             :  *
     718             :  * In most cases direct I/O writes will be done holding IOLOCK_SHARED, allowing
     719             :  * them to be done in parallel with reads and other direct I/O writes.  However,
     720             :  * if the I/O is not aligned to filesystem blocks, the direct I/O layer may need
     721             :  * to do sub-block zeroing and that requires serialisation against other direct
     722             :  * I/O to the same block.  In this case we need to serialise the submission of
     723             :  * the unaligned I/O so that we don't get racing block zeroing in the dio layer.
     724             :  * In the case where sub-block zeroing is not required, we can do concurrent
     725             :  * sub-block dios to the same block successfully.
     726             :  *
     727             :  * Optimistically submit the I/O using the shared lock first, but use the
     728             :  * IOMAP_DIO_OVERWRITE_ONLY flag to tell the lower layers to return -EAGAIN
     729             :  * if block allocation or partial block zeroing would be required.  In that case
     730             :  * we try again with the exclusive lock.
     731             :  */
     732             : static noinline ssize_t
     733     7024080 : xfs_file_dio_write_unaligned(
     734             :         struct xfs_inode        *ip,
     735             :         struct kiocb            *iocb,
     736             :         struct iov_iter         *from)
     737             : {
     738     7024080 :         size_t                  isize = i_size_read(VFS_I(ip));
     739     7024080 :         size_t                  count = iov_iter_count(from);
     740     7024080 :         unsigned int            iolock = XFS_IOLOCK_SHARED;
     741     7024080 :         unsigned int            flags = IOMAP_DIO_OVERWRITE_ONLY;
     742     7024080 :         ssize_t                 ret;
     743             : 
     744             :         /*
     745             :          * Extending writes need exclusivity because of the sub-block zeroing
     746             :          * that the DIO code always does for partial tail blocks beyond EOF, so
     747             :          * don't even bother trying the fast path in this case.
     748             :          */
     749     7024080 :         if (iocb->ki_pos > isize || iocb->ki_pos + count >= isize) {
     750     2650425 :                 if (iocb->ki_flags & IOCB_NOWAIT)
     751             :                         return -EAGAIN;
     752     2650425 : retry_exclusive:
     753     3171972 :                 iolock = XFS_IOLOCK_EXCL;
     754     3171972 :                 flags = IOMAP_DIO_FORCE_WAIT;
     755             :         }
     756             : 
     757     7545627 :         ret = xfs_ilock_iocb(iocb, iolock);
     758     7545624 :         if (ret)
     759             :                 return ret;
     760             : 
     761             :         /*
     762             :          * We can't properly handle unaligned direct I/O to reflink files yet,
     763             :          * as we can't unshare a partial block.
     764             :          */
     765     7545626 :         if (xfs_is_cow_inode(ip)) {
     766     4515916 :                 trace_xfs_reflink_bounce_dio_write(iocb, from);
     767     4515916 :                 ret = -ENOTBLK;
     768     4515916 :                 goto out_unlock;
     769             :         }
     770             : 
     771     3029700 :         ret = xfs_file_write_checks(iocb, from, &iolock);
     772     3029716 :         if (ret)
     773          77 :                 goto out_unlock;
     774             : 
     775             :         /*
     776             :          * If we are doing exclusive unaligned I/O, this must be the only I/O
     777             :          * in-flight.  Otherwise we risk data corruption due to unwritten extent
     778             :          * conversions from the AIO end_io handler.  Wait for all other I/O to
     779             :          * drain first.
     780             :          */
     781     3029639 :         if (flags & IOMAP_DIO_FORCE_WAIT)
     782     2407613 :                 inode_dio_wait(VFS_I(ip));
     783             : 
     784     3029631 :         trace_xfs_file_direct_write(iocb, from);
     785     3029624 :         ret = iomap_dio_rw(iocb, from, &xfs_direct_write_iomap_ops,
     786             :                            &xfs_dio_write_ops, flags, NULL, 0);
     787             : 
     788             :         /*
     789             :          * Retry unaligned I/O with exclusive blocking semantics if the DIO
     790             :          * layer rejected it for mapping or locking reasons. If we are doing
     791             :          * nonblocking user I/O, propagate the error.
     792             :          */
     793     3029690 :         if (ret == -EAGAIN && !(iocb->ki_flags & IOCB_NOWAIT)) {
     794      521549 :                 ASSERT(flags & IOMAP_DIO_OVERWRITE_ONLY);
     795      521549 :                 xfs_iunlock(ip, iolock);
     796      521547 :                 goto retry_exclusive;
     797             :         }
     798             : 
     799     2508141 : out_unlock:
     800     7024134 :         if (iolock)
     801     7024117 :                 xfs_iunlock(ip, iolock);
     802             :         return ret;
     803             : }
     804             : 
     805             : static ssize_t
     806    10763985 : xfs_file_dio_write(
     807             :         struct kiocb            *iocb,
     808             :         struct iov_iter         *from)
     809             : {
     810    10763985 :         struct xfs_inode        *ip = XFS_I(file_inode(iocb->ki_filp));
     811    10763985 :         struct xfs_buftarg      *target = xfs_inode_buftarg(ip);
     812    10763985 :         size_t                  count = iov_iter_count(from);
     813             : 
     814             :         /* direct I/O must be aligned to device logical sector size */
     815    10763985 :         if ((iocb->ki_pos | count) & target->bt_logical_sectormask)
     816             :                 return -EINVAL;
     817    10763985 :         if ((iocb->ki_pos | count) & ip->i_mount->m_blockmask)
     818     7024061 :                 return xfs_file_dio_write_unaligned(ip, iocb, from);
     819     3739924 :         return xfs_file_dio_write_aligned(ip, iocb, from);
     820             : }
     821             : 
     822             : static noinline ssize_t
     823             : xfs_file_dax_write(
     824             :         struct kiocb            *iocb,
     825             :         struct iov_iter         *from)
     826             : {
     827             :         struct inode            *inode = iocb->ki_filp->f_mapping->host;
     828             :         struct xfs_inode        *ip = XFS_I(inode);
     829             :         unsigned int            iolock = XFS_IOLOCK_EXCL;
     830             :         ssize_t                 ret, error = 0;
     831             :         loff_t                  pos;
     832             : 
     833             :         ret = xfs_ilock_iocb(iocb, iolock);
     834             :         if (ret)
     835             :                 return ret;
     836             :         ret = xfs_file_write_checks(iocb, from, &iolock);
     837             :         if (ret)
     838             :                 goto out;
     839             : 
     840             :         pos = iocb->ki_pos;
     841             : 
     842             :         trace_xfs_file_dax_write(iocb, from);
     843             :         ret = dax_iomap_rw(iocb, from, &xfs_dax_write_iomap_ops);
     844             :         if (ret > 0 && iocb->ki_pos > i_size_read(inode)) {
     845             :                 i_size_write(inode, iocb->ki_pos);
     846             :                 error = xfs_setfilesize(ip, pos, ret);
     847             :         }
     848             : out:
     849             :         if (iolock)
     850             :                 xfs_iunlock(ip, iolock);
     851             :         if (error)
     852             :                 return error;
     853             : 
     854             :         if (ret > 0) {
     855             :                 XFS_STATS_ADD(ip->i_mount, xs_write_bytes, ret);
     856             : 
     857             :                 /* Handle various SYNC-type writes */
     858             :                 ret = generic_write_sync(iocb, ret);
     859             :         }
     860             :         return ret;
     861             : }
     862             : 
     863             : STATIC ssize_t
     864    58854271 : xfs_file_buffered_write(
     865             :         struct kiocb            *iocb,
     866             :         struct iov_iter         *from)
     867             : {
     868    58854271 :         struct inode            *inode = iocb->ki_filp->f_mapping->host;
     869    58854271 :         struct xfs_inode        *ip = XFS_I(inode);
     870    58854271 :         ssize_t                 ret;
     871    58854271 :         bool                    cleared_space = false;
     872    59284213 :         unsigned int            iolock;
     873             : 
     874             : write_retry:
     875    59284213 :         iolock = XFS_IOLOCK_EXCL;
     876    59284213 :         ret = xfs_ilock_iocb(iocb, iolock);
     877    59284482 :         if (ret)
     878           0 :                 return ret;
     879             : 
     880    59284482 :         ret = xfs_file_write_checks(iocb, from, &iolock);
     881    59270324 :         if (ret)
     882         669 :                 goto out;
     883             : 
     884    59269655 :         trace_xfs_file_buffered_write(iocb, from);
     885    59282568 :         ret = iomap_file_buffered_write(iocb, from,
     886             :                         &xfs_buffered_write_iomap_ops);
     887             : 
     888             :         /*
     889             :          * If we hit a space limit, try to free up some lingering preallocated
     890             :          * space before returning an error. In the case of ENOSPC, first try to
     891             :          * write back all dirty inodes to free up some of the excess reserved
     892             :          * metadata space. This reduces the chances that the eofblocks scan
     893             :          * waits on dirty mappings. Since xfs_flush_inodes() is serialized, this
     894             :          * also behaves as a filter to prevent too many eofblocks scans from
     895             :          * running at the same time.  Use a synchronous scan to increase the
     896             :          * effectiveness of the scan.
     897             :          */
     898    59279652 :         if (ret == -EDQUOT && !cleared_space) {
     899         232 :                 xfs_iunlock(ip, iolock);
     900         232 :                 xfs_blockgc_free_quota(ip, XFS_ICWALK_FLAG_SYNC);
     901         232 :                 cleared_space = true;
     902         232 :                 goto write_retry;
     903    59279420 :         } else if (ret == -ENOSPC && !cleared_space) {
     904      429539 :                 struct xfs_icwalk       icw = {0};
     905             : 
     906      429539 :                 cleared_space = true;
     907      429539 :                 xfs_flush_inodes(ip->i_mount);
     908             : 
     909      429580 :                 xfs_iunlock(ip, iolock);
     910      429505 :                 icw.icw_flags = XFS_ICWALK_FLAG_SYNC;
     911      429505 :                 xfs_blockgc_free_space(ip->i_mount, &icw);
     912      429710 :                 goto write_retry;
     913             :         }
     914             : 
     915    58849881 : out:
     916    58850550 :         if (iolock)
     917    58856607 :                 xfs_iunlock(ip, iolock);
     918             : 
     919    58848367 :         if (ret > 0) {
     920    58477592 :                 XFS_STATS_ADD(ip->i_mount, xs_write_bytes, ret);
     921             :                 /* Handle various SYNC-type writes */
     922    58477592 :                 ret = generic_write_sync(iocb, ret);
     923             :         }
     924             :         return ret;
     925             : }
     926             : 
     927             : STATIC ssize_t
     928    65104148 : xfs_file_write_iter(
     929             :         struct kiocb            *iocb,
     930             :         struct iov_iter         *from)
     931             : {
     932    65104148 :         struct inode            *inode = iocb->ki_filp->f_mapping->host;
     933    65104148 :         struct xfs_inode        *ip = XFS_I(inode);
     934    65104148 :         ssize_t                 ret;
     935    65104148 :         size_t                  ocount = iov_iter_count(from);
     936             : 
     937    65104148 :         XFS_STATS_INC(ip->i_mount, xs_write_calls);
     938             : 
     939    65104148 :         if (ocount == 0)
     940             :                 return 0;
     941             : 
     942   130208152 :         if (xfs_is_shutdown(ip->i_mount))
     943             :                 return -EIO;
     944             : 
     945    65094347 :         if (IS_DAX(inode))
     946             :                 return xfs_file_dax_write(iocb, from);
     947             : 
     948    65094347 :         if (iocb->ki_flags & IOCB_DIRECT) {
     949             :                 /*
     950             :                  * Allow a directio write to fall back to a buffered
     951             :                  * write *only* in the case that we're doing a reflink
     952             :                  * CoW.  In all other directio scenarios we do not
     953             :                  * allow an operation to fall back to buffered mode.
     954             :                  */
     955    10764045 :                 ret = xfs_file_dio_write(iocb, from);
     956    10764133 :                 if (ret != -ENOTBLK)
     957             :                         return ret;
     958             :         }
     959             : 
     960    58846315 :         return xfs_file_buffered_write(iocb, from);
     961             : }
     962             : 
     963             : /* Does this file, inode, or mount want synchronous writes? */
     964   408708277 : static inline bool xfs_file_sync_writes(struct file *filp)
     965             : {
     966   408708277 :         struct xfs_inode        *ip = XFS_I(file_inode(filp));
     967             : 
     968   408708277 :         if (xfs_has_wsync(ip->i_mount))
     969             :                 return true;
     970   408708269 :         if (filp->f_flags & (__O_SYNC | O_DSYNC))
     971             :                 return true;
     972   408701135 :         if (IS_SYNC(file_inode(filp)))
     973           4 :                 return true;
     974             : 
     975             :         return false;
     976             : }
     977             : 
     978             : #define XFS_FALLOC_FL_SUPPORTED                                         \
     979             :                 (FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE |           \
     980             :                  FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE |      \
     981             :                  FALLOC_FL_INSERT_RANGE | FALLOC_FL_UNSHARE_RANGE |     \
     982             :                  FALLOC_FL_MAP_FREE_SPACE)
     983             : 
     984             : STATIC long
     985    16125042 : xfs_file_fallocate(
     986             :         struct file             *file,
     987             :         int                     mode,
     988             :         loff_t                  offset,
     989             :         loff_t                  len)
     990             : {
     991    16125042 :         struct inode            *inode = file_inode(file);
     992    16125042 :         struct xfs_inode        *ip = XFS_I(inode);
     993    16125042 :         long                    error;
     994    16125042 :         uint                    iolock = XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL;
     995    16125042 :         loff_t                  new_size = 0;
     996    16125042 :         bool                    do_file_insert = false;
     997             : 
     998    16125042 :         if (!S_ISREG(inode->i_mode))
     999             :                 return -EINVAL;
    1000    16125042 :         if (mode & ~XFS_FALLOC_FL_SUPPORTED)
    1001             :                 return -EOPNOTSUPP;
    1002             : 
    1003    16125042 :         xfs_ilock(ip, iolock);
    1004    16125108 :         error = xfs_break_layouts(inode, &iolock, BREAK_UNMAP);
    1005    16125120 :         if (error)
    1006           0 :                 goto out_unlock;
    1007             : 
    1008             :         /*
    1009             :          * Must wait for all AIO to complete before we continue as AIO can
    1010             :          * change the file size on completion without holding any locks we
    1011             :          * currently hold. We must do this first because AIO can update both
    1012             :          * the on disk and in memory inode sizes, and the operations that follow
    1013             :          * require the in-memory size to be fully up-to-date.
    1014             :          */
    1015    16125120 :         inode_dio_wait(inode);
    1016             : 
    1017             :         /*
    1018             :          * Now AIO and DIO has drained we flush and (if necessary) invalidate
    1019             :          * the cached range over the first operation we are about to run.
    1020             :          *
    1021             :          * We care about zero and collapse here because they both run a hole
    1022             :          * punch over the range first. Because that can zero data, and the range
    1023             :          * of invalidation for the shift operations is much larger, we still do
    1024             :          * the required flush for collapse in xfs_prepare_shift().
    1025             :          *
    1026             :          * Insert has the same range requirements as collapse, and we extend the
    1027             :          * file first which can zero data. Hence insert has the same
    1028             :          * flush/invalidate requirements as collapse and so they are both
    1029             :          * handled at the right time by xfs_prepare_shift().
    1030             :          */
    1031    16125059 :         if (mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_ZERO_RANGE |
    1032             :                     FALLOC_FL_COLLAPSE_RANGE)) {
    1033    11250602 :                 error = xfs_flush_unmap_range(ip, offset, len);
    1034    11250674 :                 if (error)
    1035         178 :                         goto out_unlock;
    1036             :         }
    1037             : 
    1038    16124953 :         error = file_modified(file);
    1039    16124997 :         if (error)
    1040           4 :                 goto out_unlock;
    1041             : 
    1042    16124993 :         if (mode & FALLOC_FL_PUNCH_HOLE) {
    1043             :                 /* Unshare around the region to punch, if needed. */
    1044     8321726 :                 if (xfs_file_write_needs_cow_around(ip, offset, len)) {
    1045           0 :                         error = xfs_file_cow_around(ip, offset, len);
    1046           0 :                         if (error)
    1047           0 :                                 goto out_unlock;
    1048             :                 }
    1049             : 
    1050     8321721 :                 error = xfs_free_file_space(ip, offset, len);
    1051     8321720 :                 if (error)
    1052       14190 :                         goto out_unlock;
    1053     7803267 :         } else if (mode & FALLOC_FL_COLLAPSE_RANGE) {
    1054      955014 :                 if (!xfs_is_falloc_aligned(ip, offset, len)) {
    1055      334242 :                         error = -EINVAL;
    1056      334242 :                         goto out_unlock;
    1057             :                 }
    1058             : 
    1059             :                 /*
    1060             :                  * There is no need to overlap collapse range with EOF,
    1061             :                  * in which case it is effectively a truncate operation
    1062             :                  */
    1063      620771 :                 if (offset + len >= i_size_read(inode)) {
    1064      146235 :                         error = -EINVAL;
    1065      146235 :                         goto out_unlock;
    1066             :                 }
    1067             : 
    1068      474536 :                 new_size = i_size_read(inode) - len;
    1069             : 
    1070      474536 :                 error = xfs_collapse_file_space(ip, offset, len);
    1071      474536 :                 if (error)
    1072        1809 :                         goto out_unlock;
    1073     6848253 :         } else if (mode & FALLOC_FL_INSERT_RANGE) {
    1074      858502 :                 loff_t          isize = i_size_read(inode);
    1075             : 
    1076      858502 :                 if (!xfs_is_falloc_aligned(ip, offset, len)) {
    1077      334578 :                         error = -EINVAL;
    1078      334578 :                         goto out_unlock;
    1079             :                 }
    1080             : 
    1081             :                 /*
    1082             :                  * New inode size must not exceed ->s_maxbytes, accounting for
    1083             :                  * possible signed overflow.
    1084             :                  */
    1085      523924 :                 if (inode->i_sb->s_maxbytes - isize < len) {
    1086           2 :                         error = -EFBIG;
    1087           2 :                         goto out_unlock;
    1088             :                 }
    1089      523922 :                 new_size = isize + len;
    1090             : 
    1091             :                 /* Offset should be less than i_size */
    1092      523922 :                 if (offset >= isize) {
    1093      119480 :                         error = -EINVAL;
    1094      119480 :                         goto out_unlock;
    1095             :                 }
    1096             :                 do_file_insert = true;
    1097     5989751 :         } else if (mode & FALLOC_FL_MAP_FREE_SPACE) {
    1098         204 :                 struct xfs_mount        *mp = ip->i_mount;
    1099         204 :                 xfs_off_t               device_size;
    1100             : 
    1101         204 :                 if (!capable(CAP_SYS_ADMIN)) {
    1102           0 :                         error = -EPERM;
    1103           0 :                         goto out_unlock;
    1104             :                 }
    1105             : 
    1106         204 :                 if (XFS_IS_REALTIME_INODE(ip))
    1107           0 :                         device_size = XFS_FSB_TO_B(mp, mp->m_sb.sb_rblocks);
    1108             :                 else
    1109         204 :                         device_size = XFS_FSB_TO_B(mp, mp->m_sb.sb_dblocks);
    1110             : 
    1111             :                 /*
    1112             :                  * Bail out now if we aren't allowed to make the file size the
    1113             :                  * same length as the device.
    1114             :                  */
    1115         204 :                 if (device_size > i_size_read(inode)) {
    1116           4 :                         new_size = device_size;
    1117           4 :                         error = inode_newsize_ok(inode, new_size);
    1118           4 :                         if (error)
    1119           0 :                                 goto out_unlock;
    1120             :                 }
    1121             : 
    1122         204 :                 if (XFS_IS_REALTIME_INODE(ip))
    1123           0 :                         error = xfs_map_free_rt_space(ip, offset, len);
    1124             :                 else
    1125         204 :                         error = xfs_map_free_space(ip, offset, len);
    1126         204 :                 if (error) {
    1127           0 :                         if (error == -ECANCELED)
    1128           0 :                                 error = 0;
    1129           0 :                         goto out_unlock;
    1130             :                 }
    1131             :         } else {
    1132     5989547 :                 if (!(mode & FALLOC_FL_KEEP_SIZE) &&
    1133     2574464 :                     offset + len > i_size_read(inode)) {
    1134     1370327 :                         new_size = offset + len;
    1135     1370327 :                         error = inode_newsize_ok(inode, new_size);
    1136     1370327 :                         if (error)
    1137           2 :                                 goto out_unlock;
    1138             :                 }
    1139             : 
    1140     5989545 :                 if (mode & FALLOC_FL_ZERO_RANGE) {
    1141             :                         /*
    1142             :                          * Punch a hole and prealloc the range.  We use a hole
    1143             :                          * punch rather than unwritten extent conversion for two
    1144             :                          * reasons:
    1145             :                          *
    1146             :                          *   1.) Hole punch handles partial block zeroing for us.
    1147             :                          *   2.) If prealloc returns ENOSPC, the file range is
    1148             :                          *       still zero-valued by virtue of the hole punch.
    1149             :                          */
    1150     1973777 :                         unsigned int blksize = i_blocksize(inode);
    1151             : 
    1152     1973777 :                         trace_xfs_zero_file_space(ip, offset, len);
    1153             : 
    1154             :                         /* Unshare around the region to zero, if needed. */
    1155     1973777 :                         if (xfs_file_write_needs_cow_around(ip, offset, len)) {
    1156           0 :                                 error = xfs_file_cow_around(ip, offset, len);
    1157           0 :                                 if (error)
    1158           0 :                                         goto out_unlock;
    1159             :                         }
    1160             : 
    1161     1973776 :                         error = xfs_free_file_space(ip, offset, len);
    1162     1973778 :                         if (error)
    1163        8367 :                                 goto out_unlock;
    1164             : 
    1165     1965411 :                         len = round_up(offset + len, blksize) -
    1166     1965411 :                               round_down(offset, blksize);
    1167     1965411 :                         offset = round_down(offset, blksize);
    1168     4015768 :                 } else if (mode & FALLOC_FL_UNSHARE_RANGE) {
    1169             :                         /*
    1170             :                          * Enlarge the unshare region to align to a full
    1171             :                          * allocation unit.
    1172             :                          */
    1173          66 :                         if (xfs_inode_needs_cow_around(ip)) {
    1174           0 :                                 loff_t          isize = i_size_read(VFS_I(ip));
    1175           0 :                                 unsigned int    rextsize;
    1176           0 :                                 uint32_t        mod;
    1177             : 
    1178           0 :                                 rextsize = xfs_inode_alloc_unitsize(ip);
    1179           0 :                                 div_u64_rem(offset, rextsize, &mod);
    1180           0 :                                 offset -= mod;
    1181           0 :                                 len += mod;
    1182             : 
    1183           0 :                                 div_u64_rem(offset + len, rextsize, &mod);
    1184           0 :                                 if (mod)
    1185           0 :                                         len += rextsize - mod;
    1186           0 :                                 if (offset + len > isize)
    1187           0 :                                         len = isize - offset;
    1188             :                         }
    1189          66 :                         error = xfs_reflink_unshare(ip, offset, len);
    1190          66 :                         if (error)
    1191           2 :                                 goto out_unlock;
    1192             :                 } else {
    1193             :                         /*
    1194             :                          * If always_cow mode we can't use preallocations and
    1195             :                          * thus should not create them.
    1196             :                          */
    1197     4015702 :                         if (xfs_is_always_cow_inode(ip)) {
    1198           0 :                                 error = -EOPNOTSUPP;
    1199           0 :                                 goto out_unlock;
    1200             :                         }
    1201             :                 }
    1202             : 
    1203     5981179 :                 if (!xfs_is_always_cow_inode(ip)) {
    1204     5981167 :                         error = xfs_alloc_file_space(ip, offset, len);
    1205     5981180 :                         if (error)
    1206       99939 :                                 goto out_unlock;
    1207             :                 }
    1208             :         }
    1209             : 
    1210             :         /* Change file size if needed */
    1211    15066144 :         if (new_size) {
    1212     2239869 :                 struct iattr iattr;
    1213             : 
    1214     2239869 :                 iattr.ia_valid = ATTR_SIZE;
    1215     2239869 :                 iattr.ia_size = new_size;
    1216     4479739 :                 error = xfs_vn_setattr_size(file_mnt_idmap(file),
    1217             :                                             file_dentry(file), &iattr);
    1218     2239869 :                 if (error)
    1219         328 :                         goto out_unlock;
    1220             :         }
    1221             : 
    1222             :         /*
    1223             :          * Perform hole insertion now that the file size has been
    1224             :          * updated so that if we crash during the operation we don't
    1225             :          * leave shifted extents past EOF and hence losing access to
    1226             :          * the data that is contained within them.
    1227             :          */
    1228    15065816 :         if (do_file_insert) {
    1229      404342 :                 error = xfs_insert_file_space(ip, offset, len);
    1230      404342 :                 if (error)
    1231        1779 :                         goto out_unlock;
    1232             :         }
    1233             : 
    1234    15064037 :         if (xfs_file_sync_writes(file))
    1235        7130 :                 error = xfs_log_force_inode(ip);
    1236             : 
    1237    15056907 : out_unlock:
    1238    16125172 :         xfs_iunlock(ip, iolock);
    1239    16125172 :         return error;
    1240             : }
    1241             : 
    1242             : STATIC int
    1243     2120886 : xfs_file_fadvise(
    1244             :         struct file     *file,
    1245             :         loff_t          start,
    1246             :         loff_t          end,
    1247             :         int             advice)
    1248             : {
    1249     2120886 :         struct xfs_inode *ip = XFS_I(file_inode(file));
    1250     2120886 :         int ret;
    1251     2120886 :         int lockflags = 0;
    1252             : 
    1253             :         /*
    1254             :          * Operations creating pages in page cache need protection from hole
    1255             :          * punching and similar ops
    1256             :          */
    1257     2120886 :         if (advice == POSIX_FADV_WILLNEED) {
    1258           0 :                 lockflags = XFS_IOLOCK_SHARED;
    1259           0 :                 xfs_ilock(ip, lockflags);
    1260             :         }
    1261     2120886 :         ret = generic_fadvise(file, start, end, advice);
    1262     2120945 :         if (lockflags)
    1263           0 :                 xfs_iunlock(ip, lockflags);
    1264     2120945 :         return ret;
    1265             : }
    1266             : 
    1267             : STATIC loff_t
    1268   272727396 : xfs_file_remap_range(
    1269             :         struct file             *file_in,
    1270             :         loff_t                  pos_in,
    1271             :         struct file             *file_out,
    1272             :         loff_t                  pos_out,
    1273             :         loff_t                  len,
    1274             :         unsigned int            remap_flags)
    1275             : {
    1276   272727396 :         struct inode            *inode_in = file_inode(file_in);
    1277   272727396 :         struct xfs_inode        *src = XFS_I(inode_in);
    1278   272727396 :         struct inode            *inode_out = file_inode(file_out);
    1279   272727396 :         struct xfs_inode        *dest = XFS_I(inode_out);
    1280   272727396 :         struct xfs_mount        *mp = src->i_mount;
    1281   272727396 :         loff_t                  remapped = 0;
    1282   272727396 :         xfs_extlen_t            cowextsize;
    1283   272727396 :         int                     ret;
    1284             : 
    1285   272727396 :         if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY))
    1286             :                 return -EINVAL;
    1287             : 
    1288   272727396 :         if (!xfs_has_reflink(mp))
    1289             :                 return -EOPNOTSUPP;
    1290             : 
    1291   545419526 :         if (xfs_is_shutdown(mp))
    1292             :                 return -EIO;
    1293             : 
    1294             :         /* Prepare and then clone file data. */
    1295   272707663 :         ret = xfs_reflink_remap_prep(file_in, pos_in, file_out, pos_out,
    1296             :                         &len, remap_flags);
    1297   272712711 :         if (ret || len == 0)
    1298    75293247 :                 return ret;
    1299             : 
    1300   197419464 :         trace_xfs_reflink_remap_range(src, pos_in, len, dest, pos_out);
    1301             : 
    1302   197417069 :         ret = xfs_reflink_remap_blocks(src, pos_in, dest, pos_out, len,
    1303             :                         &remapped);
    1304   197422847 :         if (ret)
    1305      597900 :                 goto out_unlock;
    1306             : 
    1307             :         /*
    1308             :          * Carry the cowextsize hint from src to dest if we're sharing the
    1309             :          * entire source file to the entire destination file, the source file
    1310             :          * has a cowextsize hint, and the destination file does not.
    1311             :          */
    1312   196824947 :         cowextsize = 0;
    1313   196824947 :         if (pos_in == 0 && len == i_size_read(inode_in) &&
    1314       45816 :             (src->i_diflags2 & XFS_DIFLAG2_COWEXTSIZE) &&
    1315          48 :             pos_out == 0 && len >= i_size_read(inode_out) &&
    1316          46 :             !(dest->i_diflags2 & XFS_DIFLAG2_COWEXTSIZE))
    1317           6 :                 cowextsize = src->i_cowextsize;
    1318             : 
    1319   196824947 :         ret = xfs_reflink_update_dest(dest, pos_out + len, cowextsize,
    1320             :                         remap_flags);
    1321   196824584 :         if (ret)
    1322           0 :                 goto out_unlock;
    1323             : 
    1324   196824584 :         if (xfs_file_sync_writes(file_in) || xfs_file_sync_writes(file_out))
    1325         942 :                 xfs_log_force_inode(dest);
    1326   196823724 : out_unlock:
    1327   197421640 :         xfs_iunlock2_io_mmap(src, dest);
    1328   197419236 :         if (ret)
    1329      597904 :                 trace_xfs_reflink_remap_range_error(dest, ret, _RET_IP_);
    1330   197419235 :         return remapped > 0 ? remapped : ret;
    1331             : }
    1332             : 
    1333             : STATIC int
    1334   484048933 : xfs_file_open(
    1335             :         struct inode    *inode,
    1336             :         struct file     *file)
    1337             : {
    1338   968097866 :         if (xfs_is_shutdown(XFS_M(inode->i_sb)))
    1339             :                 return -EIO;
    1340   484034572 :         file->f_mode |= FMODE_NOWAIT | FMODE_BUF_RASYNC | FMODE_BUF_WASYNC |
    1341             :                         FMODE_DIO_PARALLEL_WRITE | FMODE_CAN_ODIRECT;
    1342   484034572 :         return generic_file_open(inode, file);
    1343             : }
    1344             : 
    1345             : STATIC int
    1346    26767218 : xfs_dir_open(
    1347             :         struct inode    *inode,
    1348             :         struct file     *file)
    1349             : {
    1350    26767218 :         struct xfs_inode *ip = XFS_I(inode);
    1351    26767218 :         unsigned int    mode;
    1352    26767218 :         int             error;
    1353             : 
    1354    26767218 :         error = xfs_file_open(inode, file);
    1355    26767471 :         if (error)
    1356             :                 return error;
    1357             : 
    1358             :         /*
    1359             :          * If there are any blocks, read-ahead block 0 as we're almost
    1360             :          * certain to have the next operation be a read there.
    1361             :          */
    1362    26766430 :         mode = xfs_ilock_data_map_shared(ip);
    1363    26766549 :         if (ip->i_df.if_nextents > 0)
    1364     6481797 :                 error = xfs_dir3_data_readahead(ip, 0, 0);
    1365    26766502 :         xfs_iunlock(ip, mode);
    1366    26766502 :         return error;
    1367             : }
    1368             : 
    1369             : /*
    1370             :  * When we release the file, we don't want it to trim EOF blocks if it is a
    1371             :  * readonly context.  This avoids open/read/close workloads from removing
    1372             :  * EOF blocks that other writers depend upon to reduce fragmentation.
    1373             :  */
    1374             : STATIC int
    1375   457253893 : xfs_file_release(
    1376             :         struct inode    *inode,
    1377             :         struct file     *file)
    1378             : {
    1379   457253893 :         bool            free_eof_blocks = true;
    1380             : 
    1381   457253893 :         if ((file->f_mode & (FMODE_WRITE | FMODE_READ)) == FMODE_READ)
    1382    57658639 :                 free_eof_blocks = false;
    1383             : 
    1384   457253893 :         return xfs_release(XFS_I(inode), free_eof_blocks);
    1385             : }
    1386             : 
    1387             : STATIC int
    1388    49460947 : xfs_file_readdir(
    1389             :         struct file     *file,
    1390             :         struct dir_context *ctx)
    1391             : {
    1392    49460947 :         struct inode    *inode = file_inode(file);
    1393    49460947 :         xfs_inode_t     *ip = XFS_I(inode);
    1394    49460947 :         size_t          bufsize;
    1395             : 
    1396             :         /*
    1397             :          * The Linux API doesn't pass down the total size of the buffer
    1398             :          * we read into down to the filesystem.  With the filldir concept
    1399             :          * it's not needed for correct information, but the XFS dir2 leaf
    1400             :          * code wants an estimate of the buffer size to calculate it's
    1401             :          * readahead window and size the buffers used for mapping to
    1402             :          * physical blocks.
    1403             :          *
    1404             :          * Try to give it an estimate that's good enough, maybe at some
    1405             :          * point we can change the ->readdir prototype to include the
    1406             :          * buffer size.  For now we use the current glibc buffer size.
    1407             :          */
    1408    49460947 :         bufsize = (size_t)min_t(loff_t, XFS_READDIR_BUFSIZE, ip->i_disk_size);
    1409             : 
    1410    49460947 :         return xfs_readdir(NULL, ip, ctx, bufsize);
    1411             : }
    1412             : 
    1413             : STATIC loff_t
    1414    44634637 : xfs_file_llseek(
    1415             :         struct file     *file,
    1416             :         loff_t          offset,
    1417             :         int             whence)
    1418             : {
    1419    44634637 :         struct inode            *inode = file->f_mapping->host;
    1420             : 
    1421    89269274 :         if (xfs_is_shutdown(XFS_I(inode)->i_mount))
    1422             :                 return -EIO;
    1423             : 
    1424    44634626 :         switch (whence) {
    1425    44402137 :         default:
    1426    44402137 :                 return generic_file_llseek(file, offset, whence);
    1427         326 :         case SEEK_HOLE:
    1428         326 :                 offset = iomap_seek_hole(inode, offset, &xfs_seek_iomap_ops);
    1429         326 :                 break;
    1430      232163 :         case SEEK_DATA:
    1431      232163 :                 offset = iomap_seek_data(inode, offset, &xfs_seek_iomap_ops);
    1432      232163 :                 break;
    1433             :         }
    1434             : 
    1435      232489 :         if (offset < 0)
    1436             :                 return offset;
    1437      178526 :         return vfs_setpos(file, offset, inode->i_sb->s_maxbytes);
    1438             : }
    1439             : 
    1440             : #ifdef CONFIG_FS_DAX
    1441             : static inline vm_fault_t
    1442             : xfs_dax_fault(
    1443             :         struct vm_fault         *vmf,
    1444             :         enum page_entry_size    pe_size,
    1445             :         bool                    write_fault,
    1446             :         pfn_t                   *pfn)
    1447             : {
    1448             :         return dax_iomap_fault(vmf, pe_size, pfn, NULL,
    1449             :                         (write_fault && !vmf->cow_page) ?
    1450             :                                 &xfs_dax_write_iomap_ops :
    1451             :                                 &xfs_read_iomap_ops);
    1452             : }
    1453             : #else
    1454             : static inline vm_fault_t
    1455             : xfs_dax_fault(
    1456             :         struct vm_fault         *vmf,
    1457             :         enum page_entry_size    pe_size,
    1458             :         bool                    write_fault,
    1459             :         pfn_t                   *pfn)
    1460             : {
    1461             :         ASSERT(0);
    1462             :         return VM_FAULT_SIGBUS;
    1463             : }
    1464             : #endif
    1465             : 
    1466             : static int
    1467     4031509 : xfs_filemap_fault_around(
    1468             :         struct vm_fault         *vmf,
    1469             :         struct inode            *inode)
    1470             : {
    1471     4031509 :         struct xfs_inode        *ip = XFS_I(inode);
    1472     4031509 :         struct folio            *folio = page_folio(vmf->page);
    1473     4031509 :         loff_t                  pos;
    1474     4031509 :         ssize_t                 len;
    1475             : 
    1476     4031509 :         if (!xfs_inode_needs_cow_around(ip))
    1477             :                 return 0;
    1478             : 
    1479           0 :         folio_lock(folio);
    1480           0 :         len = folio_mkwrite_check_truncate(folio, inode);
    1481           0 :         if (len < 0) {
    1482           0 :                 folio_unlock(folio);
    1483           0 :                 return len;
    1484             :         }
    1485           0 :         pos = folio_pos(folio);
    1486           0 :         folio_unlock(folio);
    1487             : 
    1488           0 :         if (!xfs_file_write_needs_cow_around(ip, pos, len))
    1489             :                 return 0;
    1490             : 
    1491           0 :         return xfs_file_cow_around(XFS_I(inode), pos, len);
    1492             : }
    1493             : 
    1494             : /*
    1495             :  * Locking for serialisation of IO during page faults. This results in a lock
    1496             :  * ordering of:
    1497             :  *
    1498             :  * mmap_lock (MM)
    1499             :  *   sb_start_pagefault(vfs, freeze)
    1500             :  *     invalidate_lock (vfs/XFS_MMAPLOCK - truncate serialisation)
    1501             :  *       page_lock (MM)
    1502             :  *         i_lock (XFS - extent map serialisation)
    1503             :  */
    1504             : static vm_fault_t
    1505    16328634 : __xfs_filemap_fault(
    1506             :         struct vm_fault         *vmf,
    1507             :         enum page_entry_size    pe_size,
    1508             :         bool                    write_fault)
    1509             : {
    1510    16328634 :         struct inode            *inode = file_inode(vmf->vma->vm_file);
    1511    16328634 :         struct xfs_inode        *ip = XFS_I(inode);
    1512    16328634 :         vm_fault_t              ret;
    1513             : 
    1514    16328634 :         trace_xfs_filemap_fault(ip, pe_size, write_fault);
    1515             : 
    1516    16330132 :         if (write_fault) {
    1517     4031446 :                 sb_start_pagefault(inode->i_sb);
    1518     4031496 :                 file_update_time(vmf->vma->vm_file);
    1519             :         }
    1520             : 
    1521    16330183 :         if (IS_DAX(inode)) {
    1522             :                 pfn_t pfn;
    1523             : 
    1524             :                 xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
    1525             :                 ret = xfs_dax_fault(vmf, pe_size, write_fault, &pfn);
    1526             :                 if (ret & VM_FAULT_NEEDDSYNC)
    1527             :                         ret = dax_finish_sync_fault(vmf, pe_size, pfn);
    1528             :                 xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
    1529             :         } else {
    1530    16330183 :                 if (write_fault) {
    1531     4031497 :                         int     error;
    1532             : 
    1533     4031497 :                         xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
    1534             : 
    1535             :                         /*
    1536             :                          * Unshare all the blocks in this rt extent surrounding
    1537             :                          * this page.
    1538             :                          */
    1539     4031500 :                         error = xfs_filemap_fault_around(vmf, inode);
    1540     4031502 :                         if (error) {
    1541           0 :                                 xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
    1542           0 :                                 ret = block_page_mkwrite_return(error);
    1543           0 :                                 goto out;
    1544             :                         }
    1545             : 
    1546     4031502 :                         ret = iomap_page_mkwrite(vmf,
    1547             :                                         &xfs_page_mkwrite_iomap_ops);
    1548     4031483 :                         xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
    1549             :                 } else {
    1550    12298686 :                         ret = filemap_fault(vmf);
    1551             :                 }
    1552             :         }
    1553             : 
    1554    16330762 : out:
    1555    16330762 :         if (write_fault)
    1556     4031489 :                 sb_end_pagefault(inode->i_sb);
    1557    16330752 :         return ret;
    1558             : }
    1559             : 
    1560             : static inline bool
    1561             : xfs_is_write_fault(
    1562             :         struct vm_fault         *vmf)
    1563             : {
    1564             :         return (vmf->flags & FAULT_FLAG_WRITE) &&
    1565             :                (vmf->vma->vm_flags & VM_SHARED);
    1566             : }
    1567             : 
    1568             : static vm_fault_t
    1569    12298758 : xfs_filemap_fault(
    1570             :         struct vm_fault         *vmf)
    1571             : {
    1572             :         /* DAX can shortcut the normal fault path on write faults! */
    1573    12298758 :         return __xfs_filemap_fault(vmf, PE_SIZE_PTE,
    1574             :                         IS_DAX(file_inode(vmf->vma->vm_file)) &&
    1575             :                         xfs_is_write_fault(vmf));
    1576             : }
    1577             : 
    1578             : static vm_fault_t
    1579           0 : xfs_filemap_huge_fault(
    1580             :         struct vm_fault         *vmf,
    1581             :         enum page_entry_size    pe_size)
    1582             : {
    1583           0 :         if (!IS_DAX(file_inode(vmf->vma->vm_file)))
    1584           0 :                 return VM_FAULT_FALLBACK;
    1585             : 
    1586             :         /* DAX can shortcut the normal fault path on write faults! */
    1587             :         return __xfs_filemap_fault(vmf, pe_size,
    1588             :                         xfs_is_write_fault(vmf));
    1589             : }
    1590             : 
    1591             : static vm_fault_t
    1592     4031412 : xfs_filemap_page_mkwrite(
    1593             :         struct vm_fault         *vmf)
    1594             : {
    1595     4031412 :         return __xfs_filemap_fault(vmf, PE_SIZE_PTE, true);
    1596             : }
    1597             : 
    1598             : /*
    1599             :  * pfn_mkwrite was originally intended to ensure we capture time stamp updates
    1600             :  * on write faults. In reality, it needs to serialise against truncate and
    1601             :  * prepare memory for writing so handle is as standard write fault.
    1602             :  */
    1603             : static vm_fault_t
    1604           0 : xfs_filemap_pfn_mkwrite(
    1605             :         struct vm_fault         *vmf)
    1606             : {
    1607             : 
    1608           0 :         return __xfs_filemap_fault(vmf, PE_SIZE_PTE, true);
    1609             : }
    1610             : 
    1611             : static const struct vm_operations_struct xfs_file_vm_ops = {
    1612             :         .fault          = xfs_filemap_fault,
    1613             :         .huge_fault     = xfs_filemap_huge_fault,
    1614             :         .map_pages      = filemap_map_pages,
    1615             :         .page_mkwrite   = xfs_filemap_page_mkwrite,
    1616             :         .pfn_mkwrite    = xfs_filemap_pfn_mkwrite,
    1617             : };
    1618             : 
    1619             : STATIC int
    1620     8862963 : xfs_file_mmap(
    1621             :         struct file             *file,
    1622             :         struct vm_area_struct   *vma)
    1623             : {
    1624     8862963 :         struct inode            *inode = file_inode(file);
    1625     8862963 :         struct xfs_buftarg      *target = xfs_inode_buftarg(XFS_I(inode));
    1626             : 
    1627             :         /*
    1628             :          * We don't support synchronous mappings for non-DAX files and
    1629             :          * for DAX files if underneath dax_device is not synchronous.
    1630             :          */
    1631     8862963 :         if (!daxdev_mapping_supported(vma, target->bt_daxdev))
    1632             :                 return -EOPNOTSUPP;
    1633             : 
    1634     8862972 :         file_accessed(file);
    1635     8862983 :         vma->vm_ops = &xfs_file_vm_ops;
    1636     8862983 :         if (IS_DAX(inode))
    1637             :                 vm_flags_set(vma, VM_HUGEPAGE);
    1638     8862983 :         return 0;
    1639             : }
    1640             : 
    1641             : const struct file_operations xfs_file_operations = {
    1642             :         .llseek         = xfs_file_llseek,
    1643             :         .read_iter      = xfs_file_read_iter,
    1644             :         .write_iter     = xfs_file_write_iter,
    1645             :         .splice_read    = xfs_file_splice_read,
    1646             :         .splice_write   = iter_file_splice_write,
    1647             :         .iopoll         = iocb_bio_iopoll,
    1648             :         .unlocked_ioctl = xfs_file_ioctl,
    1649             : #ifdef CONFIG_COMPAT
    1650             :         .compat_ioctl   = xfs_file_compat_ioctl,
    1651             : #endif
    1652             :         .mmap           = xfs_file_mmap,
    1653             :         .mmap_supported_flags = MAP_SYNC,
    1654             :         .open           = xfs_file_open,
    1655             :         .release        = xfs_file_release,
    1656             :         .fsync          = xfs_file_fsync,
    1657             :         .get_unmapped_area = thp_get_unmapped_area,
    1658             :         .fallocate      = xfs_file_fallocate,
    1659             :         .fadvise        = xfs_file_fadvise,
    1660             :         .remap_file_range = xfs_file_remap_range,
    1661             : };
    1662             : 
    1663             : const struct file_operations xfs_dir_file_operations = {
    1664             :         .open           = xfs_dir_open,
    1665             :         .read           = generic_read_dir,
    1666             :         .iterate_shared = xfs_file_readdir,
    1667             :         .llseek         = generic_file_llseek,
    1668             :         .unlocked_ioctl = xfs_file_ioctl,
    1669             : #ifdef CONFIG_COMPAT
    1670             :         .compat_ioctl   = xfs_file_compat_ioctl,
    1671             : #endif
    1672             :         .fsync          = xfs_dir_fsync,
    1673             : };

Generated by: LCOV version 1.14