LCOV - code coverage report
Current view: top level - fs/xfs - xfs_file.c (source / functions) Hit Total Coverage
Test: fstests of 6.5.0-rc4-xfsx @ Mon Jul 31 20:08:34 PDT 2023 Lines: 572 646 88.5 %
Date: 2023-07-31 20:08:34 Functions: 36 40 90.0 %

          Line data    Source code
       1             : // SPDX-License-Identifier: GPL-2.0
       2             : /*
       3             :  * Copyright (c) 2000-2005 Silicon Graphics, Inc.
       4             :  * All Rights Reserved.
       5             :  */
       6             : #include "xfs.h"
       7             : #include "xfs_fs.h"
       8             : #include "xfs_shared.h"
       9             : #include "xfs_format.h"
      10             : #include "xfs_log_format.h"
      11             : #include "xfs_trans_resv.h"
      12             : #include "xfs_mount.h"
      13             : #include "xfs_inode.h"
      14             : #include "xfs_trans.h"
      15             : #include "xfs_inode_item.h"
      16             : #include "xfs_bmap.h"
      17             : #include "xfs_bmap_util.h"
      18             : #include "xfs_dir2.h"
      19             : #include "xfs_dir2_priv.h"
      20             : #include "xfs_ioctl.h"
      21             : #include "xfs_trace.h"
      22             : #include "xfs_log.h"
      23             : #include "xfs_icache.h"
      24             : #include "xfs_pnfs.h"
      25             : #include "xfs_iomap.h"
      26             : #include "xfs_reflink.h"
      27             : #include "xfs_file.h"
      28             : 
      29             : #include <linux/dax.h>
      30             : #include <linux/falloc.h>
      31             : #include <linux/backing-dev.h>
      32             : #include <linux/mman.h>
      33             : #include <linux/fadvise.h>
      34             : #include <linux/mount.h>
      35             : #include <linux/buffer_head.h> /* for block_page_mkwrite_return */
      36             : 
      37             : static const struct vm_operations_struct xfs_file_vm_ops;
      38             : 
      39             : /*
      40             :  * Decide if the given file range is aligned to the size of the fundamental
      41             :  * allocation unit for the file.
      42             :  */
      43             : bool
      44    21208366 : xfs_is_falloc_aligned(
      45             :         struct xfs_inode        *ip,
      46             :         loff_t                  pos,
      47             :         long long int           len)
      48             : {
      49    21208366 :         unsigned int            alloc_unit = xfs_inode_alloc_unitsize(ip);
      50             : 
      51    39017984 :         if (XFS_IS_REALTIME_INODE(ip) && !is_power_of_2(alloc_unit))
      52     8652853 :                 return isaligned_64(pos, alloc_unit) &&
      53     1112342 :                        isaligned_64(len, alloc_unit);
      54             : 
      55    14235620 :         return !((pos | len) & (alloc_unit - 1));
      56             : }
      57             : 
      58             : /*
      59             :  * Fsync operations on directories are much simpler than on regular files,
      60             :  * as there is no file data to flush, and thus also no need for explicit
      61             :  * cache flush operations, and there are no non-transaction metadata updates
      62             :  * on directories either.
      63             :  */
      64             : STATIC int
      65     1033495 : xfs_dir_fsync(
      66             :         struct file             *file,
      67             :         loff_t                  start,
      68             :         loff_t                  end,
      69             :         int                     datasync)
      70             : {
      71     1033495 :         struct xfs_inode        *ip = XFS_I(file->f_mapping->host);
      72             : 
      73     1033495 :         trace_xfs_dir_fsync(ip);
      74     1033483 :         return xfs_log_force_inode(ip);
      75             : }
      76             : 
      77             : static xfs_csn_t
      78     5036636 : xfs_fsync_seq(
      79             :         struct xfs_inode        *ip,
      80             :         bool                    datasync)
      81             : {
      82     5036636 :         if (!xfs_ipincount(ip))
      83             :                 return 0;
      84     5035579 :         if (datasync && !(ip->i_itemp->ili_fsync_fields & ~XFS_ILOG_TIMESTAMP))
      85             :                 return 0;
      86     4750040 :         return ip->i_itemp->ili_commit_seq;
      87             : }
      88             : 
      89             : /*
      90             :  * All metadata updates are logged, which means that we just have to flush the
      91             :  * log up to the latest LSN that touched the inode.
      92             :  *
      93             :  * If we have concurrent fsync/fdatasync() calls, we need them to all block on
      94             :  * the log force before we clear the ili_fsync_fields field. This ensures that
      95             :  * we don't get a racing sync operation that does not wait for the metadata to
      96             :  * hit the journal before returning.  If we race with clearing ili_fsync_fields,
      97             :  * then all that will happen is the log force will do nothing as the lsn will
      98             :  * already be on disk.  We can't race with setting ili_fsync_fields because that
      99             :  * is done under XFS_ILOCK_EXCL, and that can't happen because we hold the lock
     100             :  * shared until after the ili_fsync_fields is cleared.
     101             :  */
     102             : static  int
     103     5036622 : xfs_fsync_flush_log(
     104             :         struct xfs_inode        *ip,
     105             :         bool                    datasync,
     106             :         int                     *log_flushed)
     107             : {
     108     5036622 :         int                     error = 0;
     109     5036622 :         xfs_csn_t               seq;
     110             : 
     111     5036622 :         xfs_ilock(ip, XFS_ILOCK_SHARED);
     112     5036654 :         seq = xfs_fsync_seq(ip, datasync);
     113     5036302 :         if (seq) {
     114     4750053 :                 error = xfs_log_force_seq(ip->i_mount, seq, XFS_LOG_SYNC,
     115             :                                           log_flushed);
     116             : 
     117     4749372 :                 spin_lock(&ip->i_itemp->ili_lock);
     118     4749861 :                 ip->i_itemp->ili_fsync_fields = 0;
     119     4749861 :                 spin_unlock(&ip->i_itemp->ili_lock);
     120             :         }
     121     5036225 :         xfs_iunlock(ip, XFS_ILOCK_SHARED);
     122     5034579 :         return error;
     123             : }
     124             : 
     125             : STATIC int
     126    33192607 : xfs_file_fsync(
     127             :         struct file             *file,
     128             :         loff_t                  start,
     129             :         loff_t                  end,
     130             :         int                     datasync)
     131             : {
     132    33192607 :         struct xfs_inode        *ip = XFS_I(file->f_mapping->host);
     133    33192607 :         struct xfs_mount        *mp = ip->i_mount;
     134    33192607 :         int                     error, err2;
     135    33192607 :         int                     log_flushed = 0;
     136             : 
     137    33192607 :         trace_xfs_file_fsync(ip);
     138             : 
     139    33191315 :         error = file_write_and_wait_range(file, start, end);
     140    33200476 :         if (error)
     141             :                 return error;
     142             : 
     143    66394772 :         if (xfs_is_shutdown(mp))
     144             :                 return -EIO;
     145             : 
     146    33195504 :         xfs_iflags_clear(ip, XFS_ITRUNCATED);
     147             : 
     148             :         /*
     149             :          * If we have an RT and/or log subvolume we need to make sure to flush
     150             :          * the write cache the device used for file data first.  This is to
     151             :          * ensure newly written file data make it to disk before logging the new
     152             :          * inode size in case of an extending write.
     153             :          */
     154    33193156 :         if (XFS_IS_REALTIME_INODE(ip))
     155    14903303 :                 error = xfs_buftarg_flush(mp->m_rtdev_targp);
     156    18289853 :         else if (mp->m_logdev_targp != mp->m_ddev_targp)
     157      550987 :                 error = xfs_buftarg_flush(mp->m_ddev_targp);
     158             : 
     159             :         /*
     160             :          * Any inode that has dirty modifications in the log is pinned.  The
     161             :          * racy check here for a pinned inode will not catch modifications
     162             :          * that happen concurrently to the fsync call, but fsync semantics
     163             :          * only require to sync previously completed I/O.
     164             :          */
     165    33184470 :         if (xfs_ipincount(ip)) {
     166     5036618 :                 err2 = xfs_fsync_flush_log(ip, datasync, &log_flushed);
     167     5034505 :                 if (err2 && !error)
     168        1052 :                         error = err2;
     169             :         }
     170             : 
     171             :         /*
     172             :          * If we only have a single device, and the log force about was
     173             :          * a no-op we might have to flush the data device cache here.
     174             :          * This can only happen for fdatasync/O_DSYNC if we were overwriting
     175             :          * an already allocated file and thus do not have any metadata to
     176             :          * commit.
     177             :          */
     178    33182357 :         if (!log_flushed && !XFS_IS_REALTIME_INODE(ip) &&
     179    16111580 :             mp->m_logdev_targp == mp->m_ddev_targp) {
     180    15808925 :                 err2 = xfs_buftarg_flush(mp->m_ddev_targp);
     181    15794266 :                 if (err2 && !error)
     182         262 :                         error = err2;
     183             :         }
     184             : 
     185             :         return error;
     186             : }
     187             : 
     188             : static int
     189  1057054673 : xfs_ilock_iocb(
     190             :         struct kiocb            *iocb,
     191             :         unsigned int            lock_mode)
     192             : {
     193  1057054673 :         struct xfs_inode        *ip = XFS_I(file_inode(iocb->ki_filp));
     194             : 
     195  1057054673 :         if (iocb->ki_flags & IOCB_NOWAIT) {
     196           0 :                 if (!xfs_ilock_nowait(ip, lock_mode))
     197           0 :                         return -EAGAIN;
     198             :         } else {
     199  1057054673 :                 xfs_ilock(ip, lock_mode);
     200             :         }
     201             : 
     202             :         return 0;
     203             : }
     204             : 
     205             : STATIC ssize_t
     206   346694982 : xfs_file_dio_read(
     207             :         struct kiocb            *iocb,
     208             :         struct iov_iter         *to)
     209             : {
     210   346694982 :         struct xfs_inode        *ip = XFS_I(file_inode(iocb->ki_filp));
     211   346694982 :         ssize_t                 ret;
     212             : 
     213   346694982 :         trace_xfs_file_direct_read(iocb, to);
     214             : 
     215   346693940 :         if (!iov_iter_count(to))
     216             :                 return 0; /* skip atime */
     217             : 
     218   346669051 :         file_accessed(iocb->ki_filp);
     219             : 
     220   346667830 :         ret = xfs_ilock_iocb(iocb, XFS_IOLOCK_SHARED);
     221   346667932 :         if (ret)
     222             :                 return ret;
     223   346667934 :         ret = iomap_dio_rw(iocb, to, &xfs_read_iomap_ops, NULL, 0, NULL, 0);
     224   346671315 :         xfs_iunlock(ip, XFS_IOLOCK_SHARED);
     225             : 
     226   346671315 :         return ret;
     227             : }
     228             : 
     229             : static noinline ssize_t
     230           0 : xfs_file_dax_read(
     231             :         struct kiocb            *iocb,
     232             :         struct iov_iter         *to)
     233             : {
     234           0 :         struct xfs_inode        *ip = XFS_I(iocb->ki_filp->f_mapping->host);
     235           0 :         ssize_t                 ret = 0;
     236             : 
     237           0 :         trace_xfs_file_dax_read(iocb, to);
     238             : 
     239           0 :         if (!iov_iter_count(to))
     240             :                 return 0; /* skip atime */
     241             : 
     242           0 :         ret = xfs_ilock_iocb(iocb, XFS_IOLOCK_SHARED);
     243           0 :         if (ret)
     244             :                 return ret;
     245           0 :         ret = dax_iomap_rw(iocb, to, &xfs_read_iomap_ops);
     246           0 :         xfs_iunlock(ip, XFS_IOLOCK_SHARED);
     247             : 
     248           0 :         file_accessed(iocb->ki_filp);
     249           0 :         return ret;
     250             : }
     251             : 
     252             : STATIC ssize_t
     253   405572902 : xfs_file_buffered_read(
     254             :         struct kiocb            *iocb,
     255             :         struct iov_iter         *to)
     256             : {
     257   405572902 :         struct xfs_inode        *ip = XFS_I(file_inode(iocb->ki_filp));
     258   405572902 :         ssize_t                 ret;
     259             : 
     260   405572902 :         trace_xfs_file_buffered_read(iocb, to);
     261             : 
     262   405594300 :         ret = xfs_ilock_iocb(iocb, XFS_IOLOCK_SHARED);
     263   404517890 :         if (ret)
     264             :                 return ret;
     265   404552981 :         ret = generic_file_read_iter(iocb, to);
     266   405192589 :         xfs_iunlock(ip, XFS_IOLOCK_SHARED);
     267             : 
     268   405192589 :         return ret;
     269             : }
     270             : 
     271             : STATIC ssize_t
     272   753164330 : xfs_file_read_iter(
     273             :         struct kiocb            *iocb,
     274             :         struct iov_iter         *to)
     275             : {
     276   753164330 :         struct inode            *inode = file_inode(iocb->ki_filp);
     277   753164330 :         struct xfs_mount        *mp = XFS_I(inode)->i_mount;
     278   753164330 :         ssize_t                 ret = 0;
     279             : 
     280   753164330 :         XFS_STATS_INC(mp, xs_read_calls);
     281             : 
     282  1503314558 :         if (xfs_is_shutdown(mp))
     283             :                 return -EIO;
     284             : 
     285   751654682 :         if (IS_DAX(inode))
     286           0 :                 ret = xfs_file_dax_read(iocb, to);
     287   751654682 :         else if (iocb->ki_flags & IOCB_DIRECT)
     288   346695000 :                 ret = xfs_file_dio_read(iocb, to);
     289             :         else
     290   404959682 :                 ret = xfs_file_buffered_read(iocb, to);
     291             : 
     292   750930624 :         if (ret > 0)
     293   402116093 :                 XFS_STATS_ADD(mp, xs_read_bytes, ret);
     294             :         return ret;
     295             : }
     296             : 
     297             : STATIC ssize_t
     298    15220907 : xfs_file_splice_read(
     299             :         struct file             *in,
     300             :         loff_t                  *ppos,
     301             :         struct pipe_inode_info  *pipe,
     302             :         size_t                  len,
     303             :         unsigned int            flags)
     304             : {
     305    15220907 :         struct inode            *inode = file_inode(in);
     306    15220907 :         struct xfs_inode        *ip = XFS_I(inode);
     307    15220907 :         struct xfs_mount        *mp = ip->i_mount;
     308    15220907 :         ssize_t                 ret = 0;
     309             : 
     310    15220907 :         XFS_STATS_INC(mp, xs_read_calls);
     311             : 
     312    30441724 :         if (xfs_is_shutdown(mp))
     313             :                 return -EIO;
     314             : 
     315    15220836 :         trace_xfs_file_splice_read(ip, *ppos, len);
     316             : 
     317    15220742 :         xfs_ilock(ip, XFS_IOLOCK_SHARED);
     318    15220723 :         ret = filemap_splice_read(in, ppos, pipe, len, flags);
     319    15220828 :         xfs_iunlock(ip, XFS_IOLOCK_SHARED);
     320    15220818 :         if (ret > 0)
     321    15220642 :                 XFS_STATS_ADD(mp, xs_read_bytes, ret);
     322             :         return ret;
     323             : }
     324             : 
     325             : /*
     326             :  * Decide if this file write requires COWing-around at either end of the write
     327             :  * range.  This is only required if the file allocation unit is larger than
     328             :  * 1FSB and the write range is not aligned with the allocation unit.
     329             :  */
     330             : static bool
     331   530935221 : xfs_file_write_needs_cow_around(
     332             :         struct xfs_inode        *ip,
     333             :         loff_t                  pos,
     334             :         long long int           count)
     335             : {
     336             :         /*
     337             :          * No COWing required if this inode doesn't do COW.
     338             :          *
     339             :          * If the allocation unit is 1FSB, we do not need to COW around the
     340             :          * edges of the operation range.  This applies to all files on the data
     341             :          * device and rt files that have an extent size of 1FSB.
     342             :          */
     343   530935221 :         if (!xfs_inode_needs_cow_around(ip))
     344             :                 return false;
     345             : 
     346             :         /*
     347             :          * Otherwise, check that the operation is aligned to the rt extent
     348             :          * size.  Any unaligned operation /must/ be COWed around since the
     349             :          * regular reflink code only handles extending writes up to fsblock
     350             :          * boundaries.
     351             :          */
     352    15355928 :         return !xfs_is_falloc_aligned(ip, pos, count);
     353             : }
     354             : 
     355             : /* Do we need to COW-around at this offset to handle a truncate up or down? */
     356             : bool
     357    17598875 : xfs_truncate_needs_cow_around(
     358             :         struct xfs_inode        *ip,
     359             :         loff_t                  pos)
     360             : {
     361    17598875 :         return xfs_file_write_needs_cow_around(ip, pos, 0);
     362             : }
     363             : 
     364             : /* Does this file write require COWing around? */
     365             : static inline bool
     366             : xfs_iocb_needs_cow_around(
     367             :         struct xfs_inode        *ip,
     368             :         const struct kiocb      *iocb,
     369             :         const struct iov_iter   *from)
     370             : {
     371    16300030 :         return xfs_file_write_needs_cow_around(ip, iocb->ki_pos,
     372             :                         iov_iter_count(from));
     373             : }
     374             : 
     375             : /* Unshare the allocation unit mapped to the given file position.  */
     376             : inline int
     377    25335153 : xfs_file_unshare_at(
     378             :         struct xfs_inode        *ip,
     379             :         loff_t                  pos)
     380             : {
     381    25335153 :         loff_t                  isize = i_size_read(VFS_I(ip));
     382    25335153 :         unsigned int            extsize, len;
     383    25335153 :         uint32_t                mod;
     384             : 
     385    25335153 :         len = extsize = xfs_inode_alloc_unitsize(ip);
     386             : 
     387             :         /* Open-coded rounddown_64 so that we can skip out if aligned */
     388    25334953 :         div_u64_rem(pos, extsize, &mod);
     389    25335244 :         if (mod == 0)
     390             :                 return 0;
     391    23363300 :         pos -= mod;
     392             : 
     393             :         /* Do not extend the file. */
     394    23363300 :         if (pos >= isize)
     395             :                 return 0;
     396    21892941 :         if (pos + len > isize)
     397     2311086 :                 len = isize - pos;
     398             : 
     399    21892941 :         trace_xfs_file_cow_around(ip, pos, len);
     400             : 
     401    21892982 :         if (IS_DAX(VFS_I(ip)))
     402           0 :                 return dax_file_unshare(VFS_I(ip), pos, len,
     403             :                                 &xfs_dax_write_iomap_ops);
     404    21892982 :         return iomap_file_unshare(VFS_I(ip), pos, len,
     405             :                         &xfs_buffered_write_iomap_ops);
     406             : }
     407             : 
     408             : /*
     409             :  * Dirty the pages on either side of a write request as needed to satisfy
     410             :  * alignment requirements if we're going to perform a copy-write.
     411             :  *
     412             :  * This is only needed for realtime files when the rt extent size is larger
     413             :  * than 1 fs block, because we don't allow a logical rt extent in a file to map
     414             :  * to multiple physical rt extents.  In other words, we can only map and unmap
     415             :  * full rt extents.  Note that page cache doesn't exist above EOF, so be
     416             :  * careful to stay below EOF.
     417             :  */
     418             : static int
     419    12054326 : xfs_file_cow_around(
     420             :         struct xfs_inode        *ip,
     421             :         loff_t                  pos,
     422             :         long long int           count)
     423             : {
     424    12054326 :         int                     error;
     425             : 
     426             :         /* Unshare at the start of the extent. */
     427    12054326 :         error = xfs_file_unshare_at(ip,  pos);
     428    12054667 :         if (error)
     429             :                 return error;
     430             : 
     431             :         /* Unshare at the end. */
     432    11909447 :         return xfs_file_unshare_at(ip, pos + count);
     433             : }
     434             : 
     435             : /*
     436             :  * Common pre-write limit and setup checks.
     437             :  *
     438             :  * Called with the iolocked held either shared and exclusive according to
     439             :  * @iolock, and returns with it held.  Might upgrade the iolock to exclusive
     440             :  * if called for a direct write beyond i_size.
     441             :  */
     442             : STATIC ssize_t
     443   295520402 : xfs_file_write_checks(
     444             :         struct kiocb            *iocb,
     445             :         struct iov_iter         *from,
     446             :         unsigned int            *iolock)
     447             : {
     448   295520402 :         struct file             *file = iocb->ki_filp;
     449   295520402 :         struct inode            *inode = file->f_mapping->host;
     450   295520402 :         struct xfs_inode        *ip = XFS_I(inode);
     451   295520402 :         ssize_t                 error = 0;
     452   295520402 :         size_t                  count = iov_iter_count(from);
     453   295520402 :         bool                    drained_dio = false;
     454   363223509 :         loff_t                  isize;
     455             : 
     456             : restart:
     457   363223509 :         error = generic_write_checks(iocb, from);
     458   363735164 :         if (error <= 0)
     459          30 :                 return error;
     460             : 
     461   363735134 :         if (iocb->ki_flags & IOCB_NOWAIT) {
     462           0 :                 error = break_layout(inode, false);
     463           0 :                 if (error == -EWOULDBLOCK)
     464             :                         error = -EAGAIN;
     465             :         } else {
     466   363735134 :                 error = xfs_break_layouts(inode, iolock, BREAK_WRITE);
     467             :         }
     468             : 
     469   364695266 :         if (error)
     470           0 :                 return error;
     471             : 
     472             :         /*
     473             :          * For changing security info in file_remove_privs() we need i_rwsem
     474             :          * exclusively.  We also need it to COW around the range being written.
     475             :          */
     476   364695266 :         if (*iolock == XFS_IOLOCK_SHARED &&
     477    32684435 :             (!IS_NOSEC(inode) || xfs_iocb_needs_cow_around(ip, iocb, from))) {
     478       86120 :                 xfs_iunlock(ip, *iolock);
     479       86090 :                 *iolock = XFS_IOLOCK_EXCL;
     480       86090 :                 error = xfs_ilock_iocb(iocb, *iolock);
     481       86087 :                 if (error) {
     482           0 :                         *iolock = 0;
     483           0 :                         return error;
     484             :                 }
     485       86087 :                 goto restart;
     486             :         }
     487             : 
     488             :         /*
     489             :          * The write is not aligned to the file's allocation unit.  If either
     490             :          * of the allocation units at the start or end of the write range are
     491             :          * shared, unshare them through the page cache.
     492             :          */
     493   364607402 :         if (xfs_iocb_needs_cow_around(ip, iocb, from)) {
     494     4968033 :                 ASSERT(*iolock == XFS_IOLOCK_EXCL);
     495             : 
     496     4968033 :                 inode_dio_wait(VFS_I(ip));
     497     4967981 :                 drained_dio = true;
     498             : 
     499     4967981 :                 error = xfs_file_cow_around(ip, iocb->ki_pos, count);
     500     4968371 :                 if (error)
     501      129724 :                         return error;
     502             :         }
     503             : 
     504             :         /*
     505             :          * If the offset is beyond the size of the file, we need to zero any
     506             :          * blocks that fall between the existing EOF and the start of this
     507             :          * write.  If zeroing is needed and we are currently holding the iolock
     508             :          * shared, we need to update it to exclusive which implies having to
     509             :          * redo all checks before.
     510             :          *
     511             :          * We need to serialise against EOF updates that occur in IO completions
     512             :          * here. We want to make sure that nobody is changing the size while we
     513             :          * do this check until we have placed an IO barrier (i.e.  hold the
     514             :          * XFS_IOLOCK_EXCL) that prevents new IO from being dispatched.  The
     515             :          * spinlock effectively forms a memory barrier once we have the
     516             :          * XFS_IOLOCK_EXCL so we are guaranteed to see the latest EOF value and
     517             :          * hence be able to correctly determine if we need to run zeroing.
     518             :          *
     519             :          * We can do an unlocked check here safely as IO completion can only
     520             :          * extend EOF. Truncate is locked out at this point, so the EOF can
     521             :          * not move backwards, only forwards. Hence we only need to take the
     522             :          * slow path and spin locks when we are at or beyond the current EOF.
     523             :          */
     524   363935179 :         if (iocb->ki_pos <= i_size_read(inode))
     525   227164750 :                 goto out;
     526             : 
     527   136770429 :         spin_lock(&ip->i_flags_lock);
     528   137237500 :         isize = i_size_read(inode);
     529   137237500 :         if (iocb->ki_pos > isize) {
     530   137237500 :                 spin_unlock(&ip->i_flags_lock);
     531             : 
     532   137177119 :                 if (iocb->ki_flags & IOCB_NOWAIT)
     533             :                         return -EAGAIN;
     534             : 
     535   137177119 :                 if (!drained_dio) {
     536    68270193 :                         if (*iolock == XFS_IOLOCK_SHARED) {
     537      772451 :                                 xfs_iunlock(ip, *iolock);
     538      772226 :                                 *iolock = XFS_IOLOCK_EXCL;
     539      772226 :                                 xfs_ilock(ip, *iolock);
     540      771994 :                                 iov_iter_reexpand(from, count);
     541             :                         }
     542             :                         /*
     543             :                          * We now have an IO submission barrier in place, but
     544             :                          * AIO can do EOF updates during IO completion and hence
     545             :                          * we now need to wait for all of them to drain. Non-AIO
     546             :                          * DIO will have drained before we are given the
     547             :                          * XFS_IOLOCK_EXCL, and so for most cases this wait is a
     548             :                          * no-op.
     549             :                          */
     550    68269736 :                         inode_dio_wait(inode);
     551    67617020 :                         drained_dio = true;
     552    67617020 :                         goto restart;
     553             :                 }
     554             : 
     555             :                 /*
     556             :                  * If we're starting the write past EOF, COW the allocation
     557             :                  * unit containing the current EOF before we start zeroing the
     558             :                  * range between EOF and the start of the write.
     559             :                  */
     560    68906926 :                 if (xfs_truncate_needs_cow_around(ip, isize)) {
     561      435930 :                         error = xfs_file_unshare_at(ip, isize);
     562      435935 :                         if (error)
     563             :                                 return error;
     564             :                 }
     565             : 
     566    68616504 :                 trace_xfs_zero_eof(ip, isize, iocb->ki_pos - isize);
     567    68113703 :                 error = xfs_zero_range(ip, isize, iocb->ki_pos - isize, NULL);
     568    68141147 :                 if (error)
     569             :                         return error;
     570             :         } else
     571           0 :                 spin_unlock(&ip->i_flags_lock);
     572             : 
     573   295290145 : out:
     574   295290145 :         return kiocb_modified(iocb);
     575             : }
     576             : 
     577             : static int
     578    22543114 : xfs_dio_write_end_io(
     579             :         struct kiocb            *iocb,
     580             :         ssize_t                 size,
     581             :         int                     error,
     582             :         unsigned                flags)
     583             : {
     584    22543114 :         struct inode            *inode = file_inode(iocb->ki_filp);
     585    22543114 :         struct xfs_inode        *ip = XFS_I(inode);
     586    22543114 :         loff_t                  offset = iocb->ki_pos;
     587    22543114 :         unsigned int            nofs_flag;
     588             : 
     589    22543114 :         trace_xfs_end_io_direct_write(ip, offset, size);
     590             : 
     591    45085698 :         if (xfs_is_shutdown(ip->i_mount))
     592             :                 return -EIO;
     593             : 
     594    22542137 :         if (error)
     595             :                 return error;
     596    20503197 :         if (!size)
     597             :                 return 0;
     598             : 
     599             :         /*
     600             :          * Capture amount written on completion as we can't reliably account
     601             :          * for it on submission.
     602             :          */
     603    20503197 :         XFS_STATS_ADD(ip->i_mount, xs_write_bytes, size);
     604             : 
     605             :         /*
     606             :          * We can allocate memory here while doing writeback on behalf of
     607             :          * memory reclaim.  To avoid memory allocation deadlocks set the
     608             :          * task-wide nofs context for the following operations.
     609             :          */
     610    20503314 :         nofs_flag = memalloc_nofs_save();
     611             : 
     612    20503314 :         if (flags & IOMAP_DIO_COW) {
     613     4952403 :                 error = xfs_reflink_end_cow(ip, offset, size);
     614     4952402 :                 if (error)
     615           4 :                         goto out;
     616             :         }
     617             : 
     618             :         /*
     619             :          * Unwritten conversion updates the in-core isize after extent
     620             :          * conversion but before updating the on-disk size. Updating isize any
     621             :          * earlier allows a racing dio read to find unwritten extents before
     622             :          * they are converted.
     623             :          */
     624    20503309 :         if (flags & IOMAP_DIO_UNWRITTEN) {
     625     8945512 :                 error = xfs_iomap_write_unwritten(ip, offset, size, true);
     626     8945565 :                 goto out;
     627             :         }
     628             : 
     629             :         /*
     630             :          * We need to update the in-core inode size here so that we don't end up
     631             :          * with the on-disk inode size being outside the in-core inode size. We
     632             :          * have no other method of updating EOF for AIO, so always do it here
     633             :          * if necessary.
     634             :          *
     635             :          * We need to lock the test/set EOF update as we can be racing with
     636             :          * other IO completions here to update the EOF. Failing to serialise
     637             :          * here can result in EOF moving backwards and Bad Things Happen when
     638             :          * that occurs.
     639             :          *
     640             :          * As IO completion only ever extends EOF, we can do an unlocked check
     641             :          * here to avoid taking the spinlock. If we land within the current EOF,
     642             :          * then we do not need to do an extending update at all, and we don't
     643             :          * need to take the lock to check this. If we race with an update moving
     644             :          * EOF, then we'll either still be beyond EOF and need to take the lock,
     645             :          * or we'll be within EOF and we don't need to take it at all.
     646             :          */
     647    11557797 :         if (offset + size <= i_size_read(inode))
     648    10941422 :                 goto out;
     649             : 
     650      616375 :         spin_lock(&ip->i_flags_lock);
     651      616375 :         if (offset + size > i_size_read(inode)) {
     652      616375 :                 i_size_write(inode, offset + size);
     653      616375 :                 spin_unlock(&ip->i_flags_lock);
     654      616375 :                 error = xfs_setfilesize(ip, offset, size);
     655             :         } else {
     656           0 :                 spin_unlock(&ip->i_flags_lock);
     657             :         }
     658             : 
     659    20503366 : out:
     660    20503366 :         memalloc_nofs_restore(nofs_flag);
     661    20503366 :         return error;
     662             : }
     663             : 
     664             : static const struct iomap_dio_ops xfs_dio_write_ops = {
     665             :         .end_io         = xfs_dio_write_end_io,
     666             : };
     667             : 
     668             : /*
     669             :  * Handle block aligned direct I/O writes
     670             :  */
     671             : static noinline ssize_t
     672    16644072 : xfs_file_dio_write_aligned(
     673             :         struct xfs_inode        *ip,
     674             :         struct kiocb            *iocb,
     675             :         struct iov_iter         *from)
     676             : {
     677    16644072 :         unsigned int            iolock = XFS_IOLOCK_SHARED;
     678    16644072 :         ssize_t                 ret;
     679             : 
     680             :         /*
     681             :          * If the range to write is not aligned to an allocation unit, we will
     682             :          * have to COW the allocation units on both ends of the write.  Because
     683             :          * this runs through the page cache, it requires IOLOCK_EXCL.  This
     684             :          * predicate performs an unlocked access of the rt and reflink inode
     685             :          * state.
     686             :          */
     687    16644072 :         if (xfs_iocb_needs_cow_around(ip, iocb, from))
     688     1454474 :                 iolock = XFS_IOLOCK_EXCL;
     689             : 
     690    16642072 :         ret = xfs_ilock_iocb(iocb, iolock);
     691    16639585 :         if (ret)
     692             :                 return ret;
     693    16642170 :         ret = xfs_file_write_checks(iocb, from, &iolock);
     694    16640330 :         if (ret)
     695        1072 :                 goto out_unlock;
     696             : 
     697             :         /*
     698             :          * We don't need to hold the IOLOCK exclusively across the IO, so demote
     699             :          * the iolock back to shared if we had to take the exclusive lock in
     700             :          * xfs_file_write_checks() for other reasons.
     701             :          */
     702    16639258 :         if (iolock == XFS_IOLOCK_EXCL) {
     703     2289955 :                 xfs_ilock_demote(ip, XFS_IOLOCK_EXCL);
     704     2289921 :                 iolock = XFS_IOLOCK_SHARED;
     705             :         }
     706    16639224 :         trace_xfs_file_direct_write(iocb, from);
     707    16638375 :         ret = iomap_dio_rw(iocb, from, &xfs_direct_write_iomap_ops,
     708             :                            &xfs_dio_write_ops, 0, NULL, 0);
     709    16646898 : out_unlock:
     710    16646898 :         if (iolock)
     711    16646694 :                 xfs_iunlock(ip, iolock);
     712             :         return ret;
     713             : }
     714             : 
     715             : /*
     716             :  * Handle block unaligned direct I/O writes
     717             :  *
     718             :  * In most cases direct I/O writes will be done holding IOLOCK_SHARED, allowing
     719             :  * them to be done in parallel with reads and other direct I/O writes.  However,
     720             :  * if the I/O is not aligned to filesystem blocks, the direct I/O layer may need
     721             :  * to do sub-block zeroing and that requires serialisation against other direct
     722             :  * I/O to the same block.  In this case we need to serialise the submission of
     723             :  * the unaligned I/O so that we don't get racing block zeroing in the dio layer.
     724             :  * In the case where sub-block zeroing is not required, we can do concurrent
     725             :  * sub-block dios to the same block successfully.
     726             :  *
     727             :  * Optimistically submit the I/O using the shared lock first, but use the
     728             :  * IOMAP_DIO_OVERWRITE_ONLY flag to tell the lower layers to return -EAGAIN
     729             :  * if block allocation or partial block zeroing would be required.  In that case
     730             :  * we try again with the exclusive lock.
     731             :  */
     732             : static noinline ssize_t
     733    13543975 : xfs_file_dio_write_unaligned(
     734             :         struct xfs_inode        *ip,
     735             :         struct kiocb            *iocb,
     736             :         struct iov_iter         *from)
     737             : {
     738    13543975 :         size_t                  isize = i_size_read(VFS_I(ip));
     739    13543975 :         size_t                  count = iov_iter_count(from);
     740    13543975 :         unsigned int            iolock = XFS_IOLOCK_SHARED;
     741    13543975 :         unsigned int            flags = IOMAP_DIO_OVERWRITE_ONLY;
     742    13543975 :         ssize_t                 ret;
     743             : 
     744             :         /*
     745             :          * Extending writes need exclusivity because of the sub-block zeroing
     746             :          * that the DIO code always does for partial tail blocks beyond EOF, so
     747             :          * don't even bother trying the fast path in this case.
     748             :          */
     749    13543975 :         if (iocb->ki_pos > isize || iocb->ki_pos + count >= isize) {
     750     7812642 :                 if (iocb->ki_flags & IOCB_NOWAIT)
     751             :                         return -EAGAIN;
     752     7812642 : retry_exclusive:
     753     8751582 :                 iolock = XFS_IOLOCK_EXCL;
     754     8751582 :                 flags = IOMAP_DIO_FORCE_WAIT;
     755             :         }
     756             : 
     757    14482915 :         ret = xfs_ilock_iocb(iocb, iolock);
     758    14482792 :         if (ret)
     759             :                 return ret;
     760             : 
     761             :         /*
     762             :          * We can't properly handle unaligned direct I/O to reflink files yet,
     763             :          * as we can't unshare a partial block.
     764             :          */
     765    14482760 :         if (xfs_is_cow_inode(ip)) {
     766     8583954 :                 trace_xfs_reflink_bounce_dio_write(iocb, from);
     767     8583918 :                 ret = -ENOTBLK;
     768     8583918 :                 goto out_unlock;
     769             :         }
     770             : 
     771     5898795 :         ret = xfs_file_write_checks(iocb, from, &iolock);
     772     5898589 :         if (ret)
     773          87 :                 goto out_unlock;
     774             : 
     775             :         /*
     776             :          * If we are doing exclusive unaligned I/O, this must be the only I/O
     777             :          * in-flight.  Otherwise we risk data corruption due to unwritten extent
     778             :          * conversions from the AIO end_io handler.  Wait for all other I/O to
     779             :          * drain first.
     780             :          */
     781     5898502 :         if (flags & IOMAP_DIO_FORCE_WAIT)
     782     4703212 :                 inode_dio_wait(VFS_I(ip));
     783             : 
     784     5898463 :         trace_xfs_file_direct_write(iocb, from);
     785     5898120 :         ret = iomap_dio_rw(iocb, from, &xfs_direct_write_iomap_ops,
     786             :                            &xfs_dio_write_ops, flags, NULL, 0);
     787             : 
     788             :         /*
     789             :          * Retry unaligned I/O with exclusive blocking semantics if the DIO
     790             :          * layer rejected it for mapping or locking reasons. If we are doing
     791             :          * nonblocking user I/O, propagate the error.
     792             :          */
     793     5899006 :         if (ret == -EAGAIN && !(iocb->ki_flags & IOCB_NOWAIT)) {
     794      938958 :                 ASSERT(flags & IOMAP_DIO_OVERWRITE_ONLY);
     795      938958 :                 xfs_iunlock(ip, iolock);
     796      938940 :                 goto retry_exclusive;
     797             :         }
     798             : 
     799     4960048 : out_unlock:
     800    13544053 :         if (iolock)
     801    13543976 :                 xfs_iunlock(ip, iolock);
     802             :         return ret;
     803             : }
     804             : 
     805             : static ssize_t
     806    30189213 : xfs_file_dio_write(
     807             :         struct kiocb            *iocb,
     808             :         struct iov_iter         *from)
     809             : {
     810    30189213 :         struct xfs_inode        *ip = XFS_I(file_inode(iocb->ki_filp));
     811    30189213 :         struct xfs_buftarg      *target = xfs_inode_buftarg(ip);
     812    30189213 :         size_t                  count = iov_iter_count(from);
     813             : 
     814             :         /* direct I/O must be aligned to device logical sector size */
     815    30189213 :         if ((iocb->ki_pos | count) & target->bt_logical_sectormask)
     816             :                 return -EINVAL;
     817    30189213 :         if ((iocb->ki_pos | count) & ip->i_mount->m_blockmask)
     818    13544030 :                 return xfs_file_dio_write_unaligned(ip, iocb, from);
     819    16645183 :         return xfs_file_dio_write_aligned(ip, iocb, from);
     820             : }
     821             : 
     822             : static noinline ssize_t
     823           0 : xfs_file_dax_write(
     824             :         struct kiocb            *iocb,
     825             :         struct iov_iter         *from)
     826             : {
     827           0 :         struct inode            *inode = iocb->ki_filp->f_mapping->host;
     828           0 :         struct xfs_inode        *ip = XFS_I(inode);
     829           0 :         unsigned int            iolock = XFS_IOLOCK_EXCL;
     830           0 :         ssize_t                 ret, error = 0;
     831           0 :         loff_t                  pos;
     832             : 
     833           0 :         ret = xfs_ilock_iocb(iocb, iolock);
     834           0 :         if (ret)
     835             :                 return ret;
     836           0 :         ret = xfs_file_write_checks(iocb, from, &iolock);
     837           0 :         if (ret)
     838           0 :                 goto out;
     839             : 
     840           0 :         pos = iocb->ki_pos;
     841             : 
     842           0 :         trace_xfs_file_dax_write(iocb, from);
     843           0 :         ret = dax_iomap_rw(iocb, from, &xfs_dax_write_iomap_ops);
     844           0 :         if (ret > 0 && iocb->ki_pos > i_size_read(inode)) {
     845           0 :                 i_size_write(inode, iocb->ki_pos);
     846           0 :                 error = xfs_setfilesize(ip, pos, ret);
     847             :         }
     848           0 : out:
     849           0 :         if (iolock)
     850           0 :                 xfs_iunlock(ip, iolock);
     851           0 :         if (error)
     852             :                 return error;
     853             : 
     854           0 :         if (ret > 0) {
     855           0 :                 XFS_STATS_ADD(ip->i_mount, xs_write_bytes, ret);
     856             : 
     857             :                 /* Handle various SYNC-type writes */
     858           0 :                 ret = generic_write_sync(iocb, ret);
     859             :         }
     860             :         return ret;
     861             : }
     862             : 
     863             : STATIC ssize_t
     864   271853253 : xfs_file_buffered_write(
     865             :         struct kiocb            *iocb,
     866             :         struct iov_iter         *from)
     867             : {
     868   271853253 :         struct inode            *inode = iocb->ki_filp->f_mapping->host;
     869   271853253 :         struct xfs_inode        *ip = XFS_I(inode);
     870   271853253 :         ssize_t                 ret;
     871   271853253 :         bool                    cleared_space = false;
     872   273507686 :         unsigned int            iolock;
     873             : 
     874             : write_retry:
     875   273507686 :         iolock = XFS_IOLOCK_EXCL;
     876   273507686 :         ret = xfs_ilock_iocb(iocb, iolock);
     877   272747057 :         if (ret)
     878           0 :                 return ret;
     879             : 
     880   272747057 :         ret = xfs_file_write_checks(iocb, from, &iolock);
     881   272996851 :         if (ret)
     882      154714 :                 goto out;
     883             : 
     884   272842137 :         trace_xfs_file_buffered_write(iocb, from);
     885   272278840 :         ret = iomap_file_buffered_write(iocb, from,
     886             :                         &xfs_buffered_write_iomap_ops);
     887             : 
     888             :         /*
     889             :          * If we hit a space limit, try to free up some lingering preallocated
     890             :          * space before returning an error. In the case of ENOSPC, first try to
     891             :          * write back all dirty inodes to free up some of the excess reserved
     892             :          * metadata space. This reduces the chances that the eofblocks scan
     893             :          * waits on dirty mappings. Since xfs_flush_inodes() is serialized, this
     894             :          * also behaves as a filter to prevent too many eofblocks scans from
     895             :          * running at the same time.  Use a synchronous scan to increase the
     896             :          * effectiveness of the scan.
     897             :          */
     898   272986706 :         if (ret == -EDQUOT && !cleared_space) {
     899        1826 :                 xfs_iunlock(ip, iolock);
     900        1826 :                 xfs_blockgc_free_quota(ip, XFS_ICWALK_FLAG_SYNC);
     901        1825 :                 cleared_space = true;
     902        1825 :                 goto write_retry;
     903   272984880 :         } else if (ret == -ENOSPC && !cleared_space) {
     904     1652405 :                 struct xfs_icwalk       icw = {0};
     905             : 
     906     1652405 :                 cleared_space = true;
     907     1652405 :                 xfs_flush_inodes(ip->i_mount);
     908             : 
     909     1652293 :                 xfs_iunlock(ip, iolock);
     910     1651973 :                 icw.icw_flags = XFS_ICWALK_FLAG_SYNC;
     911     1651973 :                 xfs_blockgc_free_space(ip->i_mount, &icw);
     912     1652608 :                 goto write_retry;
     913             :         }
     914             : 
     915   271332475 : out:
     916   271487189 :         if (iolock)
     917   271461178 :                 xfs_iunlock(ip, iolock);
     918             : 
     919   271697451 :         if (ret > 0) {
     920   270003014 :                 XFS_STATS_ADD(ip->i_mount, xs_write_bytes, ret);
     921             :                 /* Handle various SYNC-type writes */
     922   270134419 :                 ret = generic_write_sync(iocb, ret);
     923             :         }
     924             :         return ret;
     925             : }
     926             : 
     927             : STATIC ssize_t
     928   293595208 : xfs_file_write_iter(
     929             :         struct kiocb            *iocb,
     930             :         struct iov_iter         *from)
     931             : {
     932   293595208 :         struct inode            *inode = iocb->ki_filp->f_mapping->host;
     933   293595208 :         struct xfs_inode        *ip = XFS_I(inode);
     934   293595208 :         ssize_t                 ret;
     935   293595208 :         size_t                  ocount = iov_iter_count(from);
     936             : 
     937   293595208 :         XFS_STATS_INC(ip->i_mount, xs_write_calls);
     938             : 
     939   293530055 :         if (ocount == 0)
     940             :                 return 0;
     941             : 
     942   587059380 :         if (xfs_is_shutdown(ip->i_mount))
     943             :                 return -EIO;
     944             : 
     945   293519542 :         if (IS_DAX(inode))
     946           0 :                 return xfs_file_dax_write(iocb, from);
     947             : 
     948   293519542 :         if (iocb->ki_flags & IOCB_DIRECT) {
     949             :                 /*
     950             :                  * Allow a directio write to fall back to a buffered
     951             :                  * write *only* in the case that we're doing a reflink
     952             :                  * CoW.  In all other directio scenarios we do not
     953             :                  * allow an operation to fall back to buffered mode.
     954             :                  */
     955    30189694 :                 ret = xfs_file_dio_write(iocb, from);
     956    30186966 :                 if (ret != -ENOTBLK)
     957             :                         return ret;
     958             :         }
     959             : 
     960   271914071 :         return xfs_file_buffered_write(iocb, from);
     961             : }
     962             : 
     963             : /* Does this file, inode, or mount want synchronous writes? */
     964   538793731 : static inline bool xfs_file_sync_writes(struct file *filp)
     965             : {
     966   538793731 :         struct xfs_inode        *ip = XFS_I(file_inode(filp));
     967             : 
     968   538793731 :         if (xfs_has_wsync(ip->i_mount))
     969             :                 return true;
     970   538793691 :         if (filp->f_flags & (__O_SYNC | O_DSYNC))
     971             :                 return true;
     972   538763750 :         if (IS_SYNC(file_inode(filp)))
     973          21 :                 return true;
     974             : 
     975             :         return false;
     976             : }
     977             : 
     978             : #define XFS_FALLOC_FL_SUPPORTED                                         \
     979             :                 (FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE |           \
     980             :                  FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE |      \
     981             :                  FALLOC_FL_INSERT_RANGE | FALLOC_FL_UNSHARE_RANGE |     \
     982             :                  FALLOC_FL_MAP_FREE_SPACE)
     983             : 
     984             : STATIC long
     985    57407164 : xfs_file_fallocate(
     986             :         struct file             *file,
     987             :         int                     mode,
     988             :         loff_t                  offset,
     989             :         loff_t                  len)
     990             : {
     991    57407164 :         struct inode            *inode = file_inode(file);
     992    57407164 :         struct xfs_inode        *ip = XFS_I(inode);
     993    57407164 :         long                    error;
     994    57407164 :         uint                    iolock = XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL;
     995    57407164 :         loff_t                  new_size = 0;
     996    57407164 :         bool                    do_file_insert = false;
     997             : 
     998    57407164 :         if (!S_ISREG(inode->i_mode))
     999             :                 return -EINVAL;
    1000    57407164 :         if (mode & ~XFS_FALLOC_FL_SUPPORTED)
    1001             :                 return -EOPNOTSUPP;
    1002             : 
    1003    57407164 :         xfs_ilock(ip, iolock);
    1004    57407877 :         error = xfs_break_layouts(inode, &iolock, BREAK_UNMAP);
    1005    57407824 :         if (error)
    1006           0 :                 goto out_unlock;
    1007             : 
    1008             :         /*
    1009             :          * Must wait for all AIO to complete before we continue as AIO can
    1010             :          * change the file size on completion without holding any locks we
    1011             :          * currently hold. We must do this first because AIO can update both
    1012             :          * the on disk and in memory inode sizes, and the operations that follow
    1013             :          * require the in-memory size to be fully up-to-date.
    1014             :          */
    1015    57407824 :         inode_dio_wait(inode);
    1016             : 
    1017             :         /*
    1018             :          * Now AIO and DIO has drained we flush and (if necessary) invalidate
    1019             :          * the cached range over the first operation we are about to run.
    1020             :          *
    1021             :          * We care about zero and collapse here because they both run a hole
    1022             :          * punch over the range first. Because that can zero data, and the range
    1023             :          * of invalidation for the shift operations is much larger, we still do
    1024             :          * the required flush for collapse in xfs_prepare_shift().
    1025             :          *
    1026             :          * Insert has the same range requirements as collapse, and we extend the
    1027             :          * file first which can zero data. Hence insert has the same
    1028             :          * flush/invalidate requirements as collapse and so they are both
    1029             :          * handled at the right time by xfs_prepare_shift().
    1030             :          */
    1031    57406918 :         if (mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_ZERO_RANGE |
    1032             :                     FALLOC_FL_COLLAPSE_RANGE)) {
    1033    44676380 :                 error = xfs_flush_unmap_range(ip, offset, len);
    1034    44676166 :                 if (error)
    1035         281 :                         goto out_unlock;
    1036             :         }
    1037             : 
    1038    57406423 :         error = file_modified(file);
    1039    57407572 :         if (error)
    1040           6 :                 goto out_unlock;
    1041             : 
    1042    57407566 :         if (mode & FALLOC_FL_PUNCH_HOLE) {
    1043             :                 /* Unshare around the region to punch, if needed. */
    1044    37044007 :                 if (xfs_file_write_needs_cow_around(ip, offset, len)) {
    1045      480281 :                         error = xfs_file_cow_around(ip, offset, len);
    1046      480343 :                         if (error)
    1047        8088 :                                 goto out_unlock;
    1048             :                 }
    1049             : 
    1050    37035542 :                 error = xfs_free_file_space(ip, offset, len);
    1051    37036074 :                 if (error)
    1052       54597 :                         goto out_unlock;
    1053    20363559 :         } else if (mode & FALLOC_FL_COLLAPSE_RANGE) {
    1054     3264929 :                 if (!xfs_is_falloc_aligned(ip, offset, len)) {
    1055      517675 :                         error = -EINVAL;
    1056      517675 :                         goto out_unlock;
    1057             :                 }
    1058             : 
    1059             :                 /*
    1060             :                  * There is no need to overlap collapse range with EOF,
    1061             :                  * in which case it is effectively a truncate operation
    1062             :                  */
    1063     2747252 :                 if (offset + len >= i_size_read(inode)) {
    1064      285657 :                         error = -EINVAL;
    1065      285657 :                         goto out_unlock;
    1066             :                 }
    1067             : 
    1068     2461595 :                 new_size = i_size_read(inode) - len;
    1069             : 
    1070     2461595 :                 error = xfs_collapse_file_space(ip, offset, len);
    1071     2461610 :                 if (error)
    1072        2818 :                         goto out_unlock;
    1073    17098630 :         } else if (mode & FALLOC_FL_INSERT_RANGE) {
    1074     2587517 :                 loff_t          isize = i_size_read(inode);
    1075             : 
    1076     2587517 :                 if (!xfs_is_falloc_aligned(ip, offset, len)) {
    1077      500837 :                         error = -EINVAL;
    1078      500837 :                         goto out_unlock;
    1079             :                 }
    1080             : 
    1081             :                 /*
    1082             :                  * New inode size must not exceed ->s_maxbytes, accounting for
    1083             :                  * possible signed overflow.
    1084             :                  */
    1085     2086674 :                 if (inode->i_sb->s_maxbytes - isize < len) {
    1086          10 :                         error = -EFBIG;
    1087          10 :                         goto out_unlock;
    1088             :                 }
    1089     2086664 :                 new_size = isize + len;
    1090             : 
    1091             :                 /* Offset should be less than i_size */
    1092     2086664 :                 if (offset >= isize) {
    1093      225363 :                         error = -EINVAL;
    1094      225363 :                         goto out_unlock;
    1095             :                 }
    1096             :                 do_file_insert = true;
    1097    14511113 :         } else if (mode & FALLOC_FL_MAP_FREE_SPACE) {
    1098        1122 :                 struct xfs_mount        *mp = ip->i_mount;
    1099        1122 :                 xfs_off_t               device_size;
    1100             : 
    1101        1122 :                 if (!capable(CAP_SYS_ADMIN)) {
    1102           0 :                         error = -EPERM;
    1103           0 :                         goto out_unlock;
    1104             :                 }
    1105             : 
    1106        1122 :                 if (XFS_IS_REALTIME_INODE(ip))
    1107         510 :                         device_size = XFS_FSB_TO_B(mp, mp->m_sb.sb_rblocks);
    1108             :                 else
    1109         612 :                         device_size = XFS_FSB_TO_B(mp, mp->m_sb.sb_dblocks);
    1110             : 
    1111             :                 /*
    1112             :                  * Bail out now if we aren't allowed to make the file size the
    1113             :                  * same length as the device.
    1114             :                  */
    1115        1122 :                 if (device_size > i_size_read(inode)) {
    1116          22 :                         new_size = device_size;
    1117          22 :                         error = inode_newsize_ok(inode, new_size);
    1118          22 :                         if (error)
    1119           0 :                                 goto out_unlock;
    1120             :                 }
    1121             : 
    1122        1122 :                 if (XFS_IS_REALTIME_INODE(ip))
    1123         510 :                         error = xfs_map_free_rt_space(ip, offset, len);
    1124             :                 else
    1125         612 :                         error = xfs_map_free_space(ip, offset, len);
    1126        1122 :                 if (error) {
    1127           0 :                         if (error == -ECANCELED)
    1128           0 :                                 error = 0;
    1129           0 :                         goto out_unlock;
    1130             :                 }
    1131             :         } else {
    1132    14509991 :                 if (!(mode & FALLOC_FL_KEEP_SIZE) &&
    1133     7736253 :                     offset + len > i_size_read(inode)) {
    1134     5155961 :                         new_size = offset + len;
    1135     5155961 :                         error = inode_newsize_ok(inode, new_size);
    1136     5155961 :                         if (error)
    1137          10 :                                 goto out_unlock;
    1138             :                 }
    1139             : 
    1140    14509981 :                 if (mode & FALLOC_FL_ZERO_RANGE) {
    1141             :                         /*
    1142             :                          * Punch a hole and prealloc the range.  We use a hole
    1143             :                          * punch rather than unwritten extent conversion for two
    1144             :                          * reasons:
    1145             :                          *
    1146             :                          *   1.) Hole punch handles partial block zeroing for us.
    1147             :                          *   2.) If prealloc returns ENOSPC, the file range is
    1148             :                          *       still zero-valued by virtue of the hole punch.
    1149             :                          */
    1150     4367436 :                         unsigned int blksize = i_blocksize(inode);
    1151             : 
    1152     4367414 :                         trace_xfs_zero_file_space(ip, offset, len);
    1153             : 
    1154             :                         /* Unshare around the region to zero, if needed. */
    1155     4367415 :                         if (xfs_file_write_needs_cow_around(ip, offset, len)) {
    1156      395606 :                                 error = xfs_file_cow_around(ip, offset, len);
    1157      395607 :                                 if (error)
    1158        5317 :                                         goto out_unlock;
    1159             :                         }
    1160             : 
    1161     4362106 :                         error = xfs_free_file_space(ip, offset, len);
    1162     4362132 :                         if (error)
    1163       20872 :                                 goto out_unlock;
    1164             : 
    1165     4341260 :                         len = round_up(offset + len, blksize) -
    1166     4341260 :                               round_down(offset, blksize);
    1167     4341260 :                         offset = round_down(offset, blksize);
    1168    10142545 :                 } else if (mode & FALLOC_FL_UNSHARE_RANGE) {
    1169             :                         /*
    1170             :                          * Enlarge the unshare region to align to a full
    1171             :                          * allocation unit.
    1172             :                          */
    1173         315 :                         if (xfs_inode_needs_cow_around(ip)) {
    1174          40 :                                 loff_t          isize = i_size_read(VFS_I(ip));
    1175          40 :                                 unsigned int    rextsize;
    1176          40 :                                 uint32_t        mod;
    1177             : 
    1178          40 :                                 rextsize = xfs_inode_alloc_unitsize(ip);
    1179          40 :                                 div_u64_rem(offset, rextsize, &mod);
    1180          40 :                                 offset -= mod;
    1181          40 :                                 len += mod;
    1182             : 
    1183          40 :                                 div_u64_rem(offset + len, rextsize, &mod);
    1184          40 :                                 if (mod)
    1185          25 :                                         len += rextsize - mod;
    1186          40 :                                 if (offset + len > isize)
    1187           8 :                                         len = isize - offset;
    1188             :                         }
    1189         315 :                         error = xfs_reflink_unshare(ip, offset, len);
    1190         315 :                         if (error)
    1191           8 :                                 goto out_unlock;
    1192             :                 } else {
    1193             :                         /*
    1194             :                          * If always_cow mode we can't use preallocations and
    1195             :                          * thus should not create them.
    1196             :                          */
    1197    10142230 :                         if (xfs_is_always_cow_inode(ip)) {
    1198      448052 :                                 error = -EOPNOTSUPP;
    1199      448052 :                                 goto out_unlock;
    1200             :                         }
    1201             :                 }
    1202             : 
    1203    14035676 :                 if (!xfs_is_always_cow_inode(ip)) {
    1204    13779593 :                         error = xfs_alloc_file_space(ip, offset, len);
    1205    13779770 :                         if (error)
    1206      430666 :                                 goto out_unlock;
    1207             :                 }
    1208             :         }
    1209             : 
    1210             :         /* Change file size if needed */
    1211    54907870 :         if (new_size) {
    1212     9107278 :                 struct iattr iattr;
    1213             : 
    1214     9107278 :                 iattr.ia_valid = ATTR_SIZE;
    1215     9107278 :                 iattr.ia_size = new_size;
    1216    18214560 :                 error = xfs_vn_setattr_size(file_mnt_idmap(file),
    1217             :                                             file_dentry(file), &iattr);
    1218     9107273 :                 if (error)
    1219        1356 :                         goto out_unlock;
    1220             :         }
    1221             : 
    1222             :         /*
    1223             :          * Perform hole insertion now that the file size has been
    1224             :          * updated so that if we crash during the operation we don't
    1225             :          * leave shifted extents past EOF and hence losing access to
    1226             :          * the data that is contained within them.
    1227             :          */
    1228    54906509 :         if (do_file_insert) {
    1229     1860817 :                 error = xfs_insert_file_space(ip, offset, len);
    1230     1860819 :                 if (error)
    1231        3255 :                         goto out_unlock;
    1232             :         }
    1233             : 
    1234    54903256 :         if (xfs_file_sync_writes(file))
    1235       29922 :                 error = xfs_log_force_inode(ip);
    1236             : 
    1237    54873334 : out_unlock:
    1238    57408124 :         xfs_iunlock(ip, iolock);
    1239    57408124 :         return error;
    1240             : }
    1241             : 
    1242             : STATIC int
    1243    11769975 : xfs_file_fadvise(
    1244             :         struct file     *file,
    1245             :         loff_t          start,
    1246             :         loff_t          end,
    1247             :         int             advice)
    1248             : {
    1249    11769975 :         struct xfs_inode *ip = XFS_I(file_inode(file));
    1250    11769975 :         int ret;
    1251    11769975 :         int lockflags = 0;
    1252             : 
    1253             :         /*
    1254             :          * Operations creating pages in page cache need protection from hole
    1255             :          * punching and similar ops
    1256             :          */
    1257    11769975 :         if (advice == POSIX_FADV_WILLNEED) {
    1258           0 :                 lockflags = XFS_IOLOCK_SHARED;
    1259           0 :                 xfs_ilock(ip, lockflags);
    1260             :         }
    1261    11769975 :         ret = generic_fadvise(file, start, end, advice);
    1262    11758079 :         if (lockflags)
    1263           0 :                 xfs_iunlock(ip, lockflags);
    1264    11758079 :         return ret;
    1265             : }
    1266             : 
    1267             : STATIC loff_t
    1268   398542861 : xfs_file_remap_range(
    1269             :         struct file             *file_in,
    1270             :         loff_t                  pos_in,
    1271             :         struct file             *file_out,
    1272             :         loff_t                  pos_out,
    1273             :         loff_t                  len,
    1274             :         unsigned int            remap_flags)
    1275             : {
    1276   398542861 :         struct inode            *inode_in = file_inode(file_in);
    1277   398542861 :         struct xfs_inode        *src = XFS_I(inode_in);
    1278   398542861 :         struct inode            *inode_out = file_inode(file_out);
    1279   398542861 :         struct xfs_inode        *dest = XFS_I(inode_out);
    1280   398542861 :         struct xfs_mount        *mp = src->i_mount;
    1281   398542861 :         loff_t                  remapped = 0;
    1282   398542861 :         xfs_extlen_t            cowextsize;
    1283   398542861 :         int                     ret;
    1284             : 
    1285   398542861 :         if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY))
    1286             :                 return -EINVAL;
    1287             : 
    1288   398542861 :         if (!xfs_has_reflink(mp))
    1289             :                 return -EOPNOTSUPP;
    1290             : 
    1291   777039224 :         if (xfs_is_shutdown(mp))
    1292             :                 return -EIO;
    1293             : 
    1294             :         /* Prepare and then clone file data. */
    1295   388511897 :         ret = xfs_reflink_remap_prep(file_in, pos_in, file_out, pos_out,
    1296             :                         &len, remap_flags);
    1297   388562817 :         if (ret || len == 0)
    1298   145155528 :                 return ret;
    1299             : 
    1300   243407289 :         trace_xfs_reflink_remap_range(src, pos_in, len, dest, pos_out);
    1301             : 
    1302   243400417 :         ret = xfs_reflink_remap_blocks(src, pos_in, dest, pos_out, len,
    1303             :                         &remapped);
    1304   243377598 :         if (ret)
    1305     1378045 :                 goto out_unlock;
    1306             : 
    1307             :         /*
    1308             :          * Carry the cowextsize hint from src to dest if we're sharing the
    1309             :          * entire source file to the entire destination file, the source file
    1310             :          * has a cowextsize hint, and the destination file does not.
    1311             :          */
    1312   241999553 :         cowextsize = 0;
    1313   241999553 :         if (pos_in == 0 && len == i_size_read(inode_in) &&
    1314      215833 :             (src->i_diflags2 & XFS_DIFLAG2_COWEXTSIZE) &&
    1315         212 :             pos_out == 0 && len >= i_size_read(inode_out) &&
    1316         203 :             !(dest->i_diflags2 & XFS_DIFLAG2_COWEXTSIZE))
    1317          27 :                 cowextsize = src->i_cowextsize;
    1318             : 
    1319   241999553 :         ret = xfs_reflink_update_dest(dest, pos_out + len, cowextsize,
    1320             :                         remap_flags);
    1321   241952138 :         if (ret)
    1322           0 :                 goto out_unlock;
    1323             : 
    1324   241952138 :         if (xfs_file_sync_writes(file_in) || xfs_file_sync_writes(file_out))
    1325        1098 :                 xfs_log_force_inode(dest);
    1326   241953214 : out_unlock:
    1327   243331339 :         xfs_iunlock2_io_mmap(src, dest);
    1328   243381616 :         if (ret)
    1329     1378022 :                 trace_xfs_reflink_remap_range_error(dest, ret, _RET_IP_);
    1330   243381490 :         return remapped > 0 ? remapped : ret;
    1331             : }
    1332             : 
    1333             : STATIC int
    1334   755387812 : xfs_file_open(
    1335             :         struct inode    *inode,
    1336             :         struct file     *file)
    1337             : {
    1338  1510775624 :         if (xfs_is_shutdown(XFS_M(inode->i_sb)))
    1339             :                 return -EIO;
    1340   755374667 :         file->f_mode |= FMODE_NOWAIT | FMODE_BUF_RASYNC | FMODE_BUF_WASYNC |
    1341             :                         FMODE_DIO_PARALLEL_WRITE | FMODE_CAN_ODIRECT;
    1342   755374667 :         return generic_file_open(inode, file);
    1343             : }
    1344             : 
    1345             : STATIC int
    1346    44864334 : xfs_dir_open(
    1347             :         struct inode    *inode,
    1348             :         struct file     *file)
    1349             : {
    1350    44864334 :         struct xfs_inode *ip = XFS_I(inode);
    1351    44864334 :         unsigned int    mode;
    1352    44864334 :         int             error;
    1353             : 
    1354    44864334 :         error = xfs_file_open(inode, file);
    1355    44790015 :         if (error)
    1356             :                 return error;
    1357             : 
    1358             :         /*
    1359             :          * If there are any blocks, read-ahead block 0 as we're almost
    1360             :          * certain to have the next operation be a read there.
    1361             :          */
    1362    44794659 :         mode = xfs_ilock_data_map_shared(ip);
    1363    44777865 :         if (ip->i_df.if_nextents > 0)
    1364     9913703 :                 error = xfs_dir3_data_readahead(ip, 0, 0);
    1365    44787701 :         xfs_iunlock(ip, mode);
    1366    44787701 :         return error;
    1367             : }
    1368             : 
    1369             : /*
    1370             :  * When we release the file, we don't want it to trim EOF blocks if it is a
    1371             :  * readonly context.  This avoids open/read/close workloads from removing
    1372             :  * EOF blocks that other writers depend upon to reduce fragmentation.
    1373             :  */
    1374             : STATIC int
    1375   710475869 : xfs_file_release(
    1376             :         struct inode    *inode,
    1377             :         struct file     *file)
    1378             : {
    1379   710475869 :         bool            free_eof_blocks = true;
    1380             : 
    1381   710475869 :         if ((file->f_mode & (FMODE_WRITE | FMODE_READ)) == FMODE_READ)
    1382   118660453 :                 free_eof_blocks = false;
    1383             : 
    1384   710475869 :         return xfs_release(XFS_I(inode), free_eof_blocks);
    1385             : }
    1386             : 
    1387             : STATIC int
    1388    90770073 : xfs_file_readdir(
    1389             :         struct file     *file,
    1390             :         struct dir_context *ctx)
    1391             : {
    1392    90770073 :         struct inode    *inode = file_inode(file);
    1393    90770073 :         xfs_inode_t     *ip = XFS_I(inode);
    1394    90770073 :         size_t          bufsize;
    1395             : 
    1396             :         /*
    1397             :          * The Linux API doesn't pass down the total size of the buffer
    1398             :          * we read into down to the filesystem.  With the filldir concept
    1399             :          * it's not needed for correct information, but the XFS dir2 leaf
    1400             :          * code wants an estimate of the buffer size to calculate it's
    1401             :          * readahead window and size the buffers used for mapping to
    1402             :          * physical blocks.
    1403             :          *
    1404             :          * Try to give it an estimate that's good enough, maybe at some
    1405             :          * point we can change the ->readdir prototype to include the
    1406             :          * buffer size.  For now we use the current glibc buffer size.
    1407             :          */
    1408    90770073 :         bufsize = (size_t)min_t(loff_t, XFS_READDIR_BUFSIZE, ip->i_disk_size);
    1409             : 
    1410    90770073 :         return xfs_readdir(NULL, ip, ctx, bufsize);
    1411             : }
    1412             : 
    1413             : STATIC loff_t
    1414   102045447 : xfs_file_llseek(
    1415             :         struct file     *file,
    1416             :         loff_t          offset,
    1417             :         int             whence)
    1418             : {
    1419   102045447 :         struct inode            *inode = file->f_mapping->host;
    1420             : 
    1421   204090894 :         if (xfs_is_shutdown(XFS_I(inode)->i_mount))
    1422             :                 return -EIO;
    1423             : 
    1424   102045442 :         switch (whence) {
    1425   101704266 :         default:
    1426   101704266 :                 return generic_file_llseek(file, offset, whence);
    1427        1581 :         case SEEK_HOLE:
    1428        1581 :                 offset = iomap_seek_hole(inode, offset, &xfs_seek_iomap_ops);
    1429        1581 :                 break;
    1430      339595 :         case SEEK_DATA:
    1431      339595 :                 offset = iomap_seek_data(inode, offset, &xfs_seek_iomap_ops);
    1432      339595 :                 break;
    1433             :         }
    1434             : 
    1435      341176 :         if (offset < 0)
    1436             :                 return offset;
    1437      262849 :         return vfs_setpos(file, offset, inode->i_sb->s_maxbytes);
    1438             : }
    1439             : 
    1440             : #ifdef CONFIG_FS_DAX
    1441             : static inline vm_fault_t
    1442           0 : xfs_dax_fault(
    1443             :         struct vm_fault         *vmf,
    1444             :         enum page_entry_size    pe_size,
    1445             :         bool                    write_fault,
    1446             :         pfn_t                   *pfn)
    1447             : {
    1448           0 :         return dax_iomap_fault(vmf, pe_size, pfn, NULL,
    1449           0 :                         (write_fault && !vmf->cow_page) ?
    1450             :                                 &xfs_dax_write_iomap_ops :
    1451             :                                 &xfs_read_iomap_ops);
    1452             : }
    1453             : #else
    1454             : static inline vm_fault_t
    1455             : xfs_dax_fault(
    1456             :         struct vm_fault         *vmf,
    1457             :         enum page_entry_size    pe_size,
    1458             :         bool                    write_fault,
    1459             :         pfn_t                   *pfn)
    1460             : {
    1461             :         ASSERT(0);
    1462             :         return VM_FAULT_SIGBUS;
    1463             : }
    1464             : #endif
    1465             : 
    1466             : static int
    1467    92725784 : xfs_filemap_fault_around(
    1468             :         struct vm_fault         *vmf,
    1469             :         struct inode            *inode)
    1470             : {
    1471    92725784 :         struct xfs_inode        *ip = XFS_I(inode);
    1472    92725784 :         struct folio            *folio = page_folio(vmf->page);
    1473    92702471 :         loff_t                  pos;
    1474    92702471 :         ssize_t                 len;
    1475             : 
    1476    92702471 :         if (!xfs_inode_needs_cow_around(ip))
    1477             :                 return 0;
    1478             : 
    1479     6222705 :         folio_lock(folio);
    1480     6222708 :         len = folio_mkwrite_check_truncate(folio, inode);
    1481     6222706 :         if (len < 0) {
    1482           6 :                 folio_unlock(folio);
    1483           6 :                 return len;
    1484             :         }
    1485     6222700 :         pos = folio_pos(folio);
    1486     6222700 :         folio_unlock(folio);
    1487             : 
    1488     6222707 :         if (!xfs_file_write_needs_cow_around(ip, pos, len))
    1489             :                 return 0;
    1490             : 
    1491     6210429 :         return xfs_file_cow_around(XFS_I(inode), pos, len);
    1492             : }
    1493             : 
    1494             : /*
    1495             :  * Locking for serialisation of IO during page faults. This results in a lock
    1496             :  * ordering of:
    1497             :  *
    1498             :  * mmap_lock (MM)
    1499             :  *   sb_start_pagefault(vfs, freeze)
    1500             :  *     invalidate_lock (vfs/XFS_MMAPLOCK - truncate serialisation)
    1501             :  *       page_lock (MM)
    1502             :  *         i_lock (XFS - extent map serialisation)
    1503             :  */
    1504             : static vm_fault_t
    1505   221800070 : __xfs_filemap_fault(
    1506             :         struct vm_fault         *vmf,
    1507             :         enum page_entry_size    pe_size,
    1508             :         bool                    write_fault)
    1509             : {
    1510   221800070 :         struct inode            *inode = file_inode(vmf->vma->vm_file);
    1511   221800070 :         struct xfs_inode        *ip = XFS_I(inode);
    1512   221800070 :         vm_fault_t              ret;
    1513             : 
    1514   221800070 :         trace_xfs_filemap_fault(ip, pe_size, write_fault);
    1515             : 
    1516   221826474 :         if (write_fault) {
    1517    92972933 :                 sb_start_pagefault(inode->i_sb);
    1518    92809907 :                 file_update_time(vmf->vma->vm_file);
    1519             :         }
    1520             : 
    1521   221717413 :         if (IS_DAX(inode)) {
    1522           0 :                 pfn_t pfn;
    1523             : 
    1524           0 :                 xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
    1525           0 :                 ret = xfs_dax_fault(vmf, pe_size, write_fault, &pfn);
    1526           0 :                 if (ret & VM_FAULT_NEEDDSYNC)
    1527           0 :                         ret = dax_finish_sync_fault(vmf, pe_size, pfn);
    1528           0 :                 xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
    1529             :         } else {
    1530   221717413 :                 if (write_fault) {
    1531    92862578 :                         int     error;
    1532             : 
    1533    92862578 :                         xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
    1534             : 
    1535             :                         /*
    1536             :                          * Unshare all the blocks in this rt extent surrounding
    1537             :                          * this page.
    1538             :                          */
    1539    92832903 :                         error = xfs_filemap_fault_around(vmf, inode);
    1540    92775737 :                         if (error) {
    1541        6071 :                                 xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
    1542        6071 :                                 ret = block_page_mkwrite_return(error);
    1543        6071 :                                 goto out;
    1544             :                         }
    1545             : 
    1546    92769666 :                         ret = iomap_page_mkwrite(vmf,
    1547             :                                         &xfs_page_mkwrite_iomap_ops);
    1548    92729701 :                         xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
    1549             :                 } else {
    1550   128854835 :                         ret = filemap_fault(vmf);
    1551             :                 }
    1552             :         }
    1553             : 
    1554   221808207 : out:
    1555   221808207 :         if (write_fault)
    1556    92893971 :                 sb_end_pagefault(inode->i_sb);
    1557   221723162 :         return ret;
    1558             : }
    1559             : 
    1560             : static inline bool
    1561             : xfs_is_write_fault(
    1562             :         struct vm_fault         *vmf)
    1563             : {
    1564           0 :         return (vmf->flags & FAULT_FLAG_WRITE) &&
    1565           0 :                (vmf->vma->vm_flags & VM_SHARED);
    1566             : }
    1567             : 
    1568             : static vm_fault_t
    1569   128916407 : xfs_filemap_fault(
    1570             :         struct vm_fault         *vmf)
    1571             : {
    1572             :         /* DAX can shortcut the normal fault path on write faults! */
    1573   128916407 :         return __xfs_filemap_fault(vmf, PE_SIZE_PTE,
    1574   128916407 :                         IS_DAX(file_inode(vmf->vma->vm_file)) &&
    1575             :                         xfs_is_write_fault(vmf));
    1576             : }
    1577             : 
    1578             : static vm_fault_t
    1579       16431 : xfs_filemap_huge_fault(
    1580             :         struct vm_fault         *vmf,
    1581             :         enum page_entry_size    pe_size)
    1582             : {
    1583       16431 :         if (!IS_DAX(file_inode(vmf->vma->vm_file)))
    1584             :                 return VM_FAULT_FALLBACK;
    1585             : 
    1586             :         /* DAX can shortcut the normal fault path on write faults! */
    1587           0 :         return __xfs_filemap_fault(vmf, pe_size,
    1588             :                         xfs_is_write_fault(vmf));
    1589             : }
    1590             : 
    1591             : static vm_fault_t
    1592    93010711 : xfs_filemap_page_mkwrite(
    1593             :         struct vm_fault         *vmf)
    1594             : {
    1595    93010711 :         return __xfs_filemap_fault(vmf, PE_SIZE_PTE, true);
    1596             : }
    1597             : 
    1598             : /*
    1599             :  * pfn_mkwrite was originally intended to ensure we capture time stamp updates
    1600             :  * on write faults. In reality, it needs to serialise against truncate and
    1601             :  * prepare memory for writing so handle is as standard write fault.
    1602             :  */
    1603             : static vm_fault_t
    1604           0 : xfs_filemap_pfn_mkwrite(
    1605             :         struct vm_fault         *vmf)
    1606             : {
    1607             : 
    1608           0 :         return __xfs_filemap_fault(vmf, PE_SIZE_PTE, true);
    1609             : }
    1610             : 
    1611             : static const struct vm_operations_struct xfs_file_vm_ops = {
    1612             :         .fault          = xfs_filemap_fault,
    1613             :         .huge_fault     = xfs_filemap_huge_fault,
    1614             :         .map_pages      = filemap_map_pages,
    1615             :         .page_mkwrite   = xfs_filemap_page_mkwrite,
    1616             :         .pfn_mkwrite    = xfs_filemap_pfn_mkwrite,
    1617             : };
    1618             : 
    1619             : STATIC int
    1620    15647236 : xfs_file_mmap(
    1621             :         struct file             *file,
    1622             :         struct vm_area_struct   *vma)
    1623             : {
    1624    15647236 :         struct inode            *inode = file_inode(file);
    1625    15647236 :         struct xfs_buftarg      *target = xfs_inode_buftarg(XFS_I(inode));
    1626             : 
    1627             :         /*
    1628             :          * We don't support synchronous mappings for non-DAX files and
    1629             :          * for DAX files if underneath dax_device is not synchronous.
    1630             :          */
    1631    15647236 :         if (!daxdev_mapping_supported(vma, target->bt_daxdev))
    1632             :                 return -EOPNOTSUPP;
    1633             : 
    1634    15646988 :         file_accessed(file);
    1635    15647187 :         vma->vm_ops = &xfs_file_vm_ops;
    1636    15647187 :         if (IS_DAX(inode))
    1637           0 :                 vm_flags_set(vma, VM_HUGEPAGE);
    1638             :         return 0;
    1639             : }
    1640             : 
    1641             : const struct file_operations xfs_file_operations = {
    1642             :         .llseek         = xfs_file_llseek,
    1643             :         .read_iter      = xfs_file_read_iter,
    1644             :         .write_iter     = xfs_file_write_iter,
    1645             :         .splice_read    = xfs_file_splice_read,
    1646             :         .splice_write   = iter_file_splice_write,
    1647             :         .iopoll         = iocb_bio_iopoll,
    1648             :         .unlocked_ioctl = xfs_file_ioctl,
    1649             : #ifdef CONFIG_COMPAT
    1650             :         .compat_ioctl   = xfs_file_compat_ioctl,
    1651             : #endif
    1652             :         .mmap           = xfs_file_mmap,
    1653             :         .mmap_supported_flags = MAP_SYNC,
    1654             :         .open           = xfs_file_open,
    1655             :         .release        = xfs_file_release,
    1656             :         .fsync          = xfs_file_fsync,
    1657             :         .get_unmapped_area = thp_get_unmapped_area,
    1658             :         .fallocate      = xfs_file_fallocate,
    1659             :         .fadvise        = xfs_file_fadvise,
    1660             :         .remap_file_range = xfs_file_remap_range,
    1661             : };
    1662             : 
    1663             : const struct file_operations xfs_dir_file_operations = {
    1664             :         .open           = xfs_dir_open,
    1665             :         .read           = generic_read_dir,
    1666             :         .iterate_shared = xfs_file_readdir,
    1667             :         .llseek         = generic_file_llseek,
    1668             :         .unlocked_ioctl = xfs_file_ioctl,
    1669             : #ifdef CONFIG_COMPAT
    1670             :         .compat_ioctl   = xfs_file_compat_ioctl,
    1671             : #endif
    1672             :         .fsync          = xfs_dir_fsync,
    1673             : };

Generated by: LCOV version 1.14