LCOV - fstests of 6.5.0-rc4-xfsx @ Mon Jul 31 20:08:34 PDT 2023

LCOV - code coverage report

Current view:	top level - fs/xfs - xfs_reflink.c (source / functions)		Hit	Total	Coverage
Test:	fstests of 6.5.0-rc4-xfsx @ Mon Jul 31 20:08:34 PDT 2023	Lines:	759	796	95.4 %
Date:	2023-07-31 20:08:34	Functions:	30	30	100.0 %

          Line data    Source code

       1             : // SPDX-License-Identifier: GPL-2.0+
       2             : /*
       3             :  * Copyright (C) 2016 Oracle.  All Rights Reserved.
       4             :  * Author: Darrick J. Wong <darrick.wong@oracle.com>
       5             :  */
       6             : #include "xfs.h"
       7             : #include "xfs_fs.h"
       8             : #include "xfs_shared.h"
       9             : #include "xfs_format.h"
      10             : #include "xfs_log_format.h"
      11             : #include "xfs_trans_resv.h"
      12             : #include "xfs_mount.h"
      13             : #include "xfs_defer.h"
      14             : #include "xfs_inode.h"
      15             : #include "xfs_trans.h"
      16             : #include "xfs_bmap.h"
      17             : #include "xfs_bmap_util.h"
      18             : #include "xfs_trace.h"
      19             : #include "xfs_icache.h"
      20             : #include "xfs_btree.h"
      21             : #include "xfs_refcount_btree.h"
      22             : #include "xfs_refcount.h"
      23             : #include "xfs_bmap_btree.h"
      24             : #include "xfs_trans_space.h"
      25             : #include "xfs_bit.h"
      26             : #include "xfs_alloc.h"
      27             : #include "xfs_quota.h"
      28             : #include "xfs_reflink.h"
      29             : #include "xfs_iomap.h"
      30             : #include "xfs_ag.h"
      31             : #include "xfs_ag_resv.h"
      32             : #include "xfs_health.h"
      33             : #include "xfs_rtrefcount_btree.h"
      34             : #include "xfs_rtalloc.h"
      35             : #include "xfs_rtgroup.h"
      36             : #include "xfs_imeta.h"
      37             : #include "xfs_rtbitmap.h"
      38             : 
      39             : /*
      40             :  * Copy on Write of Shared Blocks
      41             :  *
      42             :  * XFS must preserve "the usual" file semantics even when two files share
      43             :  * the same physical blocks.  This means that a write to one file must not
      44             :  * alter the blocks in a different file; the way that we'll do that is
      45             :  * through the use of a copy-on-write mechanism.  At a high level, that
      46             :  * means that when we want to write to a shared block, we allocate a new
      47             :  * block, write the data to the new block, and if that succeeds we map the
      48             :  * new block into the file.
      49             :  *
      50             :  * XFS provides a "delayed allocation" mechanism that defers the allocation
      51             :  * of disk blocks to dirty-but-not-yet-mapped file blocks as long as
      52             :  * possible.  This reduces fragmentation by enabling the filesystem to ask
      53             :  * for bigger chunks less often, which is exactly what we want for CoW.
      54             :  *
      55             :  * The delalloc mechanism begins when the kernel wants to make a block
      56             :  * writable (write_begin or page_mkwrite).  If the offset is not mapped, we
      57             :  * create a delalloc mapping, which is a regular in-core extent, but without
      58             :  * a real startblock.  (For delalloc mappings, the startblock encodes both
      59             :  * a flag that this is a delalloc mapping, and a worst-case estimate of how
      60             :  * many blocks might be required to put the mapping into the BMBT.)  delalloc
      61             :  * mappings are a reservation against the free space in the filesystem;
      62             :  * adjacent mappings can also be combined into fewer larger mappings.
      63             :  *
      64             :  * As an optimization, the CoW extent size hint (cowextsz) creates
      65             :  * outsized aligned delalloc reservations in the hope of landing out of
      66             :  * order nearby CoW writes in a single extent on disk, thereby reducing
      67             :  * fragmentation and improving future performance.
      68             :  *
      69             :  * D: --RRRRRRSSSRRRRRRRR--- (data fork)
      70             :  * C: ------DDDDDDD--------- (CoW fork)
      71             :  *
      72             :  * When dirty pages are being written out (typically in writepage), the
      73             :  * delalloc reservations are converted into unwritten mappings by
      74             :  * allocating blocks and replacing the delalloc mapping with real ones.
      75             :  * A delalloc mapping can be replaced by several unwritten ones if the
      76             :  * free space is fragmented.
      77             :  *
      78             :  * D: --RRRRRRSSSRRRRRRRR---
      79             :  * C: ------UUUUUUU---------
      80             :  *
      81             :  * We want to adapt the delalloc mechanism for copy-on-write, since the
      82             :  * write paths are similar.  The first two steps (creating the reservation
      83             :  * and allocating the blocks) are exactly the same as delalloc except that
      84             :  * the mappings must be stored in a separate CoW fork because we do not want
      85             :  * to disturb the mapping in the data fork until we're sure that the write
      86             :  * succeeded.  IO completion in this case is the process of removing the old
      87             :  * mapping from the data fork and moving the new mapping from the CoW fork to
      88             :  * the data fork.  This will be discussed shortly.
      89             :  *
      90             :  * For now, unaligned directio writes will be bounced back to the page cache.
      91             :  * Block-aligned directio writes will use the same mechanism as buffered
      92             :  * writes.
      93             :  *
      94             :  * Just prior to submitting the actual disk write requests, we convert
      95             :  * the extents representing the range of the file actually being written
      96             :  * (as opposed to extra pieces created for the cowextsize hint) to real
      97             :  * extents.  This will become important in the next step:
      98             :  *
      99             :  * D: --RRRRRRSSSRRRRRRRR---
     100             :  * C: ------UUrrUUU---------
     101             :  *
     102             :  * CoW remapping must be done after the data block write completes,
     103             :  * because we don't want to destroy the old data fork map until we're sure
     104             :  * the new block has been written.  Since the new mappings are kept in a
     105             :  * separate fork, we can simply iterate these mappings to find the ones
     106             :  * that cover the file blocks that we just CoW'd.  For each extent, simply
     107             :  * unmap the corresponding range in the data fork, map the new range into
     108             :  * the data fork, and remove the extent from the CoW fork.  Because of
     109             :  * the presence of the cowextsize hint, however, we must be careful
     110             :  * only to remap the blocks that we've actually written out --  we must
     111             :  * never remap delalloc reservations nor CoW staging blocks that have
     112             :  * yet to be written.  This corresponds exactly to the real extents in
     113             :  * the CoW fork:
     114             :  *
     115             :  * D: --RRRRRRrrSRRRRRRRR---
     116             :  * C: ------UU--UUU---------
     117             :  *
     118             :  * Since the remapping operation can be applied to an arbitrary file
     119             :  * range, we record the need for the remap step as a flag in the ioend
     120             :  * instead of declaring a new IO type.  This is required for direct io
     121             :  * because we only have ioend for the whole dio, and we have to be able to
     122             :  * remember the presence of unwritten blocks and CoW blocks with a single
     123             :  * ioend structure.  Better yet, the more ground we can cover with one
     124             :  * ioend, the better.
     125             :  */
     126             : 
     127             : /*
     128             :  * Given an AG extent, find the lowest-numbered run of shared blocks
     129             :  * within that range and return the range in fbno/flen.  If
     130             :  * find_end_of_shared is true, return the longest contiguous extent of
     131             :  * shared blocks.  If there are no shared extents, fbno and flen will
     132             :  * be set to NULLAGBLOCK and 0, respectively.
     133             :  */
     134             : static int
     135   788379267 : xfs_reflink_find_shared(
     136             :         struct xfs_perag        *pag,
     137             :         struct xfs_trans        *tp,
     138             :         xfs_agblock_t           agbno,
     139             :         xfs_extlen_t            aglen,
     140             :         xfs_agblock_t           *fbno,
     141             :         xfs_extlen_t            *flen,
     142             :         bool                    find_end_of_shared)
     143             : {
     144   788379267 :         struct xfs_buf          *agbp;
     145   788379267 :         struct xfs_btree_cur    *cur;
     146   788379267 :         int                     error;
     147             : 
     148   788379267 :         error = xfs_alloc_read_agf(pag, tp, 0, &agbp);
     149   788386771 :         if (error)
     150             :                 return error;
     151             : 
     152   788387620 :         cur = xfs_refcountbt_init_cursor(pag->pag_mount, tp, agbp, pag);
     153             : 
     154   788431977 :         error = xfs_refcount_find_shared(cur, agbno, aglen, fbno, flen,
     155             :                         find_end_of_shared);
     156             : 
     157   788353270 :         xfs_btree_del_cursor(cur, error);
     158             : 
     159   788420172 :         xfs_trans_brelse(tp, agbp);
     160   788420172 :         return error;
     161             : }
     162             : 
     163             : /*
     164             :  * Given an RT extent, find the lowest-numbered run of shared blocks
     165             :  * within that range and return the range in fbno/flen.  If
     166             :  * find_end_of_shared is true, return the longest contiguous extent of
     167             :  * shared blocks.  If there are no shared extents, fbno and flen will
     168             :  * be set to NULLRGBLOCK and 0, respectively.
     169             :  */
     170             : static int
     171    62251591 : xfs_reflink_find_rtshared(
     172             :         struct xfs_rtgroup      *rtg,
     173             :         struct xfs_trans        *tp,
     174             :         xfs_agblock_t           rtbno,
     175             :         xfs_extlen_t            rtlen,
     176             :         xfs_agblock_t           *fbno,
     177             :         xfs_extlen_t            *flen,
     178             :         bool                    find_end_of_shared)
     179             : {
     180    62251591 :         struct xfs_mount        *mp = rtg->rtg_mount;
     181    62251591 :         struct xfs_btree_cur    *cur;
     182    62251591 :         int                     error;
     183             : 
     184    62251591 :         BUILD_BUG_ON(NULLRGBLOCK != NULLAGBLOCK);
     185             : 
     186    62251591 :         xfs_rtgroup_lock(NULL, rtg, XFS_RTGLOCK_REFCOUNT);
     187    62271708 :         cur = xfs_rtrefcountbt_init_cursor(mp, tp, rtg, rtg->rtg_refcountip);
     188    62287624 :         error = xfs_refcount_find_shared(cur, rtbno, rtlen, fbno, flen,
     189             :                         find_end_of_shared);
     190    62265923 :         xfs_btree_del_cursor(cur, error);
     191    62284786 :         xfs_rtgroup_unlock(rtg, XFS_RTGLOCK_REFCOUNT);
     192    62275993 :         return error;
     193             : }
     194             : 
     195             : /*
     196             :  * Trim the mapping to the next block where there's a change in the
     197             :  * shared/unshared status.  More specifically, this means that we
     198             :  * find the lowest-numbered extent of shared blocks that coincides with
     199             :  * the given block mapping.  If the shared extent overlaps the start of
     200             :  * the mapping, trim the mapping to the end of the shared extent.  If
     201             :  * the shared region intersects the mapping, trim the mapping to the
     202             :  * start of the shared extent.  If there are no shared regions that
     203             :  * overlap, just return the original extent.
     204             :  */
     205             : int
     206   127515071 : xfs_reflink_trim_around_shared(
     207             :         struct xfs_inode        *ip,
     208             :         struct xfs_bmbt_irec    *irec,
     209             :         bool                    *shared)
     210             : {
     211   127515071 :         struct xfs_mount        *mp = ip->i_mount;
     212   127515071 :         xfs_agblock_t           orig_bno;
     213   127515071 :         xfs_agblock_t           fbno;
     214   127515071 :         xfs_extlen_t            flen;
     215   127515071 :         int                     error = 0;
     216             : 
     217             :         /* Holes, unwritten, and delalloc extents cannot be shared */
     218   127515071 :         if (!xfs_is_cow_inode(ip) || !xfs_bmap_is_written_extent(irec)) {
     219    77414720 :                 *shared = false;
     220    77414720 :                 return 0;
     221             :         }
     222             : 
     223    50100714 :         trace_xfs_reflink_trim_around_shared(ip, irec);
     224             : 
     225    77627213 :         if (XFS_IS_REALTIME_INODE(ip)) {
     226    27525937 :                 struct xfs_rtgroup      *rtg;
     227    27525937 :                 xfs_rgnumber_t          rgno;
     228             : 
     229    27525937 :                 orig_bno = xfs_rtb_to_rgbno(mp, irec->br_startblock, &rgno);
     230    27525960 :                 rtg = xfs_rtgroup_get(mp, rgno);
     231    27526474 :                 error = xfs_reflink_find_rtshared(rtg, NULL, orig_bno,
     232    27526474 :                                 irec->br_blockcount, &fbno, &flen, true);
     233    27526585 :                 xfs_rtgroup_put(rtg);
     234             :         } else {
     235    22574559 :                 struct xfs_perag        *pag;
     236             : 
     237    22574559 :                 pag = xfs_perag_get(mp, XFS_FSB_TO_AGNO(mp,
     238             :                                         irec->br_startblock));
     239    22574957 :                 orig_bno = XFS_FSB_TO_AGBNO(mp, irec->br_startblock);
     240    22574879 :                 error = xfs_reflink_find_shared(pag, NULL, orig_bno,
     241    22574879 :                                 irec->br_blockcount, &fbno, &flen, true);
     242    22574969 :                 xfs_perag_put(pag);
     243             :         }
     244    50101750 :         if (error)
     245             :                 return error;
     246             : 
     247    50101591 :         *shared = false;
     248    50101591 :         if (fbno == NULLAGBLOCK) {
     249             :                 /* No shared blocks at all. */
     250             :                 return 0;
     251             :         }
     252             : 
     253     4786842 :         if (fbno == orig_bno) {
     254             :                 /*
     255             :                  * The start of this extent is shared.  Truncate the
     256             :                  * mapping at the end of the shared region so that a
     257             :                  * subsequent iteration starts at the start of the
     258             :                  * unshared region.
     259             :                  */
     260     4688946 :                 irec->br_blockcount = flen;
     261     4688946 :                 *shared = true;
     262     4688946 :                 return 0;
     263             :         }
     264             : 
     265             :         /*
     266             :          * There's a shared extent midway through this extent.
     267             :          * Truncate the mapping at the start of the shared
     268             :          * extent so that a subsequent iteration starts at the
     269             :          * start of the shared region.
     270             :          */
     271       97896 :         irec->br_blockcount = fbno - orig_bno;
     272       97896 :         return 0;
     273             : }
     274             : 
     275             : int
     276   109713012 : xfs_bmap_trim_cow(
     277             :         struct xfs_inode        *ip,
     278             :         struct xfs_bmbt_irec    *imap,
     279             :         bool                    *shared)
     280             : {
     281             :         /* We can't update any real extents in always COW mode. */
     282   109713012 :         if (xfs_is_always_cow_inode(ip) &&
     283     1247093 :             !isnullstartblock(imap->br_startblock)) {
     284     1031706 :                 *shared = true;
     285     1031706 :                 return 0;
     286             :         }
     287             : 
     288             :         /* Trim the mapping to the nearest shared extent boundary. */
     289   108679666 :         return xfs_reflink_trim_around_shared(ip, imap, shared);
     290             : }
     291             : 
     292             : static int
     293    12337869 : xfs_reflink_convert_cow_locked(
     294             :         struct xfs_inode        *ip,
     295             :         xfs_fileoff_t           offset_fsb,
     296             :         xfs_filblks_t           count_fsb)
     297             : {
     298    12337869 :         struct xfs_iext_cursor  icur;
     299    12337869 :         struct xfs_bmbt_irec    got;
     300    12337869 :         struct xfs_btree_cur    *dummy_cur = NULL;
     301    12337869 :         struct xfs_mount        *mp = ip->i_mount;
     302    12337869 :         int                     dummy_logflags;
     303    12337869 :         int                     error = 0;
     304             : 
     305             :         /*
     306             :          * We can only remap full rt extents, so make sure that we convert the
     307             :          * entire extent.  The caller must ensure that this is either a direct
     308             :          * write that's aligned to the rt extent size, or a buffered write for
     309             :          * which we've dirtied extra pages to make this work properly.
     310             :          */
     311    12337869 :         if (xfs_inode_needs_cow_around(ip)) {
     312      240697 :                 xfs_fileoff_t   new_off;
     313             : 
     314      240697 :                 new_off = xfs_rtb_rounddown_rtx(mp, offset_fsb);
     315      240697 :                 count_fsb += offset_fsb - new_off;
     316      240697 :                 offset_fsb = new_off;
     317             : 
     318      240697 :                 count_fsb = xfs_rtb_roundup_rtx(mp, count_fsb);
     319             :         }
     320             : 
     321    12337874 :         if (!xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb, &icur, &got))
     322             :                 return 0;
     323             : 
     324    23119119 :         do {
     325    23119119 :                 if (got.br_startoff >= offset_fsb + count_fsb)
     326             :                         break;
     327    12348118 :                 if (got.br_state == XFS_EXT_NORM)
     328         737 :                         continue;
     329    12347381 :                 if (WARN_ON_ONCE(isnullstartblock(got.br_startblock)))
     330             :                         return -EIO;
     331             : 
     332    12347381 :                 xfs_trim_extent(&got, offset_fsb, count_fsb);
     333    12347308 :                 if (!got.br_blockcount)
     334           0 :                         continue;
     335             : 
     336    12347308 :                 got.br_state = XFS_EXT_NORM;
     337    12347308 :                 error = xfs_bmap_add_extent_unwritten_real(NULL, ip,
     338             :                                 XFS_COW_FORK, &icur, &dummy_cur, &got,
     339             :                                 &dummy_logflags);
     340    12347260 :                 if (error)
     341           0 :                         return error;
     342    12347997 :         } while (xfs_iext_next_extent(ip->i_cowfp, &icur, &got));
     343             : 
     344             :         return error;
     345             : }
     346             : 
     347             : /* Convert all of the unwritten CoW extents in a file's range to real ones. */
     348             : int
     349     6271223 : xfs_reflink_convert_cow(
     350             :         struct xfs_inode        *ip,
     351             :         xfs_off_t               offset,
     352             :         xfs_off_t               count)
     353             : {
     354     6271223 :         struct xfs_mount        *mp = ip->i_mount;
     355     6271223 :         xfs_fileoff_t           offset_fsb = XFS_B_TO_FSBT(mp, offset);
     356     6271223 :         xfs_fileoff_t           end_fsb = XFS_B_TO_FSB(mp, offset + count);
     357     6271223 :         xfs_filblks_t           count_fsb = end_fsb - offset_fsb;
     358     6271223 :         int                     error;
     359             : 
     360     6271223 :         ASSERT(count != 0);
     361             : 
     362     6271223 :         xfs_ilock(ip, XFS_ILOCK_EXCL);
     363     6271226 :         error = xfs_reflink_convert_cow_locked(ip, offset_fsb, count_fsb);
     364     6271169 :         xfs_iunlock(ip, XFS_ILOCK_EXCL);
     365     6271211 :         return error;
     366             : }
     367             : 
     368             : /*
     369             :  * Find the extent that maps the given range in the COW fork. Even if the extent
     370             :  * is not shared we might have a preallocation for it in the COW fork. If so we
     371             :  * use it that rather than trigger a new allocation.
     372             :  */
     373             : static int
     374    93897894 : xfs_find_trim_cow_extent(
     375             :         struct xfs_inode        *ip,
     376             :         struct xfs_bmbt_irec    *imap,
     377             :         struct xfs_bmbt_irec    *cmap,
     378             :         bool                    *shared,
     379             :         bool                    *found)
     380             : {
     381    93897894 :         xfs_fileoff_t           offset_fsb = imap->br_startoff;
     382    93897894 :         xfs_filblks_t           count_fsb = imap->br_blockcount;
     383    93897894 :         struct xfs_iext_cursor  icur;
     384             : 
     385    93897894 :         *found = false;
     386             : 
     387             :         /*
     388             :          * If we don't find an overlapping extent, trim the range we need to
     389             :          * allocate to fit the hole we found.
     390             :          */
     391    93897894 :         if (!xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb, &icur, cmap))
     392    75132363 :                 cmap->br_startoff = offset_fsb + count_fsb;
     393    93898790 :         if (cmap->br_startoff > offset_fsb) {
     394    82428382 :                 xfs_trim_extent(imap, imap->br_startoff,
     395    82428382 :                                 cmap->br_startoff - imap->br_startoff);
     396    82427219 :                 return xfs_bmap_trim_cow(ip, imap, shared);
     397             :         }
     398             : 
     399    11470408 :         *shared = true;
     400    11470408 :         if (isnullstartblock(cmap->br_startblock)) {
     401       11323 :                 xfs_trim_extent(imap, cmap->br_startoff, cmap->br_blockcount);
     402       11323 :                 return 0;
     403             :         }
     404             : 
     405             :         /* real extent found - no need to allocate */
     406    11459085 :         xfs_trim_extent(cmap, offset_fsb, count_fsb);
     407    11459001 :         *found = true;
     408    11459001 :         return 0;
     409             : }
     410             : 
     411             : static int
     412    12688953 : xfs_reflink_convert_unwritten(
     413             :         struct xfs_inode        *ip,
     414             :         struct xfs_bmbt_irec    *imap,
     415             :         struct xfs_bmbt_irec    *cmap,
     416             :         bool                    convert_now)
     417             : {
     418    12688953 :         xfs_fileoff_t           offset_fsb = imap->br_startoff;
     419    12688953 :         xfs_filblks_t           count_fsb = imap->br_blockcount;
     420    12688953 :         int                     error;
     421             : 
     422             :         /*
     423             :          * cmap might larger than imap due to cowextsize hint.
     424             :          */
     425    12688953 :         xfs_trim_extent(cmap, offset_fsb, count_fsb);
     426             : 
     427             :         /*
     428             :          * COW fork extents are supposed to remain unwritten until we're ready
     429             :          * to initiate a disk write.  For direct I/O we are going to write the
     430             :          * data and need the conversion, but for buffered writes we're done.
     431             :          */
     432    12688806 :         if (!convert_now || cmap->br_state == XFS_EXT_NORM)
     433             :                 return 0;
     434             : 
     435     6066658 :         trace_xfs_reflink_convert_cow(ip, cmap);
     436             : 
     437     6066657 :         error = xfs_reflink_convert_cow_locked(ip, offset_fsb, count_fsb);
     438     6066659 :         if (!error)
     439     6066659 :                 cmap->br_state = XFS_EXT_NORM;
     440             : 
     441             :         return error;
     442             : }
     443             : 
     444             : static int
     445     1226137 : xfs_reflink_fill_cow_hole(
     446             :         struct xfs_inode        *ip,
     447             :         struct xfs_bmbt_irec    *imap,
     448             :         struct xfs_bmbt_irec    *cmap,
     449             :         bool                    *shared,
     450             :         uint                    *lockmode,
     451             :         bool                    convert_now)
     452             : {
     453     1226137 :         struct xfs_mount        *mp = ip->i_mount;
     454     1226137 :         struct xfs_trans        *tp;
     455     1226137 :         xfs_filblks_t           resaligned;
     456     1226137 :         unsigned int            dblocks = 0, rblocks = 0;
     457     1226137 :         int                     nimaps;
     458     1226137 :         int                     error;
     459     1226137 :         bool                    found;
     460             : 
     461     1226137 :         resaligned = xfs_aligned_fsb_count(imap->br_startoff,
     462             :                 imap->br_blockcount, xfs_get_cowextsz_hint(ip));
     463     1226134 :         if (XFS_IS_REALTIME_INODE(ip)) {
     464      872791 :                 dblocks = XFS_DIOSTRAT_SPACE_RES(mp, 0);
     465      872791 :                 rblocks = resaligned;
     466             :         } else {
     467      353343 :                 dblocks = XFS_DIOSTRAT_SPACE_RES(mp, resaligned);
     468      353343 :                 rblocks = 0;
     469             :         }
     470             : 
     471     1226134 :         xfs_iunlock(ip, *lockmode);
     472     1226139 :         *lockmode = 0;
     473             : 
     474     1226139 :         error = xfs_trans_alloc_inode(ip, &M_RES(mp)->tr_write, dblocks,
     475             :                         rblocks, false, &tp);
     476     1226139 :         if (error)
     477             :                 return error;
     478             : 
     479     1224363 :         *lockmode = XFS_ILOCK_EXCL;
     480             : 
     481     1224363 :         error = xfs_find_trim_cow_extent(ip, imap, cmap, shared, &found);
     482     1224359 :         if (error || !*shared)
     483           2 :                 goto out_trans_cancel;
     484             : 
     485     1224357 :         if (found) {
     486          53 :                 xfs_trans_cancel(tp);
     487          53 :                 goto convert;
     488             :         }
     489             : 
     490             :         /* Allocate the entire reservation as unwritten blocks. */
     491     1224304 :         nimaps = 1;
     492     1224304 :         error = xfs_bmapi_write(tp, ip, imap->br_startoff, imap->br_blockcount,
     493             :                         XFS_BMAPI_COWFORK | XFS_BMAPI_PREALLOC, 0, cmap,
     494             :                         &nimaps);
     495     1224297 :         if (error)
     496          11 :                 goto out_trans_cancel;
     497             : 
     498     1224286 :         xfs_inode_set_cowblocks_tag(ip);
     499     1224278 :         error = xfs_trans_commit(tp);
     500     1224301 :         if (error)
     501             :                 return error;
     502             : 
     503             :         /*
     504             :          * Allocation succeeded but the requested range was not even partially
     505             :          * satisfied?  Bail out!
     506             :          */
     507     1224293 :         if (nimaps == 0)
     508             :                 return -ENOSPC;
     509             : 
     510     1224293 : convert:
     511     1224346 :         return xfs_reflink_convert_unwritten(ip, imap, cmap, convert_now);
     512             : 
     513          13 : out_trans_cancel:
     514          13 :         xfs_trans_cancel(tp);
     515          13 :         return error;
     516             : }
     517             : 
     518             : static int
     519        5643 : xfs_reflink_fill_delalloc(
     520             :         struct xfs_inode        *ip,
     521             :         struct xfs_bmbt_irec    *imap,
     522             :         struct xfs_bmbt_irec    *cmap,
     523             :         bool                    *shared,
     524             :         uint                    *lockmode,
     525             :         bool                    convert_now)
     526             : {
     527        5643 :         struct xfs_mount        *mp = ip->i_mount;
     528        5683 :         struct xfs_trans        *tp;
     529        5683 :         int                     nimaps;
     530        5683 :         int                     error;
     531        5683 :         bool                    found;
     532             : 
     533        5683 :         do {
     534        5683 :                 xfs_iunlock(ip, *lockmode);
     535        5683 :                 *lockmode = 0;
     536             : 
     537        5683 :                 error = xfs_trans_alloc_inode(ip, &M_RES(mp)->tr_write, 0, 0,
     538             :                                 false, &tp);
     539        5683 :                 if (error)
     540           0 :                         return error;
     541             : 
     542        5683 :                 *lockmode = XFS_ILOCK_EXCL;
     543             : 
     544        5683 :                 error = xfs_find_trim_cow_extent(ip, imap, cmap, shared,
     545             :                                 &found);
     546        5683 :                 if (error || !*shared)
     547           0 :                         goto out_trans_cancel;
     548             : 
     549        5683 :                 if (found) {
     550           3 :                         xfs_trans_cancel(tp);
     551           3 :                         break;
     552             :                 }
     553             : 
     554        5680 :                 ASSERT(isnullstartblock(cmap->br_startblock) ||
     555             :                        cmap->br_startblock == DELAYSTARTBLOCK);
     556             : 
     557             :                 /*
     558             :                  * Replace delalloc reservation with an unwritten extent.
     559             :                  */
     560        5680 :                 nimaps = 1;
     561        5680 :                 error = xfs_bmapi_write(tp, ip, cmap->br_startoff,
     562             :                                 cmap->br_blockcount,
     563             :                                 XFS_BMAPI_COWFORK | XFS_BMAPI_PREALLOC, 0,
     564             :                                 cmap, &nimaps);
     565        5680 :                 if (error)
     566           1 :                         goto out_trans_cancel;
     567             : 
     568        5679 :                 xfs_inode_set_cowblocks_tag(ip);
     569        5679 :                 error = xfs_trans_commit(tp);
     570        5679 :                 if (error)
     571           0 :                         return error;
     572             : 
     573             :                 /*
     574             :                  * Allocation succeeded but the requested range was not even
     575             :                  * partially satisfied?  Bail out!
     576             :                  */
     577        5679 :                 if (nimaps == 0)
     578             :                         return -ENOSPC;
     579        5679 :         } while (cmap->br_startoff + cmap->br_blockcount <= imap->br_startoff);
     580             : 
     581        5642 :         return xfs_reflink_convert_unwritten(ip, imap, cmap, convert_now);
     582             : 
     583           1 : out_trans_cancel:
     584           1 :         xfs_trans_cancel(tp);
     585           1 :         return error;
     586             : }
     587             : 
     588             : /* Allocate all CoW reservations covering a range of blocks in a file. */
     589             : int
     590    92668396 : xfs_reflink_allocate_cow(
     591             :         struct xfs_inode        *ip,
     592             :         struct xfs_bmbt_irec    *imap,
     593             :         struct xfs_bmbt_irec    *cmap,
     594             :         bool                    *shared,
     595             :         uint                    *lockmode,
     596             :         bool                    convert_now)
     597             : {
     598    92668396 :         int                     error;
     599    92668396 :         bool                    found;
     600             : 
     601    92668396 :         ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
     602    92666945 :         if (!ip->i_cowfp) {
     603        5244 :                 ASSERT(!xfs_is_reflink_inode(ip));
     604        5244 :                 xfs_ifork_init_cow(ip);
     605             :         }
     606             : 
     607    92666945 :         error = xfs_find_trim_cow_extent(ip, imap, cmap, shared, &found);
     608    92667981 :         if (error || !*shared)
     609    79977248 :                 return error;
     610             : 
     611             :         /* CoW fork has a real extent */
     612    12690733 :         if (found)
     613    11458951 :                 return xfs_reflink_convert_unwritten(ip, imap, cmap,
     614             :                                 convert_now);
     615             : 
     616             :         /*
     617             :          * CoW fork does not have an extent and data extent is shared.
     618             :          * Allocate a real extent in the CoW fork.
     619             :          */
     620     1231782 :         if (cmap->br_startoff > imap->br_startoff)
     621     1226139 :                 return xfs_reflink_fill_cow_hole(ip, imap, cmap, shared,
     622             :                                 lockmode, convert_now);
     623             : 
     624             :         /*
     625             :          * CoW fork has a delalloc reservation. Replace it with a real extent.
     626             :          * There may or may not be a data fork mapping.
     627             :          */
     628        5643 :         if (isnullstartblock(cmap->br_startblock) ||
     629             :             cmap->br_startblock == DELAYSTARTBLOCK)
     630        5643 :                 return xfs_reflink_fill_delalloc(ip, imap, cmap, shared,
     631             :                                 lockmode, convert_now);
     632             : 
     633             :         /* Shouldn't get here. */
     634           0 :         ASSERT(0);
     635           0 :         return -EFSCORRUPTED;
     636             : }
     637             : 
     638             : /*
     639             :  * Cancel CoW reservations for some block range of an inode.
     640             :  *
     641             :  * If cancel_real is true this function cancels all COW fork extents for the
     642             :  * inode; if cancel_real is false, real extents are not cleared.
     643             :  *
     644             :  * Caller must have already joined the inode to the current transaction. The
     645             :  * inode will be joined to the transaction returned to the caller.
     646             :  */
     647             : int
     648    39327908 : xfs_reflink_cancel_cow_blocks(
     649             :         struct xfs_inode                *ip,
     650             :         struct xfs_trans                **tpp,
     651             :         xfs_fileoff_t                   offset_fsb,
     652             :         xfs_fileoff_t                   end_fsb,
     653             :         bool                            cancel_real)
     654             : {
     655    39327908 :         struct xfs_ifork                *ifp = xfs_ifork_ptr(ip, XFS_COW_FORK);
     656    39327908 :         struct xfs_mount                *mp = ip->i_mount;
     657    39327908 :         struct xfs_bmbt_irec            got, del;
     658    39327908 :         struct xfs_iext_cursor          icur;
     659    39327908 :         bool                            isrt = XFS_IS_REALTIME_INODE(ip);
     660    39327908 :         int                             error = 0;
     661             : 
     662             :         /*
     663             :          * Shrink the range that we're cancelling if they don't align to the
     664             :          * realtime extent size, since we can only free full extents.
     665             :          */
     666    39327908 :         if (xfs_inode_needs_cow_around(ip)) {
     667      670622 :                 offset_fsb = xfs_rtb_roundup_rtx(mp, offset_fsb);
     668      670622 :                 end_fsb = xfs_rtb_rounddown_rtx(mp, end_fsb);
     669             :         }
     670             : 
     671    78541228 :         if (!xfs_inode_has_cow_data(ip))
     672             :                 return 0;
     673    11165002 :         if (!xfs_iext_lookup_extent_before(ip, ifp, &end_fsb, &icur, &got))
     674             :                 return 0;
     675             : 
     676             :         /* Walk backwards until we're out of the I/O range... */
     677    20064887 :         while (got.br_startoff + got.br_blockcount > offset_fsb) {
     678    11419903 :                 del = got;
     679    11419903 :                 xfs_trim_extent(&del, offset_fsb, end_fsb - offset_fsb);
     680             : 
     681             :                 /* Extent delete may have bumped ext forward */
     682    11408824 :                 if (!del.br_blockcount) {
     683      212842 :                         xfs_iext_prev(ifp, &icur);
     684      212842 :                         goto next_extent;
     685             :                 }
     686             : 
     687    11195982 :                 trace_xfs_reflink_cancel_cow(ip, &del);
     688             : 
     689    11197070 :                 if (isnullstartblock(del.br_startblock)) {
     690     2780077 :                         error = xfs_bmap_del_extent_delay(ip, XFS_COW_FORK,
     691             :                                         &icur, &got, &del);
     692     2798834 :                         if (error)
     693             :                                 break;
     694     8416993 :                 } else if (del.br_state == XFS_EXT_UNWRITTEN || cancel_real) {
     695     6074458 :                         ASSERT((*tpp)->t_highest_agno == NULLAGNUMBER);
     696             : 
     697             :                         /* Free the CoW orphan record. */
     698     6074458 :                         xfs_refcount_free_cow_extent(*tpp, isrt,
     699     6074458 :                                         del.br_startblock, del.br_blockcount);
     700             : 
     701    11074682 :                         error = xfs_free_extent_later(*tpp, del.br_startblock,
     702             :                                         del.br_blockcount, NULL,
     703             :                                         XFS_AG_RESV_NONE,
     704             :                                         isrt ? XFS_FREE_EXTENT_REALTIME : 0);
     705     6079084 :                         if (error)
     706             :                                 break;
     707             : 
     708             :                         /* Roll the transaction */
     709     6079084 :                         error = xfs_defer_finish(tpp);
     710     6078842 :                         if (error)
     711             :                                 break;
     712             : 
     713             :                         /* Remove the mapping from the CoW fork. */
     714     6078826 :                         xfs_bmap_del_extent_cow(ip, &icur, &got, &del);
     715             : 
     716             :                         /* Remove the quota reservation */
     717     6078351 :                         error = xfs_quota_unreserve_blkres(ip,
     718     6078351 :                                         del.br_blockcount);
     719     6079444 :                         if (error)
     720             :                                 break;
     721             :                 } else {
     722             :                         /* Didn't do anything, push cursor back. */
     723     2342535 :                         xfs_iext_prev(ifp, &icur);
     724             :                 }
     725    11433773 : next_extent:
     726    11433773 :                 if (!xfs_iext_get_extent(ifp, &icur, &got))
     727             :                         break;
     728             :         }
     729             : 
     730             :         /* clear tag if cow fork is emptied */
     731    11064832 :         if (!ifp->if_bytes)
     732     1842227 :                 xfs_inode_clear_cowblocks_tag(ip);
     733             :         return error;
     734             : }
     735             : 
     736             : /*
     737             :  * Cancel CoW reservations for some byte range of an inode.
     738             :  *
     739             :  * If cancel_real is true this function cancels all COW fork extents for the
     740             :  * inode; if cancel_real is false, real extents are not cleared.
     741             :  */
     742             : int
     743     5747508 : xfs_reflink_cancel_cow_range(
     744             :         struct xfs_inode        *ip,
     745             :         xfs_off_t               offset,
     746             :         xfs_off_t               count,
     747             :         bool                    cancel_real)
     748             : {
     749     5747508 :         struct xfs_trans        *tp;
     750     5747508 :         xfs_fileoff_t           offset_fsb;
     751     5747508 :         xfs_fileoff_t           end_fsb;
     752     5747508 :         int                     error;
     753             : 
     754     5747508 :         trace_xfs_reflink_cancel_cow_range(ip, offset, count);
     755     5747309 :         ASSERT(ip->i_cowfp);
     756             : 
     757     5747309 :         offset_fsb = XFS_B_TO_FSBT(ip->i_mount, offset);
     758     5747309 :         if (count == NULLFILEOFF)
     759             :                 end_fsb = NULLFILEOFF;
     760             :         else
     761     1914915 :                 end_fsb = XFS_B_TO_FSB(ip->i_mount, offset + count);
     762             : 
     763             :         /* Start a rolling transaction to remove the mappings */
     764     5747309 :         error = xfs_trans_alloc(ip->i_mount, &M_RES(ip->i_mount)->tr_write,
     765             :                         0, 0, 0, &tp);
     766     5746063 :         if (error)
     767          10 :                 goto out;
     768             : 
     769     5746053 :         xfs_ilock(ip, XFS_ILOCK_EXCL);
     770     5746024 :         xfs_trans_ijoin(tp, ip, 0);
     771             : 
     772             :         /* Scrape out the old CoW reservations */
     773     5746804 :         error = xfs_reflink_cancel_cow_blocks(ip, &tp, offset_fsb, end_fsb,
     774             :                         cancel_real);
     775     5747803 :         if (error)
     776          16 :                 goto out_cancel;
     777             : 
     778     5747787 :         error = xfs_trans_commit(tp);
     779             : 
     780     5747674 :         xfs_iunlock(ip, XFS_ILOCK_EXCL);
     781     5747674 :         return error;
     782             : 
     783             : out_cancel:
     784          16 :         xfs_trans_cancel(tp);
     785          16 :         xfs_iunlock(ip, XFS_ILOCK_EXCL);
     786          26 : out:
     787          26 :         trace_xfs_reflink_cancel_cow_range_error(ip, error, _RET_IP_);
     788          26 :         return error;
     789             : }
     790             : 
     791             : #ifdef CONFIG_XFS_QUOTA
     792             : /*
     793             :  * Update quota accounting for a remapping operation.  When we're remapping
     794             :  * something from the CoW fork to the data fork, we must update the quota
     795             :  * accounting for delayed allocations.  For remapping from the data fork to the
     796             :  * data fork, use regular block accounting.
     797             :  */
     798             : static inline void
     799   209967986 : xfs_reflink_update_quota(
     800             :         struct xfs_trans        *tp,
     801             :         struct xfs_inode        *ip,
     802             :         bool                    is_cow,
     803             :         int64_t                 blocks)
     804             : {
     805   209967986 :         unsigned int            qflag;
     806             : 
     807   209967986 :         if (XFS_IS_REALTIME_INODE(ip)) {
     808    89888088 :                 qflag = is_cow ? XFS_TRANS_DQ_DELRTBCOUNT :
     809             :                                  XFS_TRANS_DQ_RTBCOUNT;
     810             :         } else {
     811   120079898 :                 qflag = is_cow ? XFS_TRANS_DQ_DELBCOUNT :
     812             :                                  XFS_TRANS_DQ_BCOUNT;
     813             :         }
     814   209967986 :         xfs_trans_mod_dquot_byino(tp, ip, qflag, blocks);
     815   209966657 : }
     816             : #else
     817             : # define xfs_reflink_update_quota(tp, ip, is_cow, blocks)       ((void)0)
     818             : #endif
     819             : 
     820             : /*
     821             :  * Remap part of the CoW fork into the data fork.
     822             :  *
     823             :  * We aim to remap the range starting at @offset_fsb and ending at @end_fsb
     824             :  * into the data fork; this function will remap what it can (at the end of the
     825             :  * range) and update @end_fsb appropriately.  Each remap gets its own
     826             :  * transaction because we can end up merging and splitting bmbt blocks for
     827             :  * every remap operation and we'd like to keep the block reservation
     828             :  * requirements as low as possible.
     829             :  */
     830             : STATIC int
     831    15000996 : xfs_reflink_end_cow_extent(
     832             :         struct xfs_inode        *ip,
     833             :         xfs_fileoff_t           *offset_fsb,
     834             :         xfs_fileoff_t           end_fsb)
     835             : {
     836    15000996 :         struct xfs_iext_cursor  icur;
     837    15000996 :         struct xfs_bmbt_irec    got, del, data;
     838    15000996 :         struct xfs_mount        *mp = ip->i_mount;
     839    15000996 :         struct xfs_trans        *tp;
     840    15000996 :         struct xfs_ifork        *ifp = xfs_ifork_ptr(ip, XFS_COW_FORK);
     841    15000996 :         unsigned int            resblks;
     842    15000996 :         int                     nmaps;
     843    15000996 :         bool                    isrt = XFS_IS_REALTIME_INODE(ip);
     844    15000996 :         int                     error;
     845             : 
     846             :         /* No COW extents?  That's easy! */
     847    15000996 :         if (ifp->if_bytes == 0) {
     848        6291 :                 *offset_fsb = end_fsb;
     849        6291 :                 return 0;
     850             :         }
     851             : 
     852    14994705 :         resblks = XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK);
     853    14994705 :         error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0,
     854             :                         XFS_TRANS_RESERVE, &tp);
     855    14994696 :         if (error)
     856             :                 return error;
     857             : 
     858             :         /*
     859             :          * Lock the inode.  We have to ijoin without automatic unlock because
     860             :          * the lead transaction is the refcountbt record deletion; the data
     861             :          * fork update follows as a deferred log item.
     862             :          */
     863    14994696 :         xfs_ilock(ip, XFS_ILOCK_EXCL);
     864    14994705 :         xfs_trans_ijoin(tp, ip, 0);
     865             : 
     866    14994707 :         error = xfs_iext_count_may_overflow(ip, XFS_DATA_FORK,
     867             :                         XFS_IEXT_REFLINK_END_COW_CNT);
     868    14994706 :         if (error == -EFBIG)
     869          16 :                 error = xfs_iext_count_upgrade(tp, ip,
     870             :                                 XFS_IEXT_REFLINK_END_COW_CNT);
     871    14994706 :         if (error)
     872          16 :                 goto out_cancel;
     873             : 
     874             :         /*
     875             :          * In case of racing, overlapping AIO writes no COW extents might be
     876             :          * left by the time I/O completes for the loser of the race.  In that
     877             :          * case we are done.
     878             :          */
     879    14994690 :         if (!xfs_iext_lookup_extent(ip, ifp, *offset_fsb, &icur, &got) ||
     880    14968613 :             got.br_startoff >= end_fsb) {
     881       62445 :                 *offset_fsb = end_fsb;
     882       62445 :                 goto out_cancel;
     883             :         }
     884             : 
     885             :         /*
     886             :          * Only remap real extents that contain data.  With AIO, speculative
     887             :          * preallocations can leak into the range we are called upon, and we
     888             :          * need to skip them.  Preserve @got for the eventual CoW fork
     889             :          * deletion; from now on @del represents the mapping that we're
     890             :          * actually remapping.
     891             :          */
     892    14989655 :         while (!xfs_bmap_is_written_extent(&got)) {
     893       57856 :                 if (!xfs_iext_next_extent(ifp, &icur, &got) ||
     894       57780 :                     got.br_startoff >= end_fsb) {
     895         447 :                         *offset_fsb = end_fsb;
     896         447 :                         goto out_cancel;
     897             :                 }
     898             :         }
     899    14931799 :         del = got;
     900             : 
     901             :         /* Grab the corresponding mapping in the data fork. */
     902    14931799 :         nmaps = 1;
     903    14931799 :         error = xfs_bmapi_read(ip, del.br_startoff, del.br_blockcount, &data,
     904             :                         &nmaps, 0);
     905    14931799 :         if (error)
     906           5 :                 goto out_cancel;
     907             : 
     908             :         /* We can only remap the smaller of the two extent sizes. */
     909    14931794 :         data.br_blockcount = min(data.br_blockcount, del.br_blockcount);
     910    14931794 :         del.br_blockcount = data.br_blockcount;
     911             : 
     912    14931794 :         trace_xfs_reflink_cow_remap_from(ip, &del);
     913    14931794 :         trace_xfs_reflink_cow_remap_to(ip, &data);
     914             : 
     915    25832007 :         if (xfs_bmap_is_real_extent(&data)) {
     916             :                 /*
     917             :                  * If the extent we're remapping is backed by storage (written
     918             :                  * or not), unmap the extent and drop its refcount.
     919             :                  */
     920    10900213 :                 xfs_bmap_unmap_extent(tp, ip, XFS_DATA_FORK, &data);
     921    10900213 :                 xfs_refcount_decrease_extent(tp, isrt, &data);
     922    10900213 :                 xfs_reflink_update_quota(tp, ip, false, -data.br_blockcount);
     923     4031581 :         } else if (data.br_startblock == DELAYSTARTBLOCK) {
     924      233908 :                 int             done;
     925             : 
     926             :                 /*
     927             :                  * If the extent we're remapping is a delalloc reservation,
     928             :                  * we can use the regular bunmapi function to release the
     929             :                  * incore state.  Dropping the delalloc reservation takes care
     930             :                  * of the quota reservation for us.
     931             :                  */
     932      233908 :                 error = xfs_bunmapi(NULL, ip, data.br_startoff,
     933             :                                 data.br_blockcount, 0, 1, &done);
     934      233908 :                 if (error)
     935           0 :                         goto out_cancel;
     936      233908 :                 ASSERT(done);
     937             :         }
     938             : 
     939             :         /* Free the CoW orphan record. */
     940    14931793 :         xfs_refcount_free_cow_extent(tp, isrt, del.br_startblock,
     941    14931793 :                         del.br_blockcount);
     942             : 
     943             :         /* Map the new blocks into the data fork. */
     944    14931794 :         xfs_bmap_map_extent(tp, ip, XFS_DATA_FORK, &del);
     945             : 
     946             :         /* Charge this new data fork mapping to the on-disk quota. */
     947    14931794 :         xfs_reflink_update_quota(tp, ip, true, del.br_blockcount);
     948             : 
     949             :         /* Remove the mapping from the CoW fork. */
     950    14931794 :         xfs_bmap_del_extent_cow(ip, &icur, &got, &del);
     951             : 
     952    14931794 :         error = xfs_trans_commit(tp);
     953    14931794 :         xfs_iunlock(ip, XFS_ILOCK_EXCL);
     954    14931794 :         if (error)
     955             :                 return error;
     956             : 
     957             :         /* Update the caller about how much progress we made. */
     958    14931769 :         *offset_fsb = del.br_startoff + del.br_blockcount;
     959    14931769 :         return 0;
     960             : 
     961       62913 : out_cancel:
     962       62913 :         xfs_trans_cancel(tp);
     963       62913 :         xfs_iunlock(ip, XFS_ILOCK_EXCL);
     964       62913 :         return error;
     965             : }
     966             : 
     967             : /*
     968             :  * Remap parts of a file's data fork after a successful CoW.
     969             :  */
     970             : int
     971    11213372 : xfs_reflink_end_cow(
     972             :         struct xfs_inode                *ip,
     973             :         xfs_off_t                       offset,
     974             :         xfs_off_t                       count)
     975             : {
     976    11213372 :         struct xfs_mount                *mp = ip->i_mount;
     977    11213372 :         xfs_fileoff_t                   offset_fsb;
     978    11213372 :         xfs_fileoff_t                   end_fsb;
     979    11213372 :         int                             error = 0;
     980             : 
     981    11213372 :         trace_xfs_reflink_end_cow(ip, offset, count);
     982             : 
     983    11213373 :         offset_fsb = XFS_B_TO_FSBT(ip->i_mount, offset);
     984    11213373 :         end_fsb = XFS_B_TO_FSB(ip->i_mount, offset + count);
     985             : 
     986             :         /*
     987             :          * Make sure the end is aligned with a rt extent (if desired), since
     988             :          * the end of the range could be EOF.  The _convert_cow function should
     989             :          * have set us up to swap only full rt extents.
     990             :          */
     991    11213373 :         if (xfs_inode_needs_cow_around(ip)) {
     992      239445 :                 offset_fsb = xfs_rtb_rounddown_rtx(mp, offset_fsb);
     993      239445 :                 end_fsb = xfs_rtb_roundup_rtx(mp, end_fsb);
     994             :         }
     995             : 
     996             :         /*
     997             :          * Walk forwards until we've remapped the I/O range.  The loop function
     998             :          * repeatedly cycles the ILOCK to allocate one transaction per remapped
     999             :          * extent.
    1000             :          *
    1001             :          * If we're being called by writeback then the pages will still
    1002             :          * have PageWriteback set, which prevents races with reflink remapping
    1003             :          * and truncate.  Reflink remapping prevents races with writeback by
    1004             :          * taking the iolock and mmaplock before flushing the pages and
    1005             :          * remapping, which means there won't be any further writeback or page
    1006             :          * cache dirtying until the reflink completes.
    1007             :          *
    1008             :          * We should never have two threads issuing writeback for the same file
    1009             :          * region.  There are also have post-eof checks in the writeback
    1010             :          * preparation code so that we don't bother writing out pages that are
    1011             :          * about to be truncated.
    1012             :          *
    1013             :          * If we're being called as part of directio write completion, the dio
    1014             :          * count is still elevated, which reflink and truncate will wait for.
    1015             :          * Reflink remapping takes the iolock and mmaplock and waits for
    1016             :          * pending dio to finish, which should prevent any directio until the
    1017             :          * remap completes.  Multiple concurrent directio writes to the same
    1018             :          * region are handled by end_cow processing only occurring for the
    1019             :          * threads which succeed; the outcome of multiple overlapping direct
    1020             :          * writes is not well defined anyway.
    1021             :          *
    1022             :          * It's possible that a buffered write and a direct write could collide
    1023             :          * here (the buffered write stumbles in after the dio flushes and
    1024             :          * invalidates the page cache and immediately queues writeback), but we
    1025             :          * have never supported this 100%.  If either disk write succeeds the
    1026             :          * blocks will be remapped.
    1027             :          */
    1028    26214369 :         while (end_fsb > offset_fsb && !error)
    1029    15000997 :                 error = xfs_reflink_end_cow_extent(ip, &offset_fsb, end_fsb);
    1030             : 
    1031    11213372 :         if (error)
    1032          46 :                 trace_xfs_reflink_end_cow_error(ip, error, _RET_IP_);
    1033    11213372 :         return error;
    1034             : }
    1035             : 
    1036             : /*
    1037             :  * Free all CoW staging blocks that are still referenced by the ondisk refcount
    1038             :  * metadata.  The ondisk metadata does not track which inode created the
    1039             :  * staging extent, so callers must ensure that there are no cached inodes with
    1040             :  * live CoW staging extents.
    1041             :  */
    1042             : int
    1043       13570 : xfs_reflink_recover_cow(
    1044             :         struct xfs_mount        *mp)
    1045             : {
    1046       13570 :         struct xfs_perag        *pag;
    1047       13570 :         struct xfs_rtgroup      *rtg;
    1048       13570 :         xfs_agnumber_t          agno;
    1049       13570 :         xfs_rgnumber_t          rgno;
    1050       13570 :         int                     error = 0;
    1051             : 
    1052       13570 :         if (!xfs_has_reflink(mp))
    1053             :                 return 0;
    1054             : 
    1055       69079 :         for_each_perag(mp, agno, pag) {
    1056       56002 :                 error = xfs_refcount_recover_cow_leftovers(mp, pag);
    1057       56002 :                 if (error) {
    1058          30 :                         xfs_perag_rele(pag);
    1059          30 :                         return error;
    1060             :                 }
    1061             :         }
    1062             : 
    1063       19402 :         for_each_rtgroup(mp, rgno, rtg) {
    1064        6325 :                 error = xfs_refcount_recover_rtcow_leftovers(mp, rtg);
    1065        6325 :                 if (error) {
    1066           0 :                         xfs_rtgroup_rele(rtg);
    1067           0 :                         return error;
    1068             :                 }
    1069             :         }
    1070             : 
    1071             :         return 0;
    1072             : }
    1073             : 
    1074             : /*
    1075             :  * Reflinking (Block) Ranges of Two Files Together
    1076             :  *
    1077             :  * First, ensure that the reflink flag is set on both inodes.  The flag is an
    1078             :  * optimization to avoid unnecessary refcount btree lookups in the write path.
    1079             :  *
    1080             :  * Now we can iteratively remap the range of extents (and holes) in src to the
    1081             :  * corresponding ranges in dest.  Let drange and srange denote the ranges of
    1082             :  * logical blocks in dest and src touched by the reflink operation.
    1083             :  *
    1084             :  * While the length of drange is greater than zero,
    1085             :  *    - Read src's bmbt at the start of srange ("imap")
    1086             :  *    - If imap doesn't exist, make imap appear to start at the end of srange
    1087             :  *      with zero length.
    1088             :  *    - If imap starts before srange, advance imap to start at srange.
    1089             :  *    - If imap goes beyond srange, truncate imap to end at the end of srange.
    1090             :  *    - Punch (imap start - srange start + imap len) blocks from dest at
    1091             :  *      offset (drange start).
    1092             :  *    - If imap points to a real range of pblks,
    1093             :  *         > Increase the refcount of the imap's pblks
    1094             :  *         > Map imap's pblks into dest at the offset
    1095             :  *           (drange start + imap start - srange start)
    1096             :  *    - Advance drange and srange by (imap start - srange start + imap len)
    1097             :  *
    1098             :  * Finally, if the reflink made dest longer, update both the in-core and
    1099             :  * on-disk file sizes.
    1100             :  *
    1101             :  * ASCII Art Demonstration:
    1102             :  *
    1103             :  * Let's say we want to reflink this source file:
    1104             :  *
    1105             :  * ----SSSSSSS-SSSSS----SSSSSS (src file)
    1106             :  *   <-------------------->
    1107             :  *
    1108             :  * into this destination file:
    1109             :  *
    1110             :  * --DDDDDDDDDDDDDDDDDDD--DDD (dest file)
    1111             :  *        <-------------------->
    1112             :  * '-' means a hole, and 'S' and 'D' are written blocks in the src and dest.
    1113             :  * Observe that the range has different logical offsets in either file.
    1114             :  *
    1115             :  * Consider that the first extent in the source file doesn't line up with our
    1116             :  * reflink range.  Unmapping  and remapping are separate operations, so we can
    1117             :  * unmap more blocks from the destination file than we remap.
    1118             :  *
    1119             :  * ----SSSSSSS-SSSSS----SSSSSS
    1120             :  *   <------->
    1121             :  * --DDDDD---------DDDDD--DDD
    1122             :  *        <------->
    1123             :  *
    1124             :  * Now remap the source extent into the destination file:
    1125             :  *
    1126             :  * ----SSSSSSS-SSSSS----SSSSSS
    1127             :  *   <------->
    1128             :  * --DDDDD--SSSSSSSDDDDD--DDD
    1129             :  *        <------->
    1130             :  *
    1131             :  * Do likewise with the second hole and extent in our range.  Holes in the
    1132             :  * unmap range don't affect our operation.
    1133             :  *
    1134             :  * ----SSSSSSS-SSSSS----SSSSSS
    1135             :  *            <---->
    1136             :  * --DDDDD--SSSSSSS-SSSSS-DDD
    1137             :  *                 <---->
    1138             :  *
    1139             :  * Finally, unmap and remap part of the third extent.  This will increase the
    1140             :  * size of the destination file.
    1141             :  *
    1142             :  * ----SSSSSSS-SSSSS----SSSSSS
    1143             :  *                  <----->
    1144             :  * --DDDDD--SSSSSSS-SSSSS----SSS
    1145             :  *                       <----->
    1146             :  *
    1147             :  * Once we update the destination file's i_size, we're done.
    1148             :  */
    1149             : 
    1150             : /*
    1151             :  * Ensure the reflink bit is set in both inodes.
    1152             :  */
    1153             : STATIC int
    1154   243381673 : xfs_reflink_set_inode_flag(
    1155             :         struct xfs_inode        *src,
    1156             :         struct xfs_inode        *dest)
    1157             : {
    1158   243381673 :         struct xfs_mount        *mp = src->i_mount;
    1159   243381673 :         int                     error;
    1160   243381673 :         struct xfs_trans        *tp;
    1161             : 
    1162   243381673 :         if (xfs_is_reflink_inode(src) && xfs_is_reflink_inode(dest))
    1163             :                 return 0;
    1164             : 
    1165     5186569 :         error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ichange, 0, 0, 0, &tp);
    1166     5187647 :         if (error)
    1167           0 :                 goto out_error;
    1168             : 
    1169             :         /* Lock both files against IO */
    1170     5187647 :         if (src->i_ino == dest->i_ino)
    1171      111000 :                 xfs_ilock(src, XFS_ILOCK_EXCL);
    1172             :         else
    1173     5076647 :                 xfs_lock_two_inodes(src, XFS_ILOCK_EXCL, dest, XFS_ILOCK_EXCL);
    1174             : 
    1175     5187638 :         if (!xfs_is_reflink_inode(src)) {
    1176      492869 :                 trace_xfs_reflink_set_inode_flag(src);
    1177      492868 :                 xfs_trans_ijoin(tp, src, XFS_ILOCK_EXCL);
    1178      492868 :                 src->i_diflags2 |= XFS_DIFLAG2_REFLINK;
    1179      492868 :                 xfs_trans_log_inode(tp, src, XFS_ILOG_CORE);
    1180      492869 :                 xfs_ifork_init_cow(src);
    1181             :         } else
    1182     4694769 :                 xfs_iunlock(src, XFS_ILOCK_EXCL);
    1183             : 
    1184     5187647 :         if (src->i_ino == dest->i_ino)
    1185      110999 :                 goto commit_flags;
    1186             : 
    1187     5076648 :         if (!xfs_is_reflink_inode(dest)) {
    1188     4774819 :                 trace_xfs_reflink_set_inode_flag(dest);
    1189     4774787 :                 xfs_trans_ijoin(tp, dest, XFS_ILOCK_EXCL);
    1190     4774821 :                 dest->i_diflags2 |= XFS_DIFLAG2_REFLINK;
    1191     4774821 :                 xfs_trans_log_inode(tp, dest, XFS_ILOG_CORE);
    1192     4774837 :                 xfs_ifork_init_cow(dest);
    1193             :         } else
    1194      301829 :                 xfs_iunlock(dest, XFS_ILOCK_EXCL);
    1195             : 
    1196     5187627 : commit_flags:
    1197     5187627 :         error = xfs_trans_commit(tp);
    1198     5187649 :         if (error)
    1199           2 :                 goto out_error;
    1200             :         return error;
    1201             : 
    1202           2 : out_error:
    1203           2 :         trace_xfs_reflink_set_inode_flag_error(dest, error, _RET_IP_);
    1204           2 :         return error;
    1205             : }
    1206             : 
    1207             : /*
    1208             :  * Update destination inode size & cowextsize hint, if necessary.
    1209             :  */
    1210             : int
    1211   241977421 : xfs_reflink_update_dest(
    1212             :         struct xfs_inode        *dest,
    1213             :         xfs_off_t               newlen,
    1214             :         xfs_extlen_t            cowextsize,
    1215             :         unsigned int            remap_flags)
    1216             : {
    1217   241977421 :         struct xfs_mount        *mp = dest->i_mount;
    1218   241977421 :         struct xfs_trans        *tp;
    1219   241977421 :         int                     error;
    1220             : 
    1221   241977421 :         if (newlen <= i_size_read(VFS_I(dest)) && cowextsize == 0)
    1222             :                 return 0;
    1223             : 
    1224     3793747 :         error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ichange, 0, 0, 0, &tp);
    1225     3785342 :         if (error)
    1226           0 :                 goto out_error;
    1227             : 
    1228     3785342 :         xfs_ilock(dest, XFS_ILOCK_EXCL);
    1229     3785284 :         xfs_trans_ijoin(tp, dest, XFS_ILOCK_EXCL);
    1230             : 
    1231     3785257 :         if (newlen > i_size_read(VFS_I(dest))) {
    1232     3785266 :                 trace_xfs_reflink_update_inode_size(dest, newlen);
    1233     3785200 :                 i_size_write(VFS_I(dest), newlen);
    1234     3785200 :                 dest->i_disk_size = newlen;
    1235             :         }
    1236             : 
    1237     3785191 :         if (cowextsize) {
    1238          27 :                 dest->i_cowextsize = cowextsize;
    1239          27 :                 dest->i_diflags2 |= XFS_DIFLAG2_COWEXTSIZE;
    1240             :         }
    1241             : 
    1242     3785191 :         xfs_trans_log_inode(tp, dest, XFS_ILOG_CORE);
    1243             : 
    1244     3785312 :         error = xfs_trans_commit(tp);
    1245     3785347 :         if (error)
    1246           0 :                 goto out_error;
    1247             :         return error;
    1248             : 
    1249           0 : out_error:
    1250           0 :         trace_xfs_reflink_update_inode_size_error(dest, error, _RET_IP_);
    1251           0 :         return error;
    1252             : }
    1253             : 
    1254             : /*
    1255             :  * Do we have enough reserve in this AG to handle a reflink?  The refcount
    1256             :  * btree already reserved all the space it needs, but the rmap btree can grow
    1257             :  * infinitely, so we won't allow more reflinks when the AG is down to the
    1258             :  * btree reserves.
    1259             :  */
    1260             : static int
    1261   136258997 : xfs_reflink_ag_has_free_space(
    1262             :         struct xfs_mount        *mp,
    1263             :         struct xfs_inode        *ip,
    1264             :         xfs_fsblock_t           fsb)
    1265             : {
    1266   136258997 :         struct xfs_perag        *pag;
    1267   136258997 :         xfs_agnumber_t          agno;
    1268   136258997 :         int                     error = 0;
    1269             : 
    1270   136258997 :         if (!xfs_has_rmapbt(mp))
    1271             :                 return 0;
    1272   136258997 :         if (XFS_IS_REALTIME_INODE(ip)) {
    1273    60236071 :                 struct xfs_rtgroup      *rtg;
    1274    60236071 :                 xfs_rgnumber_t          rgno;
    1275             : 
    1276    60236071 :                 rgno = xfs_rtb_to_rgno(mp, fsb);
    1277    60236038 :                 rtg = xfs_rtgroup_get(mp, rgno);
    1278   120472135 :                 if (xfs_imeta_resv_critical(rtg->rtg_rmapip) ||
    1279    60236012 :                     xfs_imeta_resv_critical(rtg->rtg_refcountip))
    1280             :                         error = -ENOSPC;
    1281    60236037 :                 xfs_rtgroup_put(rtg);
    1282    60236037 :                 return error;
    1283             :         }
    1284             : 
    1285    76022926 :         agno = XFS_FSB_TO_AGNO(mp, fsb);
    1286    76022926 :         pag = xfs_perag_get(mp, agno);
    1287   152044581 :         if (xfs_ag_resv_critical(pag, XFS_AG_RESV_RMAPBT) ||
    1288    76021660 :             xfs_ag_resv_critical(pag, XFS_AG_RESV_METADATA))
    1289             :                 error = -ENOSPC;
    1290    76022853 :         xfs_perag_put(pag);
    1291    76022853 :         return error;
    1292             : }
    1293             : 
    1294             : /*
    1295             :  * Remap the given extent into the file.  The dmap blockcount will be set to
    1296             :  * the number of blocks that were actually remapped.
    1297             :  */
    1298             : STATIC int
    1299   438684007 : xfs_reflink_remap_extent(
    1300             :         struct xfs_inode        *ip,
    1301             :         struct xfs_bmbt_irec    *dmap,
    1302             :         xfs_off_t               new_isize)
    1303             : {
    1304   438684007 :         struct xfs_bmbt_irec    smap;
    1305   438684007 :         struct xfs_mount        *mp = ip->i_mount;
    1306   438684007 :         struct xfs_trans        *tp;
    1307   438684007 :         xfs_off_t               newlen;
    1308   438684007 :         int64_t                 qdelta = 0;
    1309   438684007 :         unsigned int            dblocks, rblocks, resblks;
    1310   438684007 :         bool                    quota_reserved = true;
    1311   438684007 :         bool                    smap_real;
    1312   438684007 :         bool                    dmap_written = xfs_bmap_is_written_extent(dmap);
    1313   438684007 :         bool                    isrt = XFS_IS_REALTIME_INODE(ip);
    1314   438684007 :         int                     iext_delta = 0;
    1315   438684007 :         int                     nimaps;
    1316   438684007 :         int                     error;
    1317             : 
    1318             :         /*
    1319             :          * Start a rolling transaction to switch the mappings.
    1320             :          *
    1321             :          * Adding a written extent to the extent map can cause a bmbt split,
    1322             :          * and removing a mapped extent from the extent can cause a bmbt split.
    1323             :          * The two operations cannot both cause a split since they operate on
    1324             :          * the same index in the bmap btree, so we only need a reservation for
    1325             :          * one bmbt split if either thing is happening.  However, we haven't
    1326             :          * locked the inode yet, so we reserve assuming this is the case.
    1327             :          *
    1328             :          * The first allocation call tries to reserve enough space to handle
    1329             :          * mapping dmap into a sparse part of the file plus the bmbt split.  We
    1330             :          * haven't locked the inode or read the existing mapping yet, so we do
    1331             :          * not know for sure that we need the space.  This should succeed most
    1332             :          * of the time.
    1333             :          *
    1334             :          * If the first attempt fails, try again but reserving only enough
    1335             :          * space to handle a bmbt split.  This is the hard minimum requirement,
    1336             :          * and we revisit quota reservations later when we know more about what
    1337             :          * we're remapping.
    1338             :          */
    1339   438684007 :         resblks = XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK);
    1340   438684007 :         if (XFS_IS_REALTIME_INODE(ip)) {
    1341   202536351 :                 dblocks = resblks;
    1342   202536351 :                 rblocks = dmap->br_blockcount;
    1343             :         } else {
    1344   236147656 :                 dblocks = resblks + dmap->br_blockcount;
    1345   236147656 :                 rblocks = 0;
    1346             :         }
    1347   438684007 :         error = xfs_trans_alloc_inode(ip, &M_RES(mp)->tr_write,
    1348             :                         dblocks, rblocks, false, &tp);
    1349   438701127 :         if (error == -EDQUOT || error == -ENOSPC) {
    1350     5352138 :                 quota_reserved = false;
    1351     5352138 :                 error = xfs_trans_alloc_inode(ip, &M_RES(mp)->tr_write,
    1352             :                                 resblks, 0, false, &tp);
    1353             :         }
    1354   438701138 :         if (error)
    1355     1375336 :                 goto out;
    1356             : 
    1357             :         /*
    1358             :          * Read what's currently mapped in the destination file into smap.
    1359             :          * If smap isn't a hole, we will have to remove it before we can add
    1360             :          * dmap to the destination file.
    1361             :          */
    1362   437325802 :         nimaps = 1;
    1363   437325802 :         error = xfs_bmapi_read(ip, dmap->br_startoff, dmap->br_blockcount,
    1364             :                         &smap, &nimaps, 0);
    1365   437315355 :         if (error)
    1366          15 :                 goto out_cancel;
    1367   437315340 :         ASSERT(nimaps == 1 && smap.br_startoff == dmap->br_startoff);
    1368   437315340 :         smap_real = xfs_bmap_is_real_extent(&smap);
    1369             : 
    1370             :         /*
    1371             :          * We can only remap as many blocks as the smaller of the two extent
    1372             :          * maps, because we can only remap one extent at a time.
    1373             :          */
    1374   437315340 :         dmap->br_blockcount = min(dmap->br_blockcount, smap.br_blockcount);
    1375   437315340 :         ASSERT(dmap->br_blockcount == smap.br_blockcount);
    1376             : 
    1377   437315340 :         trace_xfs_reflink_remap_extent_dest(ip, &smap);
    1378             : 
    1379             :         /*
    1380             :          * Two extents mapped to the same physical block must not have
    1381             :          * different states; that's filesystem corruption.  Move on to the next
    1382             :          * extent if they're both holes or both the same physical extent.
    1383             :          */
    1384   437314894 :         if (dmap->br_startblock == smap.br_startblock) {
    1385   251835456 :                 if (dmap->br_state != smap.br_state) {
    1386           0 :                         xfs_bmap_mark_sick(ip, XFS_DATA_FORK);
    1387           0 :                         error = -EFSCORRUPTED;
    1388             :                 }
    1389   251835456 :                 goto out_cancel;
    1390             :         }
    1391             : 
    1392             :         /* If both extents are unwritten, leave them alone. */
    1393   185479438 :         if (dmap->br_state == XFS_EXT_UNWRITTEN &&
    1394    18797163 :             smap.br_state == XFS_EXT_UNWRITTEN)
    1395     1342735 :                 goto out_cancel;
    1396             : 
    1397             :         /* No reflinking if the AG of the dest mapping is low on space. */
    1398   184136703 :         if (dmap_written) {
    1399   136259011 :                 error = xfs_reflink_ag_has_free_space(mp, ip,
    1400             :                                 dmap->br_startblock);
    1401   136259040 :                 if (error)
    1402        1263 :                         goto out_cancel;
    1403             :         }
    1404             : 
    1405             :         /*
    1406             :          * Increase quota reservation if we think the quota block counter for
    1407             :          * this file could increase.
    1408             :          *
    1409             :          * If we are mapping a written extent into the file, we need to have
    1410             :          * enough quota block count reservation to handle the blocks in that
    1411             :          * extent.  We log only the delta to the quota block counts, so if the
    1412             :          * extent we're unmapping also has blocks allocated to it, we don't
    1413             :          * need a quota reservation for the extent itself.
    1414             :          *
    1415             :          * Note that if we're replacing a delalloc reservation with a written
    1416             :          * extent, we have to take the full quota reservation because removing
    1417             :          * the delalloc reservation gives the block count back to the quota
    1418             :          * count.  This is suboptimal, but the VFS flushed the dest range
    1419             :          * before we started.  That should have removed all the delalloc
    1420             :          * reservations, but we code defensively.
    1421             :          *
    1422             :          * xfs_trans_alloc_inode above already tried to grab an even larger
    1423             :          * quota reservation, and kicked off a blockgc scan if it couldn't.
    1424             :          * If we can't get a potentially smaller quota reservation now, we're
    1425             :          * done.
    1426             :          */
    1427   184135469 :         if (!quota_reserved && !smap_real && dmap_written) {
    1428      166509 :                 if (XFS_IS_REALTIME_INODE(ip)) {
    1429       58605 :                         dblocks = 0;
    1430       58605 :                         rblocks = dmap->br_blockcount;
    1431             :                 } else {
    1432      107904 :                         dblocks = dmap->br_blockcount;
    1433      107904 :                         rblocks = 0;
    1434             :                 }
    1435      166509 :                 error = xfs_trans_reserve_quota_nblks(tp, ip, dblocks, rblocks,
    1436             :                                 false);
    1437      166509 :                 if (error)
    1438           0 :                         goto out_cancel;
    1439             :         }
    1440             : 
    1441   184135469 :         if (smap_real)
    1442    34960150 :                 ++iext_delta;
    1443             : 
    1444   184135469 :         if (dmap_written)
    1445   136257722 :                 ++iext_delta;
    1446             : 
    1447   184135469 :         error = xfs_iext_count_may_overflow(ip, XFS_DATA_FORK, iext_delta);
    1448   184134752 :         if (error == -EFBIG)
    1449          24 :                 error = xfs_iext_count_upgrade(tp, ip, iext_delta);
    1450   184134752 :         if (error)
    1451          24 :                 goto out_cancel;
    1452             : 
    1453   184134728 :         if (smap_real) {
    1454             :                 /*
    1455             :                  * If the extent we're unmapping is backed by storage (written
    1456             :                  * or not), unmap the extent and drop its refcount.
    1457             :                  */
    1458    34959713 :                 xfs_bmap_unmap_extent(tp, ip, XFS_DATA_FORK, &smap);
    1459    34960814 :                 xfs_refcount_decrease_extent(tp, isrt, &smap);
    1460    34961547 :                 qdelta -= smap.br_blockcount;
    1461   149175015 :         } else if (smap.br_startblock == DELAYSTARTBLOCK) {
    1462        3199 :                 int             done;
    1463             : 
    1464             :                 /*
    1465             :                  * If the extent we're unmapping is a delalloc reservation,
    1466             :                  * we can use the regular bunmapi function to release the
    1467             :                  * incore state.  Dropping the delalloc reservation takes care
    1468             :                  * of the quota reservation for us.
    1469             :                  */
    1470        3199 :                 error = xfs_bunmapi(NULL, ip, smap.br_startoff,
    1471             :                                 smap.br_blockcount, 0, 1, &done);
    1472        3199 :                 if (error)
    1473           0 :                         goto out_cancel;
    1474        3199 :                 ASSERT(done);
    1475             :         }
    1476             : 
    1477             :         /*
    1478             :          * If the extent we're sharing is backed by written storage, increase
    1479             :          * its refcount and map it into the file.
    1480             :          */
    1481   184136562 :         if (dmap_written) {
    1482   136257540 :                 xfs_refcount_increase_extent(tp, isrt, dmap);
    1483   136257587 :                 xfs_bmap_map_extent(tp, ip, XFS_DATA_FORK, dmap);
    1484   136257799 :                 qdelta += dmap->br_blockcount;
    1485             :         }
    1486             : 
    1487   184136821 :         xfs_reflink_update_quota(tp, ip, false, qdelta);
    1488             : 
    1489             :         /* Update dest isize if needed. */
    1490   184134392 :         newlen = XFS_FSB_TO_B(mp, dmap->br_startoff + dmap->br_blockcount);
    1491   184134392 :         newlen = min_t(xfs_off_t, newlen, new_isize);
    1492   184134392 :         if (newlen > i_size_read(VFS_I(ip))) {
    1493   118765478 :                 trace_xfs_reflink_update_inode_size(ip, newlen);
    1494   118765475 :                 i_size_write(VFS_I(ip), newlen);
    1495   118765475 :                 ip->i_disk_size = newlen;
    1496   118765475 :                 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
    1497             :         }
    1498             : 
    1499             :         /* Commit everything and unlock. */
    1500   184134389 :         error = xfs_trans_commit(tp);
    1501   184137323 :         goto out_unlock;
    1502             : 
    1503   253179493 : out_cancel:
    1504   253179493 :         xfs_trans_cancel(tp);
    1505   437263559 : out_unlock:
    1506   437263559 :         xfs_iunlock(ip, XFS_ILOCK_EXCL);
    1507   438627702 : out:
    1508   438627702 :         if (error)
    1509     1377541 :                 trace_xfs_reflink_remap_extent_error(ip, error, _RET_IP_);
    1510   438627668 :         return error;
    1511             : }
    1512             : 
    1513             : /* Remap a range of one file to the other. */
    1514             : int
    1515   243401518 : xfs_reflink_remap_blocks(
    1516             :         struct xfs_inode        *src,
    1517             :         loff_t                  pos_in,
    1518             :         struct xfs_inode        *dest,
    1519             :         loff_t                  pos_out,
    1520             :         loff_t                  remap_len,
    1521             :         loff_t                  *remapped)
    1522             : {
    1523   243401518 :         struct xfs_bmbt_irec    imap;
    1524   243401518 :         struct xfs_mount        *mp = src->i_mount;
    1525   243401518 :         xfs_fileoff_t           srcoff = XFS_B_TO_FSBT(mp, pos_in);
    1526   243401518 :         xfs_fileoff_t           destoff = XFS_B_TO_FSBT(mp, pos_out);
    1527   243401518 :         xfs_filblks_t           len;
    1528   243401518 :         xfs_filblks_t           remapped_len = 0;
    1529   243401518 :         xfs_off_t               new_isize = pos_out + remap_len;
    1530   243401518 :         int                     nimaps;
    1531   243401518 :         int                     error = 0;
    1532             : 
    1533   243401518 :         len = min_t(xfs_filblks_t, XFS_B_TO_FSB(mp, remap_len),
    1534             :                         XFS_MAX_FILEOFF);
    1535             : 
    1536             :         /*
    1537             :          * Make sure the end is aligned with a rt extent (if desired), since
    1538             :          * the end of the range could be EOF.
    1539             :          */
    1540   243401518 :         if (xfs_inode_has_bigrtextents(dest))
    1541    14152750 :                 len = xfs_rtb_roundup_rtx(mp, len);
    1542             : 
    1543   243401622 :         trace_xfs_reflink_remap_blocks(src, srcoff, len, dest, destoff);
    1544             : 
    1545   680649986 :         while (len > 0) {
    1546   438672854 :                 unsigned int    lock_mode;
    1547             : 
    1548             :                 /* Read extent from the source file */
    1549   438672854 :                 nimaps = 1;
    1550   438672854 :                 lock_mode = xfs_ilock_data_map_shared(src);
    1551   438689913 :                 error = xfs_bmapi_read(src, srcoff, len, &imap, &nimaps, 0);
    1552   438697678 :                 xfs_iunlock(src, lock_mode);
    1553   438685147 :                 if (error)
    1554             :                         break;
    1555             :                 /*
    1556             :                  * The caller supposedly flushed all dirty pages in the source
    1557             :                  * file range, which means that writeback should have allocated
    1558             :                  * or deleted all delalloc reservations in that range.  If we
    1559             :                  * find one, that's a good sign that something is seriously
    1560             :                  * wrong here.
    1561             :                  */
    1562   438685097 :                 ASSERT(nimaps == 1 && imap.br_startoff == srcoff);
    1563   438685097 :                 if (imap.br_startblock == DELAYSTARTBLOCK) {
    1564           0 :                         ASSERT(imap.br_startblock != DELAYSTARTBLOCK);
    1565           0 :                         xfs_bmap_mark_sick(src, XFS_DATA_FORK);
    1566           0 :                         error = -EFSCORRUPTED;
    1567           0 :                         break;
    1568             :                 }
    1569             : 
    1570   438685097 :                 trace_xfs_reflink_remap_extent_src(src, &imap);
    1571             : 
    1572             :                 /* Remap into the destination file at the given offset. */
    1573   438685705 :                 imap.br_startoff = destoff;
    1574   438685705 :                 error = xfs_reflink_remap_extent(dest, &imap, new_isize);
    1575   438651400 :                 if (error)
    1576             :                         break;
    1577             : 
    1578   437273882 :                 if (fatal_signal_pending(current)) {
    1579             :                         error = -EINTR;
    1580             :                         break;
    1581             :                 }
    1582             : 
    1583             :                 /* Advance drange/srange */
    1584   437248364 :                 srcoff += imap.br_blockcount;
    1585   437248364 :                 destoff += imap.br_blockcount;
    1586   437248364 :                 len -= imap.br_blockcount;
    1587   437248364 :                 remapped_len += imap.br_blockcount;
    1588             :         }
    1589             : 
    1590   243354267 :         if (error)
    1591     1377995 :                 trace_xfs_reflink_remap_blocks_error(dest, error, _RET_IP_);
    1592   243354270 :         *remapped = min_t(loff_t, remap_len,
    1593             :                           XFS_FSB_TO_B(src->i_mount, remapped_len));
    1594   243354270 :         return error;
    1595             : }
    1596             : 
    1597             : /*
    1598             :  * If we're reflinking to a point past the destination file's EOF, we must
    1599             :  * zero any speculative post-EOF preallocations that sit between the old EOF
    1600             :  * and the destination file offset.
    1601             :  */
    1602             : static int
    1603   243386967 : xfs_reflink_zero_posteof(
    1604             :         struct xfs_inode        *ip,
    1605             :         loff_t                  pos)
    1606             : {
    1607   243386967 :         loff_t                  isize = i_size_read(VFS_I(ip));
    1608             : 
    1609   243386967 :         if (pos <= isize)
    1610             :                 return 0;
    1611             : 
    1612     4689477 :         trace_xfs_zero_eof(ip, isize, pos - isize);
    1613     4689421 :         return xfs_zero_range(ip, isize, pos - isize, NULL);
    1614             : }
    1615             : 
    1616             : #ifdef CONFIG_XFS_RT
    1617             : /* Adjust the length of the remap operation to end on a rt extent boundary. */
    1618             : STATIC int
    1619    14155832 : xfs_reflink_remap_adjust_rtlen(
    1620             :         struct xfs_inode        *src,
    1621             :         loff_t                  pos_in,
    1622             :         struct xfs_inode        *dest,
    1623             :         loff_t                  pos_out,
    1624             :         loff_t                  *len,
    1625             :         unsigned int            remap_flags)
    1626             : {
    1627    14155832 :         struct xfs_mount        *mp = src->i_mount;
    1628    14155832 :         uint32_t                mod;
    1629             : 
    1630    14155832 :         div_u64_rem(*len, XFS_FSB_TO_B(mp, mp->m_sb.sb_rextsize), &mod);
    1631             : 
    1632             :         /*
    1633             :          * We previously checked the rtextent alignment of both offsets, so we
    1634             :          * now have to check the alignment of the length.  The VFS remap prep
    1635             :          * function can change the length on us, so we can only make length
    1636             :          * adjustments after that.  If the length is aligned to an rtextent,
    1637             :          * we're trivially good to go.
    1638             :          *
    1639             :          * Otherwise, the length is not aligned to an rt extent.  If the source
    1640             :          * file's range ends at EOF, the VFS ensured that the dest file's range
    1641             :          * also ends at EOF.  The actual remap function will round the (byte)
    1642             :          * length up to the nearest rtextent unit, so we're ok here too.
    1643             :          */
    1644    14155737 :         if (mod == 0 || pos_in + *len == i_size_read(VFS_I(src)))
    1645             :                 return 0;
    1646             : 
    1647             :         /*
    1648             :          * Otherwise, the only thing we can do is round the request length down
    1649             :          * to an rt extent boundary.  If the caller doesn't allow that, we are
    1650             :          * finished.
    1651             :          */
    1652        2967 :         if (!(remap_flags & REMAP_FILE_CAN_SHORTEN))
    1653             :                 return -EINVAL;
    1654             : 
    1655             :         /* Back off by a single extent. */
    1656         627 :         (*len) -= mod;
    1657         627 :         trace_xfs_reflink_remap_adjust_rtlen(src, pos_in, *len, dest, pos_out);
    1658         627 :         return 0;
    1659             : }
    1660             : #else
    1661             : # define xfs_reflink_remap_adjust_rtlen(...)            (0)
    1662             : #endif /* CONFIG_XFS_RT */
    1663             : 
    1664             : /*
    1665             :  * Check the alignment of a remap request when the allocation unit size isn't a
    1666             :  * power of two.  The VFS helpers use (fast) bitmask-based alignment checks,
    1667             :  * but here we have to use slow long division.
    1668             :  */
    1669             : static int
    1670    11900521 : xfs_reflink_remap_check_rtalign(
    1671             :         struct xfs_inode                *ip_in,
    1672             :         loff_t                          pos_in,
    1673             :         struct xfs_inode                *ip_out,
    1674             :         loff_t                          pos_out,
    1675             :         loff_t                          *req_len,
    1676             :         unsigned int                    remap_flags)
    1677             : {
    1678    11900521 :         struct xfs_mount                *mp = ip_in->i_mount;
    1679    11900521 :         uint32_t                        rextbytes;
    1680    11900521 :         loff_t                          in_size, out_size;
    1681    11900521 :         loff_t                          new_length, length = *req_len;
    1682    11900521 :         loff_t                          blen;
    1683             : 
    1684    11900521 :         rextbytes = XFS_FSB_TO_B(mp, mp->m_sb.sb_rextsize);
    1685    11900521 :         in_size = i_size_read(VFS_I(ip_in));
    1686    11900521 :         out_size = i_size_read(VFS_I(ip_out));
    1687             : 
    1688             :         /* The start of both ranges must be aligned to a rt extent. */
    1689    23394989 :         if (!isaligned_64(pos_in, rextbytes) ||
    1690    11494605 :             !isaligned_64(pos_out, rextbytes))
    1691      429754 :                 return -EINVAL;
    1692             : 
    1693    11470424 :         if (length == 0)
    1694       49153 :                 length = in_size - pos_in;
    1695             : 
    1696             :         /*
    1697             :          * If the user wanted us to exchange up to the infile's EOF, round up
    1698             :          * to the next block boundary for this check.
    1699             :          *
    1700             :          * Otherwise, reject the range length if it's not extent aligned.  We
    1701             :          * already confirmed the starting offsets' extent alignment.
    1702             :          */
    1703    11470424 :         if (pos_in + length == in_size)
    1704       78116 :                 blen = roundup_64(in_size, rextbytes) - pos_in;
    1705             :         else
    1706    11392308 :                 blen = rounddown_64(length, rextbytes);
    1707             : 
    1708             :         /* Don't allow overlapped remappings within the same file. */
    1709    11470265 :         if (ip_in == ip_out &&
    1710      358988 :             pos_out + blen > pos_in &&
    1711      195402 :             pos_in + blen > pos_out)
    1712             :                 return -EINVAL;
    1713             : 
    1714             :         /*
    1715             :          * Ensure that we don't exchange a partial EOF extent into the middle
    1716             :          * of another file.
    1717             :          */
    1718    11469942 :         if (isaligned_64(length, rextbytes))
    1719             :                 return 0;
    1720             : 
    1721     2429581 :         new_length = length;
    1722     2429581 :         if (pos_out + length < out_size)
    1723     2169394 :                 new_length = rounddown_64(new_length, rextbytes);
    1724             : 
    1725     2429581 :         if (new_length == length)
    1726             :                 return 0;
    1727             : 
    1728             :         /*
    1729             :          * Return the shortened request if the caller permits it.  If the
    1730             :          * request was shortened to zero rt extents, we know that the original
    1731             :          * arguments weren't valid in the first place.
    1732             :          */
    1733     2169396 :         if ((remap_flags & REMAP_FILE_CAN_SHORTEN) && new_length > 0) {
    1734       28493 :                 *req_len = new_length;
    1735       28493 :                 return 0;
    1736             :         }
    1737             : 
    1738     2140903 :         return (remap_flags & REMAP_FILE_DEDUP) ? -EBADE : -EINVAL;
    1739             : }
    1740             : 
    1741             : /*
    1742             :  * Prepare two files for range cloning.  Upon a successful return both inodes
    1743             :  * will have the iolock and mmaplock held, the page cache of the out file will
    1744             :  * be truncated, and any leases on the out file will have been broken.  This
    1745             :  * function borrows heavily from xfs_file_aio_write_checks.
    1746             :  *
    1747             :  * The VFS allows partial EOF blocks to "match" for dedupe even though it hasn't
    1748             :  * checked that the bytes beyond EOF physically match. Hence we cannot use the
    1749             :  * EOF block in the source dedupe range because it's not a complete block match,
    1750             :  * hence can introduce a corruption into the file that has it's block replaced.
    1751             :  *
    1752             :  * In similar fashion, the VFS file cloning also allows partial EOF blocks to be
    1753             :  * "block aligned" for the purposes of cloning entire files.  However, if the
    1754             :  * source file range includes the EOF block and it lands within the existing EOF
    1755             :  * of the destination file, then we can expose stale data from beyond the source
    1756             :  * file EOF in the destination file.
    1757             :  *
    1758             :  * XFS doesn't support partial block sharing, so in both cases we have check
    1759             :  * these cases ourselves. For dedupe, we can simply round the length to dedupe
    1760             :  * down to the previous whole block and ignore the partial EOF block. While this
    1761             :  * means we can't dedupe the last block of a file, this is an acceptible
    1762             :  * tradeoff for simplicity on implementation.
    1763             :  *
    1764             :  * For cloning, we want to share the partial EOF block if it is also the new EOF
    1765             :  * block of the destination file. If the partial EOF block lies inside the
    1766             :  * existing destination EOF, then we have to abort the clone to avoid exposing
    1767             :  * stale data in the destination file. Hence we reject these clone attempts with
    1768             :  * -EINVAL in this case.
    1769             :  */
    1770             : int
    1771   388531619 : xfs_reflink_remap_prep(
    1772             :         struct file             *file_in,
    1773             :         loff_t                  pos_in,
    1774             :         struct file             *file_out,
    1775             :         loff_t                  pos_out,
    1776             :         loff_t                  *len,
    1777             :         unsigned int            remap_flags)
    1778             : {
    1779   388531619 :         struct inode            *inode_in = file_inode(file_in);
    1780   388531619 :         struct xfs_inode        *src = XFS_I(inode_in);
    1781   388531619 :         struct inode            *inode_out = file_inode(file_out);
    1782   388531619 :         struct xfs_inode        *dest = XFS_I(inode_out);
    1783   388531619 :         const struct iomap_ops  *dax_read_ops = NULL;
    1784   388531619 :         unsigned int            alloc_unit = xfs_inode_alloc_unitsize(dest);
    1785   388525362 :         int                     ret;
    1786             : 
    1787             :         /* Lock both files against IO */
    1788   388525362 :         ret = xfs_ilock2_io_mmap(src, dest);
    1789   388540105 :         if (ret)
    1790             :                 return ret;
    1791             : 
    1792             :         /* Check file eligibility and prepare for block sharing. */
    1793   388540105 :         ret = -EINVAL;
    1794             :         /* Can't reflink between data and rt volumes */
    1795   795585186 :         if (XFS_IS_REALTIME_INODE(src) != XFS_IS_REALTIME_INODE(dest))
    1796           0 :                 goto out_unlock;
    1797             : 
    1798             :         /* Don't share DAX file data with non-DAX file. */
    1799   388540105 :         if (IS_DAX(inode_in) != IS_DAX(inode_out))
    1800           0 :                 goto out_unlock;
    1801             : 
    1802             :         /* Check non-power of two alignment issues, if necessary. */
    1803   573557271 :         if (XFS_IS_REALTIME_INODE(dest) && !is_power_of_2(alloc_unit)) {
    1804    11900763 :                 ret = xfs_reflink_remap_check_rtalign(src, pos_in, dest,
    1805             :                                 pos_out, len, remap_flags);
    1806    11899731 :                 if (ret)
    1807     2570957 :                         goto out_unlock;
    1808             : 
    1809             :                 /* Do the VFS checks with the regular block alignment. */
    1810     9328774 :                 alloc_unit = src->i_mount->m_sb.sb_blocksize;
    1811             :         }
    1812             : 
    1813   385968116 :         if (IS_DAX(inode_in))
    1814           0 :                 dax_read_ops = &xfs_read_iomap_ops;
    1815             : 
    1816   385968116 :         ret = __generic_remap_file_range_prep(file_in, pos_in, file_out,
    1817             :                         pos_out, len, remap_flags, dax_read_ops, alloc_unit);
    1818   385992868 :         if (ret || *len == 0)
    1819   142574975 :                 goto out_unlock;
    1820             : 
    1821             :         /* Make sure the end is aligned with a rt extent. */
    1822   243417893 :         if (xfs_inode_has_bigrtextents(src)) {
    1823    14156001 :                 ret = xfs_reflink_remap_adjust_rtlen(src, pos_in, dest,
    1824             :                                 pos_out, len, remap_flags);
    1825    14155563 :                 if (ret || *len == 0)
    1826        2712 :                         goto out_unlock;
    1827             :         }
    1828             : 
    1829             :         /* Attach dquots to dest inode before changing block map */
    1830   243414743 :         ret = xfs_qm_dqattach(dest);
    1831   243392047 :         if (ret)
    1832           0 :                 goto out_unlock;
    1833             : 
    1834             :         /*
    1835             :          * Zero existing post-eof speculative preallocations in the destination
    1836             :          * file.
    1837             :          */
    1838   243392047 :         ret = xfs_reflink_zero_posteof(dest, pos_out);
    1839   243386032 :         if (ret)
    1840        4782 :                 goto out_unlock;
    1841             : 
    1842             :         /* Set flags and remap blocks. */
    1843   243381250 :         ret = xfs_reflink_set_inode_flag(src, dest);
    1844   243371626 :         if (ret)
    1845           2 :                 goto out_unlock;
    1846             : 
    1847             :         /*
    1848             :          * Now that we've marked both inodes for reflink, make sure that all
    1849             :          * possible rt extents in both files' ranges are either wholly written,
    1850             :          * wholly unwritten, or holes.  The bmap code requires that we align
    1851             :          * all unmap and remap requests to a rt extent boundary.  We've already
    1852             :          * flushed the page cache and finished directio for the range that's
    1853             :          * being remapped, so we can convert the extents directly.
    1854             :          */
    1855   243371624 :         if (xfs_inode_has_bigrtextents(src)) {
    1856    14151304 :                 ret = xfs_rtfile_convert_unwritten(src, pos_in, *len);
    1857    14151085 :                 if (ret)
    1858          53 :                         goto out_unlock;
    1859             :         }
    1860   243371352 :         if (xfs_inode_has_bigrtextents(dest)) {
    1861    14151397 :                 ret = xfs_rtfile_convert_unwritten(dest, pos_out, *len);
    1862    14153273 :                 if (ret)
    1863          13 :                         goto out_unlock;
    1864             :         }
    1865             : 
    1866             :         /*
    1867             :          * If pos_out > EOF, we may have dirtied blocks between EOF and
    1868             :          * pos_out. In that case, we need to extend the flush and unmap to cover
    1869             :          * from EOF to the end of the copy length.
    1870             :          */
    1871   486746430 :         if (pos_out > XFS_ISIZE(dest)) {
    1872     4676599 :                 loff_t  flen = *len + (pos_out - XFS_ISIZE(dest));
    1873     4676599 :                 ret = xfs_flush_unmap_range(dest, XFS_ISIZE(dest), flen);
    1874             :         } else {
    1875   238696616 :                 ret = xfs_flush_unmap_range(dest, pos_out, *len);
    1876             :         }
    1877   243409400 :         if (ret)
    1878          84 :                 goto out_unlock;
    1879             : 
    1880             :         return 0;
    1881   145153578 : out_unlock:
    1882   145153578 :         xfs_iunlock2_io_mmap(src, dest);
    1883   145153578 :         return ret;
    1884             : }
    1885             : 
    1886             : /* Does this inode need the reflink flag? */
    1887             : int
    1888    63383970 : xfs_reflink_inode_has_shared_extents(
    1889             :         struct xfs_trans                *tp,
    1890             :         struct xfs_inode                *ip,
    1891             :         bool                            *has_shared)
    1892             : {
    1893    63383970 :         struct xfs_bmbt_irec            got;
    1894    63383970 :         struct xfs_mount                *mp = ip->i_mount;
    1895    63383970 :         struct xfs_ifork                *ifp;
    1896    63383970 :         struct xfs_iext_cursor          icur;
    1897    63383970 :         bool                            found;
    1898    63383970 :         int                             error;
    1899             : 
    1900    63383970 :         ifp = xfs_ifork_ptr(ip, XFS_DATA_FORK);
    1901    63383970 :         error = xfs_iread_extents(tp, ip, XFS_DATA_FORK);
    1902    63100674 :         if (error)
    1903             :                 return error;
    1904             : 
    1905    63099627 :         *has_shared = false;
    1906    63099627 :         found = xfs_iext_lookup_extent(ip, ifp, 0, &icur, &got);
    1907   880303248 :         while (found) {
    1908   820918579 :                 xfs_agblock_t           rbno;
    1909   820918579 :                 xfs_extlen_t            rlen;
    1910             : 
    1911   820918579 :                 if (isnullstartblock(got.br_startblock) ||
    1912   820914940 :                     got.br_state != XFS_EXT_NORM)
    1913    20460913 :                         goto next;
    1914             : 
    1915   835233186 :                 if (XFS_IS_REALTIME_INODE(ip)) {
    1916    34699515 :                         struct xfs_rtgroup      *rtg;
    1917    34699515 :                         xfs_rgnumber_t          rgno;
    1918    34699515 :                         xfs_rgblock_t           rgbno;
    1919             : 
    1920    34699515 :                         rgbno = xfs_rtb_to_rgbno(mp, got.br_startblock, &rgno);
    1921    34668647 :                         rtg = xfs_rtgroup_get(mp, rgno);
    1922    34757922 :                         error = xfs_reflink_find_rtshared(rtg, tp, rgbno,
    1923    34757922 :                                         got.br_blockcount, &rbno, &rlen,
    1924             :                                         false);
    1925    34754046 :                         xfs_rtgroup_put(rtg);
    1926             :                 } else {
    1927   765758151 :                         struct xfs_perag        *pag;
    1928   765758151 :                         xfs_agblock_t           agbno;
    1929             : 
    1930   765758151 :                         pag = xfs_perag_get(mp, XFS_FSB_TO_AGNO(mp,
    1931             :                                                 got.br_startblock));
    1932   765843803 :                         agbno = XFS_FSB_TO_AGBNO(mp, got.br_startblock);
    1933   765834989 :                         error = xfs_reflink_find_shared(pag, tp, agbno,
    1934   765834989 :                                         got.br_blockcount, &rbno, &rlen,
    1935             :                                         false);
    1936   765858333 :                         xfs_perag_put(pag);
    1937             :                 }
    1938   800639877 :                 if (error)
    1939     3779757 :                         return error;
    1940             : 
    1941             :                 /* Is there still a shared block here? */
    1942   800639877 :                 if (rbno != NULLAGBLOCK) {
    1943     3779757 :                         *has_shared = true;
    1944     3779757 :                         return 0;
    1945             :                 }
    1946   796860120 : next:
    1947   817321033 :                 found = xfs_iext_next_extent(ifp, &icur, &got);
    1948             :         }
    1949             : 
    1950             :         return 0;
    1951             : }
    1952             : 
    1953             : /*
    1954             :  * Clear the inode reflink flag if there are no shared extents.
    1955             :  *
    1956             :  * The caller is responsible for joining the inode to the transaction passed in.
    1957             :  * The inode will be joined to the transaction that is returned to the caller.
    1958             :  */
    1959             : int
    1960       91278 : xfs_reflink_clear_inode_flag(
    1961             :         struct xfs_inode        *ip,
    1962             :         struct xfs_trans        **tpp)
    1963             : {
    1964       91278 :         bool                    needs_flag;
    1965       91278 :         int                     error = 0;
    1966             : 
    1967       91278 :         ASSERT(xfs_is_reflink_inode(ip));
    1968             : 
    1969       91278 :         error = xfs_reflink_inode_has_shared_extents(*tpp, ip, &needs_flag);
    1970       91278 :         if (error || needs_flag)
    1971       82401 :                 return error;
    1972             : 
    1973             :         /*
    1974             :          * We didn't find any shared blocks so turn off the reflink flag.
    1975             :          * First, get rid of any leftover CoW mappings.
    1976             :          */
    1977        8877 :         error = xfs_reflink_cancel_cow_blocks(ip, tpp, 0, XFS_MAX_FILEOFF,
    1978             :                         true);
    1979        8877 :         if (error)
    1980             :                 return error;
    1981             : 
    1982             :         /* Clear the inode flag. */
    1983        8877 :         trace_xfs_reflink_unset_inode_flag(ip);
    1984        8877 :         ip->i_diflags2 &= ~XFS_DIFLAG2_REFLINK;
    1985        8877 :         xfs_inode_clear_cowblocks_tag(ip);
    1986        8877 :         xfs_trans_log_inode(*tpp, ip, XFS_ILOG_CORE);
    1987             : 
    1988        8877 :         return error;
    1989             : }
    1990             : 
    1991             : /*
    1992             :  * Clear the inode reflink flag if there are no shared extents and the size
    1993             :  * hasn't changed.
    1994             :  */
    1995             : STATIC int
    1996         180 : xfs_reflink_try_clear_inode_flag(
    1997             :         struct xfs_inode        *ip)
    1998             : {
    1999         180 :         struct xfs_mount        *mp = ip->i_mount;
    2000         180 :         struct xfs_trans        *tp;
    2001         180 :         int                     error = 0;
    2002             : 
    2003             :         /* Start a rolling transaction to remove the mappings */
    2004         180 :         error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, 0, 0, 0, &tp);
    2005         180 :         if (error)
    2006             :                 return error;
    2007             : 
    2008         180 :         xfs_ilock(ip, XFS_ILOCK_EXCL);
    2009         180 :         xfs_trans_ijoin(tp, ip, 0);
    2010             : 
    2011         180 :         error = xfs_reflink_clear_inode_flag(ip, &tp);
    2012         180 :         if (error)
    2013           0 :                 goto cancel;
    2014             : 
    2015         180 :         error = xfs_trans_commit(tp);
    2016         180 :         if (error)
    2017           0 :                 goto out;
    2018             : 
    2019         180 :         xfs_iunlock(ip, XFS_ILOCK_EXCL);
    2020         180 :         return 0;
    2021             : cancel:
    2022           0 :         xfs_trans_cancel(tp);
    2023           0 : out:
    2024           0 :         xfs_iunlock(ip, XFS_ILOCK_EXCL);
    2025           0 :         return error;
    2026             : }
    2027             : 
    2028             : /*
    2029             :  * Pre-COW all shared blocks within a given byte range of a file and turn off
    2030             :  * the reflink flag if we unshare all of the file's blocks.
    2031             :  */
    2032             : int
    2033         315 : xfs_reflink_unshare(
    2034             :         struct xfs_inode        *ip,
    2035             :         xfs_off_t               offset,
    2036             :         xfs_off_t               len)
    2037             : {
    2038         315 :         struct inode            *inode = VFS_I(ip);
    2039         315 :         int                     error;
    2040             : 
    2041         315 :         if (!xfs_is_reflink_inode(ip))
    2042             :                 return 0;
    2043             : 
    2044         188 :         trace_xfs_reflink_unshare(ip, offset, len);
    2045             : 
    2046         188 :         inode_dio_wait(inode);
    2047             : 
    2048         188 :         if (IS_DAX(inode))
    2049           0 :                 error = dax_file_unshare(inode, offset, len,
    2050             :                                 &xfs_dax_write_iomap_ops);
    2051             :         else
    2052         188 :                 error = iomap_file_unshare(inode, offset, len,
    2053             :                                 &xfs_buffered_write_iomap_ops);
    2054         188 :         if (error)
    2055           0 :                 goto out;
    2056             : 
    2057         188 :         error = filemap_write_and_wait_range(inode->i_mapping, offset,
    2058         188 :                         offset + len - 1);
    2059         188 :         if (error)
    2060           8 :                 goto out;
    2061             : 
    2062             :         /* Turn off the reflink flag if possible. */
    2063         180 :         error = xfs_reflink_try_clear_inode_flag(ip);
    2064         180 :         if (error)
    2065           0 :                 goto out;
    2066             :         return 0;
    2067             : 
    2068           8 : out:
    2069           8 :         trace_xfs_reflink_unshare_error(ip, error, _RET_IP_);
    2070           8 :         return error;
    2071             : }

Generated by: LCOV version 1.14