LCOV - code coverage report
Current view: top level - fs/xfs/libxfs - xfs_swapext.c (source / functions) Hit Total Coverage
Test: fstests of 6.5.0-rc3-achx @ Mon Jul 31 20:08:12 PDT 2023 Lines: 432 516 83.7 %
Date: 2023-07-31 20:08:12 Functions: 32 32 100.0 %

          Line data    Source code
       1             : // SPDX-License-Identifier: GPL-2.0-or-later
       2             : /*
       3             :  * Copyright (C) 2020-2023 Oracle.  All Rights Reserved.
       4             :  * Author: Darrick J. Wong <djwong@kernel.org>
       5             :  */
       6             : #include "xfs.h"
       7             : #include "xfs_fs.h"
       8             : #include "xfs_shared.h"
       9             : #include "xfs_format.h"
      10             : #include "xfs_log_format.h"
      11             : #include "xfs_trans_resv.h"
      12             : #include "xfs_mount.h"
      13             : #include "xfs_defer.h"
      14             : #include "xfs_inode.h"
      15             : #include "xfs_trans.h"
      16             : #include "xfs_bmap.h"
      17             : #include "xfs_icache.h"
      18             : #include "xfs_quota.h"
      19             : #include "xfs_swapext.h"
      20             : #include "xfs_trace.h"
      21             : #include "xfs_bmap_btree.h"
      22             : #include "xfs_trans_space.h"
      23             : #include "xfs_error.h"
      24             : #include "xfs_errortag.h"
      25             : #include "xfs_health.h"
      26             : #include "xfs_da_format.h"
      27             : #include "xfs_da_btree.h"
      28             : #include "xfs_attr_leaf.h"
      29             : #include "xfs_attr.h"
      30             : #include "xfs_dir2_priv.h"
      31             : #include "xfs_dir2.h"
      32             : #include "xfs_symlink_remote.h"
      33             : 
      34             : struct kmem_cache       *xfs_swapext_intent_cache;
      35             : 
      36             : /* bmbt mappings adjacent to a pair of records. */
      37             : struct xfs_swapext_adjacent {
      38             :         struct xfs_bmbt_irec            left1;
      39             :         struct xfs_bmbt_irec            right1;
      40             :         struct xfs_bmbt_irec            left2;
      41             :         struct xfs_bmbt_irec            right2;
      42             : };
      43             : 
      44             : #define ADJACENT_INIT { \
      45             :         .left1  = { .br_startblock = HOLESTARTBLOCK }, \
      46             :         .right1 = { .br_startblock = HOLESTARTBLOCK }, \
      47             :         .left2  = { .br_startblock = HOLESTARTBLOCK }, \
      48             :         .right2 = { .br_startblock = HOLESTARTBLOCK }, \
      49             : }
      50             : 
      51             : /* Information to help us reset reflink flag / CoW fork state after a swap. */
      52             : 
      53             : /* Previous state of the two inodes' reflink flags. */
      54             : #define XFS_REFLINK_STATE_IP1           (1U << 0)
      55             : #define XFS_REFLINK_STATE_IP2           (1U << 1)
      56             : 
      57             : /*
      58             :  * If the reflink flag is set on either inode, make sure it has an incore CoW
      59             :  * fork, since all reflink inodes must have them.  If there's a CoW fork and it
      60             :  * has extents in it, make sure the inodes are tagged appropriately so that
      61             :  * speculative preallocations can be GC'd if we run low of space.
      62             :  */
      63             : static inline void
      64     2479974 : xfs_swapext_ensure_cowfork(
      65             :         struct xfs_inode        *ip)
      66             : {
      67     2479974 :         struct xfs_ifork        *cfork;
      68             : 
      69     2479974 :         if (xfs_is_reflink_inode(ip))
      70     2392670 :                 xfs_ifork_init_cow(ip);
      71             : 
      72     2479970 :         cfork = xfs_ifork_ptr(ip, XFS_COW_FORK);
      73     2479970 :         if (!cfork)
      74             :                 return;
      75     2421129 :         if (cfork->if_bytes > 0)
      76     1407560 :                 xfs_inode_set_cowblocks_tag(ip);
      77             :         else
      78     1013569 :                 xfs_inode_clear_cowblocks_tag(ip);
      79             : }
      80             : 
      81             : /* Schedule an atomic extent swap. */
      82             : void
      83     1293112 : xfs_swapext_schedule(
      84             :         struct xfs_trans                *tp,
      85             :         struct xfs_swapext_intent       *sxi)
      86             : {
      87     1293112 :         trace_xfs_swapext_defer(tp->t_mountp, sxi);
      88     1293121 :         xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_SWAPEXT, &sxi->sxi_list);
      89     1293120 : }
      90             : 
      91             : /*
      92             :  * Adjust the on-disk inode size upwards if needed so that we never map extents
      93             :  * into the file past EOF.  This is crucial so that log recovery won't get
      94             :  * confused by the sudden appearance of post-eof extents.
      95             :  */
      96             : STATIC void
      97     9053876 : xfs_swapext_update_size(
      98             :         struct xfs_trans        *tp,
      99             :         struct xfs_inode        *ip,
     100             :         struct xfs_bmbt_irec    *imap,
     101             :         xfs_fsize_t             new_isize)
     102             : {
     103     9053876 :         struct xfs_mount        *mp = tp->t_mountp;
     104     9053876 :         xfs_fsize_t             len;
     105             : 
     106     9053876 :         if (new_isize < 0)
     107             :                 return;
     108             : 
     109       45474 :         len = min(XFS_FSB_TO_B(mp, imap->br_startoff + imap->br_blockcount),
     110             :                   new_isize);
     111             : 
     112       45474 :         if (len <= ip->i_disk_size)
     113             :                 return;
     114             : 
     115         176 :         trace_xfs_swapext_update_inode_size(ip, len);
     116             : 
     117         176 :         ip->i_disk_size = len;
     118         176 :         xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
     119             : }
     120             : 
     121             : static inline bool
     122             : sxi_has_more_swap_work(const struct xfs_swapext_intent *sxi)
     123             : {
     124    40826399 :         return sxi->sxi_blockcount > 0;
     125             : }
     126             : 
     127             : static inline bool
     128             : sxi_has_postop_work(const struct xfs_swapext_intent *sxi)
     129             : {
     130     1426609 :         return sxi->sxi_flags & (XFS_SWAP_EXT_CLEAR_INO1_REFLINK |
     131             :                                  XFS_SWAP_EXT_CLEAR_INO2_REFLINK |
     132             :                                  XFS_SWAP_EXT_CVT_INO2_SF);
     133             : }
     134             : 
     135             : static inline void
     136             : sxi_advance(
     137             :         struct xfs_swapext_intent       *sxi,
     138             :         const struct xfs_bmbt_irec      *irec)
     139             : {
     140    12127161 :         sxi->sxi_startoff1 += irec->br_blockcount;
     141    12127161 :         sxi->sxi_startoff2 += irec->br_blockcount;
     142    12127161 :         sxi->sxi_blockcount -= irec->br_blockcount;
     143     7540665 : }
     144             : 
     145             : #ifdef DEBUG
     146             : static inline bool
     147     1242453 : xfs_swapext_need_rt_conversion(
     148             :         const struct xfs_swapext_req    *req)
     149             : {
     150     1242453 :         struct xfs_inode                *ip = req->ip2;
     151     1242453 :         struct xfs_mount                *mp = ip->i_mount;
     152             : 
     153             :         /* xattrs don't live on the rt device */
     154     1242453 :         if (req->whichfork == XFS_ATTR_FORK)
     155             :                 return false;
     156             : 
     157             :         /*
     158             :          * Caller got permission to use logged swapext, so log recovery will
     159             :          * finish the swap and not leave us with partially swapped rt extents
     160             :          * exposed to userspace.
     161             :          */
     162     1242453 :         if (req->req_flags & XFS_SWAP_REQ_LOGGED)
     163             :                 return false;
     164             : 
     165             :         /*
     166             :          * If we can't use log intent items at all, the only supported
     167             :          * operation is full fork swaps.
     168             :          */
     169       47393 :         if (!xfs_swapext_supported(mp))
     170             :                 return false;
     171             : 
     172             :         /* Conversion is only needed for realtime files with big rt extents */
     173       15843 :         return xfs_inode_has_bigrtextents(ip);
     174             : }
     175             : 
     176             : static inline int
     177     1242454 : xfs_swapext_check_rt_extents(
     178             :         struct xfs_mount                *mp,
     179             :         const struct xfs_swapext_req    *req)
     180             : {
     181     1242454 :         struct xfs_bmbt_irec            irec1, irec2;
     182     1242454 :         xfs_fileoff_t                   startoff1 = req->startoff1;
     183     1242454 :         xfs_fileoff_t                   startoff2 = req->startoff2;
     184     1242454 :         xfs_filblks_t                   blockcount = req->blockcount;
     185     1242454 :         uint32_t                        mod;
     186     1242454 :         int                             nimaps;
     187     1242454 :         int                             error;
     188             : 
     189     1242454 :         if (!xfs_swapext_need_rt_conversion(req))
     190             :                 return 0;
     191             : 
     192           3 :         while (blockcount > 0) {
     193             :                 /* Read extent from the first file */
     194           3 :                 nimaps = 1;
     195           3 :                 error = xfs_bmapi_read(req->ip1, startoff1, blockcount,
     196             :                                 &irec1, &nimaps, 0);
     197           0 :                 if (error)
     198           0 :                         return error;
     199           0 :                 ASSERT(nimaps == 1);
     200             : 
     201             :                 /* Read extent from the second file */
     202           0 :                 nimaps = 1;
     203           0 :                 error = xfs_bmapi_read(req->ip2, startoff2,
     204             :                                 irec1.br_blockcount, &irec2, &nimaps,
     205             :                                 0);
     206           0 :                 if (error)
     207           0 :                         return error;
     208           0 :                 ASSERT(nimaps == 1);
     209             : 
     210             :                 /*
     211             :                  * We can only swap as many blocks as the smaller of the two
     212             :                  * extent maps.
     213             :                  */
     214           0 :                 irec1.br_blockcount = min(irec1.br_blockcount,
     215             :                                           irec2.br_blockcount);
     216             : 
     217             :                 /* Both mappings must be aligned to the realtime extent size. */
     218           0 :                 div_u64_rem(irec1.br_startoff, mp->m_sb.sb_rextsize, &mod);
     219           0 :                 if (mod) {
     220           0 :                         ASSERT(mod == 0);
     221           0 :                         return -EINVAL;
     222             :                 }
     223             : 
     224           0 :                 div_u64_rem(irec2.br_startoff, mp->m_sb.sb_rextsize, &mod);
     225           0 :                 if (mod) {
     226           0 :                         ASSERT(mod == 0);
     227           0 :                         return -EINVAL;
     228             :                 }
     229             : 
     230           0 :                 div_u64_rem(irec1.br_blockcount, mp->m_sb.sb_rextsize, &mod);
     231           0 :                 if (mod) {
     232           0 :                         ASSERT(mod == 0);
     233           0 :                         return -EINVAL;
     234             :                 }
     235             : 
     236           0 :                 startoff1 += irec1.br_blockcount;
     237           0 :                 startoff2 += irec1.br_blockcount;
     238           0 :                 blockcount -= irec1.br_blockcount;
     239             :         }
     240             : 
     241             :         return 0;
     242             : }
     243             : #else
     244             : # define xfs_swapext_check_rt_extents(mp, req)          (0)
     245             : #endif
     246             : 
     247             : /* Check all extents to make sure we can actually swap them. */
     248             : int
     249     1242463 : xfs_swapext_check_extents(
     250             :         struct xfs_mount                *mp,
     251             :         const struct xfs_swapext_req    *req)
     252             : {
     253     1242463 :         struct xfs_ifork                *ifp1, *ifp2;
     254             : 
     255             :         /* No fork? */
     256     1242463 :         ifp1 = xfs_ifork_ptr(req->ip1, req->whichfork);
     257     1242461 :         ifp2 = xfs_ifork_ptr(req->ip2, req->whichfork);
     258     1242456 :         if (!ifp1 || !ifp2)
     259             :                 return -EINVAL;
     260             : 
     261             :         /* We don't know how to swap local format forks. */
     262     1242456 :         if (ifp1->if_format == XFS_DINODE_FMT_LOCAL ||
     263     1242456 :             ifp2->if_format == XFS_DINODE_FMT_LOCAL)
     264             :                 return -EINVAL;
     265             : 
     266     1242456 :         return xfs_swapext_check_rt_extents(mp, req);
     267             : }
     268             : 
     269             : #ifdef CONFIG_XFS_QUOTA
     270             : /* Log the actual updates to the quota accounting. */
     271             : static inline void
     272     4586493 : xfs_swapext_update_quota(
     273             :         struct xfs_trans                *tp,
     274             :         struct xfs_swapext_intent       *sxi,
     275             :         struct xfs_bmbt_irec            *irec1,
     276             :         struct xfs_bmbt_irec            *irec2)
     277             : {
     278     4586493 :         int64_t                         ip1_delta = 0, ip2_delta = 0;
     279     4586493 :         unsigned int                    qflag;
     280             : 
     281     4586493 :         qflag = XFS_IS_REALTIME_INODE(sxi->sxi_ip1) ? XFS_TRANS_DQ_RTBCOUNT :
     282             :                                                       XFS_TRANS_DQ_BCOUNT;
     283             : 
     284     7880390 :         if (xfs_bmap_is_real_extent(irec1)) {
     285     3293898 :                 ip1_delta -= irec1->br_blockcount;
     286     3293898 :                 ip2_delta += irec1->br_blockcount;
     287             :         }
     288             : 
     289     7881885 :         if (xfs_bmap_is_real_extent(irec2)) {
     290     3295392 :                 ip1_delta += irec2->br_blockcount;
     291     3295392 :                 ip2_delta -= irec2->br_blockcount;
     292             :         }
     293             : 
     294     4586493 :         xfs_trans_mod_dquot_byino(tp, sxi->sxi_ip1, qflag, ip1_delta);
     295     4586494 :         xfs_trans_mod_dquot_byino(tp, sxi->sxi_ip2, qflag, ip2_delta);
     296     4586495 : }
     297             : #else
     298             : # define xfs_swapext_update_quota(tp, sxi, irec1, irec2)        ((void)0)
     299             : #endif
     300             : 
     301             : /* Decide if we want to skip this mapping from file1. */
     302             : static inline bool
     303    12127172 : xfs_swapext_can_skip_mapping(
     304             :         struct xfs_swapext_intent       *sxi,
     305             :         struct xfs_bmbt_irec            *irec)
     306             : {
     307    12127172 :         struct xfs_mount                *mp = sxi->sxi_ip1->i_mount;
     308             : 
     309             :         /* Do not skip this mapping if the caller did not tell us to. */
     310    12127172 :         if (!(sxi->sxi_flags & XFS_SWAP_EXT_INO1_WRITTEN))
     311             :                 return false;
     312             : 
     313             :         /* Do not skip mapped, written extents. */
     314         316 :         if (xfs_bmap_is_written_extent(irec))
     315             :                 return false;
     316             : 
     317             :         /*
     318             :          * The mapping is unwritten or a hole.  It cannot be a delalloc
     319             :          * reservation because we already excluded those.  It cannot be an
     320             :          * unwritten extent with dirty page cache because we flushed the page
     321             :          * cache.  For files where the allocation unit is 1FSB (files on the
     322             :          * data dev, rt files if the extent size is 1FSB), we can safely
     323             :          * skip this mapping.
     324             :          */
     325         208 :         if (!xfs_inode_has_bigrtextents(sxi->sxi_ip1))
     326             :                 return true;
     327             : 
     328             :         /*
     329             :          * For a realtime file with a multi-fsb allocation unit, the decision
     330             :          * is trickier because we can only swap full allocation units.
     331             :          * Unwritten mappings can appear in the middle of an rtx if the rtx is
     332             :          * partially written, but they can also appear for preallocations.
     333             :          *
     334             :          * If the mapping is a hole, skip it entirely.  Holes should align with
     335             :          * rtx boundaries.
     336             :          */
     337           0 :         if (!xfs_bmap_is_real_extent(irec))
     338             :                 return true;
     339             : 
     340             :         /*
     341             :          * All mappings below this point are unwritten.
     342             :          *
     343             :          * - If the beginning is not aligned to an rtx, trim the end of the
     344             :          *   mapping so that it does not cross an rtx boundary, and swap it.
     345             :          *
     346             :          * - If both ends are aligned to an rtx, skip the entire mapping.
     347             :          */
     348           0 :         if (!isaligned_64(irec->br_startoff, mp->m_sb.sb_rextsize)) {
     349           0 :                 xfs_fileoff_t   new_end;
     350             : 
     351           0 :                 new_end = roundup_64(irec->br_startoff, mp->m_sb.sb_rextsize);
     352           0 :                 irec->br_blockcount = new_end - irec->br_startoff;
     353           0 :                 return false;
     354             :         }
     355           0 :         if (isaligned_64(irec->br_blockcount, mp->m_sb.sb_rextsize))
     356             :                 return true;
     357             : 
     358             :         /*
     359             :          * All mappings below this point are unwritten, start on an rtx
     360             :          * boundary, and do not end on an rtx boundary.
     361             :          *
     362             :          * - If the mapping is longer than one rtx, trim the end of the mapping
     363             :          *   down to an rtx boundary and skip it.
     364             :          *
     365             :          * - The mapping is shorter than one rtx.  Swap it.
     366             :          */
     367           0 :         if (irec->br_blockcount > mp->m_sb.sb_rextsize) {
     368           0 :                 xfs_fileoff_t   new_end;
     369             : 
     370           0 :                 new_end = rounddown_64(irec->br_startoff + irec->br_blockcount,
     371             :                                 mp->m_sb.sb_rextsize);
     372           0 :                 irec->br_blockcount = new_end - irec->br_startoff;
     373           0 :                 return true;
     374             :         }
     375             : 
     376             :         return false;
     377             : }
     378             : 
     379             : /*
     380             :  * Walk forward through the file ranges in @sxi until we find two different
     381             :  * mappings to exchange.  If there is work to do, return the mappings;
     382             :  * otherwise we've reached the end of the range and sxi_blockcount will be
     383             :  * zero.
     384             :  *
     385             :  * If the walk skips over a pair of mappings to the same storage, save them as
     386             :  * the left records in @adj (if provided) so that the simulation phase can
     387             :  * avoid an extra lookup.
     388             :   */
     389             : static int
     390    10855839 : xfs_swapext_find_mappings(
     391             :         struct xfs_swapext_intent       *sxi,
     392             :         struct xfs_bmbt_irec            *irec1,
     393             :         struct xfs_bmbt_irec            *irec2,
     394             :         struct xfs_swapext_adjacent     *adj)
     395             : {
     396    10855839 :         int                             nimaps;
     397    10855839 :         int                             bmap_flags;
     398    10855839 :         int                             error;
     399             : 
     400    10855839 :         bmap_flags = xfs_bmapi_aflag(xfs_swapext_whichfork(sxi));
     401             : 
     402    13049083 :         for (; sxi_has_more_swap_work(sxi); sxi_advance(sxi, irec1)) {
     403             :                 /* Read extent from the first file */
     404    12127142 :                 nimaps = 1;
     405    12127142 :                 error = xfs_bmapi_read(sxi->sxi_ip1, sxi->sxi_startoff1,
     406             :                                 sxi->sxi_blockcount, irec1, &nimaps,
     407             :                                 bmap_flags);
     408    12127173 :                 if (error)
     409           0 :                         return error;
     410    12127173 :                 if (nimaps != 1 ||
     411    12127173 :                     irec1->br_startblock == DELAYSTARTBLOCK ||
     412    12127173 :                     irec1->br_startoff != sxi->sxi_startoff1) {
     413             :                         /*
     414             :                          * We should never get no mapping or a delalloc extent
     415             :                          * or something that doesn't match what we asked for,
     416             :                          * since the caller flushed both inodes and we hold the
     417             :                          * ILOCKs for both inodes.
     418             :                          */
     419           0 :                         ASSERT(0);
     420           0 :                         return -EINVAL;
     421             :                 }
     422             : 
     423    12127173 :                 if (xfs_swapext_can_skip_mapping(sxi, irec1)) {
     424         208 :                         trace_xfs_swapext_extent1_skip(sxi->sxi_ip1, irec1);
     425         208 :                         continue;
     426             :                 }
     427             : 
     428             :                 /* Read extent from the second file */
     429    12126961 :                 nimaps = 1;
     430    12126961 :                 error = xfs_bmapi_read(sxi->sxi_ip2, sxi->sxi_startoff2,
     431             :                                 irec1->br_blockcount, irec2, &nimaps,
     432             :                                 bmap_flags);
     433    12126966 :                 if (error)
     434           0 :                         return error;
     435    12126966 :                 if (nimaps != 1 ||
     436    12126966 :                     irec2->br_startblock == DELAYSTARTBLOCK ||
     437    12126966 :                     irec2->br_startoff != sxi->sxi_startoff2) {
     438             :                         /*
     439             :                          * We should never get no mapping or a delalloc extent
     440             :                          * or something that doesn't match what we asked for,
     441             :                          * since the caller flushed both inodes and we hold the
     442             :                          * ILOCKs for both inodes.
     443             :                          */
     444           0 :                         ASSERT(0);
     445           0 :                         return -EINVAL;
     446             :                 }
     447             : 
     448             :                 /*
     449             :                  * We can only swap as many blocks as the smaller of the two
     450             :                  * extent maps.
     451             :                  */
     452    12126966 :                 irec1->br_blockcount = min(irec1->br_blockcount,
     453             :                                            irec2->br_blockcount);
     454             : 
     455    12126966 :                 trace_xfs_swapext_extent1(sxi->sxi_ip1, irec1);
     456    12126958 :                 trace_xfs_swapext_extent2(sxi->sxi_ip2, irec2);
     457             : 
     458             :                 /* We found something to swap, so return it. */
     459    12126950 :                 if (irec1->br_startblock != irec2->br_startblock)
     460             :                         return 0;
     461             : 
     462             :                 /*
     463             :                  * Two extents mapped to the same physical block must not have
     464             :                  * different states; that's filesystem corruption.  Move on to
     465             :                  * the next extent if they're both holes or both the same
     466             :                  * physical extent.
     467             :                  */
     468     2193036 :                 if (irec1->br_state != irec2->br_state) {
     469           0 :                         xfs_bmap_mark_sick(sxi->sxi_ip1,
     470             :                                         xfs_swapext_whichfork(sxi));
     471           0 :                         xfs_bmap_mark_sick(sxi->sxi_ip2,
     472             :                                         xfs_swapext_whichfork(sxi));
     473           0 :                         return -EFSCORRUPTED;
     474             :                 }
     475             : 
     476             :                 /*
     477             :                  * Save the mappings if we're estimating work and skipping
     478             :                  * these identical mappings.
     479             :                  */
     480     2193036 :                 if (adj) {
     481     2292848 :                         memcpy(&adj->left1, irec1, sizeof(*irec1));
     482     2292848 :                         memcpy(&adj->left2, irec2, sizeof(*irec2));
     483             :                 }
     484             :         }
     485             : 
     486             :         return 0;
     487             : }
     488             : 
     489             : /* Exchange these two mappings. */
     490             : static void
     491     4586495 : xfs_swapext_exchange_mappings(
     492             :         struct xfs_trans                *tp,
     493             :         struct xfs_swapext_intent       *sxi,
     494             :         struct xfs_bmbt_irec            *irec1,
     495             :         struct xfs_bmbt_irec            *irec2)
     496             : {
     497     4586495 :         int                             whichfork = xfs_swapext_whichfork(sxi);
     498             : 
     499     4586495 :         xfs_swapext_update_quota(tp, sxi, irec1, irec2);
     500             : 
     501             :         /* Remove both mappings. */
     502     4586496 :         xfs_bmap_unmap_extent(tp, sxi->sxi_ip1, whichfork, irec1);
     503     4586495 :         xfs_bmap_unmap_extent(tp, sxi->sxi_ip2, whichfork, irec2);
     504             : 
     505             :         /*
     506             :          * Re-add both mappings.  We swap the file offsets between the two maps
     507             :          * and add the opposite map, which has the effect of filling the
     508             :          * logical offsets we just unmapped, but with with the physical mapping
     509             :          * information swapped.
     510             :          */
     511     4586495 :         swap(irec1->br_startoff, irec2->br_startoff);
     512     4586495 :         xfs_bmap_map_extent(tp, sxi->sxi_ip1, whichfork, irec2);
     513     4586494 :         xfs_bmap_map_extent(tp, sxi->sxi_ip2, whichfork, irec1);
     514             : 
     515             :         /* Make sure we're not mapping extents past EOF. */
     516     4586495 :         if (whichfork == XFS_DATA_FORK) {
     517     4526938 :                 xfs_swapext_update_size(tp, sxi->sxi_ip1, irec2,
     518             :                                 sxi->sxi_isize1);
     519     4526938 :                 xfs_swapext_update_size(tp, sxi->sxi_ip2, irec1,
     520             :                                 sxi->sxi_isize2);
     521             :         }
     522             : 
     523             :         /*
     524             :          * Advance our cursor and exit.   The caller (either defer ops or log
     525             :          * recovery) will log the SXD item, and if *blockcount is nonzero, it
     526             :          * will log a new SXI item for the remainder and call us back.
     527             :          */
     528     4586496 :         sxi_advance(sxi, irec1);
     529     4586496 : }
     530             : 
     531             : /* Convert inode2's leaf attr fork back to shortform, if possible.. */
     532             : STATIC int
     533       53103 : xfs_swapext_attr_to_sf(
     534             :         struct xfs_trans                *tp,
     535             :         struct xfs_swapext_intent       *sxi)
     536             : {
     537       53103 :         struct xfs_da_args      args = {
     538       53103 :                 .dp             = sxi->sxi_ip2,
     539       53103 :                 .geo            = tp->t_mountp->m_attr_geo,
     540             :                 .whichfork      = XFS_ATTR_FORK,
     541             :                 .trans          = tp,
     542       53103 :                 .owner          = sxi->sxi_ip2->i_ino,
     543             :         };
     544       53103 :         struct xfs_buf          *bp;
     545       53103 :         int                     forkoff;
     546       53103 :         int                     error;
     547             : 
     548       53103 :         if (!xfs_attr_is_leaf(sxi->sxi_ip2))
     549             :                 return 0;
     550             : 
     551       49669 :         error = xfs_attr3_leaf_read(tp, sxi->sxi_ip2, sxi->sxi_ip2->i_ino, 0,
     552             :                         &bp);
     553       49671 :         if (error)
     554             :                 return error;
     555             : 
     556       49671 :         forkoff = xfs_attr_shortform_allfit(bp, sxi->sxi_ip2);
     557       49670 :         if (forkoff == 0)
     558             :                 return 0;
     559             : 
     560         144 :         return xfs_attr3_leaf_to_shortform(bp, &args, forkoff);
     561             : }
     562             : 
     563             : /* Convert inode2's block dir fork back to shortform, if possible.. */
     564             : STATIC int
     565        4391 : xfs_swapext_dir_to_sf(
     566             :         struct xfs_trans                *tp,
     567             :         struct xfs_swapext_intent       *sxi)
     568             : {
     569        4391 :         struct xfs_da_args      args = {
     570        4391 :                 .dp             = sxi->sxi_ip2,
     571        4391 :                 .geo            = tp->t_mountp->m_dir_geo,
     572             :                 .whichfork      = XFS_DATA_FORK,
     573             :                 .trans          = tp,
     574        4391 :                 .owner          = sxi->sxi_ip2->i_ino,
     575             :         };
     576        4391 :         struct xfs_dir2_sf_hdr  sfh;
     577        4391 :         struct xfs_buf          *bp;
     578        4391 :         bool                    isblock;
     579        4391 :         int                     size;
     580        4391 :         int                     error;
     581             : 
     582        4391 :         error = xfs_dir2_isblock(&args, &isblock);
     583        4391 :         if (error)
     584             :                 return error;
     585             : 
     586        4391 :         if (!isblock)
     587             :                 return 0;
     588             : 
     589        3306 :         error = xfs_dir3_block_read(tp, sxi->sxi_ip2, sxi->sxi_ip2->i_ino, &bp);
     590        3306 :         if (error)
     591             :                 return error;
     592             : 
     593        3306 :         size = xfs_dir2_block_sfsize(sxi->sxi_ip2, bp->b_addr, &sfh);
     594        3306 :         if (size > xfs_inode_data_fork_size(sxi->sxi_ip2))
     595             :                 return 0;
     596             : 
     597          57 :         return xfs_dir2_block_to_sf(&args, bp, size, &sfh);
     598             : }
     599             : 
     600             : /* Convert inode2's remote symlink target back to shortform, if possible. */
     601             : STATIC int
     602        9237 : xfs_swapext_link_to_sf(
     603             :         struct xfs_trans                *tp,
     604             :         struct xfs_swapext_intent       *sxi)
     605             : {
     606        9237 :         struct xfs_inode                *ip = sxi->sxi_ip2;
     607        9237 :         struct xfs_ifork                *ifp = xfs_ifork_ptr(ip, XFS_DATA_FORK);
     608        9237 :         char                            *buf;
     609        9237 :         int                             error;
     610             : 
     611        9237 :         if (ifp->if_format == XFS_DINODE_FMT_LOCAL ||
     612        9237 :             ip->i_disk_size > xfs_inode_data_fork_size(ip))
     613             :                 return 0;
     614             : 
     615             :         /* Read the current symlink target into a buffer. */
     616           0 :         buf = kmem_alloc(ip->i_disk_size + 1, KM_NOFS);
     617           0 :         if (!buf) {
     618           0 :                 ASSERT(0);
     619           0 :                 return -ENOMEM;
     620             :         }
     621             : 
     622           0 :         error = xfs_symlink_remote_read(ip, buf);
     623           0 :         if (error)
     624           0 :                 goto free;
     625             : 
     626             :         /* Remove the blocks. */
     627           0 :         error = xfs_symlink_remote_truncate(tp, ip);
     628           0 :         if (error)
     629           0 :                 goto free;
     630             : 
     631             :         /* Convert fork to local format and log our changes. */
     632           0 :         xfs_idestroy_fork(ifp);
     633           0 :         ifp->if_bytes = 0;
     634           0 :         ifp->if_format = XFS_DINODE_FMT_LOCAL;
     635           0 :         xfs_init_local_fork(ip, XFS_DATA_FORK, buf, ip->i_disk_size);
     636           0 :         xfs_trans_log_inode(tp, ip, XFS_ILOG_DDATA | XFS_ILOG_CORE);
     637           0 : free:
     638           0 :         kmem_free(buf);
     639           0 :         return error;
     640             : }
     641             : 
     642             : static inline void
     643          30 : xfs_swapext_clear_reflink(
     644             :         struct xfs_trans        *tp,
     645             :         struct xfs_inode        *ip)
     646             : {
     647          30 :         trace_xfs_reflink_unset_inode_flag(ip);
     648             : 
     649          30 :         ip->i_diflags2 &= ~XFS_DIFLAG2_REFLINK;
     650          30 :         xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
     651          30 : }
     652             : 
     653             : /* Finish whatever work might come after a swap operation. */
     654             : static int
     655       66761 : xfs_swapext_do_postop_work(
     656             :         struct xfs_trans                *tp,
     657             :         struct xfs_swapext_intent       *sxi)
     658             : {
     659       66761 :         if (sxi->sxi_flags & XFS_SWAP_EXT_CVT_INO2_SF) {
     660       66731 :                 int                     error = 0;
     661             : 
     662       66731 :                 if (sxi->sxi_flags & XFS_SWAP_EXT_ATTR_FORK)
     663       53103 :                         error = xfs_swapext_attr_to_sf(tp, sxi);
     664       13628 :                 else if (S_ISDIR(VFS_I(sxi->sxi_ip2)->i_mode))
     665        4391 :                         error = xfs_swapext_dir_to_sf(tp, sxi);
     666        9237 :                 else if (S_ISLNK(VFS_I(sxi->sxi_ip2)->i_mode))
     667        9237 :                         error = xfs_swapext_link_to_sf(tp, sxi);
     668       66731 :                 sxi->sxi_flags &= ~XFS_SWAP_EXT_CVT_INO2_SF;
     669       66731 :                 if (error)
     670             :                         return error;
     671             :         }
     672             : 
     673       66761 :         if (sxi->sxi_flags & XFS_SWAP_EXT_CLEAR_INO1_REFLINK) {
     674           0 :                 xfs_swapext_clear_reflink(tp, sxi->sxi_ip1);
     675           0 :                 sxi->sxi_flags &= ~XFS_SWAP_EXT_CLEAR_INO1_REFLINK;
     676             :         }
     677             : 
     678       66761 :         if (sxi->sxi_flags & XFS_SWAP_EXT_CLEAR_INO2_REFLINK) {
     679          30 :                 xfs_swapext_clear_reflink(tp, sxi->sxi_ip2);
     680          30 :                 sxi->sxi_flags &= ~XFS_SWAP_EXT_CLEAR_INO2_REFLINK;
     681             :         }
     682             : 
     683             :         return 0;
     684             : }
     685             : 
     686             : /* Finish one extent swap, possibly log more. */
     687             : int
     688     5117751 : xfs_swapext_finish_one(
     689             :         struct xfs_trans                *tp,
     690             :         struct xfs_swapext_intent       *sxi)
     691             : {
     692     5117751 :         struct xfs_bmbt_irec            irec1, irec2;
     693     5117751 :         int                             error;
     694             : 
     695     5117751 :         if (sxi_has_more_swap_work(sxi)) {
     696             :                 /*
     697             :                  * If the operation state says that some range of the files
     698             :                  * have not yet been swapped, look for extents in that range to
     699             :                  * swap.  If we find some extents, swap them.
     700             :                  */
     701     5050990 :                 error = xfs_swapext_find_mappings(sxi, &irec1, &irec2, NULL);
     702     5050987 :                 if (error)
     703             :                         return error;
     704             : 
     705     5050987 :                 if (sxi_has_more_swap_work(sxi))
     706     4586495 :                         xfs_swapext_exchange_mappings(tp, sxi, &irec1, &irec2);
     707             : 
     708             :                 /*
     709             :                  * If the caller asked us to exchange the file sizes after the
     710             :                  * swap and either we just swapped the last extents in the
     711             :                  * range or we didn't find anything to swap, update the ondisk
     712             :                  * file sizes.
     713             :                  */
     714     5050988 :                 if ((sxi->sxi_flags & XFS_SWAP_EXT_SET_SIZES) &&
     715             :                     !sxi_has_more_swap_work(sxi)) {
     716       13830 :                         sxi->sxi_ip1->i_disk_size = sxi->sxi_isize1;
     717       13830 :                         sxi->sxi_ip2->i_disk_size = sxi->sxi_isize2;
     718             : 
     719       13830 :                         xfs_trans_log_inode(tp, sxi->sxi_ip1, XFS_ILOG_CORE);
     720       13830 :                         xfs_trans_log_inode(tp, sxi->sxi_ip2, XFS_ILOG_CORE);
     721             :                 }
     722       66761 :         } else if (sxi_has_postop_work(sxi)) {
     723             :                 /*
     724             :                  * Now that we're finished with the swap operation, complete
     725             :                  * the post-op cleanup work.
     726             :                  */
     727       66761 :                 error = xfs_swapext_do_postop_work(tp, sxi);
     728       66759 :                 if (error)
     729             :                         return error;
     730             :         }
     731             : 
     732     5117747 :         if (XFS_TEST_ERROR(false, tp->t_mountp, XFS_ERRTAG_SWAPEXT_FINISH_ONE))
     733             :                 return -EIO;
     734             : 
     735             :         /* If we still have work to do, ask for a new transaction. */
     736     5117748 :         if (sxi_has_more_swap_work(sxi) || sxi_has_postop_work(sxi)) {
     737     3824660 :                 trace_xfs_swapext_defer(tp->t_mountp, sxi);
     738     3824660 :                 return -EAGAIN;
     739             :         }
     740             : 
     741             :         /*
     742             :          * If we reach here, we've finished all the swapping work and the post
     743             :          * operation work.  The last thing we need to do before returning to
     744             :          * the caller is to make sure that COW forks are set up correctly.
     745             :          */
     746     1293088 :         if (!(sxi->sxi_flags & XFS_SWAP_EXT_ATTR_FORK)) {
     747     1239986 :                 xfs_swapext_ensure_cowfork(sxi->sxi_ip1);
     748     1239992 :                 xfs_swapext_ensure_cowfork(sxi->sxi_ip2);
     749             :         }
     750             : 
     751             :         return 0;
     752             : }
     753             : 
     754             : /*
     755             :  * Compute the amount of bmbt blocks we should reserve for each file.  In the
     756             :  * worst case, each exchange will fill a hole with a new mapping, which could
     757             :  * result in a btree split every time we add a new leaf block.
     758             :  */
     759             : static inline uint64_t
     760     5815904 : xfs_swapext_bmbt_blocks(
     761             :         struct xfs_mount                *mp,
     762             :         const struct xfs_swapext_req    *req)
     763             : {
     764     5815904 :         return howmany_64(req->nr_exchanges,
     765     5815904 :                                         XFS_MAX_CONTIG_BMAPS_PER_BLOCK(mp)) *
     766     5816025 :                         XFS_EXTENTADD_SPACE_RES(mp, req->whichfork);
     767             : }
     768             : 
     769             : static inline uint64_t
     770     5816358 : xfs_swapext_rmapbt_blocks(
     771             :         struct xfs_mount                *mp,
     772             :         const struct xfs_swapext_req    *req)
     773             : {
     774     5816358 :         if (!xfs_has_rmapbt(mp))
     775             :                 return 0;
     776     5800583 :         if (XFS_IS_REALTIME_INODE(req->ip1))
     777             :                 return 0;
     778             : 
     779     5800583 :         return howmany_64(req->nr_exchanges,
     780     5800583 :                                         XFS_MAX_CONTIG_RMAPS_PER_BLOCK(mp)) *
     781     5800654 :                         XFS_RMAPADD_SPACE_RES(mp);
     782             : }
     783             : 
     784             : /* Estimate the bmbt and rmapbt overhead required to exchange extents. */
     785             : int
     786     5816755 : xfs_swapext_estimate_overhead(
     787             :         struct xfs_swapext_req  *req)
     788             : {
     789     5816755 :         struct xfs_mount        *mp = req->ip1->i_mount;
     790     5816755 :         xfs_filblks_t           bmbt_blocks;
     791     5816755 :         xfs_filblks_t           rmapbt_blocks;
     792     5816755 :         xfs_filblks_t           resblks = req->resblks;
     793             : 
     794             :         /*
     795             :          * Compute the number of bmbt and rmapbt blocks we might need to handle
     796             :          * the estimated number of exchanges.
     797             :          */
     798     5816755 :         bmbt_blocks = xfs_swapext_bmbt_blocks(mp, req);
     799     5816082 :         rmapbt_blocks = xfs_swapext_rmapbt_blocks(mp, req);
     800             : 
     801     5816524 :         trace_xfs_swapext_overhead(mp, bmbt_blocks, rmapbt_blocks);
     802             : 
     803             :         /* Make sure the change in file block count doesn't overflow. */
     804     5816458 :         if (check_add_overflow(req->ip1_bcount, bmbt_blocks, &req->ip1_bcount))
     805             :                 return -EFBIG;
     806     5816458 :         if (check_add_overflow(req->ip2_bcount, bmbt_blocks, &req->ip2_bcount))
     807             :                 return -EFBIG;
     808             : 
     809             :         /*
     810             :          * Add together the number of blocks we need to handle btree growth,
     811             :          * then add it to the number of blocks we need to reserve to this
     812             :          * transaction.
     813             :          */
     814     5816458 :         if (check_add_overflow(resblks, bmbt_blocks, &resblks))
     815             :                 return -ENOSPC;
     816     5816458 :         if (check_add_overflow(resblks, bmbt_blocks, &resblks))
     817             :                 return -ENOSPC;
     818     5816458 :         if (check_add_overflow(resblks, rmapbt_blocks, &resblks))
     819             :                 return -ENOSPC;
     820     5816458 :         if (check_add_overflow(resblks, rmapbt_blocks, &resblks))
     821             :                 return -ENOSPC;
     822             : 
     823             :         /* Can't actually reserve more than UINT_MAX blocks. */
     824     5816458 :         if (req->resblks > UINT_MAX)
     825             :                 return -ENOSPC;
     826             : 
     827     5816458 :         req->resblks = resblks;
     828     5816458 :         trace_xfs_swapext_final_estimate(req);
     829     5816458 :         return 0;
     830             : }
     831             : 
     832             : /* Decide if we can merge two real extents. */
     833             : static inline bool
     834    28794972 : can_merge(
     835             :         const struct xfs_bmbt_irec      *b1,
     836             :         const struct xfs_bmbt_irec      *b2)
     837             : {
     838             :         /* Don't merge holes. */
     839    28794972 :         if (b1->br_startblock == HOLESTARTBLOCK ||
     840    23504990 :             b2->br_startblock == HOLESTARTBLOCK)
     841             :                 return false;
     842             : 
     843             :         /* We don't merge holes. */
     844    65788713 :         if (!xfs_bmap_is_real_extent(b1) || !xfs_bmap_is_real_extent(b2))
     845             :                 return false;
     846             : 
     847    21929571 :         if (b1->br_startoff   + b1->br_blockcount == b2->br_startoff &&
     848    16554920 :             b1->br_startblock + b1->br_blockcount == b2->br_startblock &&
     849     6807001 :             b1->br_state                       == b2->br_state &&
     850     6294406 :             b1->br_blockcount + b2->br_blockcount <= XFS_MAX_BMBT_EXTLEN)
     851     6294406 :                 return true;
     852             : 
     853             :         return false;
     854             : }
     855             : 
     856             : #define CLEFT_CONTIG    0x01
     857             : #define CRIGHT_CONTIG   0x02
     858             : #define CHOLE           0x04
     859             : #define CBOTH_CONTIG    (CLEFT_CONTIG | CRIGHT_CONTIG)
     860             : 
     861             : #define NLEFT_CONTIG    0x10
     862             : #define NRIGHT_CONTIG   0x20
     863             : #define NHOLE           0x40
     864             : #define NBOTH_CONTIG    (NLEFT_CONTIG | NRIGHT_CONTIG)
     865             : 
     866             : /* Estimate the effect of a single swap on extent count. */
     867             : static inline int
     868    10694840 : delta_nextents_step(
     869             :         struct xfs_mount                *mp,
     870             :         const struct xfs_bmbt_irec      *left,
     871             :         const struct xfs_bmbt_irec      *curr,
     872             :         const struct xfs_bmbt_irec      *new,
     873             :         const struct xfs_bmbt_irec      *right)
     874             : {
     875    10694840 :         bool                            lhole, rhole, chole, nhole;
     876    10694840 :         unsigned int                    state = 0;
     877    10694840 :         int                             ret = 0;
     878             : 
     879    10694840 :         lhole = left->br_startblock == HOLESTARTBLOCK;
     880    10694840 :         rhole = right->br_startblock == HOLESTARTBLOCK;
     881    10694840 :         chole = curr->br_startblock == HOLESTARTBLOCK;
     882    10694840 :         nhole = new->br_startblock == HOLESTARTBLOCK;
     883             : 
     884    10694840 :         if (chole)
     885     3086461 :                 state |= CHOLE;
     886    10694840 :         if (!lhole && !chole && can_merge(left, curr))
     887         969 :                 state |= CLEFT_CONTIG;
     888    10694840 :         if (!rhole && !chole && can_merge(curr, right))
     889     2628102 :                 state |= CRIGHT_CONTIG;
     890    10694840 :         if ((state & CBOTH_CONTIG) == CBOTH_CONTIG &&
     891         335 :             left->br_startblock + curr->br_startblock +
     892             :                                         right->br_startblock > XFS_MAX_BMBT_EXTLEN)
     893           1 :                 state &= ~CRIGHT_CONTIG;
     894             : 
     895    10694840 :         if (nhole)
     896     3086461 :                 state |= NHOLE;
     897    10694840 :         if (!lhole && !nhole && can_merge(left, new))
     898     1832657 :                 state |= NLEFT_CONTIG;
     899    10694840 :         if (!rhole && !nhole && can_merge(new, right))
     900          21 :                 state |= NRIGHT_CONTIG;
     901    10694840 :         if ((state & NBOTH_CONTIG) == NBOTH_CONTIG &&
     902          12 :             left->br_startblock + new->br_startblock +
     903             :                                         right->br_startblock > XFS_MAX_BMBT_EXTLEN)
     904           1 :                 state &= ~NRIGHT_CONTIG;
     905             : 
     906    10694840 :         switch (state & (CLEFT_CONTIG | CRIGHT_CONTIG | CHOLE)) {
     907         334 :         case CLEFT_CONTIG | CRIGHT_CONTIG:
     908             :                 /*
     909             :                  * left/curr/right are the same extent, so deleting curr causes
     910             :                  * 2 new extents to be created.
     911             :                  */
     912         334 :                 ret += 2;
     913         334 :                 break;
     914     4979643 :         case 0:
     915             :                 /*
     916             :                  * curr is not contiguous with any extent, so we remove curr
     917             :                  * completely
     918             :                  */
     919     4979643 :                 ret--;
     920     4979643 :                 break;
     921             :         case CHOLE:
     922             :                 /* hole, do nothing */
     923             :                 break;
     924             :         case CLEFT_CONTIG:
     925             :         case CRIGHT_CONTIG:
     926             :                 /* trim either left or right, no change */
     927             :                 break;
     928             :         }
     929             : 
     930    10694840 :         switch (state & (NLEFT_CONTIG | NRIGHT_CONTIG | NHOLE)) {
     931          11 :         case NLEFT_CONTIG | NRIGHT_CONTIG:
     932             :                 /*
     933             :                  * left/curr/right will become the same extent, so adding
     934             :                  * curr causes the deletion of right.
     935             :                  */
     936          11 :                 ret--;
     937          11 :                 break;
     938     5775712 :         case 0:
     939             :                 /* new is not contiguous with any extent */
     940     5775712 :                 ret++;
     941     5775712 :                 break;
     942             :         case NHOLE:
     943             :                 /* hole, do nothing. */
     944             :                 break;
     945             :         case NLEFT_CONTIG:
     946             :         case NRIGHT_CONTIG:
     947             :                 /* new is absorbed into left or right, no change */
     948             :                 break;
     949             :         }
     950             : 
     951    10694840 :         trace_xfs_swapext_delta_nextents_step(mp, left, curr, new, right, ret,
     952             :                         state);
     953    10694839 :         return ret;
     954             : }
     955             : 
     956             : /* Make sure we don't overflow the extent counters. */
     957             : static inline int
     958     1393520 : ensure_delta_nextents(
     959             :         struct xfs_swapext_req  *req,
     960             :         struct xfs_inode        *ip,
     961             :         int64_t                 delta)
     962             : {
     963     1393520 :         struct xfs_mount        *mp = ip->i_mount;
     964     1393520 :         struct xfs_ifork        *ifp = xfs_ifork_ptr(ip, req->whichfork);
     965     1393516 :         xfs_extnum_t            max_extents;
     966     1393516 :         bool                    large_extcount;
     967             : 
     968     1393516 :         if (delta < 0)
     969             :                 return 0;
     970             : 
     971     1369826 :         if (XFS_TEST_ERROR(false, mp, XFS_ERRTAG_REDUCE_MAX_IEXTENTS)) {
     972           6 :                 if (ifp->if_nextents + delta > 10)
     973             :                         return -EFBIG;
     974             :         }
     975             : 
     976     1369825 :         if (req->req_flags & XFS_SWAP_REQ_NREXT64)
     977             :                 large_extcount = true;
     978             :         else
     979     1369825 :                 large_extcount = xfs_inode_has_large_extent_counts(ip);
     980             : 
     981     1369825 :         max_extents = xfs_iext_max_nextents(large_extcount, req->whichfork);
     982     1369817 :         if (ifp->if_nextents + delta <= max_extents)
     983             :                 return 0;
     984           0 :         if (large_extcount)
     985             :                 return -EFBIG;
     986           0 :         if (!xfs_has_large_extent_counts(mp))
     987             :                 return -EFBIG;
     988             : 
     989           0 :         max_extents = xfs_iext_max_nextents(true, req->whichfork);
     990           0 :         if (ifp->if_nextents + delta > max_extents)
     991             :                 return -EFBIG;
     992             : 
     993           0 :         req->req_flags |= XFS_SWAP_REQ_NREXT64;
     994           0 :         return 0;
     995             : }
     996             : 
     997             : /* Find the next extent after irec. */
     998             : static inline int
     999    10694838 : get_next_ext(
    1000             :         struct xfs_inode                *ip,
    1001             :         int                             bmap_flags,
    1002             :         const struct xfs_bmbt_irec      *irec,
    1003             :         struct xfs_bmbt_irec            *nrec)
    1004             : {
    1005    10694838 :         xfs_fileoff_t                   off;
    1006    10694838 :         xfs_filblks_t                   blockcount;
    1007    10694838 :         int                             nimaps = 1;
    1008    10694838 :         int                             error;
    1009             : 
    1010    10694838 :         off = irec->br_startoff + irec->br_blockcount;
    1011    10694838 :         blockcount = XFS_MAX_FILEOFF - off;
    1012    10694838 :         error = xfs_bmapi_read(ip, off, blockcount, nrec, &nimaps, bmap_flags);
    1013    10694840 :         if (error)
    1014             :                 return error;
    1015    10694840 :         if (nrec->br_startblock == DELAYSTARTBLOCK ||
    1016    10687907 :             nrec->br_startoff != off) {
    1017             :                 /*
    1018             :                  * If we don't get the extent we want, return a zero-length
    1019             :                  * mapping, which our estimator function will pretend is a hole.
    1020             :                  * We shouldn't get delalloc reservations.
    1021             :                  */
    1022        6932 :                 nrec->br_startblock = HOLESTARTBLOCK;
    1023             :         }
    1024             : 
    1025             :         return 0;
    1026             : }
    1027             : 
    1028             : int __init
    1029          50 : xfs_swapext_intent_init_cache(void)
    1030             : {
    1031          50 :         xfs_swapext_intent_cache = kmem_cache_create("xfs_swapext_intent",
    1032             :                         sizeof(struct xfs_swapext_intent),
    1033             :                         0, 0, NULL);
    1034             : 
    1035          50 :         return xfs_swapext_intent_cache != NULL ? 0 : -ENOMEM;
    1036             : }
    1037             : 
    1038             : void
    1039          49 : xfs_swapext_intent_destroy_cache(void)
    1040             : {
    1041          49 :         kmem_cache_destroy(xfs_swapext_intent_cache);
    1042          49 :         xfs_swapext_intent_cache = NULL;
    1043          49 : }
    1044             : 
    1045             : /*
    1046             :  * Decide if we will swap the reflink flags between the two files after the
    1047             :  * swap.  The only time we want to do this is if we're exchanging all extents
    1048             :  * under EOF and the inode reflink flags have different states.
    1049             :  */
    1050             : static inline bool
    1051     2491852 : sxi_can_exchange_reflink_flags(
    1052             :         const struct xfs_swapext_req    *req,
    1053             :         unsigned int                    reflink_state)
    1054             : {
    1055     2491852 :         struct xfs_mount                *mp = req->ip1->i_mount;
    1056             : 
    1057     2491852 :         if (hweight32(reflink_state) != 1)
    1058             :                 return false;
    1059         110 :         if (req->startoff1 != 0 || req->startoff2 != 0)
    1060             :                 return false;
    1061          90 :         if (req->blockcount != XFS_B_TO_FSB(mp, req->ip1->i_disk_size))
    1062             :                 return false;
    1063          90 :         if (req->blockcount != XFS_B_TO_FSB(mp, req->ip2->i_disk_size))
    1064           0 :                 return false;
    1065             :         return true;
    1066             : }
    1067             : 
    1068             : 
    1069             : /* Allocate and initialize a new incore intent item from a request. */
    1070             : struct xfs_swapext_intent *
    1071     2595254 : xfs_swapext_init_intent(
    1072             :         const struct xfs_swapext_req    *req,
    1073             :         unsigned int                    *reflink_state)
    1074             : {
    1075     2595254 :         struct xfs_swapext_intent       *sxi;
    1076     2595254 :         unsigned int                    rs = 0;
    1077             : 
    1078     2595254 :         sxi = kmem_cache_zalloc(xfs_swapext_intent_cache,
    1079             :                         GFP_NOFS | __GFP_NOFAIL);
    1080     2595264 :         INIT_LIST_HEAD(&sxi->sxi_list);
    1081     2595264 :         sxi->sxi_ip1 = req->ip1;
    1082     2595264 :         sxi->sxi_ip2 = req->ip2;
    1083     2595264 :         sxi->sxi_startoff1 = req->startoff1;
    1084     2595264 :         sxi->sxi_startoff2 = req->startoff2;
    1085     2595264 :         sxi->sxi_blockcount = req->blockcount;
    1086     2595264 :         sxi->sxi_isize1 = sxi->sxi_isize2 = -1;
    1087             : 
    1088     2595264 :         if (req->whichfork == XFS_ATTR_FORK)
    1089      103414 :                 sxi->sxi_flags |= XFS_SWAP_EXT_ATTR_FORK;
    1090             : 
    1091     2595264 :         if (req->whichfork == XFS_DATA_FORK &&
    1092             :             (req->req_flags & XFS_SWAP_REQ_SET_SIZES)) {
    1093       23398 :                 sxi->sxi_flags |= XFS_SWAP_EXT_SET_SIZES;
    1094       23398 :                 sxi->sxi_isize1 = req->ip2->i_disk_size;
    1095       23398 :                 sxi->sxi_isize2 = req->ip1->i_disk_size;
    1096             :         }
    1097             : 
    1098     2595264 :         if (req->req_flags & XFS_SWAP_REQ_INO1_WRITTEN)
    1099          92 :                 sxi->sxi_flags |= XFS_SWAP_EXT_INO1_WRITTEN;
    1100     2595264 :         if (req->req_flags & XFS_SWAP_REQ_CVT_INO2_SF)
    1101      126396 :                 sxi->sxi_flags |= XFS_SWAP_EXT_CVT_INO2_SF;
    1102             : 
    1103     2595264 :         if (req->req_flags & XFS_SWAP_REQ_LOGGED)
    1104     2547817 :                 sxi->sxi_op_flags |= XFS_SWAP_EXT_OP_LOGGED;
    1105     2595264 :         if (req->req_flags & XFS_SWAP_REQ_NREXT64)
    1106           0 :                 sxi->sxi_op_flags |= XFS_SWAP_EXT_OP_NREXT64;
    1107             : 
    1108     2595264 :         if (req->whichfork == XFS_DATA_FORK) {
    1109             :                 /*
    1110             :                  * Record the state of each inode's reflink flag before the
    1111             :                  * operation.
    1112             :                  */
    1113     2491849 :                 if (xfs_is_reflink_inode(req->ip1))
    1114     2392643 :                         rs |= XFS_REFLINK_STATE_IP1;
    1115     2491849 :                 if (xfs_is_reflink_inode(req->ip2))
    1116     2392752 :                         rs |= XFS_REFLINK_STATE_IP2;
    1117             : 
    1118             :                 /*
    1119             :                  * Figure out if we're clearing the reflink flags (which
    1120             :                  * effectively swaps them) after the operation.
    1121             :                  */
    1122     2491849 :                 if (sxi_can_exchange_reflink_flags(req, rs)) {
    1123          90 :                         if (rs & XFS_REFLINK_STATE_IP1)
    1124           0 :                                 sxi->sxi_flags |=
    1125             :                                                 XFS_SWAP_EXT_CLEAR_INO1_REFLINK;
    1126          90 :                         if (rs & XFS_REFLINK_STATE_IP2)
    1127          90 :                                 sxi->sxi_flags |=
    1128             :                                                 XFS_SWAP_EXT_CLEAR_INO2_REFLINK;
    1129             :                 }
    1130             :         }
    1131             : 
    1132     2595262 :         if (reflink_state)
    1133     1293120 :                 *reflink_state = rs;
    1134     2595262 :         return sxi;
    1135             : }
    1136             : 
    1137             : /*
    1138             :  * Estimate the number of exchange operations and the number of file blocks
    1139             :  * in each file that will be affected by the exchange operation.
    1140             :  */
    1141             : int
    1142     1302147 : xfs_swapext_estimate(
    1143             :         struct xfs_swapext_req          *req)
    1144             : {
    1145     1302147 :         struct xfs_swapext_intent       *sxi;
    1146     1302147 :         struct xfs_bmbt_irec            irec1, irec2;
    1147     1302147 :         struct xfs_swapext_adjacent     adj = ADJACENT_INIT;
    1148     1302147 :         xfs_filblks_t                   ip1_blocks = 0, ip2_blocks = 0;
    1149     1302147 :         int64_t                         d_nexts1, d_nexts2;
    1150     1302147 :         int                             bmap_flags;
    1151     1302147 :         int                             error;
    1152             : 
    1153     1302147 :         ASSERT(!(req->req_flags & ~XFS_SWAP_REQ_FLAGS));
    1154             : 
    1155     1302147 :         bmap_flags = xfs_bmapi_aflag(req->whichfork);
    1156     1302147 :         sxi = xfs_swapext_init_intent(req, NULL);
    1157             : 
    1158             :         /*
    1159             :          * To guard against the possibility of overflowing the extent counters,
    1160             :          * we have to estimate an upper bound on the potential increase in that
    1161             :          * counter.  We can split the extent at each end of the range, and for
    1162             :          * each step of the swap we can split the extent that we're working on
    1163             :          * if the extents do not align.
    1164             :          */
    1165     1302147 :         d_nexts1 = d_nexts2 = 3;
    1166             : 
    1167     6649568 :         while (sxi_has_more_swap_work(sxi)) {
    1168             :                 /*
    1169             :                  * Walk through the file ranges until we find something to
    1170             :                  * swap.  Because we're simulating the swap, pass in adj to
    1171             :                  * capture skipped mappings for correct estimation of bmbt
    1172             :                  * record merges.
    1173             :                  */
    1174     5804878 :                 error = xfs_swapext_find_mappings(sxi, &irec1, &irec2, &adj);
    1175     5804866 :                 if (error)
    1176           0 :                         goto out_free;
    1177     5804866 :                 if (!sxi_has_more_swap_work(sxi))
    1178             :                         break;
    1179             : 
    1180             :                 /* Update accounting. */
    1181     9149559 :                 if (xfs_bmap_is_real_extent(&irec1))
    1182     3802139 :                         ip1_blocks += irec1.br_blockcount;
    1183     9153659 :                 if (xfs_bmap_is_real_extent(&irec2))
    1184     3806239 :                         ip2_blocks += irec2.br_blockcount;
    1185     5347420 :                 req->nr_exchanges++;
    1186             : 
    1187             :                 /* Read the next extents from both files. */
    1188     5347420 :                 error = get_next_ext(req->ip1, bmap_flags, &irec1, &adj.right1);
    1189     5347419 :                 if (error)
    1190           0 :                         goto out_free;
    1191             : 
    1192     5347419 :                 error = get_next_ext(req->ip2, bmap_flags, &irec2, &adj.right2);
    1193     5347421 :                 if (error)
    1194           0 :                         goto out_free;
    1195             : 
    1196             :                 /* Update extent count deltas. */
    1197     5347421 :                 d_nexts1 += delta_nextents_step(req->ip1->i_mount,
    1198             :                                 &adj.left1, &irec1, &irec2, &adj.right1);
    1199             : 
    1200     5347420 :                 d_nexts2 += delta_nextents_step(req->ip1->i_mount,
    1201             :                                 &adj.left2, &irec2, &irec1, &adj.right2);
    1202             : 
    1203             :                 /* Now pretend we swapped the extents. */
    1204     5347421 :                 if (can_merge(&adj.left2, &irec1))
    1205     1125418 :                         adj.left2.br_blockcount += irec1.br_blockcount;
    1206             :                 else
    1207     4222003 :                         memcpy(&adj.left2, &irec1, sizeof(irec1));
    1208             : 
    1209     5347421 :                 if (can_merge(&adj.left1, &irec2))
    1210      707239 :                         adj.left1.br_blockcount += irec2.br_blockcount;
    1211             :                 else
    1212     4640182 :                         memcpy(&adj.left1, &irec2, sizeof(irec2));
    1213             : 
    1214     5347421 :                 sxi_advance(sxi, &irec1);
    1215             :         }
    1216             : 
    1217             :         /* Account for the blocks that are being exchanged. */
    1218     1302133 :         if (XFS_IS_REALTIME_INODE(req->ip1) &&
    1219       15775 :             req->whichfork == XFS_DATA_FORK) {
    1220       15775 :                 req->ip1_rtbcount = ip1_blocks;
    1221       15775 :                 req->ip2_rtbcount = ip2_blocks;
    1222             :         } else {
    1223     1286358 :                 req->ip1_bcount = ip1_blocks;
    1224     1286358 :                 req->ip2_bcount = ip2_blocks;
    1225             :         }
    1226             : 
    1227             :         /*
    1228             :          * Make sure that both forks have enough slack left in their extent
    1229             :          * counters that the swap operation will not overflow.
    1230             :          */
    1231     1302133 :         trace_xfs_swapext_delta_nextents(req, d_nexts1, d_nexts2);
    1232     1302127 :         if (req->ip1 == req->ip2) {
    1233     1210739 :                 error = ensure_delta_nextents(req, req->ip1,
    1234             :                                 d_nexts1 + d_nexts2);
    1235             :         } else {
    1236       91388 :                 error = ensure_delta_nextents(req, req->ip1, d_nexts1);
    1237       91387 :                 if (error)
    1238           6 :                         goto out_free;
    1239       91381 :                 error = ensure_delta_nextents(req, req->ip2, d_nexts2);
    1240             :         }
    1241     1302127 :         if (error)
    1242           0 :                 goto out_free;
    1243             : 
    1244     1302127 :         trace_xfs_swapext_initial_estimate(req);
    1245     1302125 :         error = xfs_swapext_estimate_overhead(req);
    1246     1302136 : out_free:
    1247     1302136 :         kmem_cache_free(xfs_swapext_intent_cache, sxi);
    1248     1302143 :         return error;
    1249             : }
    1250             : 
    1251             : static inline void
    1252          55 : xfs_swapext_set_reflink(
    1253             :         struct xfs_trans        *tp,
    1254             :         struct xfs_inode        *ip)
    1255             : {
    1256          55 :         trace_xfs_reflink_set_inode_flag(ip);
    1257             : 
    1258          55 :         ip->i_diflags2 |= XFS_DIFLAG2_REFLINK;
    1259          55 :         xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
    1260          55 : }
    1261             : 
    1262             : /*
    1263             :  * If either file has shared blocks and we're swapping data forks, we must flag
    1264             :  * the other file as having shared blocks so that we get the shared-block rmap
    1265             :  * functions if we need to fix up the rmaps.
    1266             :  */
    1267             : void
    1268     1293123 : xfs_swapext_ensure_reflink(
    1269             :         struct xfs_trans                *tp,
    1270             :         const struct xfs_swapext_intent *sxi,
    1271             :         unsigned int                    reflink_state)
    1272             : {
    1273     1293123 :         if ((reflink_state & XFS_REFLINK_STATE_IP1) &&
    1274     1196308 :             !xfs_is_reflink_inode(sxi->sxi_ip2))
    1275           0 :                 xfs_swapext_set_reflink(tp, sxi->sxi_ip2);
    1276             : 
    1277     1293123 :         if ((reflink_state & XFS_REFLINK_STATE_IP2) &&
    1278     1196365 :             !xfs_is_reflink_inode(sxi->sxi_ip1))
    1279          55 :                 xfs_swapext_set_reflink(tp, sxi->sxi_ip1);
    1280     1293123 : }
    1281             : 
    1282             : /* Widen the extent counts of both inodes if necessary. */
    1283             : static inline void
    1284     1293105 : xfs_swapext_upgrade_extent_counts(
    1285             :         struct xfs_trans                *tp,
    1286             :         const struct xfs_swapext_intent *sxi)
    1287             : {
    1288     1293105 :         if (!(sxi->sxi_op_flags & XFS_SWAP_EXT_OP_NREXT64))
    1289             :                 return;
    1290             : 
    1291           0 :         sxi->sxi_ip1->i_diflags2 |= XFS_DIFLAG2_NREXT64;
    1292           0 :         xfs_trans_log_inode(tp, sxi->sxi_ip1, XFS_ILOG_CORE);
    1293             : 
    1294           0 :         sxi->sxi_ip2->i_diflags2 |= XFS_DIFLAG2_NREXT64;
    1295           0 :         xfs_trans_log_inode(tp, sxi->sxi_ip2, XFS_ILOG_CORE);
    1296             : }
    1297             : 
    1298             : /*
    1299             :  * Schedule a swap a range of extents from one inode to another.  If the atomic
    1300             :  * swap feature is enabled, then the operation progress can be resumed even if
    1301             :  * the system goes down.  The caller must commit the transaction to start the
    1302             :  * work.
    1303             :  *
    1304             :  * The caller must ensure the inodes must be joined to the transaction and
    1305             :  * ILOCKd; they will still be joined to the transaction at exit.
    1306             :  */
    1307             : void
    1308     1293105 : xfs_swapext(
    1309             :         struct xfs_trans                *tp,
    1310             :         const struct xfs_swapext_req    *req)
    1311             : {
    1312     1293105 :         struct xfs_swapext_intent       *sxi;
    1313     1293105 :         unsigned int                    reflink_state;
    1314             : 
    1315     1293105 :         ASSERT(xfs_isilocked(req->ip1, XFS_ILOCK_EXCL));
    1316     1293087 :         ASSERT(xfs_isilocked(req->ip2, XFS_ILOCK_EXCL));
    1317     1293092 :         ASSERT(req->whichfork != XFS_COW_FORK);
    1318     1293092 :         ASSERT(!(req->req_flags & ~XFS_SWAP_REQ_FLAGS));
    1319     1293092 :         if (req->req_flags & XFS_SWAP_REQ_SET_SIZES)
    1320       13830 :                 ASSERT(req->whichfork == XFS_DATA_FORK);
    1321     1293092 :         if (req->req_flags & XFS_SWAP_REQ_CVT_INO2_SF)
    1322       66730 :                 ASSERT(req->whichfork == XFS_ATTR_FORK ||
    1323             :                        (req->whichfork == XFS_DATA_FORK &&
    1324             :                         (S_ISDIR(VFS_I(req->ip2)->i_mode) ||
    1325             :                          S_ISLNK(VFS_I(req->ip2)->i_mode))));
    1326             : 
    1327     1293092 :         if (req->blockcount == 0)
    1328           0 :                 return;
    1329             : 
    1330     1293092 :         sxi = xfs_swapext_init_intent(req, &reflink_state);
    1331     1293100 :         xfs_swapext_schedule(tp, sxi);
    1332     1293106 :         xfs_swapext_ensure_reflink(tp, sxi, reflink_state);
    1333     1293104 :         xfs_swapext_upgrade_extent_counts(tp, sxi);
    1334             : }

Generated by: LCOV version 1.14