LCOV - fstests of 6.5.0-rc3-acha @ Mon Jul 31 20:08:06 PDT 2023

LCOV - code coverage report

Current view:	top level - fs/xfs/libxfs - xfs_swapext.c (source / functions)		Hit	Total	Coverage
Test:	fstests of 6.5.0-rc3-acha @ Mon Jul 31 20:08:06 PDT 2023	Lines:	426	515	82.7 %
Date:	2023-07-31 20:08:07	Functions:	31	31	100.0 %

          Line data    Source code

       1             : // SPDX-License-Identifier: GPL-2.0-or-later
       2             : /*
       3             :  * Copyright (C) 2020-2023 Oracle.  All Rights Reserved.
       4             :  * Author: Darrick J. Wong <djwong@kernel.org>
       5             :  */
       6             : #include "xfs.h"
       7             : #include "xfs_fs.h"
       8             : #include "xfs_shared.h"
       9             : #include "xfs_format.h"
      10             : #include "xfs_log_format.h"
      11             : #include "xfs_trans_resv.h"
      12             : #include "xfs_mount.h"
      13             : #include "xfs_defer.h"
      14             : #include "xfs_inode.h"
      15             : #include "xfs_trans.h"
      16             : #include "xfs_bmap.h"
      17             : #include "xfs_icache.h"
      18             : #include "xfs_quota.h"
      19             : #include "xfs_swapext.h"
      20             : #include "xfs_trace.h"
      21             : #include "xfs_bmap_btree.h"
      22             : #include "xfs_trans_space.h"
      23             : #include "xfs_error.h"
      24             : #include "xfs_errortag.h"
      25             : #include "xfs_health.h"
      26             : #include "xfs_da_format.h"
      27             : #include "xfs_da_btree.h"
      28             : #include "xfs_attr_leaf.h"
      29             : #include "xfs_attr.h"
      30             : #include "xfs_dir2_priv.h"
      31             : #include "xfs_dir2.h"
      32             : #include "xfs_symlink_remote.h"
      33             : 
      34             : struct kmem_cache       *xfs_swapext_intent_cache;
      35             : 
      36             : /* bmbt mappings adjacent to a pair of records. */
      37             : struct xfs_swapext_adjacent {
      38             :         struct xfs_bmbt_irec            left1;
      39             :         struct xfs_bmbt_irec            right1;
      40             :         struct xfs_bmbt_irec            left2;
      41             :         struct xfs_bmbt_irec            right2;
      42             : };
      43             : 
      44             : #define ADJACENT_INIT { \
      45             :         .left1  = { .br_startblock = HOLESTARTBLOCK }, \
      46             :         .right1 = { .br_startblock = HOLESTARTBLOCK }, \
      47             :         .left2  = { .br_startblock = HOLESTARTBLOCK }, \
      48             :         .right2 = { .br_startblock = HOLESTARTBLOCK }, \
      49             : }
      50             : 
      51             : /* Information to help us reset reflink flag / CoW fork state after a swap. */
      52             : 
      53             : /* Previous state of the two inodes' reflink flags. */
      54             : #define XFS_REFLINK_STATE_IP1           (1U << 0)
      55             : #define XFS_REFLINK_STATE_IP2           (1U << 1)
      56             : 
      57             : /*
      58             :  * If the reflink flag is set on either inode, make sure it has an incore CoW
      59             :  * fork, since all reflink inodes must have them.  If there's a CoW fork and it
      60             :  * has extents in it, make sure the inodes are tagged appropriately so that
      61             :  * speculative preallocations can be GC'd if we run low of space.
      62             :  */
      63             : static inline void
      64      361912 : xfs_swapext_ensure_cowfork(
      65             :         struct xfs_inode        *ip)
      66             : {
      67      361912 :         struct xfs_ifork        *cfork;
      68             : 
      69      361912 :         if (xfs_is_reflink_inode(ip))
      70      314634 :                 xfs_ifork_init_cow(ip);
      71             : 
      72      361912 :         cfork = xfs_ifork_ptr(ip, XFS_COW_FORK);
      73      361912 :         if (!cfork)
      74             :                 return;
      75      340559 :         if (cfork->if_bytes > 0)
      76       49026 :                 xfs_inode_set_cowblocks_tag(ip);
      77             :         else
      78      291533 :                 xfs_inode_clear_cowblocks_tag(ip);
      79             : }
      80             : 
      81             : /* Schedule an atomic extent swap. */
      82             : void
      83      221801 : xfs_swapext_schedule(
      84             :         struct xfs_trans                *tp,
      85             :         struct xfs_swapext_intent       *sxi)
      86             : {
      87      221801 :         trace_xfs_swapext_defer(tp->t_mountp, sxi);
      88      221801 :         xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_SWAPEXT, &sxi->sxi_list);
      89      221801 : }
      90             : 
      91             : /*
      92             :  * Adjust the on-disk inode size upwards if needed so that we never map extents
      93             :  * into the file past EOF.  This is crucial so that log recovery won't get
      94             :  * confused by the sudden appearance of post-eof extents.
      95             :  */
      96             : STATIC void
      97     1426786 : xfs_swapext_update_size(
      98             :         struct xfs_trans        *tp,
      99             :         struct xfs_inode        *ip,
     100             :         struct xfs_bmbt_irec    *imap,
     101             :         xfs_fsize_t             new_isize)
     102             : {
     103     1426786 :         struct xfs_mount        *mp = tp->t_mountp;
     104     1426786 :         xfs_fsize_t             len;
     105             : 
     106     1426786 :         if (new_isize < 0)
     107             :                 return;
     108             : 
     109       10536 :         len = min(XFS_FSB_TO_B(mp, imap->br_startoff + imap->br_blockcount),
     110             :                   new_isize);
     111             : 
     112       10536 :         if (len <= ip->i_disk_size)
     113             :                 return;
     114             : 
     115          52 :         trace_xfs_swapext_update_inode_size(ip, len);
     116             : 
     117          52 :         ip->i_disk_size = len;
     118          52 :         xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
     119             : }
     120             : 
     121             : static inline bool
     122             : sxi_has_more_swap_work(const struct xfs_swapext_intent *sxi)
     123             : {
     124     6412300 :         return sxi->sxi_blockcount > 0;
     125             : }
     126             : 
     127             : static inline bool
     128             : sxi_has_postop_work(const struct xfs_swapext_intent *sxi)
     129             : {
     130      311988 :         return sxi->sxi_flags & (XFS_SWAP_EXT_CLEAR_INO1_REFLINK |
     131             :                                  XFS_SWAP_EXT_CLEAR_INO2_REFLINK |
     132             :                                  XFS_SWAP_EXT_CVT_INO2_SF);
     133             : }
     134             : 
     135             : static inline void
     136             : sxi_advance(
     137             :         struct xfs_swapext_intent       *sxi,
     138             :         const struct xfs_bmbt_irec      *irec)
     139             : {
     140     1818333 :         sxi->sxi_startoff1 += irec->br_blockcount;
     141     1818333 :         sxi->sxi_startoff2 += irec->br_blockcount;
     142     1818333 :         sxi->sxi_blockcount -= irec->br_blockcount;
     143     1060976 : }
     144             : 
     145             : #ifdef DEBUG
     146             : static inline bool
     147      176827 : xfs_swapext_need_rt_conversion(
     148             :         const struct xfs_swapext_req    *req)
     149             : {
     150      176827 :         struct xfs_inode                *ip = req->ip2;
     151      176827 :         struct xfs_mount                *mp = ip->i_mount;
     152             : 
     153             :         /* xattrs don't live on the rt device */
     154      176827 :         if (req->whichfork == XFS_ATTR_FORK)
     155             :                 return false;
     156             : 
     157             :         /*
     158             :          * Caller got permission to use logged swapext, so log recovery will
     159             :          * finish the swap and not leave us with partially swapped rt extents
     160             :          * exposed to userspace.
     161             :          */
     162      176827 :         if (req->req_flags & XFS_SWAP_REQ_LOGGED)
     163             :                 return false;
     164             : 
     165             :         /*
     166             :          * If we can't use log intent items at all, the only supported
     167             :          * operation is full fork swaps.
     168             :          */
     169        6465 :         if (!xfs_swapext_supported(mp))
     170             :                 return false;
     171             : 
     172             :         /* Conversion is only needed for realtime files with big rt extents */
     173        6461 :         return xfs_inode_has_bigrtextents(ip);
     174             : }
     175             : 
     176             : static inline int
     177      176827 : xfs_swapext_check_rt_extents(
     178             :         struct xfs_mount                *mp,
     179             :         const struct xfs_swapext_req    *req)
     180             : {
     181      176827 :         struct xfs_bmbt_irec            irec1, irec2;
     182      176827 :         xfs_fileoff_t                   startoff1 = req->startoff1;
     183      176827 :         xfs_fileoff_t                   startoff2 = req->startoff2;
     184      176827 :         xfs_filblks_t                   blockcount = req->blockcount;
     185      176827 :         uint32_t                        mod;
     186      176827 :         int                             nimaps;
     187      176827 :         int                             error;
     188             : 
     189      176827 :         if (!xfs_swapext_need_rt_conversion(req))
     190             :                 return 0;
     191             : 
     192           0 :         while (blockcount > 0) {
     193             :                 /* Read extent from the first file */
     194           0 :                 nimaps = 1;
     195           0 :                 error = xfs_bmapi_read(req->ip1, startoff1, blockcount,
     196             :                                 &irec1, &nimaps, 0);
     197           0 :                 if (error)
     198           0 :                         return error;
     199           0 :                 ASSERT(nimaps == 1);
     200             : 
     201             :                 /* Read extent from the second file */
     202           0 :                 nimaps = 1;
     203           0 :                 error = xfs_bmapi_read(req->ip2, startoff2,
     204             :                                 irec1.br_blockcount, &irec2, &nimaps,
     205             :                                 0);
     206           0 :                 if (error)
     207           0 :                         return error;
     208           0 :                 ASSERT(nimaps == 1);
     209             : 
     210             :                 /*
     211             :                  * We can only swap as many blocks as the smaller of the two
     212             :                  * extent maps.
     213             :                  */
     214           0 :                 irec1.br_blockcount = min(irec1.br_blockcount,
     215             :                                           irec2.br_blockcount);
     216             : 
     217             :                 /* Both mappings must be aligned to the realtime extent size. */
     218           0 :                 div_u64_rem(irec1.br_startoff, mp->m_sb.sb_rextsize, &mod);
     219           0 :                 if (mod) {
     220           0 :                         ASSERT(mod == 0);
     221           0 :                         return -EINVAL;
     222             :                 }
     223             : 
     224           0 :                 div_u64_rem(irec2.br_startoff, mp->m_sb.sb_rextsize, &mod);
     225           0 :                 if (mod) {
     226           0 :                         ASSERT(mod == 0);
     227           0 :                         return -EINVAL;
     228             :                 }
     229             : 
     230           0 :                 div_u64_rem(irec1.br_blockcount, mp->m_sb.sb_rextsize, &mod);
     231           0 :                 if (mod) {
     232           0 :                         ASSERT(mod == 0);
     233           0 :                         return -EINVAL;
     234             :                 }
     235             : 
     236           0 :                 startoff1 += irec1.br_blockcount;
     237           0 :                 startoff2 += irec1.br_blockcount;
     238           0 :                 blockcount -= irec1.br_blockcount;
     239             :         }
     240             : 
     241             :         return 0;
     242             : }
     243             : #else
     244             : # define xfs_swapext_check_rt_extents(mp, req)          (0)
     245             : #endif
     246             : 
     247             : /* Check all extents to make sure we can actually swap them. */
     248             : int
     249      176827 : xfs_swapext_check_extents(
     250             :         struct xfs_mount                *mp,
     251             :         const struct xfs_swapext_req    *req)
     252             : {
     253      176827 :         struct xfs_ifork                *ifp1, *ifp2;
     254             : 
     255             :         /* No fork? */
     256      176827 :         ifp1 = xfs_ifork_ptr(req->ip1, req->whichfork);
     257      176827 :         ifp2 = xfs_ifork_ptr(req->ip2, req->whichfork);
     258      176827 :         if (!ifp1 || !ifp2)
     259             :                 return -EINVAL;
     260             : 
     261             :         /* We don't know how to swap local format forks. */
     262      176827 :         if (ifp1->if_format == XFS_DINODE_FMT_LOCAL ||
     263      176827 :             ifp2->if_format == XFS_DINODE_FMT_LOCAL)
     264             :                 return -EINVAL;
     265             : 
     266      176827 :         return xfs_swapext_check_rt_extents(mp, req);
     267             : }
     268             : 
     269             : #ifdef CONFIG_XFS_QUOTA
     270             : /* Log the actual updates to the quota accounting. */
     271             : static inline void
     272      757357 : xfs_swapext_update_quota(
     273             :         struct xfs_trans                *tp,
     274             :         struct xfs_swapext_intent       *sxi,
     275             :         struct xfs_bmbt_irec            *irec1,
     276             :         struct xfs_bmbt_irec            *irec2)
     277             : {
     278      757357 :         int64_t                         ip1_delta = 0, ip2_delta = 0;
     279      757357 :         unsigned int                    qflag;
     280             : 
     281      757357 :         qflag = XFS_IS_REALTIME_INODE(sxi->sxi_ip1) ? XFS_TRANS_DQ_RTBCOUNT :
     282             :                                                       XFS_TRANS_DQ_BCOUNT;
     283             : 
     284     1326394 :         if (xfs_bmap_is_real_extent(irec1)) {
     285      569037 :                 ip1_delta -= irec1->br_blockcount;
     286      569037 :                 ip2_delta += irec1->br_blockcount;
     287             :         }
     288             : 
     289     1327651 :         if (xfs_bmap_is_real_extent(irec2)) {
     290      570294 :                 ip1_delta += irec2->br_blockcount;
     291      570294 :                 ip2_delta -= irec2->br_blockcount;
     292             :         }
     293             : 
     294      757357 :         xfs_trans_mod_dquot_byino(tp, sxi->sxi_ip1, qflag, ip1_delta);
     295      757357 :         xfs_trans_mod_dquot_byino(tp, sxi->sxi_ip2, qflag, ip2_delta);
     296      757357 : }
     297             : #else
     298             : # define xfs_swapext_update_quota(tp, sxi, irec1, irec2)        ((void)0)
     299             : #endif
     300             : 
     301             : /* Decide if we want to skip this mapping from file1. */
     302             : static inline bool
     303     1818333 : xfs_swapext_can_skip_mapping(
     304             :         struct xfs_swapext_intent       *sxi,
     305             :         struct xfs_bmbt_irec            *irec)
     306             : {
     307     1818333 :         struct xfs_mount                *mp = sxi->sxi_ip1->i_mount;
     308             : 
     309             :         /* Do not skip this mapping if the caller did not tell us to. */
     310     1818333 :         if (!(sxi->sxi_flags & XFS_SWAP_EXT_INO1_WRITTEN))
     311             :                 return false;
     312             : 
     313             :         /* Do not skip mapped, written extents. */
     314         108 :         if (xfs_bmap_is_written_extent(irec))
     315             :                 return false;
     316             : 
     317             :         /*
     318             :          * The mapping is unwritten or a hole.  It cannot be a delalloc
     319             :          * reservation because we already excluded those.  It cannot be an
     320             :          * unwritten extent with dirty page cache because we flushed the page
     321             :          * cache.  For files where the allocation unit is 1FSB (files on the
     322             :          * data dev, rt files if the extent size is 1FSB), we can safely
     323             :          * skip this mapping.
     324             :          */
     325          72 :         if (!xfs_inode_has_bigrtextents(sxi->sxi_ip1))
     326             :                 return true;
     327             : 
     328             :         /*
     329             :          * For a realtime file with a multi-fsb allocation unit, the decision
     330             :          * is trickier because we can only swap full allocation units.
     331             :          * Unwritten mappings can appear in the middle of an rtx if the rtx is
     332             :          * partially written, but they can also appear for preallocations.
     333             :          *
     334             :          * If the mapping is a hole, skip it entirely.  Holes should align with
     335             :          * rtx boundaries.
     336             :          */
     337           0 :         if (!xfs_bmap_is_real_extent(irec))
     338             :                 return true;
     339             : 
     340             :         /*
     341             :          * All mappings below this point are unwritten.
     342             :          *
     343             :          * - If the beginning is not aligned to an rtx, trim the end of the
     344             :          *   mapping so that it does not cross an rtx boundary, and swap it.
     345             :          *
     346             :          * - If both ends are aligned to an rtx, skip the entire mapping.
     347             :          */
     348           0 :         if (!isaligned_64(irec->br_startoff, mp->m_sb.sb_rextsize)) {
     349           0 :                 xfs_fileoff_t   new_end;
     350             : 
     351           0 :                 new_end = roundup_64(irec->br_startoff, mp->m_sb.sb_rextsize);
     352           0 :                 irec->br_blockcount = new_end - irec->br_startoff;
     353           0 :                 return false;
     354             :         }
     355           0 :         if (isaligned_64(irec->br_blockcount, mp->m_sb.sb_rextsize))
     356             :                 return true;
     357             : 
     358             :         /*
     359             :          * All mappings below this point are unwritten, start on an rtx
     360             :          * boundary, and do not end on an rtx boundary.
     361             :          *
     362             :          * - If the mapping is longer than one rtx, trim the end of the mapping
     363             :          *   down to an rtx boundary and skip it.
     364             :          *
     365             :          * - The mapping is shorter than one rtx.  Swap it.
     366             :          */
     367           0 :         if (irec->br_blockcount > mp->m_sb.sb_rextsize) {
     368           0 :                 xfs_fileoff_t   new_end;
     369             : 
     370           0 :                 new_end = rounddown_64(irec->br_startoff + irec->br_blockcount,
     371             :                                 mp->m_sb.sb_rextsize);
     372           0 :                 irec->br_blockcount = new_end - irec->br_startoff;
     373           0 :                 return true;
     374             :         }
     375             : 
     376             :         return false;
     377             : }
     378             : 
     379             : /*
     380             :  * Walk forward through the file ranges in @sxi until we find two different
     381             :  * mappings to exchange.  If there is work to do, return the mappings;
     382             :  * otherwise we've reached the end of the range and sxi_blockcount will be
     383             :  * zero.
     384             :  *
     385             :  * If the walk skips over a pair of mappings to the same storage, save them as
     386             :  * the left records in @adj (if provided) so that the simulation phase can
     387             :  * avoid an extra lookup.
     388             :   */
     389             : static int
     390     1676629 : xfs_swapext_find_mappings(
     391             :         struct xfs_swapext_intent       *sxi,
     392             :         struct xfs_bmbt_irec            *irec1,
     393             :         struct xfs_bmbt_irec            *irec2,
     394             :         struct xfs_swapext_adjacent     *adj)
     395             : {
     396     1676629 :         int                             nimaps;
     397     1676629 :         int                             bmap_flags;
     398     1676629 :         int                             error;
     399             : 
     400     1676629 :         bmap_flags = xfs_bmapi_aflag(xfs_swapext_whichfork(sxi));
     401             : 
     402     1980034 :         for (; sxi_has_more_swap_work(sxi); sxi_advance(sxi, irec1)) {
     403             :                 /* Read extent from the first file */
     404     1818333 :                 nimaps = 1;
     405     1818333 :                 error = xfs_bmapi_read(sxi->sxi_ip1, sxi->sxi_startoff1,
     406             :                                 sxi->sxi_blockcount, irec1, &nimaps,
     407             :                                 bmap_flags);
     408     1818333 :                 if (error)
     409           0 :                         return error;
     410     1818333 :                 if (nimaps != 1 ||
     411     1818333 :                     irec1->br_startblock == DELAYSTARTBLOCK ||
     412     1818333 :                     irec1->br_startoff != sxi->sxi_startoff1) {
     413             :                         /*
     414             :                          * We should never get no mapping or a delalloc extent
     415             :                          * or something that doesn't match what we asked for,
     416             :                          * since the caller flushed both inodes and we hold the
     417             :                          * ILOCKs for both inodes.
     418             :                          */
     419           0 :                         ASSERT(0);
     420           0 :                         return -EINVAL;
     421             :                 }
     422             : 
     423     1818333 :                 if (xfs_swapext_can_skip_mapping(sxi, irec1)) {
     424          72 :                         trace_xfs_swapext_extent1_skip(sxi->sxi_ip1, irec1);
     425          72 :                         continue;
     426             :                 }
     427             : 
     428             :                 /* Read extent from the second file */
     429     1818261 :                 nimaps = 1;
     430     1818261 :                 error = xfs_bmapi_read(sxi->sxi_ip2, sxi->sxi_startoff2,
     431             :                                 irec1->br_blockcount, irec2, &nimaps,
     432             :                                 bmap_flags);
     433     1818261 :                 if (error)
     434           0 :                         return error;
     435     1818261 :                 if (nimaps != 1 ||
     436     1818261 :                     irec2->br_startblock == DELAYSTARTBLOCK ||
     437     1818261 :                     irec2->br_startoff != sxi->sxi_startoff2) {
     438             :                         /*
     439             :                          * We should never get no mapping or a delalloc extent
     440             :                          * or something that doesn't match what we asked for,
     441             :                          * since the caller flushed both inodes and we hold the
     442             :                          * ILOCKs for both inodes.
     443             :                          */
     444           0 :                         ASSERT(0);
     445           0 :                         return -EINVAL;
     446             :                 }
     447             : 
     448             :                 /*
     449             :                  * We can only swap as many blocks as the smaller of the two
     450             :                  * extent maps.
     451             :                  */
     452     1818261 :                 irec1->br_blockcount = min(irec1->br_blockcount,
     453             :                                            irec2->br_blockcount);
     454             : 
     455     1818261 :                 trace_xfs_swapext_extent1(sxi->sxi_ip1, irec1);
     456     1818261 :                 trace_xfs_swapext_extent2(sxi->sxi_ip2, irec2);
     457             : 
     458             :                 /* We found something to swap, so return it. */
     459     1818261 :                 if (irec1->br_startblock != irec2->br_startblock)
     460             :                         return 0;
     461             : 
     462             :                 /*
     463             :                  * Two extents mapped to the same physical block must not have
     464             :                  * different states; that's filesystem corruption.  Move on to
     465             :                  * the next extent if they're both holes or both the same
     466             :                  * physical extent.
     467             :                  */
     468      303333 :                 if (irec1->br_state != irec2->br_state) {
     469           0 :                         xfs_bmap_mark_sick(sxi->sxi_ip1,
     470             :                                         xfs_swapext_whichfork(sxi));
     471           0 :                         xfs_bmap_mark_sick(sxi->sxi_ip2,
     472             :                                         xfs_swapext_whichfork(sxi));
     473           0 :                         return -EFSCORRUPTED;
     474             :                 }
     475             : 
     476             :                 /*
     477             :                  * Save the mappings if we're estimating work and skipping
     478             :                  * these identical mappings.
     479             :                  */
     480      303333 :                 if (adj) {
     481      301566 :                         memcpy(&adj->left1, irec1, sizeof(*irec1));
     482      301566 :                         memcpy(&adj->left2, irec2, sizeof(*irec2));
     483             :                 }
     484             :         }
     485             : 
     486             :         return 0;
     487             : }
     488             : 
     489             : /* Exchange these two mappings. */
     490             : static void
     491      757357 : xfs_swapext_exchange_mappings(
     492             :         struct xfs_trans                *tp,
     493             :         struct xfs_swapext_intent       *sxi,
     494             :         struct xfs_bmbt_irec            *irec1,
     495             :         struct xfs_bmbt_irec            *irec2)
     496             : {
     497      757357 :         int                             whichfork = xfs_swapext_whichfork(sxi);
     498             : 
     499      757357 :         xfs_swapext_update_quota(tp, sxi, irec1, irec2);
     500             : 
     501             :         /* Remove both mappings. */
     502      757357 :         xfs_bmap_unmap_extent(tp, sxi->sxi_ip1, whichfork, irec1);
     503      757357 :         xfs_bmap_unmap_extent(tp, sxi->sxi_ip2, whichfork, irec2);
     504             : 
     505             :         /*
     506             :          * Re-add both mappings.  We swap the file offsets between the two maps
     507             :          * and add the opposite map, which has the effect of filling the
     508             :          * logical offsets we just unmapped, but with with the physical mapping
     509             :          * information swapped.
     510             :          */
     511      757357 :         swap(irec1->br_startoff, irec2->br_startoff);
     512      757357 :         xfs_bmap_map_extent(tp, sxi->sxi_ip1, whichfork, irec2);
     513      757357 :         xfs_bmap_map_extent(tp, sxi->sxi_ip2, whichfork, irec1);
     514             : 
     515             :         /* Make sure we're not mapping extents past EOF. */
     516      757357 :         if (whichfork == XFS_DATA_FORK) {
     517      713393 :                 xfs_swapext_update_size(tp, sxi->sxi_ip1, irec2,
     518             :                                 sxi->sxi_isize1);
     519      713393 :                 xfs_swapext_update_size(tp, sxi->sxi_ip2, irec1,
     520             :                                 sxi->sxi_isize2);
     521             :         }
     522             : 
     523             :         /*
     524             :          * Advance our cursor and exit.   The caller (either defer ops or log
     525             :          * recovery) will log the SXD item, and if *blockcount is nonzero, it
     526             :          * will log a new SXI item for the remainder and call us back.
     527             :          */
     528      757357 :         sxi_advance(sxi, irec1);
     529      757357 : }
     530             : 
     531             : /* Convert inode2's leaf attr fork back to shortform, if possible.. */
     532             : STATIC int
     533       40834 : xfs_swapext_attr_to_sf(
     534             :         struct xfs_trans                *tp,
     535             :         struct xfs_swapext_intent       *sxi)
     536             : {
     537       40834 :         struct xfs_da_args      args = {
     538       40834 :                 .dp             = sxi->sxi_ip2,
     539       40834 :                 .geo            = tp->t_mountp->m_attr_geo,
     540             :                 .whichfork      = XFS_ATTR_FORK,
     541             :                 .trans          = tp,
     542       40834 :                 .owner          = sxi->sxi_ip2->i_ino,
     543             :         };
     544       40834 :         struct xfs_buf          *bp;
     545       40834 :         int                     forkoff;
     546       40834 :         int                     error;
     547             : 
     548       40834 :         if (!xfs_attr_is_leaf(sxi->sxi_ip2))
     549             :                 return 0;
     550             : 
     551       38864 :         error = xfs_attr3_leaf_read(tp, sxi->sxi_ip2, sxi->sxi_ip2->i_ino, 0,
     552             :                         &bp);
     553       38864 :         if (error)
     554             :                 return error;
     555             : 
     556       38864 :         forkoff = xfs_attr_shortform_allfit(bp, sxi->sxi_ip2);
     557       38864 :         if (forkoff == 0)
     558             :                 return 0;
     559             : 
     560          24 :         return xfs_attr3_leaf_to_shortform(bp, &args, forkoff);
     561             : }
     562             : 
     563             : /* Convert inode2's block dir fork back to shortform, if possible.. */
     564             : STATIC int
     565         883 : xfs_swapext_dir_to_sf(
     566             :         struct xfs_trans                *tp,
     567             :         struct xfs_swapext_intent       *sxi)
     568             : {
     569         883 :         struct xfs_da_args      args = {
     570         883 :                 .dp             = sxi->sxi_ip2,
     571         883 :                 .geo            = tp->t_mountp->m_dir_geo,
     572             :                 .whichfork      = XFS_DATA_FORK,
     573             :                 .trans          = tp,
     574         883 :                 .owner          = sxi->sxi_ip2->i_ino,
     575             :         };
     576         883 :         struct xfs_dir2_sf_hdr  sfh;
     577         883 :         struct xfs_buf          *bp;
     578         883 :         bool                    isblock;
     579         883 :         int                     size;
     580         883 :         int                     error;
     581             : 
     582         883 :         error = xfs_dir2_isblock(&args, &isblock);
     583         883 :         if (error)
     584             :                 return error;
     585             : 
     586         883 :         if (!isblock)
     587             :                 return 0;
     588             : 
     589         848 :         error = xfs_dir3_block_read(tp, sxi->sxi_ip2, sxi->sxi_ip2->i_ino, &bp);
     590         848 :         if (error)
     591             :                 return error;
     592             : 
     593         848 :         size = xfs_dir2_block_sfsize(sxi->sxi_ip2, bp->b_addr, &sfh);
     594         848 :         if (size > xfs_inode_data_fork_size(sxi->sxi_ip2))
     595             :                 return 0;
     596             : 
     597           0 :         return xfs_dir2_block_to_sf(&args, bp, size, &sfh);
     598             : }
     599             : 
     600             : /* Convert inode2's remote symlink target back to shortform, if possible. */
     601             : STATIC int
     602        3372 : xfs_swapext_link_to_sf(
     603             :         struct xfs_trans                *tp,
     604             :         struct xfs_swapext_intent       *sxi)
     605             : {
     606        3372 :         struct xfs_inode                *ip = sxi->sxi_ip2;
     607        3372 :         struct xfs_ifork                *ifp = xfs_ifork_ptr(ip, XFS_DATA_FORK);
     608        3372 :         char                            *buf;
     609        3372 :         int                             error;
     610             : 
     611        3372 :         if (ifp->if_format == XFS_DINODE_FMT_LOCAL ||
     612        3372 :             ip->i_disk_size > xfs_inode_data_fork_size(ip))
     613             :                 return 0;
     614             : 
     615             :         /* Read the current symlink target into a buffer. */
     616           0 :         buf = kmem_alloc(ip->i_disk_size + 1, KM_NOFS);
     617           0 :         if (!buf) {
     618           0 :                 ASSERT(0);
     619           0 :                 return -ENOMEM;
     620             :         }
     621             : 
     622           0 :         error = xfs_symlink_remote_read(ip, buf);
     623           0 :         if (error)
     624           0 :                 goto free;
     625             : 
     626             :         /* Remove the blocks. */
     627           0 :         error = xfs_symlink_remote_truncate(tp, ip);
     628           0 :         if (error)
     629           0 :                 goto free;
     630             : 
     631             :         /* Convert fork to local format and log our changes. */
     632           0 :         xfs_idestroy_fork(ifp);
     633           0 :         ifp->if_bytes = 0;
     634           0 :         ifp->if_format = XFS_DINODE_FMT_LOCAL;
     635           0 :         xfs_init_local_fork(ip, XFS_DATA_FORK, buf, ip->i_disk_size);
     636           0 :         xfs_trans_log_inode(tp, ip, XFS_ILOG_DDATA | XFS_ILOG_CORE);
     637           0 : free:
     638           0 :         kmem_free(buf);
     639           0 :         return error;
     640             : }
     641             : 
     642             : static inline void
     643          10 : xfs_swapext_clear_reflink(
     644             :         struct xfs_trans        *tp,
     645             :         struct xfs_inode        *ip)
     646             : {
     647          10 :         trace_xfs_reflink_unset_inode_flag(ip);
     648             : 
     649          10 :         ip->i_diflags2 &= ~XFS_DIFLAG2_REFLINK;
     650          10 :         xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
     651          10 : }
     652             : 
     653             : /* Finish whatever work might come after a swap operation. */
     654             : static int
     655       45099 : xfs_swapext_do_postop_work(
     656             :         struct xfs_trans                *tp,
     657             :         struct xfs_swapext_intent       *sxi)
     658             : {
     659       45099 :         if (sxi->sxi_flags & XFS_SWAP_EXT_CVT_INO2_SF) {
     660       45089 :                 int                     error = 0;
     661             : 
     662       45089 :                 if (sxi->sxi_flags & XFS_SWAP_EXT_ATTR_FORK)
     663       40834 :                         error = xfs_swapext_attr_to_sf(tp, sxi);
     664        4255 :                 else if (S_ISDIR(VFS_I(sxi->sxi_ip2)->i_mode))
     665         883 :                         error = xfs_swapext_dir_to_sf(tp, sxi);
     666        3372 :                 else if (S_ISLNK(VFS_I(sxi->sxi_ip2)->i_mode))
     667        3372 :                         error = xfs_swapext_link_to_sf(tp, sxi);
     668       45089 :                 sxi->sxi_flags &= ~XFS_SWAP_EXT_CVT_INO2_SF;
     669       45089 :                 if (error)
     670             :                         return error;
     671             :         }
     672             : 
     673       45099 :         if (sxi->sxi_flags & XFS_SWAP_EXT_CLEAR_INO1_REFLINK) {
     674           0 :                 xfs_swapext_clear_reflink(tp, sxi->sxi_ip1);
     675           0 :                 sxi->sxi_flags &= ~XFS_SWAP_EXT_CLEAR_INO1_REFLINK;
     676             :         }
     677             : 
     678       45099 :         if (sxi->sxi_flags & XFS_SWAP_EXT_CLEAR_INO2_REFLINK) {
     679          10 :                 xfs_swapext_clear_reflink(tp, sxi->sxi_ip2);
     680          10 :                 sxi->sxi_flags &= ~XFS_SWAP_EXT_CLEAR_INO2_REFLINK;
     681             :         }
     682             : 
     683             :         return 0;
     684             : }
     685             : 
     686             : /* Finish one extent swap, possibly log more. */
     687             : int
     688      884380 : xfs_swapext_finish_one(
     689             :         struct xfs_trans                *tp,
     690             :         struct xfs_swapext_intent       *sxi)
     691             : {
     692      884380 :         struct xfs_bmbt_irec            irec1, irec2;
     693      884380 :         int                             error;
     694             : 
     695      884380 :         if (sxi_has_more_swap_work(sxi)) {
     696             :                 /*
     697             :                  * If the operation state says that some range of the files
     698             :                  * have not yet been swapped, look for extents in that range to
     699             :                  * swap.  If we find some extents, swap them.
     700             :                  */
     701      839281 :                 error = xfs_swapext_find_mappings(sxi, &irec1, &irec2, NULL);
     702      839281 :                 if (error)
     703             :                         return error;
     704             : 
     705      839281 :                 if (sxi_has_more_swap_work(sxi))
     706      757357 :                         xfs_swapext_exchange_mappings(tp, sxi, &irec1, &irec2);
     707             : 
     708             :                 /*
     709             :                  * If the caller asked us to exchange the file sizes after the
     710             :                  * swap and either we just swapped the last extents in the
     711             :                  * range or we didn't find anything to swap, update the ondisk
     712             :                  * file sizes.
     713             :                  */
     714      839281 :                 if ((sxi->sxi_flags & XFS_SWAP_EXT_SET_SIZES) &&
     715             :                     !sxi_has_more_swap_work(sxi)) {
     716        4323 :                         sxi->sxi_ip1->i_disk_size = sxi->sxi_isize1;
     717        4323 :                         sxi->sxi_ip2->i_disk_size = sxi->sxi_isize2;
     718             : 
     719        4323 :                         xfs_trans_log_inode(tp, sxi->sxi_ip1, XFS_ILOG_CORE);
     720        4323 :                         xfs_trans_log_inode(tp, sxi->sxi_ip2, XFS_ILOG_CORE);
     721             :                 }
     722       45099 :         } else if (sxi_has_postop_work(sxi)) {
     723             :                 /*
     724             :                  * Now that we're finished with the swap operation, complete
     725             :                  * the post-op cleanup work.
     726             :                  */
     727       45099 :                 error = xfs_swapext_do_postop_work(tp, sxi);
     728       45099 :                 if (error)
     729             :                         return error;
     730             :         }
     731             : 
     732      884380 :         if (XFS_TEST_ERROR(false, tp->t_mountp, XFS_ERRTAG_SWAPEXT_FINISH_ONE))
     733             :                 return -EIO;
     734             : 
     735             :         /* If we still have work to do, ask for a new transaction. */
     736      884380 :         if (sxi_has_more_swap_work(sxi) || sxi_has_postop_work(sxi)) {
     737      662590 :                 trace_xfs_swapext_defer(tp->t_mountp, sxi);
     738      662590 :                 return -EAGAIN;
     739             :         }
     740             : 
     741             :         /*
     742             :          * If we reach here, we've finished all the swapping work and the post
     743             :          * operation work.  The last thing we need to do before returning to
     744             :          * the caller is to make sure that COW forks are set up correctly.
     745             :          */
     746      221790 :         if (!(sxi->sxi_flags & XFS_SWAP_EXT_ATTR_FORK)) {
     747      180956 :                 xfs_swapext_ensure_cowfork(sxi->sxi_ip1);
     748      180956 :                 xfs_swapext_ensure_cowfork(sxi->sxi_ip2);
     749             :         }
     750             : 
     751             :         return 0;
     752             : }
     753             : 
     754             : /*
     755             :  * Compute the amount of bmbt blocks we should reserve for each file.  In the
     756             :  * worst case, each exchange will fill a hole with a new mapping, which could
     757             :  * result in a btree split every time we add a new leaf block.
     758             :  */
     759             : static inline uint64_t
     760             : xfs_swapext_bmbt_blocks(
     761             :         struct xfs_mount                *mp,
     762             :         const struct xfs_swapext_req    *req)
     763             : {
     764      390871 :         return howmany_64(req->nr_exchanges,
     765      390871 :                                         XFS_MAX_CONTIG_BMAPS_PER_BLOCK(mp)) *
     766      390871 :                         XFS_EXTENTADD_SPACE_RES(mp, req->whichfork);
     767             : }
     768             : 
     769             : static inline uint64_t
     770      390874 : xfs_swapext_rmapbt_blocks(
     771             :         struct xfs_mount                *mp,
     772             :         const struct xfs_swapext_req    *req)
     773             : {
     774      390874 :         if (!xfs_has_rmapbt(mp))
     775             :                 return 0;
     776      390872 :         if (XFS_IS_REALTIME_INODE(req->ip1))
     777             :                 return 0;
     778             : 
     779      390872 :         return howmany_64(req->nr_exchanges,
     780      390872 :                                         XFS_MAX_CONTIG_RMAPS_PER_BLOCK(mp)) *
     781      390872 :                         XFS_RMAPADD_SPACE_RES(mp);
     782             : }
     783             : 
     784             : /* Estimate the bmbt and rmapbt overhead required to exchange extents. */
     785             : int
     786      390871 : xfs_swapext_estimate_overhead(
     787             :         struct xfs_swapext_req  *req)
     788             : {
     789      390871 :         struct xfs_mount        *mp = req->ip1->i_mount;
     790      390871 :         xfs_filblks_t           bmbt_blocks;
     791      390871 :         xfs_filblks_t           rmapbt_blocks;
     792      390871 :         xfs_filblks_t           resblks = req->resblks;
     793             : 
     794             :         /*
     795             :          * Compute the number of bmbt and rmapbt blocks we might need to handle
     796             :          * the estimated number of exchanges.
     797             :          */
     798      390871 :         bmbt_blocks = xfs_swapext_bmbt_blocks(mp, req);
     799      390871 :         rmapbt_blocks = xfs_swapext_rmapbt_blocks(mp, req);
     800             : 
     801      390871 :         trace_xfs_swapext_overhead(mp, bmbt_blocks, rmapbt_blocks);
     802             : 
     803             :         /* Make sure the change in file block count doesn't overflow. */
     804      390870 :         if (check_add_overflow(req->ip1_bcount, bmbt_blocks, &req->ip1_bcount))
     805             :                 return -EFBIG;
     806      390870 :         if (check_add_overflow(req->ip2_bcount, bmbt_blocks, &req->ip2_bcount))
     807             :                 return -EFBIG;
     808             : 
     809             :         /*
     810             :          * Add together the number of blocks we need to handle btree growth,
     811             :          * then add it to the number of blocks we need to reserve to this
     812             :          * transaction.
     813             :          */
     814      390870 :         if (check_add_overflow(resblks, bmbt_blocks, &resblks))
     815             :                 return -ENOSPC;
     816      390870 :         if (check_add_overflow(resblks, bmbt_blocks, &resblks))
     817             :                 return -ENOSPC;
     818      390870 :         if (check_add_overflow(resblks, rmapbt_blocks, &resblks))
     819             :                 return -ENOSPC;
     820      390870 :         if (check_add_overflow(resblks, rmapbt_blocks, &resblks))
     821             :                 return -ENOSPC;
     822             : 
     823             :         /* Can't actually reserve more than UINT_MAX blocks. */
     824      390870 :         if (req->resblks > UINT_MAX)
     825             :                 return -ENOSPC;
     826             : 
     827      390870 :         req->resblks = resblks;
     828      390870 :         trace_xfs_swapext_final_estimate(req);
     829      390870 :         return 0;
     830             : }
     831             : 
     832             : /* Decide if we can merge two real extents. */
     833             : static inline bool
     834     4086972 : can_merge(
     835             :         const struct xfs_bmbt_irec      *b1,
     836             :         const struct xfs_bmbt_irec      *b2)
     837             : {
     838             :         /* Don't merge holes. */
     839     4086972 :         if (b1->br_startblock == HOLESTARTBLOCK ||
     840     3302157 :             b2->br_startblock == HOLESTARTBLOCK)
     841             :                 return false;
     842             : 
     843             :         /* We don't merge holes. */
     844     9177504 :         if (!xfs_bmap_is_real_extent(b1) || !xfs_bmap_is_real_extent(b2))
     845             :                 return false;
     846             : 
     847     3059168 :         if (b1->br_startoff   + b1->br_blockcount == b2->br_startoff &&
     848     2449218 :             b1->br_startblock + b1->br_blockcount == b2->br_startblock &&
     849      850182 :             b1->br_state                       == b2->br_state &&
     850      804708 :             b1->br_blockcount + b2->br_blockcount <= XFS_MAX_BMBT_EXTLEN)
     851      804708 :                 return true;
     852             : 
     853             :         return false;
     854             : }
     855             : 
     856             : #define CLEFT_CONTIG    0x01
     857             : #define CRIGHT_CONTIG   0x02
     858             : #define CHOLE           0x04
     859             : #define CBOTH_CONTIG    (CLEFT_CONTIG | CRIGHT_CONTIG)
     860             : 
     861             : #define NLEFT_CONTIG    0x10
     862             : #define NRIGHT_CONTIG   0x20
     863             : #define NHOLE           0x40
     864             : #define NBOTH_CONTIG    (NLEFT_CONTIG | NRIGHT_CONTIG)
     865             : 
     866             : /* Estimate the effect of a single swap on extent count. */
     867             : static inline int
     868     1515142 : delta_nextents_step(
     869             :         struct xfs_mount                *mp,
     870             :         const struct xfs_bmbt_irec      *left,
     871             :         const struct xfs_bmbt_irec      *curr,
     872             :         const struct xfs_bmbt_irec      *new,
     873             :         const struct xfs_bmbt_irec      *right)
     874             : {
     875     1515142 :         bool                            lhole, rhole, chole, nhole;
     876     1515142 :         unsigned int                    state = 0;
     877     1515142 :         int                             ret = 0;
     878             : 
     879     1515142 :         lhole = left->br_startblock == HOLESTARTBLOCK;
     880     1515142 :         rhole = right->br_startblock == HOLESTARTBLOCK;
     881     1515142 :         chole = curr->br_startblock == HOLESTARTBLOCK;
     882     1515142 :         nhole = new->br_startblock == HOLESTARTBLOCK;
     883             : 
     884     1515142 :         if (chole)
     885      375577 :                 state |= CHOLE;
     886     1515142 :         if (!lhole && !chole && can_merge(left, curr))
     887         194 :                 state |= CLEFT_CONTIG;
     888     1515142 :         if (!rhole && !chole && can_merge(curr, right))
     889      316451 :                 state |= CRIGHT_CONTIG;
     890     1515142 :         if ((state & CBOTH_CONTIG) == CBOTH_CONTIG &&
     891         148 :             left->br_startblock + curr->br_startblock +
     892             :                                         right->br_startblock > XFS_MAX_BMBT_EXTLEN)
     893           1 :                 state &= ~CRIGHT_CONTIG;
     894             : 
     895     1515142 :         if (nhole)
     896      375577 :                 state |= NHOLE;
     897     1515142 :         if (!lhole && !nhole && can_merge(left, new))
     898      244029 :                 state |= NLEFT_CONTIG;
     899     1515142 :         if (!rhole && !nhole && can_merge(new, right))
     900           5 :                 state |= NRIGHT_CONTIG;
     901     1515142 :         if ((state & NBOTH_CONTIG) == NBOTH_CONTIG &&
     902           4 :             left->br_startblock + new->br_startblock +
     903             :                                         right->br_startblock > XFS_MAX_BMBT_EXTLEN)
     904           0 :                 state &= ~NRIGHT_CONTIG;
     905             : 
     906     1515142 :         switch (state & (CLEFT_CONTIG | CRIGHT_CONTIG | CHOLE)) {
     907         147 :         case CLEFT_CONTIG | CRIGHT_CONTIG:
     908             :                 /*
     909             :                  * left/curr/right are the same extent, so deleting curr causes
     910             :                  * 2 new extents to be created.
     911             :                  */
     912         147 :                 ret += 2;
     913         147 :                 break;
     914      823068 :         case 0:
     915             :                 /*
     916             :                  * curr is not contiguous with any extent, so we remove curr
     917             :                  * completely
     918             :                  */
     919      823068 :                 ret--;
     920      823068 :                 break;
     921             :         case CHOLE:
     922             :                 /* hole, do nothing */
     923             :                 break;
     924             :         case CLEFT_CONTIG:
     925             :         case CRIGHT_CONTIG:
     926             :                 /* trim either left or right, no change */
     927             :                 break;
     928             :         }
     929             : 
     930     1515142 :         switch (state & (NLEFT_CONTIG | NRIGHT_CONTIG | NHOLE)) {
     931           4 :         case NLEFT_CONTIG | NRIGHT_CONTIG:
     932             :                 /*
     933             :                  * left/curr/right will become the same extent, so adding
     934             :                  * curr causes the deletion of right.
     935             :                  */
     936           4 :                 ret--;
     937           4 :                 break;
     938      895535 :         case 0:
     939             :                 /* new is not contiguous with any extent */
     940      895535 :                 ret++;
     941      895535 :                 break;
     942             :         case NHOLE:
     943             :                 /* hole, do nothing. */
     944             :                 break;
     945             :         case NLEFT_CONTIG:
     946             :         case NRIGHT_CONTIG:
     947             :                 /* new is absorbed into left or right, no change */
     948             :                 break;
     949             :         }
     950             : 
     951     1515142 :         trace_xfs_swapext_delta_nextents_step(mp, left, curr, new, right, ret,
     952             :                         state);
     953     1515142 :         return ret;
     954             : }
     955             : 
     956             : /* Make sure we don't overflow the extent counters. */
     957             : static inline int
     958      269266 : ensure_delta_nextents(
     959             :         struct xfs_swapext_req  *req,
     960             :         struct xfs_inode        *ip,
     961             :         int64_t                 delta)
     962             : {
     963      269266 :         struct xfs_mount        *mp = ip->i_mount;
     964      269266 :         struct xfs_ifork        *ifp = xfs_ifork_ptr(ip, req->whichfork);
     965      269266 :         xfs_extnum_t            max_extents;
     966      269266 :         bool                    large_extcount;
     967             : 
     968      269266 :         if (delta < 0)
     969             :                 return 0;
     970             : 
     971      264036 :         if (XFS_TEST_ERROR(false, mp, XFS_ERRTAG_REDUCE_MAX_IEXTENTS)) {
     972           2 :                 if (ifp->if_nextents + delta > 10)
     973             :                         return -EFBIG;
     974             :         }
     975             : 
     976      264034 :         if (req->req_flags & XFS_SWAP_REQ_NREXT64)
     977             :                 large_extcount = true;
     978             :         else
     979      264034 :                 large_extcount = xfs_inode_has_large_extent_counts(ip);
     980             : 
     981      264034 :         max_extents = xfs_iext_max_nextents(large_extcount, req->whichfork);
     982      264034 :         if (ifp->if_nextents + delta <= max_extents)
     983             :                 return 0;
     984           0 :         if (large_extcount)
     985             :                 return -EFBIG;
     986           0 :         if (!xfs_has_large_extent_counts(mp))
     987             :                 return -EFBIG;
     988             : 
     989           0 :         max_extents = xfs_iext_max_nextents(true, req->whichfork);
     990           0 :         if (ifp->if_nextents + delta > max_extents)
     991             :                 return -EFBIG;
     992             : 
     993           0 :         req->req_flags |= XFS_SWAP_REQ_NREXT64;
     994           0 :         return 0;
     995             : }
     996             : 
     997             : /* Find the next extent after irec. */
     998             : static inline int
     999     1515142 : get_next_ext(
    1000             :         struct xfs_inode                *ip,
    1001             :         int                             bmap_flags,
    1002             :         const struct xfs_bmbt_irec      *irec,
    1003             :         struct xfs_bmbt_irec            *nrec)
    1004             : {
    1005     1515142 :         xfs_fileoff_t                   off;
    1006     1515142 :         xfs_filblks_t                   blockcount;
    1007     1515142 :         int                             nimaps = 1;
    1008     1515142 :         int                             error;
    1009             : 
    1010     1515142 :         off = irec->br_startoff + irec->br_blockcount;
    1011     1515142 :         blockcount = XFS_MAX_FILEOFF - off;
    1012     1515142 :         error = xfs_bmapi_read(ip, off, blockcount, nrec, &nimaps, bmap_flags);
    1013     1515142 :         if (error)
    1014             :                 return error;
    1015     1515142 :         if (nrec->br_startblock == DELAYSTARTBLOCK ||
    1016     1513064 :             nrec->br_startoff != off) {
    1017             :                 /*
    1018             :                  * If we don't get the extent we want, return a zero-length
    1019             :                  * mapping, which our estimator function will pretend is a hole.
    1020             :                  * We shouldn't get delalloc reservations.
    1021             :                  */
    1022        2078 :                 nrec->br_startblock = HOLESTARTBLOCK;
    1023             :         }
    1024             : 
    1025             :         return 0;
    1026             : }
    1027             : 
    1028             : int __init
    1029          12 : xfs_swapext_intent_init_cache(void)
    1030             : {
    1031          12 :         xfs_swapext_intent_cache = kmem_cache_create("xfs_swapext_intent",
    1032             :                         sizeof(struct xfs_swapext_intent),
    1033             :                         0, 0, NULL);
    1034             : 
    1035          12 :         return xfs_swapext_intent_cache != NULL ? 0 : -ENOMEM;
    1036             : }
    1037             : 
    1038             : void
    1039          12 : xfs_swapext_intent_destroy_cache(void)
    1040             : {
    1041          12 :         kmem_cache_destroy(xfs_swapext_intent_cache);
    1042          12 :         xfs_swapext_intent_cache = NULL;
    1043          12 : }
    1044             : 
    1045             : /*
    1046             :  * Decide if we will swap the reflink flags between the two files after the
    1047             :  * swap.  The only time we want to do this is if we're exchanging all extents
    1048             :  * under EOF and the inode reflink flags have different states.
    1049             :  */
    1050             : static inline bool
    1051      360677 : sxi_can_exchange_reflink_flags(
    1052             :         const struct xfs_swapext_req    *req,
    1053             :         unsigned int                    reflink_state)
    1054             : {
    1055      360677 :         struct xfs_mount                *mp = req->ip1->i_mount;
    1056             : 
    1057      721354 :         if (hweight32(reflink_state) != 1)
    1058             :                 return false;
    1059          40 :         if (req->startoff1 != 0 || req->startoff2 != 0)
    1060             :                 return false;
    1061          32 :         if (req->blockcount != XFS_B_TO_FSB(mp, req->ip1->i_disk_size))
    1062             :                 return false;
    1063          32 :         if (req->blockcount != XFS_B_TO_FSB(mp, req->ip2->i_disk_size))
    1064           0 :                 return false;
    1065             :         return true;
    1066             : }
    1067             : 
    1068             : 
    1069             : /* Allocate and initialize a new incore intent item from a request. */
    1070             : struct xfs_swapext_intent *
    1071      441573 : xfs_swapext_init_intent(
    1072             :         const struct xfs_swapext_req    *req,
    1073             :         unsigned int                    *reflink_state)
    1074             : {
    1075      441573 :         struct xfs_swapext_intent       *sxi;
    1076      441573 :         unsigned int                    rs = 0;
    1077             : 
    1078      441573 :         sxi = kmem_cache_zalloc(xfs_swapext_intent_cache,
    1079             :                         GFP_NOFS | __GFP_NOFAIL);
    1080      441573 :         INIT_LIST_HEAD(&sxi->sxi_list);
    1081      441573 :         sxi->sxi_ip1 = req->ip1;
    1082      441573 :         sxi->sxi_ip2 = req->ip2;
    1083      441573 :         sxi->sxi_startoff1 = req->startoff1;
    1084      441573 :         sxi->sxi_startoff2 = req->startoff2;
    1085      441573 :         sxi->sxi_blockcount = req->blockcount;
    1086      441573 :         sxi->sxi_isize1 = sxi->sxi_isize2 = -1;
    1087             : 
    1088      441573 :         if (req->whichfork == XFS_ATTR_FORK)
    1089       80896 :                 sxi->sxi_flags |= XFS_SWAP_EXT_ATTR_FORK;
    1090             : 
    1091      441573 :         if (req->whichfork == XFS_DATA_FORK &&
    1092             :             (req->req_flags & XFS_SWAP_REQ_SET_SIZES)) {
    1093        7271 :                 sxi->sxi_flags |= XFS_SWAP_EXT_SET_SIZES;
    1094        7271 :                 sxi->sxi_isize1 = req->ip2->i_disk_size;
    1095        7271 :                 sxi->sxi_isize2 = req->ip1->i_disk_size;
    1096             :         }
    1097             : 
    1098      441573 :         if (req->req_flags & XFS_SWAP_REQ_INO1_WRITTEN)
    1099          32 :                 sxi->sxi_flags |= XFS_SWAP_EXT_INO1_WRITTEN;
    1100      441573 :         if (req->req_flags & XFS_SWAP_REQ_CVT_INO2_SF)
    1101       88027 :                 sxi->sxi_flags |= XFS_SWAP_EXT_CVT_INO2_SF;
    1102             : 
    1103      441573 :         if (req->req_flags & XFS_SWAP_REQ_LOGGED)
    1104      428655 :                 sxi->sxi_op_flags |= XFS_SWAP_EXT_OP_LOGGED;
    1105      441573 :         if (req->req_flags & XFS_SWAP_REQ_NREXT64)
    1106           0 :                 sxi->sxi_op_flags |= XFS_SWAP_EXT_OP_NREXT64;
    1107             : 
    1108      441573 :         if (req->whichfork == XFS_DATA_FORK) {
    1109             :                 /*
    1110             :                  * Record the state of each inode's reflink flag before the
    1111             :                  * operation.
    1112             :                  */
    1113      360677 :                 if (xfs_is_reflink_inode(req->ip1))
    1114      314630 :                         rs |= XFS_REFLINK_STATE_IP1;
    1115      360677 :                 if (xfs_is_reflink_inode(req->ip2))
    1116      314670 :                         rs |= XFS_REFLINK_STATE_IP2;
    1117             : 
    1118             :                 /*
    1119             :                  * Figure out if we're clearing the reflink flags (which
    1120             :                  * effectively swaps them) after the operation.
    1121             :                  */
    1122      360677 :                 if (sxi_can_exchange_reflink_flags(req, rs)) {
    1123          32 :                         if (rs & XFS_REFLINK_STATE_IP1)
    1124           0 :                                 sxi->sxi_flags |=
    1125             :                                                 XFS_SWAP_EXT_CLEAR_INO1_REFLINK;
    1126          32 :                         if (rs & XFS_REFLINK_STATE_IP2)
    1127          32 :                                 sxi->sxi_flags |=
    1128             :                                                 XFS_SWAP_EXT_CLEAR_INO2_REFLINK;
    1129             :                 }
    1130             :         }
    1131             : 
    1132      441573 :         if (reflink_state)
    1133      221802 :                 *reflink_state = rs;
    1134      441573 :         return sxi;
    1135             : }
    1136             : 
    1137             : /*
    1138             :  * Estimate the number of exchange operations and the number of file blocks
    1139             :  * in each file that will be affected by the exchange operation.
    1140             :  */
    1141             : int
    1142      219771 : xfs_swapext_estimate(
    1143             :         struct xfs_swapext_req          *req)
    1144             : {
    1145      219771 :         struct xfs_swapext_intent       *sxi;
    1146      219771 :         struct xfs_bmbt_irec            irec1, irec2;
    1147      219771 :         struct xfs_swapext_adjacent     adj = ADJACENT_INIT;
    1148      219771 :         xfs_filblks_t                   ip1_blocks = 0, ip2_blocks = 0;
    1149      219771 :         int64_t                         d_nexts1, d_nexts2;
    1150      219771 :         int                             bmap_flags;
    1151      219771 :         int                             error;
    1152             : 
    1153      219771 :         ASSERT(!(req->req_flags & ~XFS_SWAP_REQ_FLAGS));
    1154             : 
    1155      219771 :         bmap_flags = xfs_bmapi_aflag(req->whichfork);
    1156      219771 :         sxi = xfs_swapext_init_intent(req, NULL);
    1157             : 
    1158             :         /*
    1159             :          * To guard against the possibility of overflowing the extent counters,
    1160             :          * we have to estimate an upper bound on the potential increase in that
    1161             :          * counter.  We can split the extent at each end of the range, and for
    1162             :          * each step of the swap we can split the extent that we're working on
    1163             :          * if the extents do not align.
    1164             :          */
    1165      219771 :         d_nexts1 = d_nexts2 = 3;
    1166             : 
    1167      977342 :         while (sxi_has_more_swap_work(sxi)) {
    1168             :                 /*
    1169             :                  * Walk through the file ranges until we find something to
    1170             :                  * swap.  Because we're simulating the swap, pass in adj to
    1171             :                  * capture skipped mappings for correct estimation of bmbt
    1172             :                  * record merges.
    1173             :                  */
    1174      837348 :                 error = xfs_swapext_find_mappings(sxi, &irec1, &irec2, &adj);
    1175      837348 :                 if (error)
    1176           0 :                         goto out_free;
    1177      837348 :                 if (!sxi_has_more_swap_work(sxi))
    1178             :                         break;
    1179             : 
    1180             :                 /* Update accounting. */
    1181     1326628 :                 if (xfs_bmap_is_real_extent(&irec1))
    1182      569057 :                         ip1_blocks += irec1.br_blockcount;
    1183     1328079 :                 if (xfs_bmap_is_real_extent(&irec2))
    1184      570508 :                         ip2_blocks += irec2.br_blockcount;
    1185      757571 :                 req->nr_exchanges++;
    1186             : 
    1187             :                 /* Read the next extents from both files. */
    1188      757571 :                 error = get_next_ext(req->ip1, bmap_flags, &irec1, &adj.right1);
    1189      757571 :                 if (error)
    1190           0 :                         goto out_free;
    1191             : 
    1192      757571 :                 error = get_next_ext(req->ip2, bmap_flags, &irec2, &adj.right2);
    1193      757571 :                 if (error)
    1194           0 :                         goto out_free;
    1195             : 
    1196             :                 /* Update extent count deltas. */
    1197      757571 :                 d_nexts1 += delta_nextents_step(req->ip1->i_mount,
    1198             :                                 &adj.left1, &irec1, &irec2, &adj.right1);
    1199             : 
    1200      757571 :                 d_nexts2 += delta_nextents_step(req->ip1->i_mount,
    1201             :                                 &adj.left2, &irec2, &irec1, &adj.right2);
    1202             : 
    1203             :                 /* Now pretend we swapped the extents. */
    1204      757571 :                 if (can_merge(&adj.left2, &irec1))
    1205      166692 :                         adj.left2.br_blockcount += irec1.br_blockcount;
    1206             :                 else
    1207      590879 :                         memcpy(&adj.left2, &irec1, sizeof(irec1));
    1208             : 
    1209      757571 :                 if (can_merge(&adj.left1, &irec2))
    1210       77337 :                         adj.left1.br_blockcount += irec2.br_blockcount;
    1211             :                 else
    1212      680234 :                         memcpy(&adj.left1, &irec2, sizeof(irec2));
    1213             : 
    1214      757571 :                 sxi_advance(sxi, &irec1);
    1215             :         }
    1216             : 
    1217             :         /* Account for the blocks that are being exchanged. */
    1218      219771 :         if (XFS_IS_REALTIME_INODE(req->ip1) &&
    1219           2 :             req->whichfork == XFS_DATA_FORK) {
    1220           2 :                 req->ip1_rtbcount = ip1_blocks;
    1221           2 :                 req->ip2_rtbcount = ip2_blocks;
    1222             :         } else {
    1223      219769 :                 req->ip1_bcount = ip1_blocks;
    1224      219769 :                 req->ip2_bcount = ip2_blocks;
    1225             :         }
    1226             : 
    1227             :         /*
    1228             :          * Make sure that both forks have enough slack left in their extent
    1229             :          * counters that the swap operation will not overflow.
    1230             :          */
    1231      219771 :         trace_xfs_swapext_delta_nextents(req, d_nexts1, d_nexts2);
    1232      219771 :         if (req->ip1 == req->ip2) {
    1233      170274 :                 error = ensure_delta_nextents(req, req->ip1,
    1234             :                                 d_nexts1 + d_nexts2);
    1235             :         } else {
    1236       49497 :                 error = ensure_delta_nextents(req, req->ip1, d_nexts1);
    1237       49497 :                 if (error)
    1238           2 :                         goto out_free;
    1239       49495 :                 error = ensure_delta_nextents(req, req->ip2, d_nexts2);
    1240             :         }
    1241      219769 :         if (error)
    1242           0 :                 goto out_free;
    1243             : 
    1244      219769 :         trace_xfs_swapext_initial_estimate(req);
    1245      219769 :         error = xfs_swapext_estimate_overhead(req);
    1246      219771 : out_free:
    1247      219771 :         kmem_cache_free(xfs_swapext_intent_cache, sxi);
    1248      219771 :         return error;
    1249             : }
    1250             : 
    1251             : static inline void
    1252          20 : xfs_swapext_set_reflink(
    1253             :         struct xfs_trans        *tp,
    1254             :         struct xfs_inode        *ip)
    1255             : {
    1256          20 :         trace_xfs_reflink_set_inode_flag(ip);
    1257             : 
    1258          20 :         ip->i_diflags2 |= XFS_DIFLAG2_REFLINK;
    1259          20 :         xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
    1260          20 : }
    1261             : 
    1262             : /*
    1263             :  * If either file has shared blocks and we're swapping data forks, we must flag
    1264             :  * the other file as having shared blocks so that we get the shared-block rmap
    1265             :  * functions if we need to fix up the rmaps.
    1266             :  */
    1267             : void
    1268      221802 : xfs_swapext_ensure_reflink(
    1269             :         struct xfs_trans                *tp,
    1270             :         const struct xfs_swapext_intent *sxi,
    1271             :         unsigned int                    reflink_state)
    1272             : {
    1273      221802 :         if ((reflink_state & XFS_REFLINK_STATE_IP1) &&
    1274      157308 :             !xfs_is_reflink_inode(sxi->sxi_ip2))
    1275           0 :                 xfs_swapext_set_reflink(tp, sxi->sxi_ip2);
    1276             : 
    1277      221802 :         if ((reflink_state & XFS_REFLINK_STATE_IP2) &&
    1278      157328 :             !xfs_is_reflink_inode(sxi->sxi_ip1))
    1279          20 :                 xfs_swapext_set_reflink(tp, sxi->sxi_ip1);
    1280      221802 : }
    1281             : 
    1282             : /* Widen the extent counts of both inodes if necessary. */
    1283             : static inline void
    1284      221796 : xfs_swapext_upgrade_extent_counts(
    1285             :         struct xfs_trans                *tp,
    1286             :         const struct xfs_swapext_intent *sxi)
    1287             : {
    1288      221796 :         if (!(sxi->sxi_op_flags & XFS_SWAP_EXT_OP_NREXT64))
    1289             :                 return;
    1290             : 
    1291           0 :         sxi->sxi_ip1->i_diflags2 |= XFS_DIFLAG2_NREXT64;
    1292           0 :         xfs_trans_log_inode(tp, sxi->sxi_ip1, XFS_ILOG_CORE);
    1293             : 
    1294           0 :         sxi->sxi_ip2->i_diflags2 |= XFS_DIFLAG2_NREXT64;
    1295           0 :         xfs_trans_log_inode(tp, sxi->sxi_ip2, XFS_ILOG_CORE);
    1296             : }
    1297             : 
    1298             : /*
    1299             :  * Schedule a swap a range of extents from one inode to another.  If the atomic
    1300             :  * swap feature is enabled, then the operation progress can be resumed even if
    1301             :  * the system goes down.  The caller must commit the transaction to start the
    1302             :  * work.
    1303             :  *
    1304             :  * The caller must ensure the inodes must be joined to the transaction and
    1305             :  * ILOCKd; they will still be joined to the transaction at exit.
    1306             :  */
    1307             : void
    1308      221796 : xfs_swapext(
    1309             :         struct xfs_trans                *tp,
    1310             :         const struct xfs_swapext_req    *req)
    1311             : {
    1312      221796 :         struct xfs_swapext_intent       *sxi;
    1313      221796 :         unsigned int                    reflink_state;
    1314             : 
    1315      221796 :         ASSERT(xfs_isilocked(req->ip1, XFS_ILOCK_EXCL));
    1316      221796 :         ASSERT(xfs_isilocked(req->ip2, XFS_ILOCK_EXCL));
    1317      221796 :         ASSERT(req->whichfork != XFS_COW_FORK);
    1318      221796 :         ASSERT(!(req->req_flags & ~XFS_SWAP_REQ_FLAGS));
    1319      221796 :         if (req->req_flags & XFS_SWAP_REQ_SET_SIZES)
    1320        4323 :                 ASSERT(req->whichfork == XFS_DATA_FORK);
    1321      221796 :         if (req->req_flags & XFS_SWAP_REQ_CVT_INO2_SF)
    1322       45089 :                 ASSERT(req->whichfork == XFS_ATTR_FORK ||
    1323             :                        (req->whichfork == XFS_DATA_FORK &&
    1324             :                         (S_ISDIR(VFS_I(req->ip2)->i_mode) ||
    1325             :                          S_ISLNK(VFS_I(req->ip2)->i_mode))));
    1326             : 
    1327      221796 :         if (req->blockcount == 0)
    1328           0 :                 return;
    1329             : 
    1330      221796 :         sxi = xfs_swapext_init_intent(req, &reflink_state);
    1331      221796 :         xfs_swapext_schedule(tp, sxi);
    1332      221796 :         xfs_swapext_ensure_reflink(tp, sxi, reflink_state);
    1333      221796 :         xfs_swapext_upgrade_extent_counts(tp, sxi);
    1334             : }

Generated by: LCOV version 1.14