LCOV - code coverage report
Current view: top level - fs/xfs/libxfs - xfs_swapext.c (source / functions) Hit Total Coverage
Test: fstests of 6.5.0-rc4-xfsx @ Mon Jul 31 20:08:34 PDT 2023 Lines: 454 519 87.5 %
Date: 2023-07-31 20:08:34 Functions: 32 32 100.0 %

          Line data    Source code
       1             : // SPDX-License-Identifier: GPL-2.0-or-later
       2             : /*
       3             :  * Copyright (C) 2020-2023 Oracle.  All Rights Reserved.
       4             :  * Author: Darrick J. Wong <djwong@kernel.org>
       5             :  */
       6             : #include "xfs.h"
       7             : #include "xfs_fs.h"
       8             : #include "xfs_shared.h"
       9             : #include "xfs_format.h"
      10             : #include "xfs_log_format.h"
      11             : #include "xfs_trans_resv.h"
      12             : #include "xfs_mount.h"
      13             : #include "xfs_defer.h"
      14             : #include "xfs_inode.h"
      15             : #include "xfs_trans.h"
      16             : #include "xfs_bmap.h"
      17             : #include "xfs_icache.h"
      18             : #include "xfs_quota.h"
      19             : #include "xfs_swapext.h"
      20             : #include "xfs_trace.h"
      21             : #include "xfs_bmap_btree.h"
      22             : #include "xfs_trans_space.h"
      23             : #include "xfs_error.h"
      24             : #include "xfs_errortag.h"
      25             : #include "xfs_health.h"
      26             : #include "xfs_da_format.h"
      27             : #include "xfs_da_btree.h"
      28             : #include "xfs_attr_leaf.h"
      29             : #include "xfs_attr.h"
      30             : #include "xfs_dir2_priv.h"
      31             : #include "xfs_dir2.h"
      32             : #include "xfs_symlink_remote.h"
      33             : #include "xfs_rtbitmap.h"
      34             : 
      35             : struct kmem_cache       *xfs_swapext_intent_cache;
      36             : 
      37             : /* bmbt mappings adjacent to a pair of records. */
      38             : struct xfs_swapext_adjacent {
      39             :         struct xfs_bmbt_irec            left1;
      40             :         struct xfs_bmbt_irec            right1;
      41             :         struct xfs_bmbt_irec            left2;
      42             :         struct xfs_bmbt_irec            right2;
      43             : };
      44             : 
      45             : #define ADJACENT_INIT { \
      46             :         .left1  = { .br_startblock = HOLESTARTBLOCK }, \
      47             :         .right1 = { .br_startblock = HOLESTARTBLOCK }, \
      48             :         .left2  = { .br_startblock = HOLESTARTBLOCK }, \
      49             :         .right2 = { .br_startblock = HOLESTARTBLOCK }, \
      50             : }
      51             : 
      52             : /* Information to help us reset reflink flag / CoW fork state after a swap. */
      53             : 
      54             : /* Previous state of the two inodes' reflink flags. */
      55             : #define XFS_REFLINK_STATE_IP1           (1U << 0)
      56             : #define XFS_REFLINK_STATE_IP2           (1U << 1)
      57             : 
      58             : /*
      59             :  * If the reflink flag is set on either inode, make sure it has an incore CoW
      60             :  * fork, since all reflink inodes must have them.  If there's a CoW fork and it
      61             :  * has extents in it, make sure the inodes are tagged appropriately so that
      62             :  * speculative preallocations can be GC'd if we run low of space.
      63             :  */
      64             : static inline void
      65     3937868 : xfs_swapext_ensure_cowfork(
      66             :         struct xfs_inode        *ip)
      67             : {
      68     3937868 :         struct xfs_ifork        *cfork;
      69             : 
      70     3937868 :         if (xfs_is_reflink_inode(ip))
      71     3618322 :                 xfs_ifork_init_cow(ip);
      72             : 
      73     3937858 :         cfork = xfs_ifork_ptr(ip, XFS_COW_FORK);
      74     3937858 :         if (!cfork)
      75             :                 return;
      76     3662492 :         if (cfork->if_bytes > 0)
      77     1883951 :                 xfs_inode_set_cowblocks_tag(ip);
      78             :         else
      79     1778541 :                 xfs_inode_clear_cowblocks_tag(ip);
      80             : }
      81             : 
      82             : /* Schedule an atomic extent swap. */
      83             : void
      84     2048644 : xfs_swapext_schedule(
      85             :         struct xfs_trans                *tp,
      86             :         struct xfs_swapext_intent       *sxi)
      87             : {
      88     2048644 :         trace_xfs_swapext_defer(tp->t_mountp, sxi);
      89     2048618 :         xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_SWAPEXT, &sxi->sxi_list);
      90     2048644 : }
      91             : 
      92             : /*
      93             :  * Adjust the on-disk inode size upwards if needed so that we never map extents
      94             :  * into the file past EOF.  This is crucial so that log recovery won't get
      95             :  * confused by the sudden appearance of post-eof extents.
      96             :  */
      97             : STATIC void
      98    13939926 : xfs_swapext_update_size(
      99             :         struct xfs_trans        *tp,
     100             :         struct xfs_inode        *ip,
     101             :         struct xfs_bmbt_irec    *imap,
     102             :         xfs_fsize_t             new_isize)
     103             : {
     104    13939926 :         struct xfs_mount        *mp = tp->t_mountp;
     105    13939926 :         xfs_fsize_t             len;
     106             : 
     107    13939926 :         if (new_isize < 0)
     108             :                 return;
     109             : 
     110      399368 :         len = min(XFS_FSB_TO_B(mp, imap->br_startoff + imap->br_blockcount),
     111             :                   new_isize);
     112             : 
     113      399368 :         if (len <= ip->i_disk_size)
     114             :                 return;
     115             : 
     116         411 :         trace_xfs_swapext_update_inode_size(ip, len);
     117             : 
     118         411 :         ip->i_disk_size = len;
     119         411 :         xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
     120             : }
     121             : 
     122             : static inline bool
     123             : sxi_has_more_swap_work(const struct xfs_swapext_intent *sxi)
     124             : {
     125    59449795 :         return sxi->sxi_blockcount > 0;
     126             : }
     127             : 
     128             : static inline bool
     129             : sxi_has_postop_work(const struct xfs_swapext_intent *sxi)
     130             : {
     131     2266455 :         return sxi->sxi_flags & (XFS_SWAP_EXT_CLEAR_INO1_REFLINK |
     132             :                                  XFS_SWAP_EXT_CLEAR_INO2_REFLINK |
     133             :                                  XFS_SWAP_EXT_CVT_INO2_SF);
     134             : }
     135             : 
     136             : static inline void
     137             : sxi_advance(
     138             :         struct xfs_swapext_intent       *sxi,
     139             :         const struct xfs_bmbt_irec      *irec)
     140             : {
     141    17342391 :         sxi->sxi_startoff1 += irec->br_blockcount;
     142    17342391 :         sxi->sxi_startoff2 += irec->br_blockcount;
     143    17342391 :         sxi->sxi_blockcount -= irec->br_blockcount;
     144    10280989 : }
     145             : 
     146             : #ifdef DEBUG
     147             : static inline bool
     148     1861796 : xfs_swapext_need_rt_conversion(
     149             :         const struct xfs_swapext_req    *req)
     150             : {
     151     1861796 :         struct xfs_inode                *ip = req->ip2;
     152     1861796 :         struct xfs_mount                *mp = ip->i_mount;
     153             : 
     154             :         /* xattrs don't live on the rt device */
     155     1861796 :         if (req->whichfork == XFS_ATTR_FORK)
     156             :                 return false;
     157             : 
     158             :         /*
     159             :          * Caller got permission to use logged swapext, so log recovery will
     160             :          * finish the swap and not leave us with partially swapped rt extents
     161             :          * exposed to userspace.
     162             :          */
     163     1861796 :         if (req->req_flags & XFS_SWAP_REQ_LOGGED)
     164             :                 return false;
     165             : 
     166             :         /*
     167             :          * If we can't use log intent items at all, the only supported
     168             :          * operation is full fork swaps.
     169             :          */
     170       35251 :         if (!xfs_swapext_supported(mp))
     171             :                 return false;
     172             : 
     173             :         /* Conversion is only needed for realtime files with big rt extents */
     174       28227 :         return xfs_inode_has_bigrtextents(ip);
     175             : }
     176             : 
     177             : static inline int
     178     1861802 : xfs_swapext_check_rt_extents(
     179             :         struct xfs_mount                *mp,
     180             :         const struct xfs_swapext_req    *req)
     181             : {
     182     1861802 :         struct xfs_bmbt_irec            irec1, irec2;
     183     1861802 :         xfs_fileoff_t                   startoff1 = req->startoff1;
     184     1861802 :         xfs_fileoff_t                   startoff2 = req->startoff2;
     185     1861802 :         xfs_filblks_t                   blockcount = req->blockcount;
     186     1861802 :         uint32_t                        mod;
     187     1861802 :         int                             nimaps;
     188     1861802 :         int                             error;
     189             : 
     190     1861802 :         if (!xfs_swapext_need_rt_conversion(req))
     191             :                 return 0;
     192             : 
     193      331324 :         while (blockcount > 0) {
     194             :                 /* Read extent from the first file */
     195      326061 :                 nimaps = 1;
     196      326061 :                 error = xfs_bmapi_read(req->ip1, startoff1, blockcount,
     197             :                                 &irec1, &nimaps, 0);
     198      326051 :                 if (error)
     199           0 :                         return error;
     200      326051 :                 ASSERT(nimaps == 1);
     201             : 
     202             :                 /* Read extent from the second file */
     203      326051 :                 nimaps = 1;
     204      326051 :                 error = xfs_bmapi_read(req->ip2, startoff2,
     205             :                                 irec1.br_blockcount, &irec2, &nimaps,
     206             :                                 0);
     207      326051 :                 if (error)
     208           0 :                         return error;
     209      326051 :                 ASSERT(nimaps == 1);
     210             : 
     211             :                 /*
     212             :                  * We can only swap as many blocks as the smaller of the two
     213             :                  * extent maps.
     214             :                  */
     215      326051 :                 irec1.br_blockcount = min(irec1.br_blockcount,
     216             :                                           irec2.br_blockcount);
     217             : 
     218             :                 /* Both mappings must be aligned to the realtime extent size. */
     219      326051 :                 xfs_rtb_to_rtx(mp, irec1.br_startoff, &mod);
     220      326051 :                 if (mod) {
     221           0 :                         ASSERT(mod == 0);
     222           0 :                         return -EINVAL;
     223             :                 }
     224             : 
     225      326051 :                 xfs_rtb_to_rtx(mp, irec1.br_startoff, &mod);
     226      326051 :                 if (mod) {
     227           0 :                         ASSERT(mod == 0);
     228           0 :                         return -EINVAL;
     229             :                 }
     230             : 
     231      326051 :                 xfs_rtb_to_rtx(mp, irec1.br_blockcount, &mod);
     232      326051 :                 if (mod) {
     233           0 :                         ASSERT(mod == 0);
     234           0 :                         return -EINVAL;
     235             :                 }
     236             : 
     237      326051 :                 startoff1 += irec1.br_blockcount;
     238      326051 :                 startoff2 += irec1.br_blockcount;
     239      326051 :                 blockcount -= irec1.br_blockcount;
     240             :         }
     241             : 
     242             :         return 0;
     243             : }
     244             : #else
     245             : # define xfs_swapext_check_rt_extents(mp, req)          (0)
     246             : #endif
     247             : 
     248             : /* Check all extents to make sure we can actually swap them. */
     249             : int
     250     1861808 : xfs_swapext_check_extents(
     251             :         struct xfs_mount                *mp,
     252             :         const struct xfs_swapext_req    *req)
     253             : {
     254     1861808 :         struct xfs_ifork                *ifp1, *ifp2;
     255             : 
     256             :         /* No fork? */
     257     1861808 :         ifp1 = xfs_ifork_ptr(req->ip1, req->whichfork);
     258     1861802 :         ifp2 = xfs_ifork_ptr(req->ip2, req->whichfork);
     259     1861802 :         if (!ifp1 || !ifp2)
     260             :                 return -EINVAL;
     261             : 
     262             :         /* We don't know how to swap local format forks. */
     263     1861802 :         if (ifp1->if_format == XFS_DINODE_FMT_LOCAL ||
     264     1861802 :             ifp2->if_format == XFS_DINODE_FMT_LOCAL)
     265             :                 return -EINVAL;
     266             : 
     267     1861802 :         return xfs_swapext_check_rt_extents(mp, req);
     268             : }
     269             : 
     270             : #ifdef CONFIG_XFS_QUOTA
     271             : /* Log the actual updates to the quota accounting. */
     272             : static inline void
     273     7061395 : xfs_swapext_update_quota(
     274             :         struct xfs_trans                *tp,
     275             :         struct xfs_swapext_intent       *sxi,
     276             :         struct xfs_bmbt_irec            *irec1,
     277             :         struct xfs_bmbt_irec            *irec2)
     278             : {
     279     7061395 :         int64_t                         ip1_delta = 0, ip2_delta = 0;
     280     7061395 :         unsigned int                    qflag;
     281             : 
     282     7061395 :         qflag = XFS_IS_REALTIME_INODE(sxi->sxi_ip1) ? XFS_TRANS_DQ_RTBCOUNT :
     283             :                                                       XFS_TRANS_DQ_BCOUNT;
     284             : 
     285    12205441 :         if (xfs_bmap_is_real_extent(irec1)) {
     286     5144049 :                 ip1_delta -= irec1->br_blockcount;
     287     5144049 :                 ip2_delta += irec1->br_blockcount;
     288             :         }
     289             : 
     290    12206220 :         if (xfs_bmap_is_real_extent(irec2)) {
     291     5144825 :                 ip1_delta += irec2->br_blockcount;
     292     5144825 :                 ip2_delta -= irec2->br_blockcount;
     293             :         }
     294             : 
     295     7061395 :         xfs_trans_mod_dquot_byino(tp, sxi->sxi_ip1, qflag, ip1_delta);
     296     7061401 :         xfs_trans_mod_dquot_byino(tp, sxi->sxi_ip2, qflag, ip2_delta);
     297     7061401 : }
     298             : #else
     299             : # define xfs_swapext_update_quota(tp, sxi, irec1, irec2)        ((void)0)
     300             : #endif
     301             : 
     302             : /* Decide if we want to skip this mapping from file1. */
     303             : static inline bool
     304    17342404 : xfs_swapext_can_skip_mapping(
     305             :         struct xfs_swapext_intent       *sxi,
     306             :         struct xfs_bmbt_irec            *irec)
     307             : {
     308    17342404 :         struct xfs_mount                *mp = sxi->sxi_ip1->i_mount;
     309             : 
     310             :         /* Do not skip this mapping if the caller did not tell us to. */
     311    17342404 :         if (!(sxi->sxi_flags & XFS_SWAP_EXT_INO1_WRITTEN))
     312             :                 return false;
     313             : 
     314             :         /* Do not skip mapped, written extents. */
     315         574 :         if (xfs_bmap_is_written_extent(irec))
     316             :                 return false;
     317             : 
     318             :         /*
     319             :          * The mapping is unwritten or a hole.  It cannot be a delalloc
     320             :          * reservation because we already excluded those.  It cannot be an
     321             :          * unwritten extent with dirty page cache because we flushed the page
     322             :          * cache.  For files where the allocation unit is 1FSB (files on the
     323             :          * data dev, rt files if the extent size is 1FSB), we can safely
     324             :          * skip this mapping.
     325             :          */
     326         376 :         if (!xfs_inode_has_bigrtextents(sxi->sxi_ip1))
     327             :                 return true;
     328             : 
     329             :         /*
     330             :          * For a realtime file with a multi-fsb allocation unit, the decision
     331             :          * is trickier because we can only swap full allocation units.
     332             :          * Unwritten mappings can appear in the middle of an rtx if the rtx is
     333             :          * partially written, but they can also appear for preallocations.
     334             :          *
     335             :          * If the mapping is a hole, skip it entirely.  Holes should align with
     336             :          * rtx boundaries.
     337             :          */
     338         104 :         if (!xfs_bmap_is_real_extent(irec))
     339             :                 return true;
     340             : 
     341             :         /*
     342             :          * All mappings below this point are unwritten.
     343             :          *
     344             :          * - If the beginning is not aligned to an rtx, trim the end of the
     345             :          *   mapping so that it does not cross an rtx boundary, and swap it.
     346             :          *
     347             :          * - If both ends are aligned to an rtx, skip the entire mapping.
     348             :          */
     349           8 :         if (!isaligned_64(irec->br_startoff, mp->m_sb.sb_rextsize)) {
     350           0 :                 xfs_fileoff_t   new_end;
     351             : 
     352           0 :                 new_end = roundup_64(irec->br_startoff, mp->m_sb.sb_rextsize);
     353           0 :                 irec->br_blockcount = new_end - irec->br_startoff;
     354           0 :                 return false;
     355             :         }
     356           8 :         if (isaligned_64(irec->br_blockcount, mp->m_sb.sb_rextsize))
     357             :                 return true;
     358             : 
     359             :         /*
     360             :          * All mappings below this point are unwritten, start on an rtx
     361             :          * boundary, and do not end on an rtx boundary.
     362             :          *
     363             :          * - If the mapping is longer than one rtx, trim the end of the mapping
     364             :          *   down to an rtx boundary and skip it.
     365             :          *
     366             :          * - The mapping is shorter than one rtx.  Swap it.
     367             :          */
     368           0 :         if (irec->br_blockcount > mp->m_sb.sb_rextsize) {
     369           0 :                 xfs_fileoff_t   new_end;
     370             : 
     371           0 :                 new_end = rounddown_64(irec->br_startoff + irec->br_blockcount,
     372             :                                 mp->m_sb.sb_rextsize);
     373           0 :                 irec->br_blockcount = new_end - irec->br_startoff;
     374           0 :                 return true;
     375             :         }
     376             : 
     377             :         return false;
     378             : }
     379             : 
     380             : /*
     381             :  * Walk forward through the file ranges in @sxi until we find two different
     382             :  * mappings to exchange.  If there is work to do, return the mappings;
     383             :  * otherwise we've reached the end of the range and sxi_blockcount will be
     384             :  * zero.
     385             :  *
     386             :  * If the walk skips over a pair of mappings to the same storage, save them as
     387             :  * the left records in @adj (if provided) so that the simulation phase can
     388             :  * avoid an extra lookup.
     389             :   */
     390             : static int
     391    15620233 : xfs_swapext_find_mappings(
     392             :         struct xfs_swapext_intent       *sxi,
     393             :         struct xfs_bmbt_irec            *irec1,
     394             :         struct xfs_bmbt_irec            *irec2,
     395             :         struct xfs_swapext_adjacent     *adj)
     396             : {
     397    15620233 :         int                             nimaps;
     398    15620233 :         int                             bmap_flags;
     399    15620233 :         int                             error;
     400             : 
     401    15620233 :         bmap_flags = xfs_bmapi_aflag(xfs_swapext_whichfork(sxi));
     402             : 
     403    18648443 :         for (; sxi_has_more_swap_work(sxi); sxi_advance(sxi, irec1)) {
     404             :                 /* Read extent from the first file */
     405    17342366 :                 nimaps = 1;
     406    17342366 :                 error = xfs_bmapi_read(sxi->sxi_ip1, sxi->sxi_startoff1,
     407             :                                 sxi->sxi_blockcount, irec1, &nimaps,
     408             :                                 bmap_flags);
     409    17342406 :                 if (error)
     410           0 :                         return error;
     411    17342406 :                 if (nimaps != 1 ||
     412    17342406 :                     irec1->br_startblock == DELAYSTARTBLOCK ||
     413    17342406 :                     irec1->br_startoff != sxi->sxi_startoff1) {
     414             :                         /*
     415             :                          * We should never get no mapping or a delalloc extent
     416             :                          * or something that doesn't match what we asked for,
     417             :                          * since the caller flushed both inodes and we hold the
     418             :                          * ILOCKs for both inodes.
     419             :                          */
     420           0 :                         ASSERT(0);
     421           0 :                         return -EINVAL;
     422             :                 }
     423             : 
     424    17342406 :                 if (xfs_swapext_can_skip_mapping(sxi, irec1)) {
     425         376 :                         trace_xfs_swapext_extent1_skip(sxi->sxi_ip1, irec1);
     426         376 :                         continue;
     427             :                 }
     428             : 
     429             :                 /* Read extent from the second file */
     430    17342022 :                 nimaps = 1;
     431    17342022 :                 error = xfs_bmapi_read(sxi->sxi_ip2, sxi->sxi_startoff2,
     432             :                                 irec1->br_blockcount, irec2, &nimaps,
     433             :                                 bmap_flags);
     434    17342030 :                 if (error)
     435           0 :                         return error;
     436    17342030 :                 if (nimaps != 1 ||
     437    17342030 :                     irec2->br_startblock == DELAYSTARTBLOCK ||
     438    17342030 :                     irec2->br_startoff != sxi->sxi_startoff2) {
     439             :                         /*
     440             :                          * We should never get no mapping or a delalloc extent
     441             :                          * or something that doesn't match what we asked for,
     442             :                          * since the caller flushed both inodes and we hold the
     443             :                          * ILOCKs for both inodes.
     444             :                          */
     445           0 :                         ASSERT(0);
     446           0 :                         return -EINVAL;
     447             :                 }
     448             : 
     449             :                 /*
     450             :                  * We can only swap as many blocks as the smaller of the two
     451             :                  * extent maps.
     452             :                  */
     453    17342030 :                 irec1->br_blockcount = min(irec1->br_blockcount,
     454             :                                            irec2->br_blockcount);
     455             : 
     456    17342030 :                 trace_xfs_swapext_extent1(sxi->sxi_ip1, irec1);
     457    17342021 :                 trace_xfs_swapext_extent2(sxi->sxi_ip2, irec2);
     458             : 
     459             :                 /* We found something to swap, so return it. */
     460    17342013 :                 if (irec1->br_startblock != irec2->br_startblock)
     461             :                         return 0;
     462             : 
     463             :                 /*
     464             :                  * Two extents mapped to the same physical block must not have
     465             :                  * different states; that's filesystem corruption.  Move on to
     466             :                  * the next extent if they're both holes or both the same
     467             :                  * physical extent.
     468             :                  */
     469     3027834 :                 if (irec1->br_state != irec2->br_state) {
     470           0 :                         xfs_bmap_mark_sick(sxi->sxi_ip1,
     471             :                                         xfs_swapext_whichfork(sxi));
     472           0 :                         xfs_bmap_mark_sick(sxi->sxi_ip2,
     473             :                                         xfs_swapext_whichfork(sxi));
     474           0 :                         return -EFSCORRUPTED;
     475             :                 }
     476             : 
     477             :                 /*
     478             :                  * Save the mappings if we're estimating work and skipping
     479             :                  * these identical mappings.
     480             :                  */
     481     3027834 :                 if (adj) {
     482     3035890 :                         memcpy(&adj->left1, irec1, sizeof(*irec1));
     483     3035890 :                         memcpy(&adj->left2, irec2, sizeof(*irec2));
     484             :                 }
     485             :         }
     486             : 
     487             :         return 0;
     488             : }
     489             : 
     490             : /* Exchange these two mappings. */
     491             : static void
     492     7061399 : xfs_swapext_exchange_mappings(
     493             :         struct xfs_trans                *tp,
     494             :         struct xfs_swapext_intent       *sxi,
     495             :         struct xfs_bmbt_irec            *irec1,
     496             :         struct xfs_bmbt_irec            *irec2)
     497             : {
     498     7061399 :         int                             whichfork = xfs_swapext_whichfork(sxi);
     499             : 
     500     7061399 :         xfs_swapext_update_quota(tp, sxi, irec1, irec2);
     501             : 
     502             :         /* Remove both mappings. */
     503     7061401 :         xfs_bmap_unmap_extent(tp, sxi->sxi_ip1, whichfork, irec1);
     504     7061399 :         xfs_bmap_unmap_extent(tp, sxi->sxi_ip2, whichfork, irec2);
     505             : 
     506             :         /*
     507             :          * Re-add both mappings.  We swap the file offsets between the two maps
     508             :          * and add the opposite map, which has the effect of filling the
     509             :          * logical offsets we just unmapped, but with with the physical mapping
     510             :          * information swapped.
     511             :          */
     512     7061402 :         swap(irec1->br_startoff, irec2->br_startoff);
     513     7061402 :         xfs_bmap_map_extent(tp, sxi->sxi_ip1, whichfork, irec2);
     514     7061402 :         xfs_bmap_map_extent(tp, sxi->sxi_ip2, whichfork, irec1);
     515             : 
     516             :         /* Make sure we're not mapping extents past EOF. */
     517     7061402 :         if (whichfork == XFS_DATA_FORK) {
     518     6969963 :                 xfs_swapext_update_size(tp, sxi->sxi_ip1, irec2,
     519             :                                 sxi->sxi_isize1);
     520     6969963 :                 xfs_swapext_update_size(tp, sxi->sxi_ip2, irec1,
     521             :                                 sxi->sxi_isize2);
     522             :         }
     523             : 
     524             :         /*
     525             :          * Advance our cursor and exit.   The caller (either defer ops or log
     526             :          * recovery) will log the SXD item, and if *blockcount is nonzero, it
     527             :          * will log a new SXI item for the remainder and call us back.
     528             :          */
     529     7061402 :         sxi_advance(sxi, irec1);
     530     7061402 : }
     531             : 
     532             : /* Convert inode2's leaf attr fork back to shortform, if possible.. */
     533             : STATIC int
     534       79666 : xfs_swapext_attr_to_sf(
     535             :         struct xfs_trans                *tp,
     536             :         struct xfs_swapext_intent       *sxi)
     537             : {
     538       79666 :         struct xfs_da_args      args = {
     539       79666 :                 .dp             = sxi->sxi_ip2,
     540       79666 :                 .geo            = tp->t_mountp->m_attr_geo,
     541             :                 .whichfork      = XFS_ATTR_FORK,
     542             :                 .trans          = tp,
     543       79666 :                 .owner          = sxi->sxi_ip2->i_ino,
     544             :         };
     545       79666 :         struct xfs_buf          *bp;
     546       79666 :         int                     forkoff;
     547       79666 :         int                     error;
     548             : 
     549       79666 :         if (!xfs_attr_is_leaf(sxi->sxi_ip2))
     550             :                 return 0;
     551             : 
     552       73730 :         error = xfs_attr3_leaf_read(tp, sxi->sxi_ip2, sxi->sxi_ip2->i_ino, 0,
     553             :                         &bp);
     554       73731 :         if (error)
     555             :                 return error;
     556             : 
     557       73730 :         forkoff = xfs_attr_shortform_allfit(bp, sxi->sxi_ip2);
     558       73731 :         if (forkoff == 0)
     559             :                 return 0;
     560             : 
     561         205 :         return xfs_attr3_leaf_to_shortform(bp, &args, forkoff);
     562             : }
     563             : 
     564             : /* Convert inode2's block dir fork back to shortform, if possible.. */
     565             : STATIC int
     566       10638 : xfs_swapext_dir_to_sf(
     567             :         struct xfs_trans                *tp,
     568             :         struct xfs_swapext_intent       *sxi)
     569             : {
     570       10638 :         struct xfs_da_args      args = {
     571       10638 :                 .dp             = sxi->sxi_ip2,
     572       10638 :                 .geo            = tp->t_mountp->m_dir_geo,
     573             :                 .whichfork      = XFS_DATA_FORK,
     574             :                 .trans          = tp,
     575       10638 :                 .owner          = sxi->sxi_ip2->i_ino,
     576             :         };
     577       10638 :         struct xfs_dir2_sf_hdr  sfh;
     578       10638 :         struct xfs_buf          *bp;
     579       10638 :         bool                    isblock;
     580       10638 :         int                     size;
     581       10638 :         int                     error;
     582             : 
     583       10638 :         error = xfs_dir2_isblock(&args, &isblock);
     584       10638 :         if (error)
     585             :                 return error;
     586             : 
     587       10638 :         if (!isblock)
     588             :                 return 0;
     589             : 
     590        8997 :         error = xfs_dir3_block_read(tp, sxi->sxi_ip2, sxi->sxi_ip2->i_ino, &bp);
     591        8997 :         if (error)
     592             :                 return error;
     593             : 
     594        8997 :         size = xfs_dir2_block_sfsize(sxi->sxi_ip2, bp->b_addr, &sfh);
     595        8997 :         if (size > xfs_inode_data_fork_size(sxi->sxi_ip2))
     596             :                 return 0;
     597             : 
     598         104 :         return xfs_dir2_block_to_sf(&args, bp, size, &sfh);
     599             : }
     600             : 
     601             : /* Convert inode2's remote symlink target back to shortform, if possible. */
     602             : STATIC int
     603       18562 : xfs_swapext_link_to_sf(
     604             :         struct xfs_trans                *tp,
     605             :         struct xfs_swapext_intent       *sxi)
     606             : {
     607       18562 :         struct xfs_inode                *ip = sxi->sxi_ip2;
     608       18562 :         struct xfs_ifork                *ifp = xfs_ifork_ptr(ip, XFS_DATA_FORK);
     609       18562 :         char                            *buf;
     610       18562 :         int                             error;
     611             : 
     612       18562 :         if (ifp->if_format == XFS_DINODE_FMT_LOCAL ||
     613       18562 :             ip->i_disk_size > xfs_inode_data_fork_size(ip))
     614             :                 return 0;
     615             : 
     616             :         /* Read the current symlink target into a buffer. */
     617           0 :         buf = kmem_alloc(ip->i_disk_size + 1, KM_NOFS);
     618           0 :         if (!buf) {
     619           0 :                 ASSERT(0);
     620           0 :                 return -ENOMEM;
     621             :         }
     622             : 
     623           0 :         error = xfs_symlink_remote_read(ip, buf);
     624           0 :         if (error)
     625           0 :                 goto free;
     626             : 
     627             :         /* Remove the blocks. */
     628           0 :         error = xfs_symlink_remote_truncate(tp, ip);
     629           0 :         if (error)
     630           0 :                 goto free;
     631             : 
     632             :         /* Convert fork to local format and log our changes. */
     633           0 :         xfs_idestroy_fork(ifp);
     634           0 :         ifp->if_bytes = 0;
     635           0 :         ifp->if_format = XFS_DINODE_FMT_LOCAL;
     636           0 :         xfs_init_local_fork(ip, XFS_DATA_FORK, buf, ip->i_disk_size);
     637           0 :         xfs_trans_log_inode(tp, ip, XFS_ILOG_DDATA | XFS_ILOG_CORE);
     638           0 : free:
     639           0 :         kmem_free(buf);
     640           0 :         return error;
     641             : }
     642             : 
     643             : static inline void
     644          65 : xfs_swapext_clear_reflink(
     645             :         struct xfs_trans        *tp,
     646             :         struct xfs_inode        *ip)
     647             : {
     648          65 :         trace_xfs_reflink_unset_inode_flag(ip);
     649             : 
     650          65 :         ip->i_diflags2 &= ~XFS_DIFLAG2_REFLINK;
     651          65 :         xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
     652          65 : }
     653             : 
     654             : /* Finish whatever work might come after a swap operation. */
     655             : static int
     656      108934 : xfs_swapext_do_postop_work(
     657             :         struct xfs_trans                *tp,
     658             :         struct xfs_swapext_intent       *sxi)
     659             : {
     660      108934 :         if (sxi->sxi_flags & XFS_SWAP_EXT_CVT_INO2_SF) {
     661      108868 :                 int                     error = 0;
     662             : 
     663      108868 :                 if (sxi->sxi_flags & XFS_SWAP_EXT_ATTR_FORK)
     664       79668 :                         error = xfs_swapext_attr_to_sf(tp, sxi);
     665       29200 :                 else if (S_ISDIR(VFS_I(sxi->sxi_ip2)->i_mode))
     666       10638 :                         error = xfs_swapext_dir_to_sf(tp, sxi);
     667       18562 :                 else if (S_ISLNK(VFS_I(sxi->sxi_ip2)->i_mode))
     668       18562 :                         error = xfs_swapext_link_to_sf(tp, sxi);
     669      108866 :                 sxi->sxi_flags &= ~XFS_SWAP_EXT_CVT_INO2_SF;
     670      108866 :                 if (error)
     671             :                         return error;
     672             :         }
     673             : 
     674      108932 :         if (sxi->sxi_flags & XFS_SWAP_EXT_CLEAR_INO1_REFLINK) {
     675           0 :                 xfs_swapext_clear_reflink(tp, sxi->sxi_ip1);
     676           0 :                 sxi->sxi_flags &= ~XFS_SWAP_EXT_CLEAR_INO1_REFLINK;
     677             :         }
     678             : 
     679      108932 :         if (sxi->sxi_flags & XFS_SWAP_EXT_CLEAR_INO2_REFLINK) {
     680          65 :                 xfs_swapext_clear_reflink(tp, sxi->sxi_ip2);
     681          65 :                 sxi->sxi_flags &= ~XFS_SWAP_EXT_CLEAR_INO2_REFLINK;
     682             :         }
     683             : 
     684             :         return 0;
     685             : }
     686             : 
     687             : /* Finish one extent swap, possibly log more. */
     688             : int
     689     7831011 : xfs_swapext_finish_one(
     690             :         struct xfs_trans                *tp,
     691             :         struct xfs_swapext_intent       *sxi)
     692             : {
     693     7831011 :         struct xfs_bmbt_irec            irec1, irec2;
     694     7831011 :         int                             error;
     695             : 
     696     7831011 :         if (sxi_has_more_swap_work(sxi)) {
     697             :                 /*
     698             :                  * If the operation state says that some range of the files
     699             :                  * have not yet been swapped, look for extents in that range to
     700             :                  * swap.  If we find some extents, swap them.
     701             :                  */
     702     7722077 :                 error = xfs_swapext_find_mappings(sxi, &irec1, &irec2, NULL);
     703     7722068 :                 if (error)
     704             :                         return error;
     705             : 
     706     7722068 :                 if (sxi_has_more_swap_work(sxi))
     707     7061399 :                         xfs_swapext_exchange_mappings(tp, sxi, &irec1, &irec2);
     708             : 
     709             :                 /*
     710             :                  * If the caller asked us to exchange the file sizes after the
     711             :                  * swap and either we just swapped the last extents in the
     712             :                  * range or we didn't find anything to swap, update the ondisk
     713             :                  * file sizes.
     714             :                  */
     715     7722071 :                 if ((sxi->sxi_flags & XFS_SWAP_EXT_SET_SIZES) &&
     716             :                     !sxi_has_more_swap_work(sxi)) {
     717      111493 :                         sxi->sxi_ip1->i_disk_size = sxi->sxi_isize1;
     718      111493 :                         sxi->sxi_ip2->i_disk_size = sxi->sxi_isize2;
     719             : 
     720      111493 :                         xfs_trans_log_inode(tp, sxi->sxi_ip1, XFS_ILOG_CORE);
     721      111493 :                         xfs_trans_log_inode(tp, sxi->sxi_ip2, XFS_ILOG_CORE);
     722             :                 }
     723      108934 :         } else if (sxi_has_postop_work(sxi)) {
     724             :                 /*
     725             :                  * Now that we're finished with the swap operation, complete
     726             :                  * the post-op cleanup work.
     727             :                  */
     728      108934 :                 error = xfs_swapext_do_postop_work(tp, sxi);
     729      108930 :                 if (error)
     730             :                         return error;
     731             :         }
     732             : 
     733     7831001 :         if (XFS_TEST_ERROR(false, tp->t_mountp, XFS_ERRTAG_SWAPEXT_FINISH_ONE))
     734             :                 return -EIO;
     735             : 
     736             :         /* If we still have work to do, ask for a new transaction. */
     737     7830997 :         if (sxi_has_more_swap_work(sxi) || sxi_has_postop_work(sxi)) {
     738     5782408 :                 trace_xfs_swapext_defer(tp->t_mountp, sxi);
     739     5782408 :                 return -EAGAIN;
     740             :         }
     741             : 
     742             :         /*
     743             :          * If we reach here, we've finished all the swapping work and the post
     744             :          * operation work.  The last thing we need to do before returning to
     745             :          * the caller is to make sure that COW forks are set up correctly.
     746             :          */
     747     2048589 :         if (!(sxi->sxi_flags & XFS_SWAP_EXT_ATTR_FORK)) {
     748     1968928 :                 xfs_swapext_ensure_cowfork(sxi->sxi_ip1);
     749     1968939 :                 xfs_swapext_ensure_cowfork(sxi->sxi_ip2);
     750             :         }
     751             : 
     752             :         return 0;
     753             : }
     754             : 
     755             : /*
     756             :  * Compute the amount of bmbt blocks we should reserve for each file.  In the
     757             :  * worst case, each exchange will fill a hole with a new mapping, which could
     758             :  * result in a btree split every time we add a new leaf block.
     759             :  */
     760             : static inline uint64_t
     761     6810709 : xfs_swapext_bmbt_blocks(
     762             :         struct xfs_mount                *mp,
     763             :         const struct xfs_swapext_req    *req)
     764             : {
     765     6810709 :         return howmany_64(req->nr_exchanges,
     766     6810709 :                                         XFS_MAX_CONTIG_BMAPS_PER_BLOCK(mp)) *
     767     6807334 :                         XFS_EXTENTADD_SPACE_RES(mp, req->whichfork);
     768             : }
     769             : 
     770             : static inline uint64_t
     771     6810239 : xfs_swapext_rmapbt_blocks(
     772             :         struct xfs_mount                *mp,
     773             :         const struct xfs_swapext_req    *req)
     774             : {
     775     6810239 :         if (!xfs_has_rmapbt(mp))
     776             :                 return 0;
     777     6806728 :         if (XFS_IS_REALTIME_INODE(req->ip1))
     778      644968 :                 return howmany_64(req->nr_exchanges,
     779      644968 :                                         XFS_MAX_CONTIG_RTRMAPS_PER_BLOCK(mp)) *
     780      644968 :                         XFS_RTRMAPADD_SPACE_RES(mp);
     781             : 
     782     6161760 :         return howmany_64(req->nr_exchanges,
     783     6161760 :                                         XFS_MAX_CONTIG_RMAPS_PER_BLOCK(mp)) *
     784     6161065 :                         XFS_RMAPADD_SPACE_RES(mp);
     785             : }
     786             : 
     787             : /* Estimate the bmbt and rmapbt overhead required to exchange extents. */
     788             : int
     789     6814189 : xfs_swapext_estimate_overhead(
     790             :         struct xfs_swapext_req  *req)
     791             : {
     792     6814189 :         struct xfs_mount        *mp = req->ip1->i_mount;
     793     6814189 :         xfs_filblks_t           bmbt_blocks;
     794     6814189 :         xfs_filblks_t           rmapbt_blocks;
     795     6814189 :         xfs_filblks_t           resblks = req->resblks;
     796             : 
     797             :         /*
     798             :          * Compute the number of bmbt and rmapbt blocks we might need to handle
     799             :          * the estimated number of exchanges.
     800             :          */
     801     6814189 :         bmbt_blocks = xfs_swapext_bmbt_blocks(mp, req);
     802     6808616 :         rmapbt_blocks = xfs_swapext_rmapbt_blocks(mp, req);
     803             : 
     804     6809645 :         trace_xfs_swapext_overhead(mp, bmbt_blocks, rmapbt_blocks);
     805             : 
     806             :         /* Make sure the change in file block count doesn't overflow. */
     807     6809406 :         if (check_add_overflow(req->ip1_bcount, bmbt_blocks, &req->ip1_bcount))
     808             :                 return -EFBIG;
     809     6809406 :         if (check_add_overflow(req->ip2_bcount, bmbt_blocks, &req->ip2_bcount))
     810             :                 return -EFBIG;
     811             : 
     812             :         /*
     813             :          * Add together the number of blocks we need to handle btree growth,
     814             :          * then add it to the number of blocks we need to reserve to this
     815             :          * transaction.
     816             :          */
     817     6809406 :         if (check_add_overflow(resblks, bmbt_blocks, &resblks))
     818             :                 return -ENOSPC;
     819     6809406 :         if (check_add_overflow(resblks, bmbt_blocks, &resblks))
     820             :                 return -ENOSPC;
     821     6809406 :         if (check_add_overflow(resblks, rmapbt_blocks, &resblks))
     822             :                 return -ENOSPC;
     823     6809406 :         if (check_add_overflow(resblks, rmapbt_blocks, &resblks))
     824             :                 return -ENOSPC;
     825             : 
     826             :         /* Can't actually reserve more than UINT_MAX blocks. */
     827     6809406 :         if (req->resblks > UINT_MAX)
     828             :                 return -ENOSPC;
     829             : 
     830     6809406 :         req->resblks = resblks;
     831     6809406 :         trace_xfs_swapext_final_estimate(req);
     832     6809406 :         return 0;
     833             : }
     834             : 
     835             : /* Decide if we can merge two real extents. */
     836             : static inline bool
     837    39902099 : can_merge(
     838             :         const struct xfs_bmbt_irec      *b1,
     839             :         const struct xfs_bmbt_irec      *b2)
     840             : {
     841             :         /* Don't merge holes. */
     842    39902099 :         if (b1->br_startblock == HOLESTARTBLOCK ||
     843    32652856 :             b2->br_startblock == HOLESTARTBLOCK)
     844             :                 return false;
     845             : 
     846             :         /* We don't merge holes. */
     847    92629452 :         if (!xfs_bmap_is_real_extent(b1) || !xfs_bmap_is_real_extent(b2))
     848             :                 return false;
     849             : 
     850    30876484 :         if (b1->br_startoff   + b1->br_blockcount == b2->br_startoff &&
     851    22913235 :             b1->br_startblock + b1->br_blockcount == b2->br_startblock &&
     852     9799811 :             b1->br_state                       == b2->br_state &&
     853     8997768 :             b1->br_blockcount + b2->br_blockcount <= XFS_MAX_BMBT_EXTLEN)
     854     8997768 :                 return true;
     855             : 
     856             :         return false;
     857             : }
     858             : 
     859             : #define CLEFT_CONTIG    0x01
     860             : #define CRIGHT_CONTIG   0x02
     861             : #define CHOLE           0x04
     862             : #define CBOTH_CONTIG    (CLEFT_CONTIG | CRIGHT_CONTIG)
     863             : 
     864             : #define NLEFT_CONTIG    0x10
     865             : #define NRIGHT_CONTIG   0x20
     866             : #define NHOLE           0x40
     867             : #define NBOTH_CONTIG    (NLEFT_CONTIG | NRIGHT_CONTIG)
     868             : 
     869             : /* Estimate the effect of a single swap on extent count. */
     870             : static inline int
     871    14505558 : delta_nextents_step(
     872             :         struct xfs_mount                *mp,
     873             :         const struct xfs_bmbt_irec      *left,
     874             :         const struct xfs_bmbt_irec      *curr,
     875             :         const struct xfs_bmbt_irec      *new,
     876             :         const struct xfs_bmbt_irec      *right)
     877             : {
     878    14505558 :         bool                            lhole, rhole, chole, nhole;
     879    14505558 :         unsigned int                    state = 0;
     880    14505558 :         int                             ret = 0;
     881             : 
     882    14505558 :         lhole = left->br_startblock == HOLESTARTBLOCK;
     883    14505558 :         rhole = right->br_startblock == HOLESTARTBLOCK;
     884    14505558 :         chole = curr->br_startblock == HOLESTARTBLOCK;
     885    14505558 :         nhole = new->br_startblock == HOLESTARTBLOCK;
     886             : 
     887    14505558 :         if (chole)
     888     3935459 :                 state |= CHOLE;
     889    14505558 :         if (!lhole && !chole && can_merge(left, curr))
     890        1427 :                 state |= CLEFT_CONTIG;
     891    14505558 :         if (!rhole && !chole && can_merge(curr, right))
     892     3801433 :                 state |= CRIGHT_CONTIG;
     893    14505558 :         if ((state & CBOTH_CONTIG) == CBOTH_CONTIG &&
     894         470 :             left->br_startblock + curr->br_startblock +
     895             :                                         right->br_startblock > XFS_MAX_BMBT_EXTLEN)
     896         133 :                 state &= ~CRIGHT_CONTIG;
     897             : 
     898    14505558 :         if (nhole)
     899     3935459 :                 state |= NHOLE;
     900    14505558 :         if (!lhole && !nhole && can_merge(left, new))
     901     2597404 :                 state |= NLEFT_CONTIG;
     902    14505558 :         if (!rhole && !nhole && can_merge(new, right))
     903         100 :                 state |= NRIGHT_CONTIG;
     904    14505558 :         if ((state & NBOTH_CONTIG) == NBOTH_CONTIG &&
     905          21 :             left->br_startblock + new->br_startblock +
     906             :                                         right->br_startblock > XFS_MAX_BMBT_EXTLEN)
     907           6 :                 state &= ~NRIGHT_CONTIG;
     908             : 
     909    14505558 :         switch (state & (CLEFT_CONTIG | CRIGHT_CONTIG | CHOLE)) {
     910         337 :         case CLEFT_CONTIG | CRIGHT_CONTIG:
     911             :                 /*
     912             :                  * left/curr/right are the same extent, so deleting curr causes
     913             :                  * 2 new extents to be created.
     914             :                  */
     915         337 :                 ret += 2;
     916         337 :                 break;
     917     6767709 :         case 0:
     918             :                 /*
     919             :                  * curr is not contiguous with any extent, so we remove curr
     920             :                  * completely
     921             :                  */
     922     6767709 :                 ret--;
     923     6767709 :                 break;
     924             :         case CHOLE:
     925             :                 /* hole, do nothing */
     926             :                 break;
     927             :         case CLEFT_CONTIG:
     928             :         case CRIGHT_CONTIG:
     929             :                 /* trim either left or right, no change */
     930             :                 break;
     931             :         }
     932             : 
     933    14505558 :         switch (state & (NLEFT_CONTIG | NRIGHT_CONTIG | NHOLE)) {
     934          15 :         case NLEFT_CONTIG | NRIGHT_CONTIG:
     935             :                 /*
     936             :                  * left/curr/right will become the same extent, so adding
     937             :                  * curr causes the deletion of right.
     938             :                  */
     939          15 :                 ret--;
     940          15 :                 break;
     941     7972617 :         case 0:
     942             :                 /* new is not contiguous with any extent */
     943     7972617 :                 ret++;
     944     7972617 :                 break;
     945             :         case NHOLE:
     946             :                 /* hole, do nothing. */
     947             :                 break;
     948             :         case NLEFT_CONTIG:
     949             :         case NRIGHT_CONTIG:
     950             :                 /* new is absorbed into left or right, no change */
     951             :                 break;
     952             :         }
     953             : 
     954    14505558 :         trace_xfs_swapext_delta_nextents_step(mp, left, curr, new, right, ret,
     955             :                         state);
     956    14505557 :         return ret;
     957             : }
     958             : 
     959             : /* Make sure we don't overflow the extent counters. */
     960             : static inline int
     961     2245064 : ensure_delta_nextents(
     962             :         struct xfs_swapext_req  *req,
     963             :         struct xfs_inode        *ip,
     964             :         int64_t                 delta)
     965             : {
     966     2245064 :         struct xfs_mount        *mp = ip->i_mount;
     967     2245064 :         struct xfs_ifork        *ifp = xfs_ifork_ptr(ip, req->whichfork);
     968     2245051 :         xfs_extnum_t            max_extents;
     969     2245051 :         bool                    large_extcount;
     970             : 
     971     2245051 :         if (delta < 0)
     972             :                 return 0;
     973             : 
     974     2221166 :         if (XFS_TEST_ERROR(false, mp, XFS_ERRTAG_REDUCE_MAX_IEXTENTS)) {
     975          10 :                 if (ifp->if_nextents + delta > 10)
     976             :                         return -EFBIG;
     977             :         }
     978             : 
     979     2221160 :         if (req->req_flags & XFS_SWAP_REQ_NREXT64)
     980             :                 large_extcount = true;
     981             :         else
     982     2221160 :                 large_extcount = xfs_inode_has_large_extent_counts(ip);
     983             : 
     984     2221160 :         max_extents = xfs_iext_max_nextents(large_extcount, req->whichfork);
     985     2221160 :         if (ifp->if_nextents + delta <= max_extents)
     986             :                 return 0;
     987           0 :         if (large_extcount)
     988             :                 return -EFBIG;
     989           0 :         if (!xfs_has_large_extent_counts(mp))
     990             :                 return -EFBIG;
     991             : 
     992           0 :         max_extents = xfs_iext_max_nextents(true, req->whichfork);
     993           0 :         if (ifp->if_nextents + delta > max_extents)
     994             :                 return -EFBIG;
     995             : 
     996           0 :         req->req_flags |= XFS_SWAP_REQ_NREXT64;
     997           0 :         return 0;
     998             : }
     999             : 
    1000             : /* Find the next extent after irec. */
    1001             : static inline int
    1002    14505558 : get_next_ext(
    1003             :         struct xfs_inode                *ip,
    1004             :         int                             bmap_flags,
    1005             :         const struct xfs_bmbt_irec      *irec,
    1006             :         struct xfs_bmbt_irec            *nrec)
    1007             : {
    1008    14505558 :         xfs_fileoff_t                   off;
    1009    14505558 :         xfs_filblks_t                   blockcount;
    1010    14505558 :         int                             nimaps = 1;
    1011    14505558 :         int                             error;
    1012             : 
    1013    14505558 :         off = irec->br_startoff + irec->br_blockcount;
    1014    14505558 :         blockcount = XFS_MAX_FILEOFF - off;
    1015    14505558 :         error = xfs_bmapi_read(ip, off, blockcount, nrec, &nimaps, bmap_flags);
    1016    14505558 :         if (error)
    1017             :                 return error;
    1018    14505558 :         if (nrec->br_startblock == DELAYSTARTBLOCK ||
    1019    14498687 :             nrec->br_startoff != off) {
    1020             :                 /*
    1021             :                  * If we don't get the extent we want, return a zero-length
    1022             :                  * mapping, which our estimator function will pretend is a hole.
    1023             :                  * We shouldn't get delalloc reservations.
    1024             :                  */
    1025        6871 :                 nrec->br_startblock = HOLESTARTBLOCK;
    1026             :         }
    1027             : 
    1028             :         return 0;
    1029             : }
    1030             : 
    1031             : int __init
    1032          59 : xfs_swapext_intent_init_cache(void)
    1033             : {
    1034          59 :         xfs_swapext_intent_cache = kmem_cache_create("xfs_swapext_intent",
    1035             :                         sizeof(struct xfs_swapext_intent),
    1036             :                         0, 0, NULL);
    1037             : 
    1038          59 :         return xfs_swapext_intent_cache != NULL ? 0 : -ENOMEM;
    1039             : }
    1040             : 
    1041             : void
    1042          58 : xfs_swapext_intent_destroy_cache(void)
    1043             : {
    1044          58 :         kmem_cache_destroy(xfs_swapext_intent_cache);
    1045          58 :         xfs_swapext_intent_cache = NULL;
    1046          58 : }
    1047             : 
    1048             : /*
    1049             :  * Decide if we will swap the reflink flags between the two files after the
    1050             :  * swap.  The only time we want to do this is if we're exchanging all extents
    1051             :  * under EOF and the inode reflink flags have different states.
    1052             :  */
    1053             : static inline bool
    1054     3931908 : sxi_can_exchange_reflink_flags(
    1055             :         const struct xfs_swapext_req    *req,
    1056             :         unsigned int                    reflink_state)
    1057             : {
    1058     3931908 :         struct xfs_mount                *mp = req->ip1->i_mount;
    1059             : 
    1060     3931908 :         if (hweight32(reflink_state) != 1)
    1061             :                 return false;
    1062         218 :         if (req->startoff1 != 0 || req->startoff2 != 0)
    1063             :                 return false;
    1064         186 :         if (req->blockcount != XFS_B_TO_FSB(mp, req->ip1->i_disk_size))
    1065             :                 return false;
    1066         178 :         if (req->blockcount != XFS_B_TO_FSB(mp, req->ip2->i_disk_size))
    1067           0 :                 return false;
    1068             :         return true;
    1069             : }
    1070             : 
    1071             : 
    1072             : /* Allocate and initialize a new incore intent item from a request. */
    1073             : struct xfs_swapext_intent *
    1074     4086001 : xfs_swapext_init_intent(
    1075             :         const struct xfs_swapext_req    *req,
    1076             :         unsigned int                    *reflink_state)
    1077             : {
    1078     4086001 :         struct xfs_swapext_intent       *sxi;
    1079     4086001 :         unsigned int                    rs = 0;
    1080             : 
    1081     4086001 :         sxi = kmem_cache_zalloc(xfs_swapext_intent_cache,
    1082             :                         GFP_NOFS | __GFP_NOFAIL);
    1083     4086002 :         INIT_LIST_HEAD(&sxi->sxi_list);
    1084     4086002 :         sxi->sxi_ip1 = req->ip1;
    1085     4086002 :         sxi->sxi_ip2 = req->ip2;
    1086     4086002 :         sxi->sxi_startoff1 = req->startoff1;
    1087     4086002 :         sxi->sxi_startoff2 = req->startoff2;
    1088     4086002 :         sxi->sxi_blockcount = req->blockcount;
    1089     4086002 :         sxi->sxi_isize1 = sxi->sxi_isize2 = -1;
    1090             : 
    1091     4086002 :         if (req->whichfork == XFS_ATTR_FORK)
    1092      154103 :                 sxi->sxi_flags |= XFS_SWAP_EXT_ATTR_FORK;
    1093             : 
    1094     4086002 :         if (req->whichfork == XFS_DATA_FORK &&
    1095             :             (req->req_flags & XFS_SWAP_REQ_SET_SIZES)) {
    1096      212941 :                 sxi->sxi_flags |= XFS_SWAP_EXT_SET_SIZES;
    1097      212941 :                 sxi->sxi_isize1 = req->ip2->i_disk_size;
    1098      212941 :                 sxi->sxi_isize2 = req->ip1->i_disk_size;
    1099             :         }
    1100             : 
    1101     4086002 :         if (req->req_flags & XFS_SWAP_REQ_INO1_WRITTEN)
    1102         164 :                 sxi->sxi_flags |= XFS_SWAP_EXT_INO1_WRITTEN;
    1103     4086002 :         if (req->req_flags & XFS_SWAP_REQ_CVT_INO2_SF)
    1104      202438 :                 sxi->sxi_flags |= XFS_SWAP_EXT_CVT_INO2_SF;
    1105             : 
    1106     4086002 :         if (req->req_flags & XFS_SWAP_REQ_LOGGED)
    1107     4026070 :                 sxi->sxi_op_flags |= XFS_SWAP_EXT_OP_LOGGED;
    1108     4086002 :         if (req->req_flags & XFS_SWAP_REQ_NREXT64)
    1109           0 :                 sxi->sxi_op_flags |= XFS_SWAP_EXT_OP_NREXT64;
    1110             : 
    1111     4086002 :         if (req->whichfork == XFS_DATA_FORK) {
    1112             :                 /*
    1113             :                  * Record the state of each inode's reflink flag before the
    1114             :                  * operation.
    1115             :                  */
    1116     3931901 :                 if (xfs_is_reflink_inode(req->ip1))
    1117     3618244 :                         rs |= XFS_REFLINK_STATE_IP1;
    1118     3931901 :                 if (xfs_is_reflink_inode(req->ip2))
    1119     3618461 :                         rs |= XFS_REFLINK_STATE_IP2;
    1120             : 
    1121             :                 /*
    1122             :                  * Figure out if we're clearing the reflink flags (which
    1123             :                  * effectively swaps them) after the operation.
    1124             :                  */
    1125     3931901 :                 if (sxi_can_exchange_reflink_flags(req, rs)) {
    1126         178 :                         if (rs & XFS_REFLINK_STATE_IP1)
    1127           0 :                                 sxi->sxi_flags |=
    1128             :                                                 XFS_SWAP_EXT_CLEAR_INO1_REFLINK;
    1129         178 :                         if (rs & XFS_REFLINK_STATE_IP2)
    1130         178 :                                 sxi->sxi_flags |=
    1131             :                                                 XFS_SWAP_EXT_CLEAR_INO2_REFLINK;
    1132             :                 }
    1133             :         }
    1134             : 
    1135     4086013 :         if (reflink_state)
    1136     2048644 :                 *reflink_state = rs;
    1137     4086013 :         return sxi;
    1138             : }
    1139             : 
    1140             : /*
    1141             :  * Estimate the number of exchange operations and the number of file blocks
    1142             :  * in each file that will be affected by the exchange operation.
    1143             :  */
    1144             : int
    1145     2037371 : xfs_swapext_estimate(
    1146             :         struct xfs_swapext_req          *req)
    1147             : {
    1148     2037371 :         struct xfs_swapext_intent       *sxi;
    1149     2037371 :         struct xfs_bmbt_irec            irec1, irec2;
    1150     2037371 :         struct xfs_swapext_adjacent     adj = ADJACENT_INIT;
    1151     2037371 :         xfs_filblks_t                   ip1_blocks = 0, ip2_blocks = 0;
    1152     2037371 :         int64_t                         d_nexts1, d_nexts2;
    1153     2037371 :         int                             bmap_flags;
    1154     2037371 :         int                             error;
    1155             : 
    1156     2037371 :         ASSERT(!(req->req_flags & ~XFS_SWAP_REQ_FLAGS));
    1157             : 
    1158     2037371 :         bmap_flags = xfs_bmapi_aflag(req->whichfork);
    1159     2037371 :         sxi = xfs_swapext_init_intent(req, NULL);
    1160             : 
    1161             :         /*
    1162             :          * To guard against the possibility of overflowing the extent counters,
    1163             :          * we have to estimate an upper bound on the potential increase in that
    1164             :          * counter.  We can split the extent at each end of the range, and for
    1165             :          * each step of the swap we can split the extent that we're working on
    1166             :          * if the extents do not align.
    1167             :          */
    1168     2037371 :         d_nexts1 = d_nexts2 = 3;
    1169             : 
    1170     9290150 :         while (sxi_has_more_swap_work(sxi)) {
    1171             :                 /*
    1172             :                  * Walk through the file ranges until we find something to
    1173             :                  * swap.  Because we're simulating the swap, pass in adj to
    1174             :                  * capture skipped mappings for correct estimation of bmbt
    1175             :                  * record merges.
    1176             :                  */
    1177     7898185 :                 error = xfs_swapext_find_mappings(sxi, &irec1, &irec2, &adj);
    1178     7898180 :                 if (error)
    1179           0 :                         goto out_free;
    1180     7898180 :                 if (!sxi_has_more_swap_work(sxi))
    1181             :                         break;
    1182             : 
    1183             :                 /* Update accounting. */
    1184    12536673 :                 if (xfs_bmap_is_real_extent(&irec1))
    1185     5283895 :                         ip1_blocks += irec1.br_blockcount;
    1186    12538981 :                 if (xfs_bmap_is_real_extent(&irec2))
    1187     5286203 :                         ip2_blocks += irec2.br_blockcount;
    1188     7252778 :                 req->nr_exchanges++;
    1189             : 
    1190             :                 /* Read the next extents from both files. */
    1191     7252778 :                 error = get_next_ext(req->ip1, bmap_flags, &irec1, &adj.right1);
    1192     7252779 :                 if (error)
    1193           0 :                         goto out_free;
    1194             : 
    1195     7252779 :                 error = get_next_ext(req->ip2, bmap_flags, &irec2, &adj.right2);
    1196     7252780 :                 if (error)
    1197           0 :                         goto out_free;
    1198             : 
    1199             :                 /* Update extent count deltas. */
    1200     7252780 :                 d_nexts1 += delta_nextents_step(req->ip1->i_mount,
    1201             :                                 &adj.left1, &irec1, &irec2, &adj.right1);
    1202             : 
    1203     7252779 :                 d_nexts2 += delta_nextents_step(req->ip1->i_mount,
    1204             :                                 &adj.left2, &irec2, &irec1, &adj.right2);
    1205             : 
    1206             :                 /* Now pretend we swapped the extents. */
    1207     7252779 :                 if (can_merge(&adj.left2, &irec1))
    1208     1596573 :                         adj.left2.br_blockcount += irec1.br_blockcount;
    1209             :                 else
    1210     5656206 :                         memcpy(&adj.left2, &irec1, sizeof(irec1));
    1211             : 
    1212     7252779 :                 if (can_merge(&adj.left1, &irec2))
    1213     1000831 :                         adj.left1.br_blockcount += irec2.br_blockcount;
    1214             :                 else
    1215     6251948 :                         memcpy(&adj.left1, &irec2, sizeof(irec2));
    1216             : 
    1217     7252779 :                 sxi_advance(sxi, &irec1);
    1218             :         }
    1219             : 
    1220             :         /* Account for the blocks that are being exchanged. */
    1221     2037363 :         if (XFS_IS_REALTIME_INODE(req->ip1) &&
    1222      648482 :             req->whichfork == XFS_DATA_FORK) {
    1223      648483 :                 req->ip1_rtbcount = ip1_blocks;
    1224      648483 :                 req->ip2_rtbcount = ip2_blocks;
    1225             :         } else {
    1226     1388880 :                 req->ip1_bcount = ip1_blocks;
    1227     1388880 :                 req->ip2_bcount = ip2_blocks;
    1228             :         }
    1229             : 
    1230             :         /*
    1231             :          * Make sure that both forks have enough slack left in their extent
    1232             :          * counters that the swap operation will not overflow.
    1233             :          */
    1234     2037363 :         trace_xfs_swapext_delta_nextents(req, d_nexts1, d_nexts2);
    1235     2037356 :         if (req->ip1 == req->ip2) {
    1236     1829650 :                 error = ensure_delta_nextents(req, req->ip1,
    1237             :                                 d_nexts1 + d_nexts2);
    1238             :         } else {
    1239      207706 :                 error = ensure_delta_nextents(req, req->ip1, d_nexts1);
    1240      207706 :                 if (error)
    1241          10 :                         goto out_free;
    1242      207696 :                 error = ensure_delta_nextents(req, req->ip2, d_nexts2);
    1243             :         }
    1244     2037353 :         if (error)
    1245           0 :                 goto out_free;
    1246             : 
    1247     2037353 :         trace_xfs_swapext_initial_estimate(req);
    1248     2037353 :         error = xfs_swapext_estimate_overhead(req);
    1249     2037354 : out_free:
    1250     2037354 :         kmem_cache_free(xfs_swapext_intent_cache, sxi);
    1251     2037370 :         return error;
    1252             : }
    1253             : 
    1254             : static inline void
    1255         109 : xfs_swapext_set_reflink(
    1256             :         struct xfs_trans        *tp,
    1257             :         struct xfs_inode        *ip)
    1258             : {
    1259         109 :         trace_xfs_reflink_set_inode_flag(ip);
    1260             : 
    1261         109 :         ip->i_diflags2 |= XFS_DIFLAG2_REFLINK;
    1262         109 :         xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
    1263         109 : }
    1264             : 
    1265             : /*
    1266             :  * If either file has shared blocks and we're swapping data forks, we must flag
    1267             :  * the other file as having shared blocks so that we get the shared-block rmap
    1268             :  * functions if we need to fix up the rmaps.
    1269             :  */
    1270             : void
    1271     2048654 : xfs_swapext_ensure_reflink(
    1272             :         struct xfs_trans                *tp,
    1273             :         const struct xfs_swapext_intent *sxi,
    1274             :         unsigned int                    reflink_state)
    1275             : {
    1276     2048654 :         if ((reflink_state & XFS_REFLINK_STATE_IP1) &&
    1277     1809112 :             !xfs_is_reflink_inode(sxi->sxi_ip2))
    1278           0 :                 xfs_swapext_set_reflink(tp, sxi->sxi_ip2);
    1279             : 
    1280     2048654 :         if ((reflink_state & XFS_REFLINK_STATE_IP2) &&
    1281     1809221 :             !xfs_is_reflink_inode(sxi->sxi_ip1))
    1282         109 :                 xfs_swapext_set_reflink(tp, sxi->sxi_ip1);
    1283     2048654 : }
    1284             : 
    1285             : /* Widen the extent counts of both inodes if necessary. */
    1286             : static inline void
    1287     2048617 : xfs_swapext_upgrade_extent_counts(
    1288             :         struct xfs_trans                *tp,
    1289             :         const struct xfs_swapext_intent *sxi)
    1290             : {
    1291     2048617 :         if (!(sxi->sxi_op_flags & XFS_SWAP_EXT_OP_NREXT64))
    1292             :                 return;
    1293             : 
    1294           0 :         sxi->sxi_ip1->i_diflags2 |= XFS_DIFLAG2_NREXT64;
    1295           0 :         xfs_trans_log_inode(tp, sxi->sxi_ip1, XFS_ILOG_CORE);
    1296             : 
    1297           0 :         sxi->sxi_ip2->i_diflags2 |= XFS_DIFLAG2_NREXT64;
    1298           0 :         xfs_trans_log_inode(tp, sxi->sxi_ip2, XFS_ILOG_CORE);
    1299             : }
    1300             : 
    1301             : /*
    1302             :  * Schedule a swap a range of extents from one inode to another.  If the atomic
    1303             :  * swap feature is enabled, then the operation progress can be resumed even if
    1304             :  * the system goes down.  The caller must commit the transaction to start the
    1305             :  * work.
    1306             :  *
    1307             :  * The caller must ensure the inodes must be joined to the transaction and
    1308             :  * ILOCKd; they will still be joined to the transaction at exit.
    1309             :  */
    1310             : void
    1311     2048614 : xfs_swapext(
    1312             :         struct xfs_trans                *tp,
    1313             :         const struct xfs_swapext_req    *req)
    1314             : {
    1315     2048614 :         struct xfs_swapext_intent       *sxi;
    1316     2048614 :         unsigned int                    reflink_state;
    1317             : 
    1318     2048614 :         ASSERT(xfs_isilocked(req->ip1, XFS_ILOCK_EXCL));
    1319     2048598 :         ASSERT(xfs_isilocked(req->ip2, XFS_ILOCK_EXCL));
    1320     2048610 :         ASSERT(req->whichfork != XFS_COW_FORK);
    1321     2048610 :         ASSERT(!(req->req_flags & ~XFS_SWAP_REQ_FLAGS));
    1322     2048610 :         if (req->req_flags & XFS_SWAP_REQ_SET_SIZES)
    1323      111493 :                 ASSERT(req->whichfork == XFS_DATA_FORK);
    1324     2048610 :         if (req->req_flags & XFS_SWAP_REQ_CVT_INO2_SF)
    1325      108867 :                 ASSERT(req->whichfork == XFS_ATTR_FORK ||
    1326             :                        (req->whichfork == XFS_DATA_FORK &&
    1327             :                         (S_ISDIR(VFS_I(req->ip2)->i_mode) ||
    1328             :                          S_ISLNK(VFS_I(req->ip2)->i_mode))));
    1329             : 
    1330     2048610 :         if (req->blockcount == 0)
    1331           0 :                 return;
    1332             : 
    1333     2048610 :         sxi = xfs_swapext_init_intent(req, &reflink_state);
    1334     2048615 :         xfs_swapext_schedule(tp, sxi);
    1335     2048618 :         xfs_swapext_ensure_reflink(tp, sxi, reflink_state);
    1336     2048619 :         xfs_swapext_upgrade_extent_counts(tp, sxi);
    1337             : }

Generated by: LCOV version 1.14