LCOV - code coverage report
Current view: top level - fs/xfs/libxfs - xfs_swapext.c (source / functions) Hit Total Coverage
Test: fstests of 6.5.0-rc4-xfsa @ Mon Jul 31 20:08:27 PDT 2023 Lines: 443 516 85.9 %
Date: 2023-07-31 20:08:27 Functions: 31 31 100.0 %

          Line data    Source code
       1             : // SPDX-License-Identifier: GPL-2.0-or-later
       2             : /*
       3             :  * Copyright (C) 2020-2023 Oracle.  All Rights Reserved.
       4             :  * Author: Darrick J. Wong <djwong@kernel.org>
       5             :  */
       6             : #include "xfs.h"
       7             : #include "xfs_fs.h"
       8             : #include "xfs_shared.h"
       9             : #include "xfs_format.h"
      10             : #include "xfs_log_format.h"
      11             : #include "xfs_trans_resv.h"
      12             : #include "xfs_mount.h"
      13             : #include "xfs_defer.h"
      14             : #include "xfs_inode.h"
      15             : #include "xfs_trans.h"
      16             : #include "xfs_bmap.h"
      17             : #include "xfs_icache.h"
      18             : #include "xfs_quota.h"
      19             : #include "xfs_swapext.h"
      20             : #include "xfs_trace.h"
      21             : #include "xfs_bmap_btree.h"
      22             : #include "xfs_trans_space.h"
      23             : #include "xfs_error.h"
      24             : #include "xfs_errortag.h"
      25             : #include "xfs_health.h"
      26             : #include "xfs_da_format.h"
      27             : #include "xfs_da_btree.h"
      28             : #include "xfs_attr_leaf.h"
      29             : #include "xfs_attr.h"
      30             : #include "xfs_dir2_priv.h"
      31             : #include "xfs_dir2.h"
      32             : #include "xfs_symlink_remote.h"
      33             : #include "xfs_rtbitmap.h"
      34             : 
      35             : struct kmem_cache       *xfs_swapext_intent_cache;
      36             : 
      37             : /* bmbt mappings adjacent to a pair of records. */
      38             : struct xfs_swapext_adjacent {
      39             :         struct xfs_bmbt_irec            left1;
      40             :         struct xfs_bmbt_irec            right1;
      41             :         struct xfs_bmbt_irec            left2;
      42             :         struct xfs_bmbt_irec            right2;
      43             : };
      44             : 
      45             : #define ADJACENT_INIT { \
      46             :         .left1  = { .br_startblock = HOLESTARTBLOCK }, \
      47             :         .right1 = { .br_startblock = HOLESTARTBLOCK }, \
      48             :         .left2  = { .br_startblock = HOLESTARTBLOCK }, \
      49             :         .right2 = { .br_startblock = HOLESTARTBLOCK }, \
      50             : }
      51             : 
      52             : /* Information to help us reset reflink flag / CoW fork state after a swap. */
      53             : 
      54             : /* Previous state of the two inodes' reflink flags. */
      55             : #define XFS_REFLINK_STATE_IP1           (1U << 0)
      56             : #define XFS_REFLINK_STATE_IP2           (1U << 1)
      57             : 
      58             : /*
      59             :  * If the reflink flag is set on either inode, make sure it has an incore CoW
      60             :  * fork, since all reflink inodes must have them.  If there's a CoW fork and it
      61             :  * has extents in it, make sure the inodes are tagged appropriately so that
      62             :  * speculative preallocations can be GC'd if we run low of space.
      63             :  */
      64             : static inline void
      65      652696 : xfs_swapext_ensure_cowfork(
      66             :         struct xfs_inode        *ip)
      67             : {
      68      652696 :         struct xfs_ifork        *cfork;
      69             : 
      70      652696 :         if (xfs_is_reflink_inode(ip))
      71      486819 :                 xfs_ifork_init_cow(ip);
      72             : 
      73      652696 :         cfork = xfs_ifork_ptr(ip, XFS_COW_FORK);
      74      652696 :         if (!cfork)
      75             :                 return;
      76      517742 :         if (cfork->if_bytes > 0)
      77       81928 :                 xfs_inode_set_cowblocks_tag(ip);
      78             :         else
      79      435814 :                 xfs_inode_clear_cowblocks_tag(ip);
      80             : }
      81             : 
      82             : /* Schedule an atomic extent swap. */
      83             : void
      84      366139 : xfs_swapext_schedule(
      85             :         struct xfs_trans                *tp,
      86             :         struct xfs_swapext_intent       *sxi)
      87             : {
      88      366139 :         trace_xfs_swapext_defer(tp->t_mountp, sxi);
      89      366139 :         xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_SWAPEXT, &sxi->sxi_list);
      90      366139 : }
      91             : 
      92             : /*
      93             :  * Adjust the on-disk inode size upwards if needed so that we never map extents
      94             :  * into the file past EOF.  This is crucial so that log recovery won't get
      95             :  * confused by the sudden appearance of post-eof extents.
      96             :  */
      97             : STATIC void
      98     2157642 : xfs_swapext_update_size(
      99             :         struct xfs_trans        *tp,
     100             :         struct xfs_inode        *ip,
     101             :         struct xfs_bmbt_irec    *imap,
     102             :         xfs_fsize_t             new_isize)
     103             : {
     104     2157642 :         struct xfs_mount        *mp = tp->t_mountp;
     105     2157642 :         xfs_fsize_t             len;
     106             : 
     107     2157642 :         if (new_isize < 0)
     108             :                 return;
     109             : 
     110      230426 :         len = min(XFS_FSB_TO_B(mp, imap->br_startoff + imap->br_blockcount),
     111             :                   new_isize);
     112             : 
     113      230426 :         if (len <= ip->i_disk_size)
     114             :                 return;
     115             : 
     116          73 :         trace_xfs_swapext_update_inode_size(ip, len);
     117             : 
     118          73 :         ip->i_disk_size = len;
     119          73 :         xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
     120             : }
     121             : 
     122             : static inline bool
     123             : sxi_has_more_swap_work(const struct xfs_swapext_intent *sxi)
     124             : {
     125     9412064 :         return sxi->sxi_blockcount > 0;
     126             : }
     127             : 
     128             : static inline bool
     129             : sxi_has_postop_work(const struct xfs_swapext_intent *sxi)
     130             : {
     131      461036 :         return sxi->sxi_flags & (XFS_SWAP_EXT_CLEAR_INO1_REFLINK |
     132             :                                  XFS_SWAP_EXT_CLEAR_INO2_REFLINK |
     133             :                                  XFS_SWAP_EXT_CVT_INO2_SF);
     134             : }
     135             : 
     136             : static inline void
     137             : sxi_advance(
     138             :         struct xfs_swapext_intent       *sxi,
     139             :         const struct xfs_bmbt_irec      *irec)
     140             : {
     141     2628363 :         sxi->sxi_startoff1 += irec->br_blockcount;
     142     2628363 :         sxi->sxi_startoff2 += irec->br_blockcount;
     143     2628363 :         sxi->sxi_blockcount -= irec->br_blockcount;
     144     1506321 : }
     145             : 
     146             : #ifdef DEBUG
     147             : static inline bool
     148      264640 : xfs_swapext_need_rt_conversion(
     149             :         const struct xfs_swapext_req    *req)
     150             : {
     151      264640 :         struct xfs_inode                *ip = req->ip2;
     152      264640 :         struct xfs_mount                *mp = ip->i_mount;
     153             : 
     154             :         /* xattrs don't live on the rt device */
     155      264640 :         if (req->whichfork == XFS_ATTR_FORK)
     156             :                 return false;
     157             : 
     158             :         /*
     159             :          * Caller got permission to use logged swapext, so log recovery will
     160             :          * finish the swap and not leave us with partially swapped rt extents
     161             :          * exposed to userspace.
     162             :          */
     163      264640 :         if (req->req_flags & XFS_SWAP_REQ_LOGGED)
     164             :                 return false;
     165             : 
     166             :         /*
     167             :          * If we can't use log intent items at all, the only supported
     168             :          * operation is full fork swaps.
     169             :          */
     170        6440 :         if (!xfs_swapext_supported(mp))
     171             :                 return false;
     172             : 
     173             :         /* Conversion is only needed for realtime files with big rt extents */
     174        6440 :         return xfs_inode_has_bigrtextents(ip);
     175             : }
     176             : 
     177             : static inline int
     178      264640 : xfs_swapext_check_rt_extents(
     179             :         struct xfs_mount                *mp,
     180             :         const struct xfs_swapext_req    *req)
     181             : {
     182      264640 :         struct xfs_bmbt_irec            irec1, irec2;
     183      264640 :         xfs_fileoff_t                   startoff1 = req->startoff1;
     184      264640 :         xfs_fileoff_t                   startoff2 = req->startoff2;
     185      264640 :         xfs_filblks_t                   blockcount = req->blockcount;
     186      264640 :         uint32_t                        mod;
     187      264640 :         int                             nimaps;
     188      264640 :         int                             error;
     189             : 
     190      264640 :         if (!xfs_swapext_need_rt_conversion(req))
     191             :                 return 0;
     192             : 
     193           0 :         while (blockcount > 0) {
     194             :                 /* Read extent from the first file */
     195           0 :                 nimaps = 1;
     196           0 :                 error = xfs_bmapi_read(req->ip1, startoff1, blockcount,
     197             :                                 &irec1, &nimaps, 0);
     198           0 :                 if (error)
     199           0 :                         return error;
     200           0 :                 ASSERT(nimaps == 1);
     201             : 
     202             :                 /* Read extent from the second file */
     203           0 :                 nimaps = 1;
     204           0 :                 error = xfs_bmapi_read(req->ip2, startoff2,
     205             :                                 irec1.br_blockcount, &irec2, &nimaps,
     206             :                                 0);
     207           0 :                 if (error)
     208           0 :                         return error;
     209           0 :                 ASSERT(nimaps == 1);
     210             : 
     211             :                 /*
     212             :                  * We can only swap as many blocks as the smaller of the two
     213             :                  * extent maps.
     214             :                  */
     215           0 :                 irec1.br_blockcount = min(irec1.br_blockcount,
     216             :                                           irec2.br_blockcount);
     217             : 
     218             :                 /* Both mappings must be aligned to the realtime extent size. */
     219           0 :                 xfs_rtb_to_rtx(mp, irec1.br_startoff, &mod);
     220           0 :                 if (mod) {
     221           0 :                         ASSERT(mod == 0);
     222           0 :                         return -EINVAL;
     223             :                 }
     224             : 
     225           0 :                 xfs_rtb_to_rtx(mp, irec1.br_startoff, &mod);
     226           0 :                 if (mod) {
     227             :                         ASSERT(mod == 0);
     228             :                         return -EINVAL;
     229             :                 }
     230             : 
     231           0 :                 xfs_rtb_to_rtx(mp, irec1.br_blockcount, &mod);
     232           0 :                 if (mod) {
     233           0 :                         ASSERT(mod == 0);
     234           0 :                         return -EINVAL;
     235             :                 }
     236             : 
     237           0 :                 startoff1 += irec1.br_blockcount;
     238           0 :                 startoff2 += irec1.br_blockcount;
     239           0 :                 blockcount -= irec1.br_blockcount;
     240             :         }
     241             : 
     242             :         return 0;
     243             : }
     244             : #else
     245             : # define xfs_swapext_check_rt_extents(mp, req)          (0)
     246             : #endif
     247             : 
     248             : /* Check all extents to make sure we can actually swap them. */
     249             : int
     250      264640 : xfs_swapext_check_extents(
     251             :         struct xfs_mount                *mp,
     252             :         const struct xfs_swapext_req    *req)
     253             : {
     254      264640 :         struct xfs_ifork                *ifp1, *ifp2;
     255             : 
     256             :         /* No fork? */
     257      264640 :         ifp1 = xfs_ifork_ptr(req->ip1, req->whichfork);
     258      264640 :         ifp2 = xfs_ifork_ptr(req->ip2, req->whichfork);
     259      264640 :         if (!ifp1 || !ifp2)
     260             :                 return -EINVAL;
     261             : 
     262             :         /* We don't know how to swap local format forks. */
     263      264640 :         if (ifp1->if_format == XFS_DINODE_FMT_LOCAL ||
     264      264640 :             ifp2->if_format == XFS_DINODE_FMT_LOCAL)
     265             :                 return -EINVAL;
     266             : 
     267      264640 :         return xfs_swapext_check_rt_extents(mp, req);
     268             : }
     269             : 
     270             : #ifdef CONFIG_XFS_QUOTA
     271             : /* Log the actual updates to the quota accounting. */
     272             : static inline void
     273     1122042 : xfs_swapext_update_quota(
     274             :         struct xfs_trans                *tp,
     275             :         struct xfs_swapext_intent       *sxi,
     276             :         struct xfs_bmbt_irec            *irec1,
     277             :         struct xfs_bmbt_irec            *irec2)
     278             : {
     279     1122042 :         int64_t                         ip1_delta = 0, ip2_delta = 0;
     280     1122042 :         unsigned int                    qflag;
     281             : 
     282     1122042 :         qflag = XFS_IS_REALTIME_INODE(sxi->sxi_ip1) ? XFS_TRANS_DQ_RTBCOUNT :
     283             :                                                       XFS_TRANS_DQ_BCOUNT;
     284             : 
     285     1993904 :         if (xfs_bmap_is_real_extent(irec1)) {
     286      871862 :                 ip1_delta -= irec1->br_blockcount;
     287      871862 :                 ip2_delta += irec1->br_blockcount;
     288             :         }
     289             : 
     290     1996217 :         if (xfs_bmap_is_real_extent(irec2)) {
     291      874175 :                 ip1_delta += irec2->br_blockcount;
     292      874175 :                 ip2_delta -= irec2->br_blockcount;
     293             :         }
     294             : 
     295     1122042 :         xfs_trans_mod_dquot_byino(tp, sxi->sxi_ip1, qflag, ip1_delta);
     296     1122042 :         xfs_trans_mod_dquot_byino(tp, sxi->sxi_ip2, qflag, ip2_delta);
     297     1122042 : }
     298             : #else
     299             : # define xfs_swapext_update_quota(tp, sxi, irec1, irec2)        ((void)0)
     300             : #endif
     301             : 
     302             : /* Decide if we want to skip this mapping from file1. */
     303             : static inline bool
     304     2628363 : xfs_swapext_can_skip_mapping(
     305             :         struct xfs_swapext_intent       *sxi,
     306             :         struct xfs_bmbt_irec            *irec)
     307             : {
     308     2628363 :         struct xfs_mount                *mp = sxi->sxi_ip1->i_mount;
     309             : 
     310             :         /* Do not skip this mapping if the caller did not tell us to. */
     311     2628363 :         if (!(sxi->sxi_flags & XFS_SWAP_EXT_INO1_WRITTEN))
     312             :                 return false;
     313             : 
     314             :         /* Do not skip mapped, written extents. */
     315         108 :         if (xfs_bmap_is_written_extent(irec))
     316             :                 return false;
     317             : 
     318             :         /*
     319             :          * The mapping is unwritten or a hole.  It cannot be a delalloc
     320             :          * reservation because we already excluded those.  It cannot be an
     321             :          * unwritten extent with dirty page cache because we flushed the page
     322             :          * cache.  For files where the allocation unit is 1FSB (files on the
     323             :          * data dev, rt files if the extent size is 1FSB), we can safely
     324             :          * skip this mapping.
     325             :          */
     326          72 :         if (!xfs_inode_has_bigrtextents(sxi->sxi_ip1))
     327             :                 return true;
     328             : 
     329             :         /*
     330             :          * For a realtime file with a multi-fsb allocation unit, the decision
     331             :          * is trickier because we can only swap full allocation units.
     332             :          * Unwritten mappings can appear in the middle of an rtx if the rtx is
     333             :          * partially written, but they can also appear for preallocations.
     334             :          *
     335             :          * If the mapping is a hole, skip it entirely.  Holes should align with
     336             :          * rtx boundaries.
     337             :          */
     338           0 :         if (!xfs_bmap_is_real_extent(irec))
     339             :                 return true;
     340             : 
     341             :         /*
     342             :          * All mappings below this point are unwritten.
     343             :          *
     344             :          * - If the beginning is not aligned to an rtx, trim the end of the
     345             :          *   mapping so that it does not cross an rtx boundary, and swap it.
     346             :          *
     347             :          * - If both ends are aligned to an rtx, skip the entire mapping.
     348             :          */
     349           0 :         if (!isaligned_64(irec->br_startoff, mp->m_sb.sb_rextsize)) {
     350           0 :                 xfs_fileoff_t   new_end;
     351             : 
     352           0 :                 new_end = roundup_64(irec->br_startoff, mp->m_sb.sb_rextsize);
     353           0 :                 irec->br_blockcount = new_end - irec->br_startoff;
     354           0 :                 return false;
     355             :         }
     356           0 :         if (isaligned_64(irec->br_blockcount, mp->m_sb.sb_rextsize))
     357             :                 return true;
     358             : 
     359             :         /*
     360             :          * All mappings below this point are unwritten, start on an rtx
     361             :          * boundary, and do not end on an rtx boundary.
     362             :          *
     363             :          * - If the mapping is longer than one rtx, trim the end of the mapping
     364             :          *   down to an rtx boundary and skip it.
     365             :          *
     366             :          * - The mapping is shorter than one rtx.  Swap it.
     367             :          */
     368           0 :         if (irec->br_blockcount > mp->m_sb.sb_rextsize) {
     369           0 :                 xfs_fileoff_t   new_end;
     370             : 
     371           0 :                 new_end = rounddown_64(irec->br_startoff + irec->br_blockcount,
     372             :                                 mp->m_sb.sb_rextsize);
     373           0 :                 irec->br_blockcount = new_end - irec->br_startoff;
     374           0 :                 return true;
     375             :         }
     376             : 
     377             :         return false;
     378             : }
     379             : 
     380             : /*
     381             :  * Walk forward through the file ranges in @sxi until we find two different
     382             :  * mappings to exchange.  If there is work to do, return the mappings;
     383             :  * otherwise we've reached the end of the range and sxi_blockcount will be
     384             :  * zero.
     385             :  *
     386             :  * If the walk skips over a pair of mappings to the same storage, save them as
     387             :  * the left records in @adj (if provided) so that the simulation phase can
     388             :  * avoid an extra lookup.
     389             :   */
     390             : static int
     391     2439694 : xfs_swapext_find_mappings(
     392             :         struct xfs_swapext_intent       *sxi,
     393             :         struct xfs_bmbt_irec            *irec1,
     394             :         struct xfs_bmbt_irec            *irec2,
     395             :         struct xfs_swapext_adjacent     *adj)
     396             : {
     397     2439694 :         int                             nimaps;
     398     2439694 :         int                             bmap_flags;
     399     2439694 :         int                             error;
     400             : 
     401     2439694 :         bmap_flags = xfs_bmapi_aflag(xfs_swapext_whichfork(sxi));
     402             : 
     403     2826578 :         for (; sxi_has_more_swap_work(sxi); sxi_advance(sxi, irec1)) {
     404             :                 /* Read extent from the first file */
     405     2628362 :                 nimaps = 1;
     406     2628362 :                 error = xfs_bmapi_read(sxi->sxi_ip1, sxi->sxi_startoff1,
     407             :                                 sxi->sxi_blockcount, irec1, &nimaps,
     408             :                                 bmap_flags);
     409     2628363 :                 if (error)
     410           0 :                         return error;
     411     2628363 :                 if (nimaps != 1 ||
     412     2628363 :                     irec1->br_startblock == DELAYSTARTBLOCK ||
     413     2628363 :                     irec1->br_startoff != sxi->sxi_startoff1) {
     414             :                         /*
     415             :                          * We should never get no mapping or a delalloc extent
     416             :                          * or something that doesn't match what we asked for,
     417             :                          * since the caller flushed both inodes and we hold the
     418             :                          * ILOCKs for both inodes.
     419             :                          */
     420           0 :                         ASSERT(0);
     421           0 :                         return -EINVAL;
     422             :                 }
     423             : 
     424     2628363 :                 if (xfs_swapext_can_skip_mapping(sxi, irec1)) {
     425          72 :                         trace_xfs_swapext_extent1_skip(sxi->sxi_ip1, irec1);
     426          72 :                         continue;
     427             :                 }
     428             : 
     429             :                 /* Read extent from the second file */
     430     2628291 :                 nimaps = 1;
     431     2628291 :                 error = xfs_bmapi_read(sxi->sxi_ip2, sxi->sxi_startoff2,
     432             :                                 irec1->br_blockcount, irec2, &nimaps,
     433             :                                 bmap_flags);
     434     2628291 :                 if (error)
     435           0 :                         return error;
     436     2628291 :                 if (nimaps != 1 ||
     437     2628291 :                     irec2->br_startblock == DELAYSTARTBLOCK ||
     438     2628291 :                     irec2->br_startoff != sxi->sxi_startoff2) {
     439             :                         /*
     440             :                          * We should never get no mapping or a delalloc extent
     441             :                          * or something that doesn't match what we asked for,
     442             :                          * since the caller flushed both inodes and we hold the
     443             :                          * ILOCKs for both inodes.
     444             :                          */
     445           0 :                         ASSERT(0);
     446           0 :                         return -EINVAL;
     447             :                 }
     448             : 
     449             :                 /*
     450             :                  * We can only swap as many blocks as the smaller of the two
     451             :                  * extent maps.
     452             :                  */
     453     2628291 :                 irec1->br_blockcount = min(irec1->br_blockcount,
     454             :                                            irec2->br_blockcount);
     455             : 
     456     2628291 :                 trace_xfs_swapext_extent1(sxi->sxi_ip1, irec1);
     457     2628291 :                 trace_xfs_swapext_extent2(sxi->sxi_ip2, irec2);
     458             : 
     459             :                 /* We found something to swap, so return it. */
     460     2628291 :                 if (irec1->br_startblock != irec2->br_startblock)
     461             :                         return 0;
     462             : 
     463             :                 /*
     464             :                  * Two extents mapped to the same physical block must not have
     465             :                  * different states; that's filesystem corruption.  Move on to
     466             :                  * the next extent if they're both holes or both the same
     467             :                  * physical extent.
     468             :                  */
     469      386812 :                 if (irec1->br_state != irec2->br_state) {
     470           0 :                         xfs_bmap_mark_sick(sxi->sxi_ip1,
     471             :                                         xfs_swapext_whichfork(sxi));
     472           0 :                         xfs_bmap_mark_sick(sxi->sxi_ip2,
     473             :                                         xfs_swapext_whichfork(sxi));
     474           0 :                         return -EFSCORRUPTED;
     475             :                 }
     476             : 
     477             :                 /*
     478             :                  * Save the mappings if we're estimating work and skipping
     479             :                  * these identical mappings.
     480             :                  */
     481      386812 :                 if (adj) {
     482      383288 :                         memcpy(&adj->left1, irec1, sizeof(*irec1));
     483      383288 :                         memcpy(&adj->left2, irec2, sizeof(*irec2));
     484             :                 }
     485             :         }
     486             : 
     487             :         return 0;
     488             : }
     489             : 
     490             : /* Exchange these two mappings. */
     491             : static void
     492     1122042 : xfs_swapext_exchange_mappings(
     493             :         struct xfs_trans                *tp,
     494             :         struct xfs_swapext_intent       *sxi,
     495             :         struct xfs_bmbt_irec            *irec1,
     496             :         struct xfs_bmbt_irec            *irec2)
     497             : {
     498     1122042 :         int                             whichfork = xfs_swapext_whichfork(sxi);
     499             : 
     500     1122042 :         xfs_swapext_update_quota(tp, sxi, irec1, irec2);
     501             : 
     502             :         /* Remove both mappings. */
     503     1122042 :         xfs_bmap_unmap_extent(tp, sxi->sxi_ip1, whichfork, irec1);
     504     1122042 :         xfs_bmap_unmap_extent(tp, sxi->sxi_ip2, whichfork, irec2);
     505             : 
     506             :         /*
     507             :          * Re-add both mappings.  We swap the file offsets between the two maps
     508             :          * and add the opposite map, which has the effect of filling the
     509             :          * logical offsets we just unmapped, but with with the physical mapping
     510             :          * information swapped.
     511             :          */
     512     1122042 :         swap(irec1->br_startoff, irec2->br_startoff);
     513     1122042 :         xfs_bmap_map_extent(tp, sxi->sxi_ip1, whichfork, irec2);
     514     1122042 :         xfs_bmap_map_extent(tp, sxi->sxi_ip2, whichfork, irec1);
     515             : 
     516             :         /* Make sure we're not mapping extents past EOF. */
     517     1122042 :         if (whichfork == XFS_DATA_FORK) {
     518     1078821 :                 xfs_swapext_update_size(tp, sxi->sxi_ip1, irec2,
     519             :                                 sxi->sxi_isize1);
     520     1078821 :                 xfs_swapext_update_size(tp, sxi->sxi_ip2, irec1,
     521             :                                 sxi->sxi_isize2);
     522             :         }
     523             : 
     524             :         /*
     525             :          * Advance our cursor and exit.   The caller (either defer ops or log
     526             :          * recovery) will log the SXD item, and if *blockcount is nonzero, it
     527             :          * will log a new SXI item for the remainder and call us back.
     528             :          */
     529     1122042 :         sxi_advance(sxi, irec1);
     530     1122042 : }
     531             : 
     532             : /* Convert inode2's leaf attr fork back to shortform, if possible.. */
     533             : STATIC int
     534       39780 : xfs_swapext_attr_to_sf(
     535             :         struct xfs_trans                *tp,
     536             :         struct xfs_swapext_intent       *sxi)
     537             : {
     538       39780 :         struct xfs_da_args      args = {
     539       39780 :                 .dp             = sxi->sxi_ip2,
     540       39780 :                 .geo            = tp->t_mountp->m_attr_geo,
     541             :                 .whichfork      = XFS_ATTR_FORK,
     542             :                 .trans          = tp,
     543       39780 :                 .owner          = sxi->sxi_ip2->i_ino,
     544             :         };
     545       39780 :         struct xfs_buf          *bp;
     546       39780 :         int                     forkoff;
     547       39780 :         int                     error;
     548             : 
     549       39780 :         if (!xfs_attr_is_leaf(sxi->sxi_ip2))
     550             :                 return 0;
     551             : 
     552       36840 :         error = xfs_attr3_leaf_read(tp, sxi->sxi_ip2, sxi->sxi_ip2->i_ino, 0,
     553             :                         &bp);
     554       36840 :         if (error)
     555             :                 return error;
     556             : 
     557       36840 :         forkoff = xfs_attr_shortform_allfit(bp, sxi->sxi_ip2);
     558       36840 :         if (forkoff == 0)
     559             :                 return 0;
     560             : 
     561          46 :         return xfs_attr3_leaf_to_shortform(bp, &args, forkoff);
     562             : }
     563             : 
     564             : /* Convert inode2's block dir fork back to shortform, if possible.. */
     565             : STATIC int
     566        1701 : xfs_swapext_dir_to_sf(
     567             :         struct xfs_trans                *tp,
     568             :         struct xfs_swapext_intent       *sxi)
     569             : {
     570        1701 :         struct xfs_da_args      args = {
     571        1701 :                 .dp             = sxi->sxi_ip2,
     572        1701 :                 .geo            = tp->t_mountp->m_dir_geo,
     573             :                 .whichfork      = XFS_DATA_FORK,
     574             :                 .trans          = tp,
     575        1701 :                 .owner          = sxi->sxi_ip2->i_ino,
     576             :         };
     577        1701 :         struct xfs_dir2_sf_hdr  sfh;
     578        1701 :         struct xfs_buf          *bp;
     579        1701 :         bool                    isblock;
     580        1701 :         int                     size;
     581        1701 :         int                     error;
     582             : 
     583        1701 :         error = xfs_dir2_isblock(&args, &isblock);
     584        1701 :         if (error)
     585             :                 return error;
     586             : 
     587        1701 :         if (!isblock)
     588             :                 return 0;
     589             : 
     590        1643 :         error = xfs_dir3_block_read(tp, sxi->sxi_ip2, sxi->sxi_ip2->i_ino, &bp);
     591        1643 :         if (error)
     592             :                 return error;
     593             : 
     594        1643 :         size = xfs_dir2_block_sfsize(sxi->sxi_ip2, bp->b_addr, &sfh);
     595        1643 :         if (size > xfs_inode_data_fork_size(sxi->sxi_ip2))
     596             :                 return 0;
     597             : 
     598           0 :         return xfs_dir2_block_to_sf(&args, bp, size, &sfh);
     599             : }
     600             : 
     601             : /* Convert inode2's remote symlink target back to shortform, if possible. */
     602             : STATIC int
     603        5964 : xfs_swapext_link_to_sf(
     604             :         struct xfs_trans                *tp,
     605             :         struct xfs_swapext_intent       *sxi)
     606             : {
     607        5964 :         struct xfs_inode                *ip = sxi->sxi_ip2;
     608        5964 :         struct xfs_ifork                *ifp = xfs_ifork_ptr(ip, XFS_DATA_FORK);
     609        5964 :         char                            *buf;
     610        5964 :         int                             error;
     611             : 
     612        5964 :         if (ifp->if_format == XFS_DINODE_FMT_LOCAL ||
     613        5964 :             ip->i_disk_size > xfs_inode_data_fork_size(ip))
     614             :                 return 0;
     615             : 
     616             :         /* Read the current symlink target into a buffer. */
     617           1 :         buf = kmem_alloc(ip->i_disk_size + 1, KM_NOFS);
     618           1 :         if (!buf) {
     619           0 :                 ASSERT(0);
     620           0 :                 return -ENOMEM;
     621             :         }
     622             : 
     623           1 :         error = xfs_symlink_remote_read(ip, buf);
     624           1 :         if (error)
     625           0 :                 goto free;
     626             : 
     627             :         /* Remove the blocks. */
     628           1 :         error = xfs_symlink_remote_truncate(tp, ip);
     629           1 :         if (error)
     630           0 :                 goto free;
     631             : 
     632             :         /* Convert fork to local format and log our changes. */
     633           1 :         xfs_idestroy_fork(ifp);
     634           1 :         ifp->if_bytes = 0;
     635           1 :         ifp->if_format = XFS_DINODE_FMT_LOCAL;
     636           1 :         xfs_init_local_fork(ip, XFS_DATA_FORK, buf, ip->i_disk_size);
     637           1 :         xfs_trans_log_inode(tp, ip, XFS_ILOG_DDATA | XFS_ILOG_CORE);
     638           1 : free:
     639           1 :         kmem_free(buf);
     640           1 :         return error;
     641             : }
     642             : 
     643             : static inline void
     644           9 : xfs_swapext_clear_reflink(
     645             :         struct xfs_trans        *tp,
     646             :         struct xfs_inode        *ip)
     647             : {
     648           9 :         trace_xfs_reflink_unset_inode_flag(ip);
     649             : 
     650           9 :         ip->i_diflags2 &= ~XFS_DIFLAG2_REFLINK;
     651           9 :         xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
     652           9 : }
     653             : 
     654             : /* Finish whatever work might come after a swap operation. */
     655             : static int
     656       47454 : xfs_swapext_do_postop_work(
     657             :         struct xfs_trans                *tp,
     658             :         struct xfs_swapext_intent       *sxi)
     659             : {
     660       47454 :         if (sxi->sxi_flags & XFS_SWAP_EXT_CVT_INO2_SF) {
     661       47445 :                 int                     error = 0;
     662             : 
     663       47445 :                 if (sxi->sxi_flags & XFS_SWAP_EXT_ATTR_FORK)
     664       39780 :                         error = xfs_swapext_attr_to_sf(tp, sxi);
     665        7665 :                 else if (S_ISDIR(VFS_I(sxi->sxi_ip2)->i_mode))
     666        1701 :                         error = xfs_swapext_dir_to_sf(tp, sxi);
     667        5964 :                 else if (S_ISLNK(VFS_I(sxi->sxi_ip2)->i_mode))
     668        5964 :                         error = xfs_swapext_link_to_sf(tp, sxi);
     669       47445 :                 sxi->sxi_flags &= ~XFS_SWAP_EXT_CVT_INO2_SF;
     670       47445 :                 if (error)
     671             :                         return error;
     672             :         }
     673             : 
     674       47454 :         if (sxi->sxi_flags & XFS_SWAP_EXT_CLEAR_INO1_REFLINK) {
     675           0 :                 xfs_swapext_clear_reflink(tp, sxi->sxi_ip1);
     676           0 :                 sxi->sxi_flags &= ~XFS_SWAP_EXT_CLEAR_INO1_REFLINK;
     677             :         }
     678             : 
     679       47454 :         if (sxi->sxi_flags & XFS_SWAP_EXT_CLEAR_INO2_REFLINK) {
     680           9 :                 xfs_swapext_clear_reflink(tp, sxi->sxi_ip2);
     681           9 :                 sxi->sxi_flags &= ~XFS_SWAP_EXT_CLEAR_INO2_REFLINK;
     682             :         }
     683             : 
     684             :         return 0;
     685             : }
     686             : 
     687             : /* Finish one extent swap, possibly log more. */
     688             : int
     689     1270556 : xfs_swapext_finish_one(
     690             :         struct xfs_trans                *tp,
     691             :         struct xfs_swapext_intent       *sxi)
     692             : {
     693     1270556 :         struct xfs_bmbt_irec            irec1, irec2;
     694     1270556 :         int                             error;
     695             : 
     696     1270556 :         if (sxi_has_more_swap_work(sxi)) {
     697             :                 /*
     698             :                  * If the operation state says that some range of the files
     699             :                  * have not yet been swapped, look for extents in that range to
     700             :                  * swap.  If we find some extents, swap them.
     701             :                  */
     702     1223102 :                 error = xfs_swapext_find_mappings(sxi, &irec1, &irec2, NULL);
     703     1223102 :                 if (error)
     704             :                         return error;
     705             : 
     706     1223102 :                 if (sxi_has_more_swap_work(sxi))
     707     1122042 :                         xfs_swapext_exchange_mappings(tp, sxi, &irec1, &irec2);
     708             : 
     709             :                 /*
     710             :                  * If the caller asked us to exchange the file sizes after the
     711             :                  * swap and either we just swapped the last extents in the
     712             :                  * range or we didn't find anything to swap, update the ondisk
     713             :                  * file sizes.
     714             :                  */
     715     1223102 :                 if ((sxi->sxi_flags & XFS_SWAP_EXT_SET_SIZES) &&
     716             :                     !sxi_has_more_swap_work(sxi)) {
     717       61902 :                         sxi->sxi_ip1->i_disk_size = sxi->sxi_isize1;
     718       61902 :                         sxi->sxi_ip2->i_disk_size = sxi->sxi_isize2;
     719             : 
     720       61902 :                         xfs_trans_log_inode(tp, sxi->sxi_ip1, XFS_ILOG_CORE);
     721       61902 :                         xfs_trans_log_inode(tp, sxi->sxi_ip2, XFS_ILOG_CORE);
     722             :                 }
     723       47454 :         } else if (sxi_has_postop_work(sxi)) {
     724             :                 /*
     725             :                  * Now that we're finished with the swap operation, complete
     726             :                  * the post-op cleanup work.
     727             :                  */
     728       47454 :                 error = xfs_swapext_do_postop_work(tp, sxi);
     729       47454 :                 if (error)
     730             :                         return error;
     731             :         }
     732             : 
     733     1270556 :         if (XFS_TEST_ERROR(false, tp->t_mountp, XFS_ERRTAG_SWAPEXT_FINISH_ONE))
     734             :                 return -EIO;
     735             : 
     736             :         /* If we still have work to do, ask for a new transaction. */
     737     1270556 :         if (sxi_has_more_swap_work(sxi) || sxi_has_postop_work(sxi)) {
     738      904428 :                 trace_xfs_swapext_defer(tp->t_mountp, sxi);
     739      904428 :                 return -EAGAIN;
     740             :         }
     741             : 
     742             :         /*
     743             :          * If we reach here, we've finished all the swapping work and the post
     744             :          * operation work.  The last thing we need to do before returning to
     745             :          * the caller is to make sure that COW forks are set up correctly.
     746             :          */
     747      366128 :         if (!(sxi->sxi_flags & XFS_SWAP_EXT_ATTR_FORK)) {
     748      326348 :                 xfs_swapext_ensure_cowfork(sxi->sxi_ip1);
     749      326348 :                 xfs_swapext_ensure_cowfork(sxi->sxi_ip2);
     750             :         }
     751             : 
     752             :         return 0;
     753             : }
     754             : 
     755             : /*
     756             :  * Compute the amount of bmbt blocks we should reserve for each file.  In the
     757             :  * worst case, each exchange will fill a hole with a new mapping, which could
     758             :  * result in a btree split every time we add a new leaf block.
     759             :  */
     760             : static inline uint64_t
     761             : xfs_swapext_bmbt_blocks(
     762             :         struct xfs_mount                *mp,
     763             :         const struct xfs_swapext_req    *req)
     764             : {
     765      668155 :         return howmany_64(req->nr_exchanges,
     766      668155 :                                         XFS_MAX_CONTIG_BMAPS_PER_BLOCK(mp)) *
     767      668155 :                         XFS_EXTENTADD_SPACE_RES(mp, req->whichfork);
     768             : }
     769             : 
     770             : static inline uint64_t
     771      668160 : xfs_swapext_rmapbt_blocks(
     772             :         struct xfs_mount                *mp,
     773             :         const struct xfs_swapext_req    *req)
     774             : {
     775      668160 :         if (!xfs_has_rmapbt(mp))
     776             :                 return 0;
     777      668160 :         if (XFS_IS_REALTIME_INODE(req->ip1))
     778       72484 :                 return howmany_64(req->nr_exchanges,
     779       72484 :                                         XFS_MAX_CONTIG_RTRMAPS_PER_BLOCK(mp)) *
     780       72484 :                         XFS_RTRMAPADD_SPACE_RES(mp);
     781             : 
     782      595676 :         return howmany_64(req->nr_exchanges,
     783      595676 :                                         XFS_MAX_CONTIG_RMAPS_PER_BLOCK(mp)) *
     784      595676 :                         XFS_RMAPADD_SPACE_RES(mp);
     785             : }
     786             : 
     787             : /* Estimate the bmbt and rmapbt overhead required to exchange extents. */
     788             : int
     789      668155 : xfs_swapext_estimate_overhead(
     790             :         struct xfs_swapext_req  *req)
     791             : {
     792      668155 :         struct xfs_mount        *mp = req->ip1->i_mount;
     793      668155 :         xfs_filblks_t           bmbt_blocks;
     794      668155 :         xfs_filblks_t           rmapbt_blocks;
     795      668155 :         xfs_filblks_t           resblks = req->resblks;
     796             : 
     797             :         /*
     798             :          * Compute the number of bmbt and rmapbt blocks we might need to handle
     799             :          * the estimated number of exchanges.
     800             :          */
     801      668155 :         bmbt_blocks = xfs_swapext_bmbt_blocks(mp, req);
     802      668155 :         rmapbt_blocks = xfs_swapext_rmapbt_blocks(mp, req);
     803             : 
     804      668155 :         trace_xfs_swapext_overhead(mp, bmbt_blocks, rmapbt_blocks);
     805             : 
     806             :         /* Make sure the change in file block count doesn't overflow. */
     807      668159 :         if (check_add_overflow(req->ip1_bcount, bmbt_blocks, &req->ip1_bcount))
     808             :                 return -EFBIG;
     809      668159 :         if (check_add_overflow(req->ip2_bcount, bmbt_blocks, &req->ip2_bcount))
     810             :                 return -EFBIG;
     811             : 
     812             :         /*
     813             :          * Add together the number of blocks we need to handle btree growth,
     814             :          * then add it to the number of blocks we need to reserve to this
     815             :          * transaction.
     816             :          */
     817      668159 :         if (check_add_overflow(resblks, bmbt_blocks, &resblks))
     818             :                 return -ENOSPC;
     819      668159 :         if (check_add_overflow(resblks, bmbt_blocks, &resblks))
     820             :                 return -ENOSPC;
     821      668159 :         if (check_add_overflow(resblks, rmapbt_blocks, &resblks))
     822             :                 return -ENOSPC;
     823      668159 :         if (check_add_overflow(resblks, rmapbt_blocks, &resblks))
     824             :                 return -ENOSPC;
     825             : 
     826             :         /* Can't actually reserve more than UINT_MAX blocks. */
     827      668159 :         if (req->resblks > UINT_MAX)
     828             :                 return -ENOSPC;
     829             : 
     830      668159 :         req->resblks = resblks;
     831      668159 :         trace_xfs_swapext_final_estimate(req);
     832      668159 :         return 0;
     833             : }
     834             : 
     835             : /* Decide if we can merge two real extents. */
     836             : static inline bool
     837     6289399 : can_merge(
     838             :         const struct xfs_bmbt_irec      *b1,
     839             :         const struct xfs_bmbt_irec      *b2)
     840             : {
     841             :         /* Don't merge holes. */
     842     6289399 :         if (b1->br_startblock == HOLESTARTBLOCK ||
     843     5143145 :             b2->br_startblock == HOLESTARTBLOCK)
     844             :                 return false;
     845             : 
     846             :         /* We don't merge holes. */
     847    14597031 :         if (!xfs_bmap_is_real_extent(b1) || !xfs_bmap_is_real_extent(b2))
     848             :                 return false;
     849             : 
     850     4865677 :         if (b1->br_startoff   + b1->br_blockcount == b2->br_startoff &&
     851     3787609 :             b1->br_startblock + b1->br_blockcount == b2->br_startblock &&
     852     1502024 :             b1->br_state                       == b2->br_state &&
     853     1430418 :             b1->br_blockcount + b2->br_blockcount <= XFS_MAX_BMBT_EXTLEN)
     854     1430418 :                 return true;
     855             : 
     856             :         return false;
     857             : }
     858             : 
     859             : #define CLEFT_CONTIG    0x01
     860             : #define CRIGHT_CONTIG   0x02
     861             : #define CHOLE           0x04
     862             : #define CBOTH_CONTIG    (CLEFT_CONTIG | CRIGHT_CONTIG)
     863             : 
     864             : #define NLEFT_CONTIG    0x10
     865             : #define NRIGHT_CONTIG   0x20
     866             : #define NHOLE           0x40
     867             : #define NBOTH_CONTIG    (NLEFT_CONTIG | NRIGHT_CONTIG)
     868             : 
     869             : /* Estimate the effect of a single swap on extent count. */
     870             : static inline int
     871     2238874 : delta_nextents_step(
     872             :         struct xfs_mount                *mp,
     873             :         const struct xfs_bmbt_irec      *left,
     874             :         const struct xfs_bmbt_irec      *curr,
     875             :         const struct xfs_bmbt_irec      *new,
     876             :         const struct xfs_bmbt_irec      *right)
     877             : {
     878     2238874 :         bool                            lhole, rhole, chole, nhole;
     879     2238874 :         unsigned int                    state = 0;
     880     2238874 :         int                             ret = 0;
     881             : 
     882     2238874 :         lhole = left->br_startblock == HOLESTARTBLOCK;
     883     2238874 :         rhole = right->br_startblock == HOLESTARTBLOCK;
     884     2238874 :         chole = curr->br_startblock == HOLESTARTBLOCK;
     885     2238874 :         nhole = new->br_startblock == HOLESTARTBLOCK;
     886             : 
     887     2238874 :         if (chole)
     888      498241 :                 state |= CHOLE;
     889     2238874 :         if (!lhole && !chole && can_merge(left, curr))
     890         227 :                 state |= CLEFT_CONTIG;
     891     2238874 :         if (!rhole && !chole && can_merge(curr, right))
     892      597855 :                 state |= CRIGHT_CONTIG;
     893     2238874 :         if ((state & CBOTH_CONTIG) == CBOTH_CONTIG &&
     894         162 :             left->br_startblock + curr->br_startblock +
     895             :                                         right->br_startblock > XFS_MAX_BMBT_EXTLEN)
     896          15 :                 state &= ~CRIGHT_CONTIG;
     897             : 
     898     2238874 :         if (nhole)
     899      498241 :                 state |= NHOLE;
     900     2238874 :         if (!lhole && !nhole && can_merge(left, new))
     901      416163 :                 state |= NLEFT_CONTIG;
     902     2238874 :         if (!rhole && !nhole && can_merge(new, right))
     903          10 :                 state |= NRIGHT_CONTIG;
     904     2238874 :         if ((state & NBOTH_CONTIG) == NBOTH_CONTIG &&
     905           9 :             left->br_startblock + new->br_startblock +
     906             :                                         right->br_startblock > XFS_MAX_BMBT_EXTLEN)
     907           0 :                 state &= ~NRIGHT_CONTIG;
     908             : 
     909     2238874 :         switch (state & (CLEFT_CONTIG | CRIGHT_CONTIG | CHOLE)) {
     910         147 :         case CLEFT_CONTIG | CRIGHT_CONTIG:
     911             :                 /*
     912             :                  * left/curr/right are the same extent, so deleting curr causes
     913             :                  * 2 new extents to be created.
     914             :                  */
     915         147 :                 ret += 2;
     916         147 :                 break;
     917     1142713 :         case 0:
     918             :                 /*
     919             :                  * curr is not contiguous with any extent, so we remove curr
     920             :                  * completely
     921             :                  */
     922     1142713 :                 ret--;
     923     1142713 :                 break;
     924             :         case CHOLE:
     925             :                 /* hole, do nothing */
     926             :                 break;
     927             :         case CLEFT_CONTIG:
     928             :         case CRIGHT_CONTIG:
     929             :                 /* trim either left or right, no change */
     930             :                 break;
     931             :         }
     932             : 
     933     2238874 :         switch (state & (NLEFT_CONTIG | NRIGHT_CONTIG | NHOLE)) {
     934           9 :         case NLEFT_CONTIG | NRIGHT_CONTIG:
     935             :                 /*
     936             :                  * left/curr/right will become the same extent, so adding
     937             :                  * curr causes the deletion of right.
     938             :                  */
     939           9 :                 ret--;
     940           9 :                 break;
     941     1324469 :         case 0:
     942             :                 /* new is not contiguous with any extent */
     943     1324469 :                 ret++;
     944     1324469 :                 break;
     945             :         case NHOLE:
     946             :                 /* hole, do nothing. */
     947             :                 break;
     948             :         case NLEFT_CONTIG:
     949             :         case NRIGHT_CONTIG:
     950             :                 /* new is absorbed into left or right, no change */
     951             :                 break;
     952             :         }
     953             : 
     954     2238874 :         trace_xfs_swapext_delta_nextents_step(mp, left, curr, new, right, ret,
     955             :                         state);
     956     2238874 :         return ret;
     957             : }
     958             : 
     959             : /* Make sure we don't overflow the extent counters. */
     960             : static inline int
     961      466594 : ensure_delta_nextents(
     962             :         struct xfs_swapext_req  *req,
     963             :         struct xfs_inode        *ip,
     964             :         int64_t                 delta)
     965             : {
     966      466594 :         struct xfs_mount        *mp = ip->i_mount;
     967      466594 :         struct xfs_ifork        *ifp = xfs_ifork_ptr(ip, req->whichfork);
     968      466594 :         xfs_extnum_t            max_extents;
     969      466594 :         bool                    large_extcount;
     970             : 
     971      466594 :         if (delta < 0)
     972             :                 return 0;
     973             : 
     974      461375 :         if (XFS_TEST_ERROR(false, mp, XFS_ERRTAG_REDUCE_MAX_IEXTENTS)) {
     975           2 :                 if (ifp->if_nextents + delta > 10)
     976             :                         return -EFBIG;
     977             :         }
     978             : 
     979      461373 :         if (req->req_flags & XFS_SWAP_REQ_NREXT64)
     980             :                 large_extcount = true;
     981             :         else
     982      461373 :                 large_extcount = xfs_inode_has_large_extent_counts(ip);
     983             : 
     984      461373 :         max_extents = xfs_iext_max_nextents(large_extcount, req->whichfork);
     985      461373 :         if (ifp->if_nextents + delta <= max_extents)
     986             :                 return 0;
     987           0 :         if (large_extcount)
     988             :                 return -EFBIG;
     989           0 :         if (!xfs_has_large_extent_counts(mp))
     990             :                 return -EFBIG;
     991             : 
     992           0 :         max_extents = xfs_iext_max_nextents(true, req->whichfork);
     993           0 :         if (ifp->if_nextents + delta > max_extents)
     994             :                 return -EFBIG;
     995             : 
     996           0 :         req->req_flags |= XFS_SWAP_REQ_NREXT64;
     997           0 :         return 0;
     998             : }
     999             : 
    1000             : /* Find the next extent after irec. */
    1001             : static inline int
    1002     2238874 : get_next_ext(
    1003             :         struct xfs_inode                *ip,
    1004             :         int                             bmap_flags,
    1005             :         const struct xfs_bmbt_irec      *irec,
    1006             :         struct xfs_bmbt_irec            *nrec)
    1007             : {
    1008     2238874 :         xfs_fileoff_t                   off;
    1009     2238874 :         xfs_filblks_t                   blockcount;
    1010     2238874 :         int                             nimaps = 1;
    1011     2238874 :         int                             error;
    1012             : 
    1013     2238874 :         off = irec->br_startoff + irec->br_blockcount;
    1014     2238874 :         blockcount = XFS_MAX_FILEOFF - off;
    1015     2238874 :         error = xfs_bmapi_read(ip, off, blockcount, nrec, &nimaps, bmap_flags);
    1016     2238874 :         if (error)
    1017             :                 return error;
    1018     2238874 :         if (nrec->br_startblock == DELAYSTARTBLOCK ||
    1019     2236541 :             nrec->br_startoff != off) {
    1020             :                 /*
    1021             :                  * If we don't get the extent we want, return a zero-length
    1022             :                  * mapping, which our estimator function will pretend is a hole.
    1023             :                  * We shouldn't get delalloc reservations.
    1024             :                  */
    1025        2333 :                 nrec->br_startblock = HOLESTARTBLOCK;
    1026             :         }
    1027             : 
    1028             :         return 0;
    1029             : }
    1030             : 
    1031             : int __init
    1032          12 : xfs_swapext_intent_init_cache(void)
    1033             : {
    1034          12 :         xfs_swapext_intent_cache = kmem_cache_create("xfs_swapext_intent",
    1035             :                         sizeof(struct xfs_swapext_intent),
    1036             :                         0, 0, NULL);
    1037             : 
    1038          12 :         return xfs_swapext_intent_cache != NULL ? 0 : -ENOMEM;
    1039             : }
    1040             : 
    1041             : void
    1042          12 : xfs_swapext_intent_destroy_cache(void)
    1043             : {
    1044          12 :         kmem_cache_destroy(xfs_swapext_intent_cache);
    1045          12 :         xfs_swapext_intent_cache = NULL;
    1046          12 : }
    1047             : 
    1048             : /*
    1049             :  * Decide if we will swap the reflink flags between the two files after the
    1050             :  * swap.  The only time we want to do this is if we're exchanging all extents
    1051             :  * under EOF and the inode reflink flags have different states.
    1052             :  */
    1053             : static inline bool
    1054      650179 : sxi_can_exchange_reflink_flags(
    1055             :         const struct xfs_swapext_req    *req,
    1056             :         unsigned int                    reflink_state)
    1057             : {
    1058      650179 :         struct xfs_mount                *mp = req->ip1->i_mount;
    1059             : 
    1060     1300358 :         if (hweight32(reflink_state) != 1)
    1061             :                 return false;
    1062          38 :         if (req->startoff1 != 0 || req->startoff2 != 0)
    1063             :                 return false;
    1064          30 :         if (req->blockcount != XFS_B_TO_FSB(mp, req->ip1->i_disk_size))
    1065             :                 return false;
    1066          30 :         if (req->blockcount != XFS_B_TO_FSB(mp, req->ip2->i_disk_size))
    1067           0 :                 return false;
    1068             :         return true;
    1069             : }
    1070             : 
    1071             : 
    1072             : /* Allocate and initialize a new incore intent item from a request. */
    1073             : struct xfs_swapext_intent *
    1074      728492 : xfs_swapext_init_intent(
    1075             :         const struct xfs_swapext_req    *req,
    1076             :         unsigned int                    *reflink_state)
    1077             : {
    1078      728492 :         struct xfs_swapext_intent       *sxi;
    1079      728492 :         unsigned int                    rs = 0;
    1080             : 
    1081      728492 :         sxi = kmem_cache_zalloc(xfs_swapext_intent_cache,
    1082             :                         GFP_NOFS | __GFP_NOFAIL);
    1083      728492 :         INIT_LIST_HEAD(&sxi->sxi_list);
    1084      728492 :         sxi->sxi_ip1 = req->ip1;
    1085      728492 :         sxi->sxi_ip2 = req->ip2;
    1086      728492 :         sxi->sxi_startoff1 = req->startoff1;
    1087      728492 :         sxi->sxi_startoff2 = req->startoff2;
    1088      728492 :         sxi->sxi_blockcount = req->blockcount;
    1089      728492 :         sxi->sxi_isize1 = sxi->sxi_isize2 = -1;
    1090             : 
    1091      728492 :         if (req->whichfork == XFS_ATTR_FORK)
    1092       78313 :                 sxi->sxi_flags |= XFS_SWAP_EXT_ATTR_FORK;
    1093             : 
    1094      728492 :         if (req->whichfork == XFS_DATA_FORK &&
    1095             :             (req->req_flags & XFS_SWAP_REQ_SET_SIZES)) {
    1096      121147 :                 sxi->sxi_flags |= XFS_SWAP_EXT_SET_SIZES;
    1097      121147 :                 sxi->sxi_isize1 = req->ip2->i_disk_size;
    1098      121147 :                 sxi->sxi_isize2 = req->ip1->i_disk_size;
    1099             :         }
    1100             : 
    1101      728492 :         if (req->req_flags & XFS_SWAP_REQ_INO1_WRITTEN)
    1102          32 :                 sxi->sxi_flags |= XFS_SWAP_EXT_INO1_WRITTEN;
    1103      728492 :         if (req->req_flags & XFS_SWAP_REQ_CVT_INO2_SF)
    1104       90982 :                 sxi->sxi_flags |= XFS_SWAP_EXT_CVT_INO2_SF;
    1105             : 
    1106      728492 :         if (req->req_flags & XFS_SWAP_REQ_LOGGED)
    1107      715618 :                 sxi->sxi_op_flags |= XFS_SWAP_EXT_OP_LOGGED;
    1108      728492 :         if (req->req_flags & XFS_SWAP_REQ_NREXT64)
    1109           0 :                 sxi->sxi_op_flags |= XFS_SWAP_EXT_OP_NREXT64;
    1110             : 
    1111      728492 :         if (req->whichfork == XFS_DATA_FORK) {
    1112             :                 /*
    1113             :                  * Record the state of each inode's reflink flag before the
    1114             :                  * operation.
    1115             :                  */
    1116      650179 :                 if (xfs_is_reflink_inode(req->ip1))
    1117      486816 :                         rs |= XFS_REFLINK_STATE_IP1;
    1118      650179 :                 if (xfs_is_reflink_inode(req->ip2))
    1119      486854 :                         rs |= XFS_REFLINK_STATE_IP2;
    1120             : 
    1121             :                 /*
    1122             :                  * Figure out if we're clearing the reflink flags (which
    1123             :                  * effectively swaps them) after the operation.
    1124             :                  */
    1125      650179 :                 if (sxi_can_exchange_reflink_flags(req, rs)) {
    1126          30 :                         if (rs & XFS_REFLINK_STATE_IP1)
    1127           0 :                                 sxi->sxi_flags |=
    1128             :                                                 XFS_SWAP_EXT_CLEAR_INO1_REFLINK;
    1129          30 :                         if (rs & XFS_REFLINK_STATE_IP2)
    1130          30 :                                 sxi->sxi_flags |=
    1131             :                                                 XFS_SWAP_EXT_CLEAR_INO2_REFLINK;
    1132             :                 }
    1133             :         }
    1134             : 
    1135      728492 :         if (reflink_state)
    1136      366140 :                 *reflink_state = rs;
    1137      728492 :         return sxi;
    1138             : }
    1139             : 
    1140             : /*
    1141             :  * Estimate the number of exchange operations and the number of file blocks
    1142             :  * in each file that will be affected by the exchange operation.
    1143             :  */
    1144             : int
    1145      362352 : xfs_swapext_estimate(
    1146             :         struct xfs_swapext_req          *req)
    1147             : {
    1148      362352 :         struct xfs_swapext_intent       *sxi;
    1149      362352 :         struct xfs_bmbt_irec            irec1, irec2;
    1150      362352 :         struct xfs_swapext_adjacent     adj = ADJACENT_INIT;
    1151      362352 :         xfs_filblks_t                   ip1_blocks = 0, ip2_blocks = 0;
    1152      362352 :         int64_t                         d_nexts1, d_nexts2;
    1153      362352 :         int                             bmap_flags;
    1154      362352 :         int                             error;
    1155             : 
    1156      362352 :         ASSERT(!(req->req_flags & ~XFS_SWAP_REQ_FLAGS));
    1157             : 
    1158      362352 :         bmap_flags = xfs_bmapi_aflag(req->whichfork);
    1159      362352 :         sxi = xfs_swapext_init_intent(req, NULL);
    1160             : 
    1161             :         /*
    1162             :          * To guard against the possibility of overflowing the extent counters,
    1163             :          * we have to estimate an upper bound on the potential increase in that
    1164             :          * counter.  We can split the extent at each end of the range, and for
    1165             :          * each step of the swap we can split the extent that we're working on
    1166             :          * if the extents do not align.
    1167             :          */
    1168      362352 :         d_nexts1 = d_nexts2 = 3;
    1169             : 
    1170     1481789 :         while (sxi_has_more_swap_work(sxi)) {
    1171             :                 /*
    1172             :                  * Walk through the file ranges until we find something to
    1173             :                  * swap.  Because we're simulating the swap, pass in adj to
    1174             :                  * capture skipped mappings for correct estimation of bmbt
    1175             :                  * record merges.
    1176             :                  */
    1177     1216593 :                 error = xfs_swapext_find_mappings(sxi, &irec1, &irec2, &adj);
    1178     1216593 :                 if (error)
    1179           0 :                         goto out_free;
    1180     1216593 :                 if (!sxi_has_more_swap_work(sxi))
    1181             :                         break;
    1182             : 
    1183             :                 /* Update accounting. */
    1184     1988500 :                 if (xfs_bmap_is_real_extent(&irec1))
    1185      869063 :                         ip1_blocks += irec1.br_blockcount;
    1186     1991007 :                 if (xfs_bmap_is_real_extent(&irec2))
    1187      871570 :                         ip2_blocks += irec2.br_blockcount;
    1188     1119437 :                 req->nr_exchanges++;
    1189             : 
    1190             :                 /* Read the next extents from both files. */
    1191     1119437 :                 error = get_next_ext(req->ip1, bmap_flags, &irec1, &adj.right1);
    1192     1119437 :                 if (error)
    1193           0 :                         goto out_free;
    1194             : 
    1195     1119437 :                 error = get_next_ext(req->ip2, bmap_flags, &irec2, &adj.right2);
    1196     1119437 :                 if (error)
    1197           0 :                         goto out_free;
    1198             : 
    1199             :                 /* Update extent count deltas. */
    1200     1119437 :                 d_nexts1 += delta_nextents_step(req->ip1->i_mount,
    1201             :                                 &adj.left1, &irec1, &irec2, &adj.right1);
    1202             : 
    1203     1119437 :                 d_nexts2 += delta_nextents_step(req->ip1->i_mount,
    1204             :                                 &adj.left2, &irec2, &irec1, &adj.right2);
    1205             : 
    1206             :                 /* Now pretend we swapped the extents. */
    1207     1119437 :                 if (can_merge(&adj.left2, &irec1))
    1208      277580 :                         adj.left2.br_blockcount += irec1.br_blockcount;
    1209             :                 else
    1210      841857 :                         memcpy(&adj.left2, &irec1, sizeof(irec1));
    1211             : 
    1212     1119437 :                 if (can_merge(&adj.left1, &irec2))
    1213      138583 :                         adj.left1.br_blockcount += irec2.br_blockcount;
    1214             :                 else
    1215      980854 :                         memcpy(&adj.left1, &irec2, sizeof(irec2));
    1216             : 
    1217     1119437 :                 sxi_advance(sxi, &irec1);
    1218             :         }
    1219             : 
    1220             :         /* Account for the blocks that are being exchanged. */
    1221      362352 :         if (XFS_IS_REALTIME_INODE(req->ip1) &&
    1222       72484 :             req->whichfork == XFS_DATA_FORK) {
    1223       72484 :                 req->ip1_rtbcount = ip1_blocks;
    1224       72484 :                 req->ip2_rtbcount = ip2_blocks;
    1225             :         } else {
    1226      289868 :                 req->ip1_bcount = ip1_blocks;
    1227      289868 :                 req->ip2_bcount = ip2_blocks;
    1228             :         }
    1229             : 
    1230             :         /*
    1231             :          * Make sure that both forks have enough slack left in their extent
    1232             :          * counters that the swap operation will not overflow.
    1233             :          */
    1234      362352 :         trace_xfs_swapext_delta_nextents(req, d_nexts1, d_nexts2);
    1235      362352 :         if (req->ip1 == req->ip2) {
    1236      258108 :                 error = ensure_delta_nextents(req, req->ip1,
    1237             :                                 d_nexts1 + d_nexts2);
    1238             :         } else {
    1239      104244 :                 error = ensure_delta_nextents(req, req->ip1, d_nexts1);
    1240      104244 :                 if (error)
    1241           2 :                         goto out_free;
    1242      104242 :                 error = ensure_delta_nextents(req, req->ip2, d_nexts2);
    1243             :         }
    1244      362350 :         if (error)
    1245           0 :                 goto out_free;
    1246             : 
    1247      362350 :         trace_xfs_swapext_initial_estimate(req);
    1248      362350 :         error = xfs_swapext_estimate_overhead(req);
    1249      362352 : out_free:
    1250      362352 :         kmem_cache_free(xfs_swapext_intent_cache, sxi);
    1251      362352 :         return error;
    1252             : }
    1253             : 
    1254             : static inline void
    1255          19 : xfs_swapext_set_reflink(
    1256             :         struct xfs_trans        *tp,
    1257             :         struct xfs_inode        *ip)
    1258             : {
    1259          19 :         trace_xfs_reflink_set_inode_flag(ip);
    1260             : 
    1261          19 :         ip->i_diflags2 |= XFS_DIFLAG2_REFLINK;
    1262          19 :         xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
    1263          19 : }
    1264             : 
    1265             : /*
    1266             :  * If either file has shared blocks and we're swapping data forks, we must flag
    1267             :  * the other file as having shared blocks so that we get the shared-block rmap
    1268             :  * functions if we need to fix up the rmaps.
    1269             :  */
    1270             : void
    1271      366140 : xfs_swapext_ensure_reflink(
    1272             :         struct xfs_trans                *tp,
    1273             :         const struct xfs_swapext_intent *sxi,
    1274             :         unsigned int                    reflink_state)
    1275             : {
    1276      366140 :         if ((reflink_state & XFS_REFLINK_STATE_IP1) &&
    1277      243401 :             !xfs_is_reflink_inode(sxi->sxi_ip2))
    1278           0 :                 xfs_swapext_set_reflink(tp, sxi->sxi_ip2);
    1279             : 
    1280      366140 :         if ((reflink_state & XFS_REFLINK_STATE_IP2) &&
    1281      243420 :             !xfs_is_reflink_inode(sxi->sxi_ip1))
    1282          19 :                 xfs_swapext_set_reflink(tp, sxi->sxi_ip1);
    1283      366140 : }
    1284             : 
    1285             : /* Widen the extent counts of both inodes if necessary. */
    1286             : static inline void
    1287      366134 : xfs_swapext_upgrade_extent_counts(
    1288             :         struct xfs_trans                *tp,
    1289             :         const struct xfs_swapext_intent *sxi)
    1290             : {
    1291      366134 :         if (!(sxi->sxi_op_flags & XFS_SWAP_EXT_OP_NREXT64))
    1292             :                 return;
    1293             : 
    1294           0 :         sxi->sxi_ip1->i_diflags2 |= XFS_DIFLAG2_NREXT64;
    1295           0 :         xfs_trans_log_inode(tp, sxi->sxi_ip1, XFS_ILOG_CORE);
    1296             : 
    1297           0 :         sxi->sxi_ip2->i_diflags2 |= XFS_DIFLAG2_NREXT64;
    1298           0 :         xfs_trans_log_inode(tp, sxi->sxi_ip2, XFS_ILOG_CORE);
    1299             : }
    1300             : 
    1301             : /*
    1302             :  * Schedule a swap a range of extents from one inode to another.  If the atomic
    1303             :  * swap feature is enabled, then the operation progress can be resumed even if
    1304             :  * the system goes down.  The caller must commit the transaction to start the
    1305             :  * work.
    1306             :  *
    1307             :  * The caller must ensure the inodes must be joined to the transaction and
    1308             :  * ILOCKd; they will still be joined to the transaction at exit.
    1309             :  */
    1310             : void
    1311      366134 : xfs_swapext(
    1312             :         struct xfs_trans                *tp,
    1313             :         const struct xfs_swapext_req    *req)
    1314             : {
    1315      366134 :         struct xfs_swapext_intent       *sxi;
    1316      366134 :         unsigned int                    reflink_state;
    1317             : 
    1318      366134 :         ASSERT(xfs_isilocked(req->ip1, XFS_ILOCK_EXCL));
    1319      366134 :         ASSERT(xfs_isilocked(req->ip2, XFS_ILOCK_EXCL));
    1320      366134 :         ASSERT(req->whichfork != XFS_COW_FORK);
    1321      366134 :         ASSERT(!(req->req_flags & ~XFS_SWAP_REQ_FLAGS));
    1322      366134 :         if (req->req_flags & XFS_SWAP_REQ_SET_SIZES)
    1323       61902 :                 ASSERT(req->whichfork == XFS_DATA_FORK);
    1324      366134 :         if (req->req_flags & XFS_SWAP_REQ_CVT_INO2_SF)
    1325       47445 :                 ASSERT(req->whichfork == XFS_ATTR_FORK ||
    1326             :                        (req->whichfork == XFS_DATA_FORK &&
    1327             :                         (S_ISDIR(VFS_I(req->ip2)->i_mode) ||
    1328             :                          S_ISLNK(VFS_I(req->ip2)->i_mode))));
    1329             : 
    1330      366134 :         if (req->blockcount == 0)
    1331           0 :                 return;
    1332             : 
    1333      366134 :         sxi = xfs_swapext_init_intent(req, &reflink_state);
    1334      366134 :         xfs_swapext_schedule(tp, sxi);
    1335      366134 :         xfs_swapext_ensure_reflink(tp, sxi, reflink_state);
    1336      366134 :         xfs_swapext_upgrade_extent_counts(tp, sxi);
    1337             : }

Generated by: LCOV version 1.14