Line data Source code
1 : // SPDX-License-Identifier: GPL-2.0-or-later
2 : /*
3 : * Copyright (C) 2020-2023 Oracle. All Rights Reserved.
4 : * Author: Darrick J. Wong <djwong@kernel.org>
5 : */
6 : #include "xfs.h"
7 : #include "xfs_fs.h"
8 : #include "xfs_shared.h"
9 : #include "xfs_format.h"
10 : #include "xfs_log_format.h"
11 : #include "xfs_trans_resv.h"
12 : #include "xfs_mount.h"
13 : #include "xfs_defer.h"
14 : #include "xfs_inode.h"
15 : #include "xfs_trans.h"
16 : #include "xfs_bmap.h"
17 : #include "xfs_icache.h"
18 : #include "xfs_quota.h"
19 : #include "xfs_swapext.h"
20 : #include "xfs_trace.h"
21 : #include "xfs_bmap_btree.h"
22 : #include "xfs_trans_space.h"
23 : #include "xfs_error.h"
24 : #include "xfs_errortag.h"
25 : #include "xfs_health.h"
26 : #include "xfs_da_format.h"
27 : #include "xfs_da_btree.h"
28 : #include "xfs_attr_leaf.h"
29 : #include "xfs_attr.h"
30 : #include "xfs_dir2_priv.h"
31 : #include "xfs_dir2.h"
32 : #include "xfs_symlink_remote.h"
33 :
34 : struct kmem_cache *xfs_swapext_intent_cache;
35 :
36 : /* bmbt mappings adjacent to a pair of records. */
37 : struct xfs_swapext_adjacent {
38 : struct xfs_bmbt_irec left1;
39 : struct xfs_bmbt_irec right1;
40 : struct xfs_bmbt_irec left2;
41 : struct xfs_bmbt_irec right2;
42 : };
43 :
44 : #define ADJACENT_INIT { \
45 : .left1 = { .br_startblock = HOLESTARTBLOCK }, \
46 : .right1 = { .br_startblock = HOLESTARTBLOCK }, \
47 : .left2 = { .br_startblock = HOLESTARTBLOCK }, \
48 : .right2 = { .br_startblock = HOLESTARTBLOCK }, \
49 : }
50 :
51 : /* Information to help us reset reflink flag / CoW fork state after a swap. */
52 :
53 : /* Previous state of the two inodes' reflink flags. */
54 : #define XFS_REFLINK_STATE_IP1 (1U << 0)
55 : #define XFS_REFLINK_STATE_IP2 (1U << 1)
56 :
57 : /*
58 : * If the reflink flag is set on either inode, make sure it has an incore CoW
59 : * fork, since all reflink inodes must have them. If there's a CoW fork and it
60 : * has extents in it, make sure the inodes are tagged appropriately so that
61 : * speculative preallocations can be GC'd if we run low of space.
62 : */
63 : static inline void
64 361912 : xfs_swapext_ensure_cowfork(
65 : struct xfs_inode *ip)
66 : {
67 361912 : struct xfs_ifork *cfork;
68 :
69 361912 : if (xfs_is_reflink_inode(ip))
70 314634 : xfs_ifork_init_cow(ip);
71 :
72 361912 : cfork = xfs_ifork_ptr(ip, XFS_COW_FORK);
73 361912 : if (!cfork)
74 : return;
75 340559 : if (cfork->if_bytes > 0)
76 49026 : xfs_inode_set_cowblocks_tag(ip);
77 : else
78 291533 : xfs_inode_clear_cowblocks_tag(ip);
79 : }
80 :
81 : /* Schedule an atomic extent swap. */
82 : void
83 221801 : xfs_swapext_schedule(
84 : struct xfs_trans *tp,
85 : struct xfs_swapext_intent *sxi)
86 : {
87 221801 : trace_xfs_swapext_defer(tp->t_mountp, sxi);
88 221801 : xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_SWAPEXT, &sxi->sxi_list);
89 221801 : }
90 :
91 : /*
92 : * Adjust the on-disk inode size upwards if needed so that we never map extents
93 : * into the file past EOF. This is crucial so that log recovery won't get
94 : * confused by the sudden appearance of post-eof extents.
95 : */
96 : STATIC void
97 1426786 : xfs_swapext_update_size(
98 : struct xfs_trans *tp,
99 : struct xfs_inode *ip,
100 : struct xfs_bmbt_irec *imap,
101 : xfs_fsize_t new_isize)
102 : {
103 1426786 : struct xfs_mount *mp = tp->t_mountp;
104 1426786 : xfs_fsize_t len;
105 :
106 1426786 : if (new_isize < 0)
107 : return;
108 :
109 10536 : len = min(XFS_FSB_TO_B(mp, imap->br_startoff + imap->br_blockcount),
110 : new_isize);
111 :
112 10536 : if (len <= ip->i_disk_size)
113 : return;
114 :
115 52 : trace_xfs_swapext_update_inode_size(ip, len);
116 :
117 52 : ip->i_disk_size = len;
118 52 : xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
119 : }
120 :
121 : static inline bool
122 : sxi_has_more_swap_work(const struct xfs_swapext_intent *sxi)
123 : {
124 6412300 : return sxi->sxi_blockcount > 0;
125 : }
126 :
127 : static inline bool
128 : sxi_has_postop_work(const struct xfs_swapext_intent *sxi)
129 : {
130 311988 : return sxi->sxi_flags & (XFS_SWAP_EXT_CLEAR_INO1_REFLINK |
131 : XFS_SWAP_EXT_CLEAR_INO2_REFLINK |
132 : XFS_SWAP_EXT_CVT_INO2_SF);
133 : }
134 :
135 : static inline void
136 : sxi_advance(
137 : struct xfs_swapext_intent *sxi,
138 : const struct xfs_bmbt_irec *irec)
139 : {
140 1818333 : sxi->sxi_startoff1 += irec->br_blockcount;
141 1818333 : sxi->sxi_startoff2 += irec->br_blockcount;
142 1818333 : sxi->sxi_blockcount -= irec->br_blockcount;
143 1060976 : }
144 :
145 : #ifdef DEBUG
146 : static inline bool
147 176827 : xfs_swapext_need_rt_conversion(
148 : const struct xfs_swapext_req *req)
149 : {
150 176827 : struct xfs_inode *ip = req->ip2;
151 176827 : struct xfs_mount *mp = ip->i_mount;
152 :
153 : /* xattrs don't live on the rt device */
154 176827 : if (req->whichfork == XFS_ATTR_FORK)
155 : return false;
156 :
157 : /*
158 : * Caller got permission to use logged swapext, so log recovery will
159 : * finish the swap and not leave us with partially swapped rt extents
160 : * exposed to userspace.
161 : */
162 176827 : if (req->req_flags & XFS_SWAP_REQ_LOGGED)
163 : return false;
164 :
165 : /*
166 : * If we can't use log intent items at all, the only supported
167 : * operation is full fork swaps.
168 : */
169 6465 : if (!xfs_swapext_supported(mp))
170 : return false;
171 :
172 : /* Conversion is only needed for realtime files with big rt extents */
173 6461 : return xfs_inode_has_bigrtextents(ip);
174 : }
175 :
176 : static inline int
177 176827 : xfs_swapext_check_rt_extents(
178 : struct xfs_mount *mp,
179 : const struct xfs_swapext_req *req)
180 : {
181 176827 : struct xfs_bmbt_irec irec1, irec2;
182 176827 : xfs_fileoff_t startoff1 = req->startoff1;
183 176827 : xfs_fileoff_t startoff2 = req->startoff2;
184 176827 : xfs_filblks_t blockcount = req->blockcount;
185 176827 : uint32_t mod;
186 176827 : int nimaps;
187 176827 : int error;
188 :
189 176827 : if (!xfs_swapext_need_rt_conversion(req))
190 : return 0;
191 :
192 0 : while (blockcount > 0) {
193 : /* Read extent from the first file */
194 0 : nimaps = 1;
195 0 : error = xfs_bmapi_read(req->ip1, startoff1, blockcount,
196 : &irec1, &nimaps, 0);
197 0 : if (error)
198 0 : return error;
199 0 : ASSERT(nimaps == 1);
200 :
201 : /* Read extent from the second file */
202 0 : nimaps = 1;
203 0 : error = xfs_bmapi_read(req->ip2, startoff2,
204 : irec1.br_blockcount, &irec2, &nimaps,
205 : 0);
206 0 : if (error)
207 0 : return error;
208 0 : ASSERT(nimaps == 1);
209 :
210 : /*
211 : * We can only swap as many blocks as the smaller of the two
212 : * extent maps.
213 : */
214 0 : irec1.br_blockcount = min(irec1.br_blockcount,
215 : irec2.br_blockcount);
216 :
217 : /* Both mappings must be aligned to the realtime extent size. */
218 0 : div_u64_rem(irec1.br_startoff, mp->m_sb.sb_rextsize, &mod);
219 0 : if (mod) {
220 0 : ASSERT(mod == 0);
221 0 : return -EINVAL;
222 : }
223 :
224 0 : div_u64_rem(irec2.br_startoff, mp->m_sb.sb_rextsize, &mod);
225 0 : if (mod) {
226 0 : ASSERT(mod == 0);
227 0 : return -EINVAL;
228 : }
229 :
230 0 : div_u64_rem(irec1.br_blockcount, mp->m_sb.sb_rextsize, &mod);
231 0 : if (mod) {
232 0 : ASSERT(mod == 0);
233 0 : return -EINVAL;
234 : }
235 :
236 0 : startoff1 += irec1.br_blockcount;
237 0 : startoff2 += irec1.br_blockcount;
238 0 : blockcount -= irec1.br_blockcount;
239 : }
240 :
241 : return 0;
242 : }
243 : #else
244 : # define xfs_swapext_check_rt_extents(mp, req) (0)
245 : #endif
246 :
247 : /* Check all extents to make sure we can actually swap them. */
248 : int
249 176827 : xfs_swapext_check_extents(
250 : struct xfs_mount *mp,
251 : const struct xfs_swapext_req *req)
252 : {
253 176827 : struct xfs_ifork *ifp1, *ifp2;
254 :
255 : /* No fork? */
256 176827 : ifp1 = xfs_ifork_ptr(req->ip1, req->whichfork);
257 176827 : ifp2 = xfs_ifork_ptr(req->ip2, req->whichfork);
258 176827 : if (!ifp1 || !ifp2)
259 : return -EINVAL;
260 :
261 : /* We don't know how to swap local format forks. */
262 176827 : if (ifp1->if_format == XFS_DINODE_FMT_LOCAL ||
263 176827 : ifp2->if_format == XFS_DINODE_FMT_LOCAL)
264 : return -EINVAL;
265 :
266 176827 : return xfs_swapext_check_rt_extents(mp, req);
267 : }
268 :
269 : #ifdef CONFIG_XFS_QUOTA
270 : /* Log the actual updates to the quota accounting. */
271 : static inline void
272 757357 : xfs_swapext_update_quota(
273 : struct xfs_trans *tp,
274 : struct xfs_swapext_intent *sxi,
275 : struct xfs_bmbt_irec *irec1,
276 : struct xfs_bmbt_irec *irec2)
277 : {
278 757357 : int64_t ip1_delta = 0, ip2_delta = 0;
279 757357 : unsigned int qflag;
280 :
281 757357 : qflag = XFS_IS_REALTIME_INODE(sxi->sxi_ip1) ? XFS_TRANS_DQ_RTBCOUNT :
282 : XFS_TRANS_DQ_BCOUNT;
283 :
284 1326394 : if (xfs_bmap_is_real_extent(irec1)) {
285 569037 : ip1_delta -= irec1->br_blockcount;
286 569037 : ip2_delta += irec1->br_blockcount;
287 : }
288 :
289 1327651 : if (xfs_bmap_is_real_extent(irec2)) {
290 570294 : ip1_delta += irec2->br_blockcount;
291 570294 : ip2_delta -= irec2->br_blockcount;
292 : }
293 :
294 757357 : xfs_trans_mod_dquot_byino(tp, sxi->sxi_ip1, qflag, ip1_delta);
295 757357 : xfs_trans_mod_dquot_byino(tp, sxi->sxi_ip2, qflag, ip2_delta);
296 757357 : }
297 : #else
298 : # define xfs_swapext_update_quota(tp, sxi, irec1, irec2) ((void)0)
299 : #endif
300 :
301 : /* Decide if we want to skip this mapping from file1. */
302 : static inline bool
303 1818333 : xfs_swapext_can_skip_mapping(
304 : struct xfs_swapext_intent *sxi,
305 : struct xfs_bmbt_irec *irec)
306 : {
307 1818333 : struct xfs_mount *mp = sxi->sxi_ip1->i_mount;
308 :
309 : /* Do not skip this mapping if the caller did not tell us to. */
310 1818333 : if (!(sxi->sxi_flags & XFS_SWAP_EXT_INO1_WRITTEN))
311 : return false;
312 :
313 : /* Do not skip mapped, written extents. */
314 108 : if (xfs_bmap_is_written_extent(irec))
315 : return false;
316 :
317 : /*
318 : * The mapping is unwritten or a hole. It cannot be a delalloc
319 : * reservation because we already excluded those. It cannot be an
320 : * unwritten extent with dirty page cache because we flushed the page
321 : * cache. For files where the allocation unit is 1FSB (files on the
322 : * data dev, rt files if the extent size is 1FSB), we can safely
323 : * skip this mapping.
324 : */
325 72 : if (!xfs_inode_has_bigrtextents(sxi->sxi_ip1))
326 : return true;
327 :
328 : /*
329 : * For a realtime file with a multi-fsb allocation unit, the decision
330 : * is trickier because we can only swap full allocation units.
331 : * Unwritten mappings can appear in the middle of an rtx if the rtx is
332 : * partially written, but they can also appear for preallocations.
333 : *
334 : * If the mapping is a hole, skip it entirely. Holes should align with
335 : * rtx boundaries.
336 : */
337 0 : if (!xfs_bmap_is_real_extent(irec))
338 : return true;
339 :
340 : /*
341 : * All mappings below this point are unwritten.
342 : *
343 : * - If the beginning is not aligned to an rtx, trim the end of the
344 : * mapping so that it does not cross an rtx boundary, and swap it.
345 : *
346 : * - If both ends are aligned to an rtx, skip the entire mapping.
347 : */
348 0 : if (!isaligned_64(irec->br_startoff, mp->m_sb.sb_rextsize)) {
349 0 : xfs_fileoff_t new_end;
350 :
351 0 : new_end = roundup_64(irec->br_startoff, mp->m_sb.sb_rextsize);
352 0 : irec->br_blockcount = new_end - irec->br_startoff;
353 0 : return false;
354 : }
355 0 : if (isaligned_64(irec->br_blockcount, mp->m_sb.sb_rextsize))
356 : return true;
357 :
358 : /*
359 : * All mappings below this point are unwritten, start on an rtx
360 : * boundary, and do not end on an rtx boundary.
361 : *
362 : * - If the mapping is longer than one rtx, trim the end of the mapping
363 : * down to an rtx boundary and skip it.
364 : *
365 : * - The mapping is shorter than one rtx. Swap it.
366 : */
367 0 : if (irec->br_blockcount > mp->m_sb.sb_rextsize) {
368 0 : xfs_fileoff_t new_end;
369 :
370 0 : new_end = rounddown_64(irec->br_startoff + irec->br_blockcount,
371 : mp->m_sb.sb_rextsize);
372 0 : irec->br_blockcount = new_end - irec->br_startoff;
373 0 : return true;
374 : }
375 :
376 : return false;
377 : }
378 :
379 : /*
380 : * Walk forward through the file ranges in @sxi until we find two different
381 : * mappings to exchange. If there is work to do, return the mappings;
382 : * otherwise we've reached the end of the range and sxi_blockcount will be
383 : * zero.
384 : *
385 : * If the walk skips over a pair of mappings to the same storage, save them as
386 : * the left records in @adj (if provided) so that the simulation phase can
387 : * avoid an extra lookup.
388 : */
389 : static int
390 1676629 : xfs_swapext_find_mappings(
391 : struct xfs_swapext_intent *sxi,
392 : struct xfs_bmbt_irec *irec1,
393 : struct xfs_bmbt_irec *irec2,
394 : struct xfs_swapext_adjacent *adj)
395 : {
396 1676629 : int nimaps;
397 1676629 : int bmap_flags;
398 1676629 : int error;
399 :
400 1676629 : bmap_flags = xfs_bmapi_aflag(xfs_swapext_whichfork(sxi));
401 :
402 1980034 : for (; sxi_has_more_swap_work(sxi); sxi_advance(sxi, irec1)) {
403 : /* Read extent from the first file */
404 1818333 : nimaps = 1;
405 1818333 : error = xfs_bmapi_read(sxi->sxi_ip1, sxi->sxi_startoff1,
406 : sxi->sxi_blockcount, irec1, &nimaps,
407 : bmap_flags);
408 1818333 : if (error)
409 0 : return error;
410 1818333 : if (nimaps != 1 ||
411 1818333 : irec1->br_startblock == DELAYSTARTBLOCK ||
412 1818333 : irec1->br_startoff != sxi->sxi_startoff1) {
413 : /*
414 : * We should never get no mapping or a delalloc extent
415 : * or something that doesn't match what we asked for,
416 : * since the caller flushed both inodes and we hold the
417 : * ILOCKs for both inodes.
418 : */
419 0 : ASSERT(0);
420 0 : return -EINVAL;
421 : }
422 :
423 1818333 : if (xfs_swapext_can_skip_mapping(sxi, irec1)) {
424 72 : trace_xfs_swapext_extent1_skip(sxi->sxi_ip1, irec1);
425 72 : continue;
426 : }
427 :
428 : /* Read extent from the second file */
429 1818261 : nimaps = 1;
430 1818261 : error = xfs_bmapi_read(sxi->sxi_ip2, sxi->sxi_startoff2,
431 : irec1->br_blockcount, irec2, &nimaps,
432 : bmap_flags);
433 1818261 : if (error)
434 0 : return error;
435 1818261 : if (nimaps != 1 ||
436 1818261 : irec2->br_startblock == DELAYSTARTBLOCK ||
437 1818261 : irec2->br_startoff != sxi->sxi_startoff2) {
438 : /*
439 : * We should never get no mapping or a delalloc extent
440 : * or something that doesn't match what we asked for,
441 : * since the caller flushed both inodes and we hold the
442 : * ILOCKs for both inodes.
443 : */
444 0 : ASSERT(0);
445 0 : return -EINVAL;
446 : }
447 :
448 : /*
449 : * We can only swap as many blocks as the smaller of the two
450 : * extent maps.
451 : */
452 1818261 : irec1->br_blockcount = min(irec1->br_blockcount,
453 : irec2->br_blockcount);
454 :
455 1818261 : trace_xfs_swapext_extent1(sxi->sxi_ip1, irec1);
456 1818261 : trace_xfs_swapext_extent2(sxi->sxi_ip2, irec2);
457 :
458 : /* We found something to swap, so return it. */
459 1818261 : if (irec1->br_startblock != irec2->br_startblock)
460 : return 0;
461 :
462 : /*
463 : * Two extents mapped to the same physical block must not have
464 : * different states; that's filesystem corruption. Move on to
465 : * the next extent if they're both holes or both the same
466 : * physical extent.
467 : */
468 303333 : if (irec1->br_state != irec2->br_state) {
469 0 : xfs_bmap_mark_sick(sxi->sxi_ip1,
470 : xfs_swapext_whichfork(sxi));
471 0 : xfs_bmap_mark_sick(sxi->sxi_ip2,
472 : xfs_swapext_whichfork(sxi));
473 0 : return -EFSCORRUPTED;
474 : }
475 :
476 : /*
477 : * Save the mappings if we're estimating work and skipping
478 : * these identical mappings.
479 : */
480 303333 : if (adj) {
481 301566 : memcpy(&adj->left1, irec1, sizeof(*irec1));
482 301566 : memcpy(&adj->left2, irec2, sizeof(*irec2));
483 : }
484 : }
485 :
486 : return 0;
487 : }
488 :
489 : /* Exchange these two mappings. */
490 : static void
491 757357 : xfs_swapext_exchange_mappings(
492 : struct xfs_trans *tp,
493 : struct xfs_swapext_intent *sxi,
494 : struct xfs_bmbt_irec *irec1,
495 : struct xfs_bmbt_irec *irec2)
496 : {
497 757357 : int whichfork = xfs_swapext_whichfork(sxi);
498 :
499 757357 : xfs_swapext_update_quota(tp, sxi, irec1, irec2);
500 :
501 : /* Remove both mappings. */
502 757357 : xfs_bmap_unmap_extent(tp, sxi->sxi_ip1, whichfork, irec1);
503 757357 : xfs_bmap_unmap_extent(tp, sxi->sxi_ip2, whichfork, irec2);
504 :
505 : /*
506 : * Re-add both mappings. We swap the file offsets between the two maps
507 : * and add the opposite map, which has the effect of filling the
508 : * logical offsets we just unmapped, but with with the physical mapping
509 : * information swapped.
510 : */
511 757357 : swap(irec1->br_startoff, irec2->br_startoff);
512 757357 : xfs_bmap_map_extent(tp, sxi->sxi_ip1, whichfork, irec2);
513 757357 : xfs_bmap_map_extent(tp, sxi->sxi_ip2, whichfork, irec1);
514 :
515 : /* Make sure we're not mapping extents past EOF. */
516 757357 : if (whichfork == XFS_DATA_FORK) {
517 713393 : xfs_swapext_update_size(tp, sxi->sxi_ip1, irec2,
518 : sxi->sxi_isize1);
519 713393 : xfs_swapext_update_size(tp, sxi->sxi_ip2, irec1,
520 : sxi->sxi_isize2);
521 : }
522 :
523 : /*
524 : * Advance our cursor and exit. The caller (either defer ops or log
525 : * recovery) will log the SXD item, and if *blockcount is nonzero, it
526 : * will log a new SXI item for the remainder and call us back.
527 : */
528 757357 : sxi_advance(sxi, irec1);
529 757357 : }
530 :
531 : /* Convert inode2's leaf attr fork back to shortform, if possible.. */
532 : STATIC int
533 40834 : xfs_swapext_attr_to_sf(
534 : struct xfs_trans *tp,
535 : struct xfs_swapext_intent *sxi)
536 : {
537 40834 : struct xfs_da_args args = {
538 40834 : .dp = sxi->sxi_ip2,
539 40834 : .geo = tp->t_mountp->m_attr_geo,
540 : .whichfork = XFS_ATTR_FORK,
541 : .trans = tp,
542 40834 : .owner = sxi->sxi_ip2->i_ino,
543 : };
544 40834 : struct xfs_buf *bp;
545 40834 : int forkoff;
546 40834 : int error;
547 :
548 40834 : if (!xfs_attr_is_leaf(sxi->sxi_ip2))
549 : return 0;
550 :
551 38864 : error = xfs_attr3_leaf_read(tp, sxi->sxi_ip2, sxi->sxi_ip2->i_ino, 0,
552 : &bp);
553 38864 : if (error)
554 : return error;
555 :
556 38864 : forkoff = xfs_attr_shortform_allfit(bp, sxi->sxi_ip2);
557 38864 : if (forkoff == 0)
558 : return 0;
559 :
560 24 : return xfs_attr3_leaf_to_shortform(bp, &args, forkoff);
561 : }
562 :
563 : /* Convert inode2's block dir fork back to shortform, if possible.. */
564 : STATIC int
565 883 : xfs_swapext_dir_to_sf(
566 : struct xfs_trans *tp,
567 : struct xfs_swapext_intent *sxi)
568 : {
569 883 : struct xfs_da_args args = {
570 883 : .dp = sxi->sxi_ip2,
571 883 : .geo = tp->t_mountp->m_dir_geo,
572 : .whichfork = XFS_DATA_FORK,
573 : .trans = tp,
574 883 : .owner = sxi->sxi_ip2->i_ino,
575 : };
576 883 : struct xfs_dir2_sf_hdr sfh;
577 883 : struct xfs_buf *bp;
578 883 : bool isblock;
579 883 : int size;
580 883 : int error;
581 :
582 883 : error = xfs_dir2_isblock(&args, &isblock);
583 883 : if (error)
584 : return error;
585 :
586 883 : if (!isblock)
587 : return 0;
588 :
589 848 : error = xfs_dir3_block_read(tp, sxi->sxi_ip2, sxi->sxi_ip2->i_ino, &bp);
590 848 : if (error)
591 : return error;
592 :
593 848 : size = xfs_dir2_block_sfsize(sxi->sxi_ip2, bp->b_addr, &sfh);
594 848 : if (size > xfs_inode_data_fork_size(sxi->sxi_ip2))
595 : return 0;
596 :
597 0 : return xfs_dir2_block_to_sf(&args, bp, size, &sfh);
598 : }
599 :
600 : /* Convert inode2's remote symlink target back to shortform, if possible. */
601 : STATIC int
602 3372 : xfs_swapext_link_to_sf(
603 : struct xfs_trans *tp,
604 : struct xfs_swapext_intent *sxi)
605 : {
606 3372 : struct xfs_inode *ip = sxi->sxi_ip2;
607 3372 : struct xfs_ifork *ifp = xfs_ifork_ptr(ip, XFS_DATA_FORK);
608 3372 : char *buf;
609 3372 : int error;
610 :
611 3372 : if (ifp->if_format == XFS_DINODE_FMT_LOCAL ||
612 3372 : ip->i_disk_size > xfs_inode_data_fork_size(ip))
613 : return 0;
614 :
615 : /* Read the current symlink target into a buffer. */
616 0 : buf = kmem_alloc(ip->i_disk_size + 1, KM_NOFS);
617 0 : if (!buf) {
618 0 : ASSERT(0);
619 0 : return -ENOMEM;
620 : }
621 :
622 0 : error = xfs_symlink_remote_read(ip, buf);
623 0 : if (error)
624 0 : goto free;
625 :
626 : /* Remove the blocks. */
627 0 : error = xfs_symlink_remote_truncate(tp, ip);
628 0 : if (error)
629 0 : goto free;
630 :
631 : /* Convert fork to local format and log our changes. */
632 0 : xfs_idestroy_fork(ifp);
633 0 : ifp->if_bytes = 0;
634 0 : ifp->if_format = XFS_DINODE_FMT_LOCAL;
635 0 : xfs_init_local_fork(ip, XFS_DATA_FORK, buf, ip->i_disk_size);
636 0 : xfs_trans_log_inode(tp, ip, XFS_ILOG_DDATA | XFS_ILOG_CORE);
637 0 : free:
638 0 : kmem_free(buf);
639 0 : return error;
640 : }
641 :
642 : static inline void
643 10 : xfs_swapext_clear_reflink(
644 : struct xfs_trans *tp,
645 : struct xfs_inode *ip)
646 : {
647 10 : trace_xfs_reflink_unset_inode_flag(ip);
648 :
649 10 : ip->i_diflags2 &= ~XFS_DIFLAG2_REFLINK;
650 10 : xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
651 10 : }
652 :
653 : /* Finish whatever work might come after a swap operation. */
654 : static int
655 45099 : xfs_swapext_do_postop_work(
656 : struct xfs_trans *tp,
657 : struct xfs_swapext_intent *sxi)
658 : {
659 45099 : if (sxi->sxi_flags & XFS_SWAP_EXT_CVT_INO2_SF) {
660 45089 : int error = 0;
661 :
662 45089 : if (sxi->sxi_flags & XFS_SWAP_EXT_ATTR_FORK)
663 40834 : error = xfs_swapext_attr_to_sf(tp, sxi);
664 4255 : else if (S_ISDIR(VFS_I(sxi->sxi_ip2)->i_mode))
665 883 : error = xfs_swapext_dir_to_sf(tp, sxi);
666 3372 : else if (S_ISLNK(VFS_I(sxi->sxi_ip2)->i_mode))
667 3372 : error = xfs_swapext_link_to_sf(tp, sxi);
668 45089 : sxi->sxi_flags &= ~XFS_SWAP_EXT_CVT_INO2_SF;
669 45089 : if (error)
670 : return error;
671 : }
672 :
673 45099 : if (sxi->sxi_flags & XFS_SWAP_EXT_CLEAR_INO1_REFLINK) {
674 0 : xfs_swapext_clear_reflink(tp, sxi->sxi_ip1);
675 0 : sxi->sxi_flags &= ~XFS_SWAP_EXT_CLEAR_INO1_REFLINK;
676 : }
677 :
678 45099 : if (sxi->sxi_flags & XFS_SWAP_EXT_CLEAR_INO2_REFLINK) {
679 10 : xfs_swapext_clear_reflink(tp, sxi->sxi_ip2);
680 10 : sxi->sxi_flags &= ~XFS_SWAP_EXT_CLEAR_INO2_REFLINK;
681 : }
682 :
683 : return 0;
684 : }
685 :
686 : /* Finish one extent swap, possibly log more. */
687 : int
688 884380 : xfs_swapext_finish_one(
689 : struct xfs_trans *tp,
690 : struct xfs_swapext_intent *sxi)
691 : {
692 884380 : struct xfs_bmbt_irec irec1, irec2;
693 884380 : int error;
694 :
695 884380 : if (sxi_has_more_swap_work(sxi)) {
696 : /*
697 : * If the operation state says that some range of the files
698 : * have not yet been swapped, look for extents in that range to
699 : * swap. If we find some extents, swap them.
700 : */
701 839281 : error = xfs_swapext_find_mappings(sxi, &irec1, &irec2, NULL);
702 839281 : if (error)
703 : return error;
704 :
705 839281 : if (sxi_has_more_swap_work(sxi))
706 757357 : xfs_swapext_exchange_mappings(tp, sxi, &irec1, &irec2);
707 :
708 : /*
709 : * If the caller asked us to exchange the file sizes after the
710 : * swap and either we just swapped the last extents in the
711 : * range or we didn't find anything to swap, update the ondisk
712 : * file sizes.
713 : */
714 839281 : if ((sxi->sxi_flags & XFS_SWAP_EXT_SET_SIZES) &&
715 : !sxi_has_more_swap_work(sxi)) {
716 4323 : sxi->sxi_ip1->i_disk_size = sxi->sxi_isize1;
717 4323 : sxi->sxi_ip2->i_disk_size = sxi->sxi_isize2;
718 :
719 4323 : xfs_trans_log_inode(tp, sxi->sxi_ip1, XFS_ILOG_CORE);
720 4323 : xfs_trans_log_inode(tp, sxi->sxi_ip2, XFS_ILOG_CORE);
721 : }
722 45099 : } else if (sxi_has_postop_work(sxi)) {
723 : /*
724 : * Now that we're finished with the swap operation, complete
725 : * the post-op cleanup work.
726 : */
727 45099 : error = xfs_swapext_do_postop_work(tp, sxi);
728 45099 : if (error)
729 : return error;
730 : }
731 :
732 884380 : if (XFS_TEST_ERROR(false, tp->t_mountp, XFS_ERRTAG_SWAPEXT_FINISH_ONE))
733 : return -EIO;
734 :
735 : /* If we still have work to do, ask for a new transaction. */
736 884380 : if (sxi_has_more_swap_work(sxi) || sxi_has_postop_work(sxi)) {
737 662590 : trace_xfs_swapext_defer(tp->t_mountp, sxi);
738 662590 : return -EAGAIN;
739 : }
740 :
741 : /*
742 : * If we reach here, we've finished all the swapping work and the post
743 : * operation work. The last thing we need to do before returning to
744 : * the caller is to make sure that COW forks are set up correctly.
745 : */
746 221790 : if (!(sxi->sxi_flags & XFS_SWAP_EXT_ATTR_FORK)) {
747 180956 : xfs_swapext_ensure_cowfork(sxi->sxi_ip1);
748 180956 : xfs_swapext_ensure_cowfork(sxi->sxi_ip2);
749 : }
750 :
751 : return 0;
752 : }
753 :
754 : /*
755 : * Compute the amount of bmbt blocks we should reserve for each file. In the
756 : * worst case, each exchange will fill a hole with a new mapping, which could
757 : * result in a btree split every time we add a new leaf block.
758 : */
759 : static inline uint64_t
760 : xfs_swapext_bmbt_blocks(
761 : struct xfs_mount *mp,
762 : const struct xfs_swapext_req *req)
763 : {
764 390871 : return howmany_64(req->nr_exchanges,
765 390871 : XFS_MAX_CONTIG_BMAPS_PER_BLOCK(mp)) *
766 390871 : XFS_EXTENTADD_SPACE_RES(mp, req->whichfork);
767 : }
768 :
769 : static inline uint64_t
770 390874 : xfs_swapext_rmapbt_blocks(
771 : struct xfs_mount *mp,
772 : const struct xfs_swapext_req *req)
773 : {
774 390874 : if (!xfs_has_rmapbt(mp))
775 : return 0;
776 390872 : if (XFS_IS_REALTIME_INODE(req->ip1))
777 : return 0;
778 :
779 390872 : return howmany_64(req->nr_exchanges,
780 390872 : XFS_MAX_CONTIG_RMAPS_PER_BLOCK(mp)) *
781 390872 : XFS_RMAPADD_SPACE_RES(mp);
782 : }
783 :
784 : /* Estimate the bmbt and rmapbt overhead required to exchange extents. */
785 : int
786 390871 : xfs_swapext_estimate_overhead(
787 : struct xfs_swapext_req *req)
788 : {
789 390871 : struct xfs_mount *mp = req->ip1->i_mount;
790 390871 : xfs_filblks_t bmbt_blocks;
791 390871 : xfs_filblks_t rmapbt_blocks;
792 390871 : xfs_filblks_t resblks = req->resblks;
793 :
794 : /*
795 : * Compute the number of bmbt and rmapbt blocks we might need to handle
796 : * the estimated number of exchanges.
797 : */
798 390871 : bmbt_blocks = xfs_swapext_bmbt_blocks(mp, req);
799 390871 : rmapbt_blocks = xfs_swapext_rmapbt_blocks(mp, req);
800 :
801 390871 : trace_xfs_swapext_overhead(mp, bmbt_blocks, rmapbt_blocks);
802 :
803 : /* Make sure the change in file block count doesn't overflow. */
804 390870 : if (check_add_overflow(req->ip1_bcount, bmbt_blocks, &req->ip1_bcount))
805 : return -EFBIG;
806 390870 : if (check_add_overflow(req->ip2_bcount, bmbt_blocks, &req->ip2_bcount))
807 : return -EFBIG;
808 :
809 : /*
810 : * Add together the number of blocks we need to handle btree growth,
811 : * then add it to the number of blocks we need to reserve to this
812 : * transaction.
813 : */
814 390870 : if (check_add_overflow(resblks, bmbt_blocks, &resblks))
815 : return -ENOSPC;
816 390870 : if (check_add_overflow(resblks, bmbt_blocks, &resblks))
817 : return -ENOSPC;
818 390870 : if (check_add_overflow(resblks, rmapbt_blocks, &resblks))
819 : return -ENOSPC;
820 390870 : if (check_add_overflow(resblks, rmapbt_blocks, &resblks))
821 : return -ENOSPC;
822 :
823 : /* Can't actually reserve more than UINT_MAX blocks. */
824 390870 : if (req->resblks > UINT_MAX)
825 : return -ENOSPC;
826 :
827 390870 : req->resblks = resblks;
828 390870 : trace_xfs_swapext_final_estimate(req);
829 390870 : return 0;
830 : }
831 :
832 : /* Decide if we can merge two real extents. */
833 : static inline bool
834 4086972 : can_merge(
835 : const struct xfs_bmbt_irec *b1,
836 : const struct xfs_bmbt_irec *b2)
837 : {
838 : /* Don't merge holes. */
839 4086972 : if (b1->br_startblock == HOLESTARTBLOCK ||
840 3302157 : b2->br_startblock == HOLESTARTBLOCK)
841 : return false;
842 :
843 : /* We don't merge holes. */
844 9177504 : if (!xfs_bmap_is_real_extent(b1) || !xfs_bmap_is_real_extent(b2))
845 : return false;
846 :
847 3059168 : if (b1->br_startoff + b1->br_blockcount == b2->br_startoff &&
848 2449218 : b1->br_startblock + b1->br_blockcount == b2->br_startblock &&
849 850182 : b1->br_state == b2->br_state &&
850 804708 : b1->br_blockcount + b2->br_blockcount <= XFS_MAX_BMBT_EXTLEN)
851 804708 : return true;
852 :
853 : return false;
854 : }
855 :
856 : #define CLEFT_CONTIG 0x01
857 : #define CRIGHT_CONTIG 0x02
858 : #define CHOLE 0x04
859 : #define CBOTH_CONTIG (CLEFT_CONTIG | CRIGHT_CONTIG)
860 :
861 : #define NLEFT_CONTIG 0x10
862 : #define NRIGHT_CONTIG 0x20
863 : #define NHOLE 0x40
864 : #define NBOTH_CONTIG (NLEFT_CONTIG | NRIGHT_CONTIG)
865 :
866 : /* Estimate the effect of a single swap on extent count. */
867 : static inline int
868 1515142 : delta_nextents_step(
869 : struct xfs_mount *mp,
870 : const struct xfs_bmbt_irec *left,
871 : const struct xfs_bmbt_irec *curr,
872 : const struct xfs_bmbt_irec *new,
873 : const struct xfs_bmbt_irec *right)
874 : {
875 1515142 : bool lhole, rhole, chole, nhole;
876 1515142 : unsigned int state = 0;
877 1515142 : int ret = 0;
878 :
879 1515142 : lhole = left->br_startblock == HOLESTARTBLOCK;
880 1515142 : rhole = right->br_startblock == HOLESTARTBLOCK;
881 1515142 : chole = curr->br_startblock == HOLESTARTBLOCK;
882 1515142 : nhole = new->br_startblock == HOLESTARTBLOCK;
883 :
884 1515142 : if (chole)
885 375577 : state |= CHOLE;
886 1515142 : if (!lhole && !chole && can_merge(left, curr))
887 194 : state |= CLEFT_CONTIG;
888 1515142 : if (!rhole && !chole && can_merge(curr, right))
889 316451 : state |= CRIGHT_CONTIG;
890 1515142 : if ((state & CBOTH_CONTIG) == CBOTH_CONTIG &&
891 148 : left->br_startblock + curr->br_startblock +
892 : right->br_startblock > XFS_MAX_BMBT_EXTLEN)
893 1 : state &= ~CRIGHT_CONTIG;
894 :
895 1515142 : if (nhole)
896 375577 : state |= NHOLE;
897 1515142 : if (!lhole && !nhole && can_merge(left, new))
898 244029 : state |= NLEFT_CONTIG;
899 1515142 : if (!rhole && !nhole && can_merge(new, right))
900 5 : state |= NRIGHT_CONTIG;
901 1515142 : if ((state & NBOTH_CONTIG) == NBOTH_CONTIG &&
902 4 : left->br_startblock + new->br_startblock +
903 : right->br_startblock > XFS_MAX_BMBT_EXTLEN)
904 0 : state &= ~NRIGHT_CONTIG;
905 :
906 1515142 : switch (state & (CLEFT_CONTIG | CRIGHT_CONTIG | CHOLE)) {
907 147 : case CLEFT_CONTIG | CRIGHT_CONTIG:
908 : /*
909 : * left/curr/right are the same extent, so deleting curr causes
910 : * 2 new extents to be created.
911 : */
912 147 : ret += 2;
913 147 : break;
914 823068 : case 0:
915 : /*
916 : * curr is not contiguous with any extent, so we remove curr
917 : * completely
918 : */
919 823068 : ret--;
920 823068 : break;
921 : case CHOLE:
922 : /* hole, do nothing */
923 : break;
924 : case CLEFT_CONTIG:
925 : case CRIGHT_CONTIG:
926 : /* trim either left or right, no change */
927 : break;
928 : }
929 :
930 1515142 : switch (state & (NLEFT_CONTIG | NRIGHT_CONTIG | NHOLE)) {
931 4 : case NLEFT_CONTIG | NRIGHT_CONTIG:
932 : /*
933 : * left/curr/right will become the same extent, so adding
934 : * curr causes the deletion of right.
935 : */
936 4 : ret--;
937 4 : break;
938 895535 : case 0:
939 : /* new is not contiguous with any extent */
940 895535 : ret++;
941 895535 : break;
942 : case NHOLE:
943 : /* hole, do nothing. */
944 : break;
945 : case NLEFT_CONTIG:
946 : case NRIGHT_CONTIG:
947 : /* new is absorbed into left or right, no change */
948 : break;
949 : }
950 :
951 1515142 : trace_xfs_swapext_delta_nextents_step(mp, left, curr, new, right, ret,
952 : state);
953 1515142 : return ret;
954 : }
955 :
956 : /* Make sure we don't overflow the extent counters. */
957 : static inline int
958 269266 : ensure_delta_nextents(
959 : struct xfs_swapext_req *req,
960 : struct xfs_inode *ip,
961 : int64_t delta)
962 : {
963 269266 : struct xfs_mount *mp = ip->i_mount;
964 269266 : struct xfs_ifork *ifp = xfs_ifork_ptr(ip, req->whichfork);
965 269266 : xfs_extnum_t max_extents;
966 269266 : bool large_extcount;
967 :
968 269266 : if (delta < 0)
969 : return 0;
970 :
971 264036 : if (XFS_TEST_ERROR(false, mp, XFS_ERRTAG_REDUCE_MAX_IEXTENTS)) {
972 2 : if (ifp->if_nextents + delta > 10)
973 : return -EFBIG;
974 : }
975 :
976 264034 : if (req->req_flags & XFS_SWAP_REQ_NREXT64)
977 : large_extcount = true;
978 : else
979 264034 : large_extcount = xfs_inode_has_large_extent_counts(ip);
980 :
981 264034 : max_extents = xfs_iext_max_nextents(large_extcount, req->whichfork);
982 264034 : if (ifp->if_nextents + delta <= max_extents)
983 : return 0;
984 0 : if (large_extcount)
985 : return -EFBIG;
986 0 : if (!xfs_has_large_extent_counts(mp))
987 : return -EFBIG;
988 :
989 0 : max_extents = xfs_iext_max_nextents(true, req->whichfork);
990 0 : if (ifp->if_nextents + delta > max_extents)
991 : return -EFBIG;
992 :
993 0 : req->req_flags |= XFS_SWAP_REQ_NREXT64;
994 0 : return 0;
995 : }
996 :
997 : /* Find the next extent after irec. */
998 : static inline int
999 1515142 : get_next_ext(
1000 : struct xfs_inode *ip,
1001 : int bmap_flags,
1002 : const struct xfs_bmbt_irec *irec,
1003 : struct xfs_bmbt_irec *nrec)
1004 : {
1005 1515142 : xfs_fileoff_t off;
1006 1515142 : xfs_filblks_t blockcount;
1007 1515142 : int nimaps = 1;
1008 1515142 : int error;
1009 :
1010 1515142 : off = irec->br_startoff + irec->br_blockcount;
1011 1515142 : blockcount = XFS_MAX_FILEOFF - off;
1012 1515142 : error = xfs_bmapi_read(ip, off, blockcount, nrec, &nimaps, bmap_flags);
1013 1515142 : if (error)
1014 : return error;
1015 1515142 : if (nrec->br_startblock == DELAYSTARTBLOCK ||
1016 1513064 : nrec->br_startoff != off) {
1017 : /*
1018 : * If we don't get the extent we want, return a zero-length
1019 : * mapping, which our estimator function will pretend is a hole.
1020 : * We shouldn't get delalloc reservations.
1021 : */
1022 2078 : nrec->br_startblock = HOLESTARTBLOCK;
1023 : }
1024 :
1025 : return 0;
1026 : }
1027 :
1028 : int __init
1029 12 : xfs_swapext_intent_init_cache(void)
1030 : {
1031 12 : xfs_swapext_intent_cache = kmem_cache_create("xfs_swapext_intent",
1032 : sizeof(struct xfs_swapext_intent),
1033 : 0, 0, NULL);
1034 :
1035 12 : return xfs_swapext_intent_cache != NULL ? 0 : -ENOMEM;
1036 : }
1037 :
1038 : void
1039 12 : xfs_swapext_intent_destroy_cache(void)
1040 : {
1041 12 : kmem_cache_destroy(xfs_swapext_intent_cache);
1042 12 : xfs_swapext_intent_cache = NULL;
1043 12 : }
1044 :
1045 : /*
1046 : * Decide if we will swap the reflink flags between the two files after the
1047 : * swap. The only time we want to do this is if we're exchanging all extents
1048 : * under EOF and the inode reflink flags have different states.
1049 : */
1050 : static inline bool
1051 360677 : sxi_can_exchange_reflink_flags(
1052 : const struct xfs_swapext_req *req,
1053 : unsigned int reflink_state)
1054 : {
1055 360677 : struct xfs_mount *mp = req->ip1->i_mount;
1056 :
1057 721354 : if (hweight32(reflink_state) != 1)
1058 : return false;
1059 40 : if (req->startoff1 != 0 || req->startoff2 != 0)
1060 : return false;
1061 32 : if (req->blockcount != XFS_B_TO_FSB(mp, req->ip1->i_disk_size))
1062 : return false;
1063 32 : if (req->blockcount != XFS_B_TO_FSB(mp, req->ip2->i_disk_size))
1064 0 : return false;
1065 : return true;
1066 : }
1067 :
1068 :
1069 : /* Allocate and initialize a new incore intent item from a request. */
1070 : struct xfs_swapext_intent *
1071 441573 : xfs_swapext_init_intent(
1072 : const struct xfs_swapext_req *req,
1073 : unsigned int *reflink_state)
1074 : {
1075 441573 : struct xfs_swapext_intent *sxi;
1076 441573 : unsigned int rs = 0;
1077 :
1078 441573 : sxi = kmem_cache_zalloc(xfs_swapext_intent_cache,
1079 : GFP_NOFS | __GFP_NOFAIL);
1080 441573 : INIT_LIST_HEAD(&sxi->sxi_list);
1081 441573 : sxi->sxi_ip1 = req->ip1;
1082 441573 : sxi->sxi_ip2 = req->ip2;
1083 441573 : sxi->sxi_startoff1 = req->startoff1;
1084 441573 : sxi->sxi_startoff2 = req->startoff2;
1085 441573 : sxi->sxi_blockcount = req->blockcount;
1086 441573 : sxi->sxi_isize1 = sxi->sxi_isize2 = -1;
1087 :
1088 441573 : if (req->whichfork == XFS_ATTR_FORK)
1089 80896 : sxi->sxi_flags |= XFS_SWAP_EXT_ATTR_FORK;
1090 :
1091 441573 : if (req->whichfork == XFS_DATA_FORK &&
1092 : (req->req_flags & XFS_SWAP_REQ_SET_SIZES)) {
1093 7271 : sxi->sxi_flags |= XFS_SWAP_EXT_SET_SIZES;
1094 7271 : sxi->sxi_isize1 = req->ip2->i_disk_size;
1095 7271 : sxi->sxi_isize2 = req->ip1->i_disk_size;
1096 : }
1097 :
1098 441573 : if (req->req_flags & XFS_SWAP_REQ_INO1_WRITTEN)
1099 32 : sxi->sxi_flags |= XFS_SWAP_EXT_INO1_WRITTEN;
1100 441573 : if (req->req_flags & XFS_SWAP_REQ_CVT_INO2_SF)
1101 88027 : sxi->sxi_flags |= XFS_SWAP_EXT_CVT_INO2_SF;
1102 :
1103 441573 : if (req->req_flags & XFS_SWAP_REQ_LOGGED)
1104 428655 : sxi->sxi_op_flags |= XFS_SWAP_EXT_OP_LOGGED;
1105 441573 : if (req->req_flags & XFS_SWAP_REQ_NREXT64)
1106 0 : sxi->sxi_op_flags |= XFS_SWAP_EXT_OP_NREXT64;
1107 :
1108 441573 : if (req->whichfork == XFS_DATA_FORK) {
1109 : /*
1110 : * Record the state of each inode's reflink flag before the
1111 : * operation.
1112 : */
1113 360677 : if (xfs_is_reflink_inode(req->ip1))
1114 314630 : rs |= XFS_REFLINK_STATE_IP1;
1115 360677 : if (xfs_is_reflink_inode(req->ip2))
1116 314670 : rs |= XFS_REFLINK_STATE_IP2;
1117 :
1118 : /*
1119 : * Figure out if we're clearing the reflink flags (which
1120 : * effectively swaps them) after the operation.
1121 : */
1122 360677 : if (sxi_can_exchange_reflink_flags(req, rs)) {
1123 32 : if (rs & XFS_REFLINK_STATE_IP1)
1124 0 : sxi->sxi_flags |=
1125 : XFS_SWAP_EXT_CLEAR_INO1_REFLINK;
1126 32 : if (rs & XFS_REFLINK_STATE_IP2)
1127 32 : sxi->sxi_flags |=
1128 : XFS_SWAP_EXT_CLEAR_INO2_REFLINK;
1129 : }
1130 : }
1131 :
1132 441573 : if (reflink_state)
1133 221802 : *reflink_state = rs;
1134 441573 : return sxi;
1135 : }
1136 :
1137 : /*
1138 : * Estimate the number of exchange operations and the number of file blocks
1139 : * in each file that will be affected by the exchange operation.
1140 : */
1141 : int
1142 219771 : xfs_swapext_estimate(
1143 : struct xfs_swapext_req *req)
1144 : {
1145 219771 : struct xfs_swapext_intent *sxi;
1146 219771 : struct xfs_bmbt_irec irec1, irec2;
1147 219771 : struct xfs_swapext_adjacent adj = ADJACENT_INIT;
1148 219771 : xfs_filblks_t ip1_blocks = 0, ip2_blocks = 0;
1149 219771 : int64_t d_nexts1, d_nexts2;
1150 219771 : int bmap_flags;
1151 219771 : int error;
1152 :
1153 219771 : ASSERT(!(req->req_flags & ~XFS_SWAP_REQ_FLAGS));
1154 :
1155 219771 : bmap_flags = xfs_bmapi_aflag(req->whichfork);
1156 219771 : sxi = xfs_swapext_init_intent(req, NULL);
1157 :
1158 : /*
1159 : * To guard against the possibility of overflowing the extent counters,
1160 : * we have to estimate an upper bound on the potential increase in that
1161 : * counter. We can split the extent at each end of the range, and for
1162 : * each step of the swap we can split the extent that we're working on
1163 : * if the extents do not align.
1164 : */
1165 219771 : d_nexts1 = d_nexts2 = 3;
1166 :
1167 977342 : while (sxi_has_more_swap_work(sxi)) {
1168 : /*
1169 : * Walk through the file ranges until we find something to
1170 : * swap. Because we're simulating the swap, pass in adj to
1171 : * capture skipped mappings for correct estimation of bmbt
1172 : * record merges.
1173 : */
1174 837348 : error = xfs_swapext_find_mappings(sxi, &irec1, &irec2, &adj);
1175 837348 : if (error)
1176 0 : goto out_free;
1177 837348 : if (!sxi_has_more_swap_work(sxi))
1178 : break;
1179 :
1180 : /* Update accounting. */
1181 1326628 : if (xfs_bmap_is_real_extent(&irec1))
1182 569057 : ip1_blocks += irec1.br_blockcount;
1183 1328079 : if (xfs_bmap_is_real_extent(&irec2))
1184 570508 : ip2_blocks += irec2.br_blockcount;
1185 757571 : req->nr_exchanges++;
1186 :
1187 : /* Read the next extents from both files. */
1188 757571 : error = get_next_ext(req->ip1, bmap_flags, &irec1, &adj.right1);
1189 757571 : if (error)
1190 0 : goto out_free;
1191 :
1192 757571 : error = get_next_ext(req->ip2, bmap_flags, &irec2, &adj.right2);
1193 757571 : if (error)
1194 0 : goto out_free;
1195 :
1196 : /* Update extent count deltas. */
1197 757571 : d_nexts1 += delta_nextents_step(req->ip1->i_mount,
1198 : &adj.left1, &irec1, &irec2, &adj.right1);
1199 :
1200 757571 : d_nexts2 += delta_nextents_step(req->ip1->i_mount,
1201 : &adj.left2, &irec2, &irec1, &adj.right2);
1202 :
1203 : /* Now pretend we swapped the extents. */
1204 757571 : if (can_merge(&adj.left2, &irec1))
1205 166692 : adj.left2.br_blockcount += irec1.br_blockcount;
1206 : else
1207 590879 : memcpy(&adj.left2, &irec1, sizeof(irec1));
1208 :
1209 757571 : if (can_merge(&adj.left1, &irec2))
1210 77337 : adj.left1.br_blockcount += irec2.br_blockcount;
1211 : else
1212 680234 : memcpy(&adj.left1, &irec2, sizeof(irec2));
1213 :
1214 757571 : sxi_advance(sxi, &irec1);
1215 : }
1216 :
1217 : /* Account for the blocks that are being exchanged. */
1218 219771 : if (XFS_IS_REALTIME_INODE(req->ip1) &&
1219 2 : req->whichfork == XFS_DATA_FORK) {
1220 2 : req->ip1_rtbcount = ip1_blocks;
1221 2 : req->ip2_rtbcount = ip2_blocks;
1222 : } else {
1223 219769 : req->ip1_bcount = ip1_blocks;
1224 219769 : req->ip2_bcount = ip2_blocks;
1225 : }
1226 :
1227 : /*
1228 : * Make sure that both forks have enough slack left in their extent
1229 : * counters that the swap operation will not overflow.
1230 : */
1231 219771 : trace_xfs_swapext_delta_nextents(req, d_nexts1, d_nexts2);
1232 219771 : if (req->ip1 == req->ip2) {
1233 170274 : error = ensure_delta_nextents(req, req->ip1,
1234 : d_nexts1 + d_nexts2);
1235 : } else {
1236 49497 : error = ensure_delta_nextents(req, req->ip1, d_nexts1);
1237 49497 : if (error)
1238 2 : goto out_free;
1239 49495 : error = ensure_delta_nextents(req, req->ip2, d_nexts2);
1240 : }
1241 219769 : if (error)
1242 0 : goto out_free;
1243 :
1244 219769 : trace_xfs_swapext_initial_estimate(req);
1245 219769 : error = xfs_swapext_estimate_overhead(req);
1246 219771 : out_free:
1247 219771 : kmem_cache_free(xfs_swapext_intent_cache, sxi);
1248 219771 : return error;
1249 : }
1250 :
1251 : static inline void
1252 20 : xfs_swapext_set_reflink(
1253 : struct xfs_trans *tp,
1254 : struct xfs_inode *ip)
1255 : {
1256 20 : trace_xfs_reflink_set_inode_flag(ip);
1257 :
1258 20 : ip->i_diflags2 |= XFS_DIFLAG2_REFLINK;
1259 20 : xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
1260 20 : }
1261 :
1262 : /*
1263 : * If either file has shared blocks and we're swapping data forks, we must flag
1264 : * the other file as having shared blocks so that we get the shared-block rmap
1265 : * functions if we need to fix up the rmaps.
1266 : */
1267 : void
1268 221802 : xfs_swapext_ensure_reflink(
1269 : struct xfs_trans *tp,
1270 : const struct xfs_swapext_intent *sxi,
1271 : unsigned int reflink_state)
1272 : {
1273 221802 : if ((reflink_state & XFS_REFLINK_STATE_IP1) &&
1274 157308 : !xfs_is_reflink_inode(sxi->sxi_ip2))
1275 0 : xfs_swapext_set_reflink(tp, sxi->sxi_ip2);
1276 :
1277 221802 : if ((reflink_state & XFS_REFLINK_STATE_IP2) &&
1278 157328 : !xfs_is_reflink_inode(sxi->sxi_ip1))
1279 20 : xfs_swapext_set_reflink(tp, sxi->sxi_ip1);
1280 221802 : }
1281 :
1282 : /* Widen the extent counts of both inodes if necessary. */
1283 : static inline void
1284 221796 : xfs_swapext_upgrade_extent_counts(
1285 : struct xfs_trans *tp,
1286 : const struct xfs_swapext_intent *sxi)
1287 : {
1288 221796 : if (!(sxi->sxi_op_flags & XFS_SWAP_EXT_OP_NREXT64))
1289 : return;
1290 :
1291 0 : sxi->sxi_ip1->i_diflags2 |= XFS_DIFLAG2_NREXT64;
1292 0 : xfs_trans_log_inode(tp, sxi->sxi_ip1, XFS_ILOG_CORE);
1293 :
1294 0 : sxi->sxi_ip2->i_diflags2 |= XFS_DIFLAG2_NREXT64;
1295 0 : xfs_trans_log_inode(tp, sxi->sxi_ip2, XFS_ILOG_CORE);
1296 : }
1297 :
1298 : /*
1299 : * Schedule a swap a range of extents from one inode to another. If the atomic
1300 : * swap feature is enabled, then the operation progress can be resumed even if
1301 : * the system goes down. The caller must commit the transaction to start the
1302 : * work.
1303 : *
1304 : * The caller must ensure the inodes must be joined to the transaction and
1305 : * ILOCKd; they will still be joined to the transaction at exit.
1306 : */
1307 : void
1308 221796 : xfs_swapext(
1309 : struct xfs_trans *tp,
1310 : const struct xfs_swapext_req *req)
1311 : {
1312 221796 : struct xfs_swapext_intent *sxi;
1313 221796 : unsigned int reflink_state;
1314 :
1315 221796 : ASSERT(xfs_isilocked(req->ip1, XFS_ILOCK_EXCL));
1316 221796 : ASSERT(xfs_isilocked(req->ip2, XFS_ILOCK_EXCL));
1317 221796 : ASSERT(req->whichfork != XFS_COW_FORK);
1318 221796 : ASSERT(!(req->req_flags & ~XFS_SWAP_REQ_FLAGS));
1319 221796 : if (req->req_flags & XFS_SWAP_REQ_SET_SIZES)
1320 4323 : ASSERT(req->whichfork == XFS_DATA_FORK);
1321 221796 : if (req->req_flags & XFS_SWAP_REQ_CVT_INO2_SF)
1322 45089 : ASSERT(req->whichfork == XFS_ATTR_FORK ||
1323 : (req->whichfork == XFS_DATA_FORK &&
1324 : (S_ISDIR(VFS_I(req->ip2)->i_mode) ||
1325 : S_ISLNK(VFS_I(req->ip2)->i_mode))));
1326 :
1327 221796 : if (req->blockcount == 0)
1328 0 : return;
1329 :
1330 221796 : sxi = xfs_swapext_init_intent(req, &reflink_state);
1331 221796 : xfs_swapext_schedule(tp, sxi);
1332 221796 : xfs_swapext_ensure_reflink(tp, sxi, reflink_state);
1333 221796 : xfs_swapext_upgrade_extent_counts(tp, sxi);
1334 : }
|