Line data Source code
1 : // SPDX-License-Identifier: GPL-2.0-or-later
2 : /*
3 : * Copyright (C) 2020-2023 Oracle. All Rights Reserved.
4 : * Author: Darrick J. Wong <djwong@kernel.org>
5 : */
6 : #include "xfs.h"
7 : #include "xfs_fs.h"
8 : #include "xfs_shared.h"
9 : #include "xfs_format.h"
10 : #include "xfs_log_format.h"
11 : #include "xfs_trans_resv.h"
12 : #include "xfs_mount.h"
13 : #include "xfs_defer.h"
14 : #include "xfs_inode.h"
15 : #include "xfs_trans.h"
16 : #include "xfs_bmap.h"
17 : #include "xfs_icache.h"
18 : #include "xfs_quota.h"
19 : #include "xfs_swapext.h"
20 : #include "xfs_trace.h"
21 : #include "xfs_bmap_btree.h"
22 : #include "xfs_trans_space.h"
23 : #include "xfs_error.h"
24 : #include "xfs_errortag.h"
25 : #include "xfs_health.h"
26 : #include "xfs_da_format.h"
27 : #include "xfs_da_btree.h"
28 : #include "xfs_attr_leaf.h"
29 : #include "xfs_attr.h"
30 : #include "xfs_dir2_priv.h"
31 : #include "xfs_dir2.h"
32 : #include "xfs_symlink_remote.h"
33 : #include "xfs_rtbitmap.h"
34 :
35 : struct kmem_cache *xfs_swapext_intent_cache;
36 :
37 : /* bmbt mappings adjacent to a pair of records. */
38 : struct xfs_swapext_adjacent {
39 : struct xfs_bmbt_irec left1;
40 : struct xfs_bmbt_irec right1;
41 : struct xfs_bmbt_irec left2;
42 : struct xfs_bmbt_irec right2;
43 : };
44 :
45 : #define ADJACENT_INIT { \
46 : .left1 = { .br_startblock = HOLESTARTBLOCK }, \
47 : .right1 = { .br_startblock = HOLESTARTBLOCK }, \
48 : .left2 = { .br_startblock = HOLESTARTBLOCK }, \
49 : .right2 = { .br_startblock = HOLESTARTBLOCK }, \
50 : }
51 :
52 : /* Information to help us reset reflink flag / CoW fork state after a swap. */
53 :
54 : /* Previous state of the two inodes' reflink flags. */
55 : #define XFS_REFLINK_STATE_IP1 (1U << 0)
56 : #define XFS_REFLINK_STATE_IP2 (1U << 1)
57 :
58 : /*
59 : * If the reflink flag is set on either inode, make sure it has an incore CoW
60 : * fork, since all reflink inodes must have them. If there's a CoW fork and it
61 : * has extents in it, make sure the inodes are tagged appropriately so that
62 : * speculative preallocations can be GC'd if we run low of space.
63 : */
64 : static inline void
65 652696 : xfs_swapext_ensure_cowfork(
66 : struct xfs_inode *ip)
67 : {
68 652696 : struct xfs_ifork *cfork;
69 :
70 652696 : if (xfs_is_reflink_inode(ip))
71 486819 : xfs_ifork_init_cow(ip);
72 :
73 652696 : cfork = xfs_ifork_ptr(ip, XFS_COW_FORK);
74 652696 : if (!cfork)
75 : return;
76 517742 : if (cfork->if_bytes > 0)
77 81928 : xfs_inode_set_cowblocks_tag(ip);
78 : else
79 435814 : xfs_inode_clear_cowblocks_tag(ip);
80 : }
81 :
82 : /* Schedule an atomic extent swap. */
83 : void
84 366139 : xfs_swapext_schedule(
85 : struct xfs_trans *tp,
86 : struct xfs_swapext_intent *sxi)
87 : {
88 366139 : trace_xfs_swapext_defer(tp->t_mountp, sxi);
89 366139 : xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_SWAPEXT, &sxi->sxi_list);
90 366139 : }
91 :
92 : /*
93 : * Adjust the on-disk inode size upwards if needed so that we never map extents
94 : * into the file past EOF. This is crucial so that log recovery won't get
95 : * confused by the sudden appearance of post-eof extents.
96 : */
97 : STATIC void
98 2157642 : xfs_swapext_update_size(
99 : struct xfs_trans *tp,
100 : struct xfs_inode *ip,
101 : struct xfs_bmbt_irec *imap,
102 : xfs_fsize_t new_isize)
103 : {
104 2157642 : struct xfs_mount *mp = tp->t_mountp;
105 2157642 : xfs_fsize_t len;
106 :
107 2157642 : if (new_isize < 0)
108 : return;
109 :
110 230426 : len = min(XFS_FSB_TO_B(mp, imap->br_startoff + imap->br_blockcount),
111 : new_isize);
112 :
113 230426 : if (len <= ip->i_disk_size)
114 : return;
115 :
116 73 : trace_xfs_swapext_update_inode_size(ip, len);
117 :
118 73 : ip->i_disk_size = len;
119 73 : xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
120 : }
121 :
122 : static inline bool
123 : sxi_has_more_swap_work(const struct xfs_swapext_intent *sxi)
124 : {
125 9412064 : return sxi->sxi_blockcount > 0;
126 : }
127 :
128 : static inline bool
129 : sxi_has_postop_work(const struct xfs_swapext_intent *sxi)
130 : {
131 461036 : return sxi->sxi_flags & (XFS_SWAP_EXT_CLEAR_INO1_REFLINK |
132 : XFS_SWAP_EXT_CLEAR_INO2_REFLINK |
133 : XFS_SWAP_EXT_CVT_INO2_SF);
134 : }
135 :
136 : static inline void
137 : sxi_advance(
138 : struct xfs_swapext_intent *sxi,
139 : const struct xfs_bmbt_irec *irec)
140 : {
141 2628363 : sxi->sxi_startoff1 += irec->br_blockcount;
142 2628363 : sxi->sxi_startoff2 += irec->br_blockcount;
143 2628363 : sxi->sxi_blockcount -= irec->br_blockcount;
144 1506321 : }
145 :
146 : #ifdef DEBUG
147 : static inline bool
148 264640 : xfs_swapext_need_rt_conversion(
149 : const struct xfs_swapext_req *req)
150 : {
151 264640 : struct xfs_inode *ip = req->ip2;
152 264640 : struct xfs_mount *mp = ip->i_mount;
153 :
154 : /* xattrs don't live on the rt device */
155 264640 : if (req->whichfork == XFS_ATTR_FORK)
156 : return false;
157 :
158 : /*
159 : * Caller got permission to use logged swapext, so log recovery will
160 : * finish the swap and not leave us with partially swapped rt extents
161 : * exposed to userspace.
162 : */
163 264640 : if (req->req_flags & XFS_SWAP_REQ_LOGGED)
164 : return false;
165 :
166 : /*
167 : * If we can't use log intent items at all, the only supported
168 : * operation is full fork swaps.
169 : */
170 6440 : if (!xfs_swapext_supported(mp))
171 : return false;
172 :
173 : /* Conversion is only needed for realtime files with big rt extents */
174 6440 : return xfs_inode_has_bigrtextents(ip);
175 : }
176 :
177 : static inline int
178 264640 : xfs_swapext_check_rt_extents(
179 : struct xfs_mount *mp,
180 : const struct xfs_swapext_req *req)
181 : {
182 264640 : struct xfs_bmbt_irec irec1, irec2;
183 264640 : xfs_fileoff_t startoff1 = req->startoff1;
184 264640 : xfs_fileoff_t startoff2 = req->startoff2;
185 264640 : xfs_filblks_t blockcount = req->blockcount;
186 264640 : uint32_t mod;
187 264640 : int nimaps;
188 264640 : int error;
189 :
190 264640 : if (!xfs_swapext_need_rt_conversion(req))
191 : return 0;
192 :
193 0 : while (blockcount > 0) {
194 : /* Read extent from the first file */
195 0 : nimaps = 1;
196 0 : error = xfs_bmapi_read(req->ip1, startoff1, blockcount,
197 : &irec1, &nimaps, 0);
198 0 : if (error)
199 0 : return error;
200 0 : ASSERT(nimaps == 1);
201 :
202 : /* Read extent from the second file */
203 0 : nimaps = 1;
204 0 : error = xfs_bmapi_read(req->ip2, startoff2,
205 : irec1.br_blockcount, &irec2, &nimaps,
206 : 0);
207 0 : if (error)
208 0 : return error;
209 0 : ASSERT(nimaps == 1);
210 :
211 : /*
212 : * We can only swap as many blocks as the smaller of the two
213 : * extent maps.
214 : */
215 0 : irec1.br_blockcount = min(irec1.br_blockcount,
216 : irec2.br_blockcount);
217 :
218 : /* Both mappings must be aligned to the realtime extent size. */
219 0 : xfs_rtb_to_rtx(mp, irec1.br_startoff, &mod);
220 0 : if (mod) {
221 0 : ASSERT(mod == 0);
222 0 : return -EINVAL;
223 : }
224 :
225 0 : xfs_rtb_to_rtx(mp, irec1.br_startoff, &mod);
226 0 : if (mod) {
227 : ASSERT(mod == 0);
228 : return -EINVAL;
229 : }
230 :
231 0 : xfs_rtb_to_rtx(mp, irec1.br_blockcount, &mod);
232 0 : if (mod) {
233 0 : ASSERT(mod == 0);
234 0 : return -EINVAL;
235 : }
236 :
237 0 : startoff1 += irec1.br_blockcount;
238 0 : startoff2 += irec1.br_blockcount;
239 0 : blockcount -= irec1.br_blockcount;
240 : }
241 :
242 : return 0;
243 : }
244 : #else
245 : # define xfs_swapext_check_rt_extents(mp, req) (0)
246 : #endif
247 :
248 : /* Check all extents to make sure we can actually swap them. */
249 : int
250 264640 : xfs_swapext_check_extents(
251 : struct xfs_mount *mp,
252 : const struct xfs_swapext_req *req)
253 : {
254 264640 : struct xfs_ifork *ifp1, *ifp2;
255 :
256 : /* No fork? */
257 264640 : ifp1 = xfs_ifork_ptr(req->ip1, req->whichfork);
258 264640 : ifp2 = xfs_ifork_ptr(req->ip2, req->whichfork);
259 264640 : if (!ifp1 || !ifp2)
260 : return -EINVAL;
261 :
262 : /* We don't know how to swap local format forks. */
263 264640 : if (ifp1->if_format == XFS_DINODE_FMT_LOCAL ||
264 264640 : ifp2->if_format == XFS_DINODE_FMT_LOCAL)
265 : return -EINVAL;
266 :
267 264640 : return xfs_swapext_check_rt_extents(mp, req);
268 : }
269 :
270 : #ifdef CONFIG_XFS_QUOTA
271 : /* Log the actual updates to the quota accounting. */
272 : static inline void
273 1122042 : xfs_swapext_update_quota(
274 : struct xfs_trans *tp,
275 : struct xfs_swapext_intent *sxi,
276 : struct xfs_bmbt_irec *irec1,
277 : struct xfs_bmbt_irec *irec2)
278 : {
279 1122042 : int64_t ip1_delta = 0, ip2_delta = 0;
280 1122042 : unsigned int qflag;
281 :
282 1122042 : qflag = XFS_IS_REALTIME_INODE(sxi->sxi_ip1) ? XFS_TRANS_DQ_RTBCOUNT :
283 : XFS_TRANS_DQ_BCOUNT;
284 :
285 1993904 : if (xfs_bmap_is_real_extent(irec1)) {
286 871862 : ip1_delta -= irec1->br_blockcount;
287 871862 : ip2_delta += irec1->br_blockcount;
288 : }
289 :
290 1996217 : if (xfs_bmap_is_real_extent(irec2)) {
291 874175 : ip1_delta += irec2->br_blockcount;
292 874175 : ip2_delta -= irec2->br_blockcount;
293 : }
294 :
295 1122042 : xfs_trans_mod_dquot_byino(tp, sxi->sxi_ip1, qflag, ip1_delta);
296 1122042 : xfs_trans_mod_dquot_byino(tp, sxi->sxi_ip2, qflag, ip2_delta);
297 1122042 : }
298 : #else
299 : # define xfs_swapext_update_quota(tp, sxi, irec1, irec2) ((void)0)
300 : #endif
301 :
302 : /* Decide if we want to skip this mapping from file1. */
303 : static inline bool
304 2628363 : xfs_swapext_can_skip_mapping(
305 : struct xfs_swapext_intent *sxi,
306 : struct xfs_bmbt_irec *irec)
307 : {
308 2628363 : struct xfs_mount *mp = sxi->sxi_ip1->i_mount;
309 :
310 : /* Do not skip this mapping if the caller did not tell us to. */
311 2628363 : if (!(sxi->sxi_flags & XFS_SWAP_EXT_INO1_WRITTEN))
312 : return false;
313 :
314 : /* Do not skip mapped, written extents. */
315 108 : if (xfs_bmap_is_written_extent(irec))
316 : return false;
317 :
318 : /*
319 : * The mapping is unwritten or a hole. It cannot be a delalloc
320 : * reservation because we already excluded those. It cannot be an
321 : * unwritten extent with dirty page cache because we flushed the page
322 : * cache. For files where the allocation unit is 1FSB (files on the
323 : * data dev, rt files if the extent size is 1FSB), we can safely
324 : * skip this mapping.
325 : */
326 72 : if (!xfs_inode_has_bigrtextents(sxi->sxi_ip1))
327 : return true;
328 :
329 : /*
330 : * For a realtime file with a multi-fsb allocation unit, the decision
331 : * is trickier because we can only swap full allocation units.
332 : * Unwritten mappings can appear in the middle of an rtx if the rtx is
333 : * partially written, but they can also appear for preallocations.
334 : *
335 : * If the mapping is a hole, skip it entirely. Holes should align with
336 : * rtx boundaries.
337 : */
338 0 : if (!xfs_bmap_is_real_extent(irec))
339 : return true;
340 :
341 : /*
342 : * All mappings below this point are unwritten.
343 : *
344 : * - If the beginning is not aligned to an rtx, trim the end of the
345 : * mapping so that it does not cross an rtx boundary, and swap it.
346 : *
347 : * - If both ends are aligned to an rtx, skip the entire mapping.
348 : */
349 0 : if (!isaligned_64(irec->br_startoff, mp->m_sb.sb_rextsize)) {
350 0 : xfs_fileoff_t new_end;
351 :
352 0 : new_end = roundup_64(irec->br_startoff, mp->m_sb.sb_rextsize);
353 0 : irec->br_blockcount = new_end - irec->br_startoff;
354 0 : return false;
355 : }
356 0 : if (isaligned_64(irec->br_blockcount, mp->m_sb.sb_rextsize))
357 : return true;
358 :
359 : /*
360 : * All mappings below this point are unwritten, start on an rtx
361 : * boundary, and do not end on an rtx boundary.
362 : *
363 : * - If the mapping is longer than one rtx, trim the end of the mapping
364 : * down to an rtx boundary and skip it.
365 : *
366 : * - The mapping is shorter than one rtx. Swap it.
367 : */
368 0 : if (irec->br_blockcount > mp->m_sb.sb_rextsize) {
369 0 : xfs_fileoff_t new_end;
370 :
371 0 : new_end = rounddown_64(irec->br_startoff + irec->br_blockcount,
372 : mp->m_sb.sb_rextsize);
373 0 : irec->br_blockcount = new_end - irec->br_startoff;
374 0 : return true;
375 : }
376 :
377 : return false;
378 : }
379 :
380 : /*
381 : * Walk forward through the file ranges in @sxi until we find two different
382 : * mappings to exchange. If there is work to do, return the mappings;
383 : * otherwise we've reached the end of the range and sxi_blockcount will be
384 : * zero.
385 : *
386 : * If the walk skips over a pair of mappings to the same storage, save them as
387 : * the left records in @adj (if provided) so that the simulation phase can
388 : * avoid an extra lookup.
389 : */
390 : static int
391 2439694 : xfs_swapext_find_mappings(
392 : struct xfs_swapext_intent *sxi,
393 : struct xfs_bmbt_irec *irec1,
394 : struct xfs_bmbt_irec *irec2,
395 : struct xfs_swapext_adjacent *adj)
396 : {
397 2439694 : int nimaps;
398 2439694 : int bmap_flags;
399 2439694 : int error;
400 :
401 2439694 : bmap_flags = xfs_bmapi_aflag(xfs_swapext_whichfork(sxi));
402 :
403 2826578 : for (; sxi_has_more_swap_work(sxi); sxi_advance(sxi, irec1)) {
404 : /* Read extent from the first file */
405 2628362 : nimaps = 1;
406 2628362 : error = xfs_bmapi_read(sxi->sxi_ip1, sxi->sxi_startoff1,
407 : sxi->sxi_blockcount, irec1, &nimaps,
408 : bmap_flags);
409 2628363 : if (error)
410 0 : return error;
411 2628363 : if (nimaps != 1 ||
412 2628363 : irec1->br_startblock == DELAYSTARTBLOCK ||
413 2628363 : irec1->br_startoff != sxi->sxi_startoff1) {
414 : /*
415 : * We should never get no mapping or a delalloc extent
416 : * or something that doesn't match what we asked for,
417 : * since the caller flushed both inodes and we hold the
418 : * ILOCKs for both inodes.
419 : */
420 0 : ASSERT(0);
421 0 : return -EINVAL;
422 : }
423 :
424 2628363 : if (xfs_swapext_can_skip_mapping(sxi, irec1)) {
425 72 : trace_xfs_swapext_extent1_skip(sxi->sxi_ip1, irec1);
426 72 : continue;
427 : }
428 :
429 : /* Read extent from the second file */
430 2628291 : nimaps = 1;
431 2628291 : error = xfs_bmapi_read(sxi->sxi_ip2, sxi->sxi_startoff2,
432 : irec1->br_blockcount, irec2, &nimaps,
433 : bmap_flags);
434 2628291 : if (error)
435 0 : return error;
436 2628291 : if (nimaps != 1 ||
437 2628291 : irec2->br_startblock == DELAYSTARTBLOCK ||
438 2628291 : irec2->br_startoff != sxi->sxi_startoff2) {
439 : /*
440 : * We should never get no mapping or a delalloc extent
441 : * or something that doesn't match what we asked for,
442 : * since the caller flushed both inodes and we hold the
443 : * ILOCKs for both inodes.
444 : */
445 0 : ASSERT(0);
446 0 : return -EINVAL;
447 : }
448 :
449 : /*
450 : * We can only swap as many blocks as the smaller of the two
451 : * extent maps.
452 : */
453 2628291 : irec1->br_blockcount = min(irec1->br_blockcount,
454 : irec2->br_blockcount);
455 :
456 2628291 : trace_xfs_swapext_extent1(sxi->sxi_ip1, irec1);
457 2628291 : trace_xfs_swapext_extent2(sxi->sxi_ip2, irec2);
458 :
459 : /* We found something to swap, so return it. */
460 2628291 : if (irec1->br_startblock != irec2->br_startblock)
461 : return 0;
462 :
463 : /*
464 : * Two extents mapped to the same physical block must not have
465 : * different states; that's filesystem corruption. Move on to
466 : * the next extent if they're both holes or both the same
467 : * physical extent.
468 : */
469 386812 : if (irec1->br_state != irec2->br_state) {
470 0 : xfs_bmap_mark_sick(sxi->sxi_ip1,
471 : xfs_swapext_whichfork(sxi));
472 0 : xfs_bmap_mark_sick(sxi->sxi_ip2,
473 : xfs_swapext_whichfork(sxi));
474 0 : return -EFSCORRUPTED;
475 : }
476 :
477 : /*
478 : * Save the mappings if we're estimating work and skipping
479 : * these identical mappings.
480 : */
481 386812 : if (adj) {
482 383288 : memcpy(&adj->left1, irec1, sizeof(*irec1));
483 383288 : memcpy(&adj->left2, irec2, sizeof(*irec2));
484 : }
485 : }
486 :
487 : return 0;
488 : }
489 :
490 : /* Exchange these two mappings. */
491 : static void
492 1122042 : xfs_swapext_exchange_mappings(
493 : struct xfs_trans *tp,
494 : struct xfs_swapext_intent *sxi,
495 : struct xfs_bmbt_irec *irec1,
496 : struct xfs_bmbt_irec *irec2)
497 : {
498 1122042 : int whichfork = xfs_swapext_whichfork(sxi);
499 :
500 1122042 : xfs_swapext_update_quota(tp, sxi, irec1, irec2);
501 :
502 : /* Remove both mappings. */
503 1122042 : xfs_bmap_unmap_extent(tp, sxi->sxi_ip1, whichfork, irec1);
504 1122042 : xfs_bmap_unmap_extent(tp, sxi->sxi_ip2, whichfork, irec2);
505 :
506 : /*
507 : * Re-add both mappings. We swap the file offsets between the two maps
508 : * and add the opposite map, which has the effect of filling the
509 : * logical offsets we just unmapped, but with with the physical mapping
510 : * information swapped.
511 : */
512 1122042 : swap(irec1->br_startoff, irec2->br_startoff);
513 1122042 : xfs_bmap_map_extent(tp, sxi->sxi_ip1, whichfork, irec2);
514 1122042 : xfs_bmap_map_extent(tp, sxi->sxi_ip2, whichfork, irec1);
515 :
516 : /* Make sure we're not mapping extents past EOF. */
517 1122042 : if (whichfork == XFS_DATA_FORK) {
518 1078821 : xfs_swapext_update_size(tp, sxi->sxi_ip1, irec2,
519 : sxi->sxi_isize1);
520 1078821 : xfs_swapext_update_size(tp, sxi->sxi_ip2, irec1,
521 : sxi->sxi_isize2);
522 : }
523 :
524 : /*
525 : * Advance our cursor and exit. The caller (either defer ops or log
526 : * recovery) will log the SXD item, and if *blockcount is nonzero, it
527 : * will log a new SXI item for the remainder and call us back.
528 : */
529 1122042 : sxi_advance(sxi, irec1);
530 1122042 : }
531 :
532 : /* Convert inode2's leaf attr fork back to shortform, if possible.. */
533 : STATIC int
534 39780 : xfs_swapext_attr_to_sf(
535 : struct xfs_trans *tp,
536 : struct xfs_swapext_intent *sxi)
537 : {
538 39780 : struct xfs_da_args args = {
539 39780 : .dp = sxi->sxi_ip2,
540 39780 : .geo = tp->t_mountp->m_attr_geo,
541 : .whichfork = XFS_ATTR_FORK,
542 : .trans = tp,
543 39780 : .owner = sxi->sxi_ip2->i_ino,
544 : };
545 39780 : struct xfs_buf *bp;
546 39780 : int forkoff;
547 39780 : int error;
548 :
549 39780 : if (!xfs_attr_is_leaf(sxi->sxi_ip2))
550 : return 0;
551 :
552 36840 : error = xfs_attr3_leaf_read(tp, sxi->sxi_ip2, sxi->sxi_ip2->i_ino, 0,
553 : &bp);
554 36840 : if (error)
555 : return error;
556 :
557 36840 : forkoff = xfs_attr_shortform_allfit(bp, sxi->sxi_ip2);
558 36840 : if (forkoff == 0)
559 : return 0;
560 :
561 46 : return xfs_attr3_leaf_to_shortform(bp, &args, forkoff);
562 : }
563 :
564 : /* Convert inode2's block dir fork back to shortform, if possible.. */
565 : STATIC int
566 1701 : xfs_swapext_dir_to_sf(
567 : struct xfs_trans *tp,
568 : struct xfs_swapext_intent *sxi)
569 : {
570 1701 : struct xfs_da_args args = {
571 1701 : .dp = sxi->sxi_ip2,
572 1701 : .geo = tp->t_mountp->m_dir_geo,
573 : .whichfork = XFS_DATA_FORK,
574 : .trans = tp,
575 1701 : .owner = sxi->sxi_ip2->i_ino,
576 : };
577 1701 : struct xfs_dir2_sf_hdr sfh;
578 1701 : struct xfs_buf *bp;
579 1701 : bool isblock;
580 1701 : int size;
581 1701 : int error;
582 :
583 1701 : error = xfs_dir2_isblock(&args, &isblock);
584 1701 : if (error)
585 : return error;
586 :
587 1701 : if (!isblock)
588 : return 0;
589 :
590 1643 : error = xfs_dir3_block_read(tp, sxi->sxi_ip2, sxi->sxi_ip2->i_ino, &bp);
591 1643 : if (error)
592 : return error;
593 :
594 1643 : size = xfs_dir2_block_sfsize(sxi->sxi_ip2, bp->b_addr, &sfh);
595 1643 : if (size > xfs_inode_data_fork_size(sxi->sxi_ip2))
596 : return 0;
597 :
598 0 : return xfs_dir2_block_to_sf(&args, bp, size, &sfh);
599 : }
600 :
601 : /* Convert inode2's remote symlink target back to shortform, if possible. */
602 : STATIC int
603 5964 : xfs_swapext_link_to_sf(
604 : struct xfs_trans *tp,
605 : struct xfs_swapext_intent *sxi)
606 : {
607 5964 : struct xfs_inode *ip = sxi->sxi_ip2;
608 5964 : struct xfs_ifork *ifp = xfs_ifork_ptr(ip, XFS_DATA_FORK);
609 5964 : char *buf;
610 5964 : int error;
611 :
612 5964 : if (ifp->if_format == XFS_DINODE_FMT_LOCAL ||
613 5964 : ip->i_disk_size > xfs_inode_data_fork_size(ip))
614 : return 0;
615 :
616 : /* Read the current symlink target into a buffer. */
617 1 : buf = kmem_alloc(ip->i_disk_size + 1, KM_NOFS);
618 1 : if (!buf) {
619 0 : ASSERT(0);
620 0 : return -ENOMEM;
621 : }
622 :
623 1 : error = xfs_symlink_remote_read(ip, buf);
624 1 : if (error)
625 0 : goto free;
626 :
627 : /* Remove the blocks. */
628 1 : error = xfs_symlink_remote_truncate(tp, ip);
629 1 : if (error)
630 0 : goto free;
631 :
632 : /* Convert fork to local format and log our changes. */
633 1 : xfs_idestroy_fork(ifp);
634 1 : ifp->if_bytes = 0;
635 1 : ifp->if_format = XFS_DINODE_FMT_LOCAL;
636 1 : xfs_init_local_fork(ip, XFS_DATA_FORK, buf, ip->i_disk_size);
637 1 : xfs_trans_log_inode(tp, ip, XFS_ILOG_DDATA | XFS_ILOG_CORE);
638 1 : free:
639 1 : kmem_free(buf);
640 1 : return error;
641 : }
642 :
643 : static inline void
644 9 : xfs_swapext_clear_reflink(
645 : struct xfs_trans *tp,
646 : struct xfs_inode *ip)
647 : {
648 9 : trace_xfs_reflink_unset_inode_flag(ip);
649 :
650 9 : ip->i_diflags2 &= ~XFS_DIFLAG2_REFLINK;
651 9 : xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
652 9 : }
653 :
654 : /* Finish whatever work might come after a swap operation. */
655 : static int
656 47454 : xfs_swapext_do_postop_work(
657 : struct xfs_trans *tp,
658 : struct xfs_swapext_intent *sxi)
659 : {
660 47454 : if (sxi->sxi_flags & XFS_SWAP_EXT_CVT_INO2_SF) {
661 47445 : int error = 0;
662 :
663 47445 : if (sxi->sxi_flags & XFS_SWAP_EXT_ATTR_FORK)
664 39780 : error = xfs_swapext_attr_to_sf(tp, sxi);
665 7665 : else if (S_ISDIR(VFS_I(sxi->sxi_ip2)->i_mode))
666 1701 : error = xfs_swapext_dir_to_sf(tp, sxi);
667 5964 : else if (S_ISLNK(VFS_I(sxi->sxi_ip2)->i_mode))
668 5964 : error = xfs_swapext_link_to_sf(tp, sxi);
669 47445 : sxi->sxi_flags &= ~XFS_SWAP_EXT_CVT_INO2_SF;
670 47445 : if (error)
671 : return error;
672 : }
673 :
674 47454 : if (sxi->sxi_flags & XFS_SWAP_EXT_CLEAR_INO1_REFLINK) {
675 0 : xfs_swapext_clear_reflink(tp, sxi->sxi_ip1);
676 0 : sxi->sxi_flags &= ~XFS_SWAP_EXT_CLEAR_INO1_REFLINK;
677 : }
678 :
679 47454 : if (sxi->sxi_flags & XFS_SWAP_EXT_CLEAR_INO2_REFLINK) {
680 9 : xfs_swapext_clear_reflink(tp, sxi->sxi_ip2);
681 9 : sxi->sxi_flags &= ~XFS_SWAP_EXT_CLEAR_INO2_REFLINK;
682 : }
683 :
684 : return 0;
685 : }
686 :
687 : /* Finish one extent swap, possibly log more. */
688 : int
689 1270556 : xfs_swapext_finish_one(
690 : struct xfs_trans *tp,
691 : struct xfs_swapext_intent *sxi)
692 : {
693 1270556 : struct xfs_bmbt_irec irec1, irec2;
694 1270556 : int error;
695 :
696 1270556 : if (sxi_has_more_swap_work(sxi)) {
697 : /*
698 : * If the operation state says that some range of the files
699 : * have not yet been swapped, look for extents in that range to
700 : * swap. If we find some extents, swap them.
701 : */
702 1223102 : error = xfs_swapext_find_mappings(sxi, &irec1, &irec2, NULL);
703 1223102 : if (error)
704 : return error;
705 :
706 1223102 : if (sxi_has_more_swap_work(sxi))
707 1122042 : xfs_swapext_exchange_mappings(tp, sxi, &irec1, &irec2);
708 :
709 : /*
710 : * If the caller asked us to exchange the file sizes after the
711 : * swap and either we just swapped the last extents in the
712 : * range or we didn't find anything to swap, update the ondisk
713 : * file sizes.
714 : */
715 1223102 : if ((sxi->sxi_flags & XFS_SWAP_EXT_SET_SIZES) &&
716 : !sxi_has_more_swap_work(sxi)) {
717 61902 : sxi->sxi_ip1->i_disk_size = sxi->sxi_isize1;
718 61902 : sxi->sxi_ip2->i_disk_size = sxi->sxi_isize2;
719 :
720 61902 : xfs_trans_log_inode(tp, sxi->sxi_ip1, XFS_ILOG_CORE);
721 61902 : xfs_trans_log_inode(tp, sxi->sxi_ip2, XFS_ILOG_CORE);
722 : }
723 47454 : } else if (sxi_has_postop_work(sxi)) {
724 : /*
725 : * Now that we're finished with the swap operation, complete
726 : * the post-op cleanup work.
727 : */
728 47454 : error = xfs_swapext_do_postop_work(tp, sxi);
729 47454 : if (error)
730 : return error;
731 : }
732 :
733 1270556 : if (XFS_TEST_ERROR(false, tp->t_mountp, XFS_ERRTAG_SWAPEXT_FINISH_ONE))
734 : return -EIO;
735 :
736 : /* If we still have work to do, ask for a new transaction. */
737 1270556 : if (sxi_has_more_swap_work(sxi) || sxi_has_postop_work(sxi)) {
738 904428 : trace_xfs_swapext_defer(tp->t_mountp, sxi);
739 904428 : return -EAGAIN;
740 : }
741 :
742 : /*
743 : * If we reach here, we've finished all the swapping work and the post
744 : * operation work. The last thing we need to do before returning to
745 : * the caller is to make sure that COW forks are set up correctly.
746 : */
747 366128 : if (!(sxi->sxi_flags & XFS_SWAP_EXT_ATTR_FORK)) {
748 326348 : xfs_swapext_ensure_cowfork(sxi->sxi_ip1);
749 326348 : xfs_swapext_ensure_cowfork(sxi->sxi_ip2);
750 : }
751 :
752 : return 0;
753 : }
754 :
755 : /*
756 : * Compute the amount of bmbt blocks we should reserve for each file. In the
757 : * worst case, each exchange will fill a hole with a new mapping, which could
758 : * result in a btree split every time we add a new leaf block.
759 : */
760 : static inline uint64_t
761 : xfs_swapext_bmbt_blocks(
762 : struct xfs_mount *mp,
763 : const struct xfs_swapext_req *req)
764 : {
765 668155 : return howmany_64(req->nr_exchanges,
766 668155 : XFS_MAX_CONTIG_BMAPS_PER_BLOCK(mp)) *
767 668155 : XFS_EXTENTADD_SPACE_RES(mp, req->whichfork);
768 : }
769 :
770 : static inline uint64_t
771 668160 : xfs_swapext_rmapbt_blocks(
772 : struct xfs_mount *mp,
773 : const struct xfs_swapext_req *req)
774 : {
775 668160 : if (!xfs_has_rmapbt(mp))
776 : return 0;
777 668160 : if (XFS_IS_REALTIME_INODE(req->ip1))
778 72484 : return howmany_64(req->nr_exchanges,
779 72484 : XFS_MAX_CONTIG_RTRMAPS_PER_BLOCK(mp)) *
780 72484 : XFS_RTRMAPADD_SPACE_RES(mp);
781 :
782 595676 : return howmany_64(req->nr_exchanges,
783 595676 : XFS_MAX_CONTIG_RMAPS_PER_BLOCK(mp)) *
784 595676 : XFS_RMAPADD_SPACE_RES(mp);
785 : }
786 :
787 : /* Estimate the bmbt and rmapbt overhead required to exchange extents. */
788 : int
789 668155 : xfs_swapext_estimate_overhead(
790 : struct xfs_swapext_req *req)
791 : {
792 668155 : struct xfs_mount *mp = req->ip1->i_mount;
793 668155 : xfs_filblks_t bmbt_blocks;
794 668155 : xfs_filblks_t rmapbt_blocks;
795 668155 : xfs_filblks_t resblks = req->resblks;
796 :
797 : /*
798 : * Compute the number of bmbt and rmapbt blocks we might need to handle
799 : * the estimated number of exchanges.
800 : */
801 668155 : bmbt_blocks = xfs_swapext_bmbt_blocks(mp, req);
802 668155 : rmapbt_blocks = xfs_swapext_rmapbt_blocks(mp, req);
803 :
804 668155 : trace_xfs_swapext_overhead(mp, bmbt_blocks, rmapbt_blocks);
805 :
806 : /* Make sure the change in file block count doesn't overflow. */
807 668159 : if (check_add_overflow(req->ip1_bcount, bmbt_blocks, &req->ip1_bcount))
808 : return -EFBIG;
809 668159 : if (check_add_overflow(req->ip2_bcount, bmbt_blocks, &req->ip2_bcount))
810 : return -EFBIG;
811 :
812 : /*
813 : * Add together the number of blocks we need to handle btree growth,
814 : * then add it to the number of blocks we need to reserve to this
815 : * transaction.
816 : */
817 668159 : if (check_add_overflow(resblks, bmbt_blocks, &resblks))
818 : return -ENOSPC;
819 668159 : if (check_add_overflow(resblks, bmbt_blocks, &resblks))
820 : return -ENOSPC;
821 668159 : if (check_add_overflow(resblks, rmapbt_blocks, &resblks))
822 : return -ENOSPC;
823 668159 : if (check_add_overflow(resblks, rmapbt_blocks, &resblks))
824 : return -ENOSPC;
825 :
826 : /* Can't actually reserve more than UINT_MAX blocks. */
827 668159 : if (req->resblks > UINT_MAX)
828 : return -ENOSPC;
829 :
830 668159 : req->resblks = resblks;
831 668159 : trace_xfs_swapext_final_estimate(req);
832 668159 : return 0;
833 : }
834 :
835 : /* Decide if we can merge two real extents. */
836 : static inline bool
837 6289399 : can_merge(
838 : const struct xfs_bmbt_irec *b1,
839 : const struct xfs_bmbt_irec *b2)
840 : {
841 : /* Don't merge holes. */
842 6289399 : if (b1->br_startblock == HOLESTARTBLOCK ||
843 5143145 : b2->br_startblock == HOLESTARTBLOCK)
844 : return false;
845 :
846 : /* We don't merge holes. */
847 14597031 : if (!xfs_bmap_is_real_extent(b1) || !xfs_bmap_is_real_extent(b2))
848 : return false;
849 :
850 4865677 : if (b1->br_startoff + b1->br_blockcount == b2->br_startoff &&
851 3787609 : b1->br_startblock + b1->br_blockcount == b2->br_startblock &&
852 1502024 : b1->br_state == b2->br_state &&
853 1430418 : b1->br_blockcount + b2->br_blockcount <= XFS_MAX_BMBT_EXTLEN)
854 1430418 : return true;
855 :
856 : return false;
857 : }
858 :
859 : #define CLEFT_CONTIG 0x01
860 : #define CRIGHT_CONTIG 0x02
861 : #define CHOLE 0x04
862 : #define CBOTH_CONTIG (CLEFT_CONTIG | CRIGHT_CONTIG)
863 :
864 : #define NLEFT_CONTIG 0x10
865 : #define NRIGHT_CONTIG 0x20
866 : #define NHOLE 0x40
867 : #define NBOTH_CONTIG (NLEFT_CONTIG | NRIGHT_CONTIG)
868 :
869 : /* Estimate the effect of a single swap on extent count. */
870 : static inline int
871 2238874 : delta_nextents_step(
872 : struct xfs_mount *mp,
873 : const struct xfs_bmbt_irec *left,
874 : const struct xfs_bmbt_irec *curr,
875 : const struct xfs_bmbt_irec *new,
876 : const struct xfs_bmbt_irec *right)
877 : {
878 2238874 : bool lhole, rhole, chole, nhole;
879 2238874 : unsigned int state = 0;
880 2238874 : int ret = 0;
881 :
882 2238874 : lhole = left->br_startblock == HOLESTARTBLOCK;
883 2238874 : rhole = right->br_startblock == HOLESTARTBLOCK;
884 2238874 : chole = curr->br_startblock == HOLESTARTBLOCK;
885 2238874 : nhole = new->br_startblock == HOLESTARTBLOCK;
886 :
887 2238874 : if (chole)
888 498241 : state |= CHOLE;
889 2238874 : if (!lhole && !chole && can_merge(left, curr))
890 227 : state |= CLEFT_CONTIG;
891 2238874 : if (!rhole && !chole && can_merge(curr, right))
892 597855 : state |= CRIGHT_CONTIG;
893 2238874 : if ((state & CBOTH_CONTIG) == CBOTH_CONTIG &&
894 162 : left->br_startblock + curr->br_startblock +
895 : right->br_startblock > XFS_MAX_BMBT_EXTLEN)
896 15 : state &= ~CRIGHT_CONTIG;
897 :
898 2238874 : if (nhole)
899 498241 : state |= NHOLE;
900 2238874 : if (!lhole && !nhole && can_merge(left, new))
901 416163 : state |= NLEFT_CONTIG;
902 2238874 : if (!rhole && !nhole && can_merge(new, right))
903 10 : state |= NRIGHT_CONTIG;
904 2238874 : if ((state & NBOTH_CONTIG) == NBOTH_CONTIG &&
905 9 : left->br_startblock + new->br_startblock +
906 : right->br_startblock > XFS_MAX_BMBT_EXTLEN)
907 0 : state &= ~NRIGHT_CONTIG;
908 :
909 2238874 : switch (state & (CLEFT_CONTIG | CRIGHT_CONTIG | CHOLE)) {
910 147 : case CLEFT_CONTIG | CRIGHT_CONTIG:
911 : /*
912 : * left/curr/right are the same extent, so deleting curr causes
913 : * 2 new extents to be created.
914 : */
915 147 : ret += 2;
916 147 : break;
917 1142713 : case 0:
918 : /*
919 : * curr is not contiguous with any extent, so we remove curr
920 : * completely
921 : */
922 1142713 : ret--;
923 1142713 : break;
924 : case CHOLE:
925 : /* hole, do nothing */
926 : break;
927 : case CLEFT_CONTIG:
928 : case CRIGHT_CONTIG:
929 : /* trim either left or right, no change */
930 : break;
931 : }
932 :
933 2238874 : switch (state & (NLEFT_CONTIG | NRIGHT_CONTIG | NHOLE)) {
934 9 : case NLEFT_CONTIG | NRIGHT_CONTIG:
935 : /*
936 : * left/curr/right will become the same extent, so adding
937 : * curr causes the deletion of right.
938 : */
939 9 : ret--;
940 9 : break;
941 1324469 : case 0:
942 : /* new is not contiguous with any extent */
943 1324469 : ret++;
944 1324469 : break;
945 : case NHOLE:
946 : /* hole, do nothing. */
947 : break;
948 : case NLEFT_CONTIG:
949 : case NRIGHT_CONTIG:
950 : /* new is absorbed into left or right, no change */
951 : break;
952 : }
953 :
954 2238874 : trace_xfs_swapext_delta_nextents_step(mp, left, curr, new, right, ret,
955 : state);
956 2238874 : return ret;
957 : }
958 :
959 : /* Make sure we don't overflow the extent counters. */
960 : static inline int
961 466594 : ensure_delta_nextents(
962 : struct xfs_swapext_req *req,
963 : struct xfs_inode *ip,
964 : int64_t delta)
965 : {
966 466594 : struct xfs_mount *mp = ip->i_mount;
967 466594 : struct xfs_ifork *ifp = xfs_ifork_ptr(ip, req->whichfork);
968 466594 : xfs_extnum_t max_extents;
969 466594 : bool large_extcount;
970 :
971 466594 : if (delta < 0)
972 : return 0;
973 :
974 461375 : if (XFS_TEST_ERROR(false, mp, XFS_ERRTAG_REDUCE_MAX_IEXTENTS)) {
975 2 : if (ifp->if_nextents + delta > 10)
976 : return -EFBIG;
977 : }
978 :
979 461373 : if (req->req_flags & XFS_SWAP_REQ_NREXT64)
980 : large_extcount = true;
981 : else
982 461373 : large_extcount = xfs_inode_has_large_extent_counts(ip);
983 :
984 461373 : max_extents = xfs_iext_max_nextents(large_extcount, req->whichfork);
985 461373 : if (ifp->if_nextents + delta <= max_extents)
986 : return 0;
987 0 : if (large_extcount)
988 : return -EFBIG;
989 0 : if (!xfs_has_large_extent_counts(mp))
990 : return -EFBIG;
991 :
992 0 : max_extents = xfs_iext_max_nextents(true, req->whichfork);
993 0 : if (ifp->if_nextents + delta > max_extents)
994 : return -EFBIG;
995 :
996 0 : req->req_flags |= XFS_SWAP_REQ_NREXT64;
997 0 : return 0;
998 : }
999 :
1000 : /* Find the next extent after irec. */
1001 : static inline int
1002 2238874 : get_next_ext(
1003 : struct xfs_inode *ip,
1004 : int bmap_flags,
1005 : const struct xfs_bmbt_irec *irec,
1006 : struct xfs_bmbt_irec *nrec)
1007 : {
1008 2238874 : xfs_fileoff_t off;
1009 2238874 : xfs_filblks_t blockcount;
1010 2238874 : int nimaps = 1;
1011 2238874 : int error;
1012 :
1013 2238874 : off = irec->br_startoff + irec->br_blockcount;
1014 2238874 : blockcount = XFS_MAX_FILEOFF - off;
1015 2238874 : error = xfs_bmapi_read(ip, off, blockcount, nrec, &nimaps, bmap_flags);
1016 2238874 : if (error)
1017 : return error;
1018 2238874 : if (nrec->br_startblock == DELAYSTARTBLOCK ||
1019 2236541 : nrec->br_startoff != off) {
1020 : /*
1021 : * If we don't get the extent we want, return a zero-length
1022 : * mapping, which our estimator function will pretend is a hole.
1023 : * We shouldn't get delalloc reservations.
1024 : */
1025 2333 : nrec->br_startblock = HOLESTARTBLOCK;
1026 : }
1027 :
1028 : return 0;
1029 : }
1030 :
1031 : int __init
1032 12 : xfs_swapext_intent_init_cache(void)
1033 : {
1034 12 : xfs_swapext_intent_cache = kmem_cache_create("xfs_swapext_intent",
1035 : sizeof(struct xfs_swapext_intent),
1036 : 0, 0, NULL);
1037 :
1038 12 : return xfs_swapext_intent_cache != NULL ? 0 : -ENOMEM;
1039 : }
1040 :
1041 : void
1042 12 : xfs_swapext_intent_destroy_cache(void)
1043 : {
1044 12 : kmem_cache_destroy(xfs_swapext_intent_cache);
1045 12 : xfs_swapext_intent_cache = NULL;
1046 12 : }
1047 :
1048 : /*
1049 : * Decide if we will swap the reflink flags between the two files after the
1050 : * swap. The only time we want to do this is if we're exchanging all extents
1051 : * under EOF and the inode reflink flags have different states.
1052 : */
1053 : static inline bool
1054 650179 : sxi_can_exchange_reflink_flags(
1055 : const struct xfs_swapext_req *req,
1056 : unsigned int reflink_state)
1057 : {
1058 650179 : struct xfs_mount *mp = req->ip1->i_mount;
1059 :
1060 1300358 : if (hweight32(reflink_state) != 1)
1061 : return false;
1062 38 : if (req->startoff1 != 0 || req->startoff2 != 0)
1063 : return false;
1064 30 : if (req->blockcount != XFS_B_TO_FSB(mp, req->ip1->i_disk_size))
1065 : return false;
1066 30 : if (req->blockcount != XFS_B_TO_FSB(mp, req->ip2->i_disk_size))
1067 0 : return false;
1068 : return true;
1069 : }
1070 :
1071 :
1072 : /* Allocate and initialize a new incore intent item from a request. */
1073 : struct xfs_swapext_intent *
1074 728492 : xfs_swapext_init_intent(
1075 : const struct xfs_swapext_req *req,
1076 : unsigned int *reflink_state)
1077 : {
1078 728492 : struct xfs_swapext_intent *sxi;
1079 728492 : unsigned int rs = 0;
1080 :
1081 728492 : sxi = kmem_cache_zalloc(xfs_swapext_intent_cache,
1082 : GFP_NOFS | __GFP_NOFAIL);
1083 728492 : INIT_LIST_HEAD(&sxi->sxi_list);
1084 728492 : sxi->sxi_ip1 = req->ip1;
1085 728492 : sxi->sxi_ip2 = req->ip2;
1086 728492 : sxi->sxi_startoff1 = req->startoff1;
1087 728492 : sxi->sxi_startoff2 = req->startoff2;
1088 728492 : sxi->sxi_blockcount = req->blockcount;
1089 728492 : sxi->sxi_isize1 = sxi->sxi_isize2 = -1;
1090 :
1091 728492 : if (req->whichfork == XFS_ATTR_FORK)
1092 78313 : sxi->sxi_flags |= XFS_SWAP_EXT_ATTR_FORK;
1093 :
1094 728492 : if (req->whichfork == XFS_DATA_FORK &&
1095 : (req->req_flags & XFS_SWAP_REQ_SET_SIZES)) {
1096 121147 : sxi->sxi_flags |= XFS_SWAP_EXT_SET_SIZES;
1097 121147 : sxi->sxi_isize1 = req->ip2->i_disk_size;
1098 121147 : sxi->sxi_isize2 = req->ip1->i_disk_size;
1099 : }
1100 :
1101 728492 : if (req->req_flags & XFS_SWAP_REQ_INO1_WRITTEN)
1102 32 : sxi->sxi_flags |= XFS_SWAP_EXT_INO1_WRITTEN;
1103 728492 : if (req->req_flags & XFS_SWAP_REQ_CVT_INO2_SF)
1104 90982 : sxi->sxi_flags |= XFS_SWAP_EXT_CVT_INO2_SF;
1105 :
1106 728492 : if (req->req_flags & XFS_SWAP_REQ_LOGGED)
1107 715618 : sxi->sxi_op_flags |= XFS_SWAP_EXT_OP_LOGGED;
1108 728492 : if (req->req_flags & XFS_SWAP_REQ_NREXT64)
1109 0 : sxi->sxi_op_flags |= XFS_SWAP_EXT_OP_NREXT64;
1110 :
1111 728492 : if (req->whichfork == XFS_DATA_FORK) {
1112 : /*
1113 : * Record the state of each inode's reflink flag before the
1114 : * operation.
1115 : */
1116 650179 : if (xfs_is_reflink_inode(req->ip1))
1117 486816 : rs |= XFS_REFLINK_STATE_IP1;
1118 650179 : if (xfs_is_reflink_inode(req->ip2))
1119 486854 : rs |= XFS_REFLINK_STATE_IP2;
1120 :
1121 : /*
1122 : * Figure out if we're clearing the reflink flags (which
1123 : * effectively swaps them) after the operation.
1124 : */
1125 650179 : if (sxi_can_exchange_reflink_flags(req, rs)) {
1126 30 : if (rs & XFS_REFLINK_STATE_IP1)
1127 0 : sxi->sxi_flags |=
1128 : XFS_SWAP_EXT_CLEAR_INO1_REFLINK;
1129 30 : if (rs & XFS_REFLINK_STATE_IP2)
1130 30 : sxi->sxi_flags |=
1131 : XFS_SWAP_EXT_CLEAR_INO2_REFLINK;
1132 : }
1133 : }
1134 :
1135 728492 : if (reflink_state)
1136 366140 : *reflink_state = rs;
1137 728492 : return sxi;
1138 : }
1139 :
1140 : /*
1141 : * Estimate the number of exchange operations and the number of file blocks
1142 : * in each file that will be affected by the exchange operation.
1143 : */
1144 : int
1145 362352 : xfs_swapext_estimate(
1146 : struct xfs_swapext_req *req)
1147 : {
1148 362352 : struct xfs_swapext_intent *sxi;
1149 362352 : struct xfs_bmbt_irec irec1, irec2;
1150 362352 : struct xfs_swapext_adjacent adj = ADJACENT_INIT;
1151 362352 : xfs_filblks_t ip1_blocks = 0, ip2_blocks = 0;
1152 362352 : int64_t d_nexts1, d_nexts2;
1153 362352 : int bmap_flags;
1154 362352 : int error;
1155 :
1156 362352 : ASSERT(!(req->req_flags & ~XFS_SWAP_REQ_FLAGS));
1157 :
1158 362352 : bmap_flags = xfs_bmapi_aflag(req->whichfork);
1159 362352 : sxi = xfs_swapext_init_intent(req, NULL);
1160 :
1161 : /*
1162 : * To guard against the possibility of overflowing the extent counters,
1163 : * we have to estimate an upper bound on the potential increase in that
1164 : * counter. We can split the extent at each end of the range, and for
1165 : * each step of the swap we can split the extent that we're working on
1166 : * if the extents do not align.
1167 : */
1168 362352 : d_nexts1 = d_nexts2 = 3;
1169 :
1170 1481789 : while (sxi_has_more_swap_work(sxi)) {
1171 : /*
1172 : * Walk through the file ranges until we find something to
1173 : * swap. Because we're simulating the swap, pass in adj to
1174 : * capture skipped mappings for correct estimation of bmbt
1175 : * record merges.
1176 : */
1177 1216593 : error = xfs_swapext_find_mappings(sxi, &irec1, &irec2, &adj);
1178 1216593 : if (error)
1179 0 : goto out_free;
1180 1216593 : if (!sxi_has_more_swap_work(sxi))
1181 : break;
1182 :
1183 : /* Update accounting. */
1184 1988500 : if (xfs_bmap_is_real_extent(&irec1))
1185 869063 : ip1_blocks += irec1.br_blockcount;
1186 1991007 : if (xfs_bmap_is_real_extent(&irec2))
1187 871570 : ip2_blocks += irec2.br_blockcount;
1188 1119437 : req->nr_exchanges++;
1189 :
1190 : /* Read the next extents from both files. */
1191 1119437 : error = get_next_ext(req->ip1, bmap_flags, &irec1, &adj.right1);
1192 1119437 : if (error)
1193 0 : goto out_free;
1194 :
1195 1119437 : error = get_next_ext(req->ip2, bmap_flags, &irec2, &adj.right2);
1196 1119437 : if (error)
1197 0 : goto out_free;
1198 :
1199 : /* Update extent count deltas. */
1200 1119437 : d_nexts1 += delta_nextents_step(req->ip1->i_mount,
1201 : &adj.left1, &irec1, &irec2, &adj.right1);
1202 :
1203 1119437 : d_nexts2 += delta_nextents_step(req->ip1->i_mount,
1204 : &adj.left2, &irec2, &irec1, &adj.right2);
1205 :
1206 : /* Now pretend we swapped the extents. */
1207 1119437 : if (can_merge(&adj.left2, &irec1))
1208 277580 : adj.left2.br_blockcount += irec1.br_blockcount;
1209 : else
1210 841857 : memcpy(&adj.left2, &irec1, sizeof(irec1));
1211 :
1212 1119437 : if (can_merge(&adj.left1, &irec2))
1213 138583 : adj.left1.br_blockcount += irec2.br_blockcount;
1214 : else
1215 980854 : memcpy(&adj.left1, &irec2, sizeof(irec2));
1216 :
1217 1119437 : sxi_advance(sxi, &irec1);
1218 : }
1219 :
1220 : /* Account for the blocks that are being exchanged. */
1221 362352 : if (XFS_IS_REALTIME_INODE(req->ip1) &&
1222 72484 : req->whichfork == XFS_DATA_FORK) {
1223 72484 : req->ip1_rtbcount = ip1_blocks;
1224 72484 : req->ip2_rtbcount = ip2_blocks;
1225 : } else {
1226 289868 : req->ip1_bcount = ip1_blocks;
1227 289868 : req->ip2_bcount = ip2_blocks;
1228 : }
1229 :
1230 : /*
1231 : * Make sure that both forks have enough slack left in their extent
1232 : * counters that the swap operation will not overflow.
1233 : */
1234 362352 : trace_xfs_swapext_delta_nextents(req, d_nexts1, d_nexts2);
1235 362352 : if (req->ip1 == req->ip2) {
1236 258108 : error = ensure_delta_nextents(req, req->ip1,
1237 : d_nexts1 + d_nexts2);
1238 : } else {
1239 104244 : error = ensure_delta_nextents(req, req->ip1, d_nexts1);
1240 104244 : if (error)
1241 2 : goto out_free;
1242 104242 : error = ensure_delta_nextents(req, req->ip2, d_nexts2);
1243 : }
1244 362350 : if (error)
1245 0 : goto out_free;
1246 :
1247 362350 : trace_xfs_swapext_initial_estimate(req);
1248 362350 : error = xfs_swapext_estimate_overhead(req);
1249 362352 : out_free:
1250 362352 : kmem_cache_free(xfs_swapext_intent_cache, sxi);
1251 362352 : return error;
1252 : }
1253 :
1254 : static inline void
1255 19 : xfs_swapext_set_reflink(
1256 : struct xfs_trans *tp,
1257 : struct xfs_inode *ip)
1258 : {
1259 19 : trace_xfs_reflink_set_inode_flag(ip);
1260 :
1261 19 : ip->i_diflags2 |= XFS_DIFLAG2_REFLINK;
1262 19 : xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
1263 19 : }
1264 :
1265 : /*
1266 : * If either file has shared blocks and we're swapping data forks, we must flag
1267 : * the other file as having shared blocks so that we get the shared-block rmap
1268 : * functions if we need to fix up the rmaps.
1269 : */
1270 : void
1271 366140 : xfs_swapext_ensure_reflink(
1272 : struct xfs_trans *tp,
1273 : const struct xfs_swapext_intent *sxi,
1274 : unsigned int reflink_state)
1275 : {
1276 366140 : if ((reflink_state & XFS_REFLINK_STATE_IP1) &&
1277 243401 : !xfs_is_reflink_inode(sxi->sxi_ip2))
1278 0 : xfs_swapext_set_reflink(tp, sxi->sxi_ip2);
1279 :
1280 366140 : if ((reflink_state & XFS_REFLINK_STATE_IP2) &&
1281 243420 : !xfs_is_reflink_inode(sxi->sxi_ip1))
1282 19 : xfs_swapext_set_reflink(tp, sxi->sxi_ip1);
1283 366140 : }
1284 :
1285 : /* Widen the extent counts of both inodes if necessary. */
1286 : static inline void
1287 366134 : xfs_swapext_upgrade_extent_counts(
1288 : struct xfs_trans *tp,
1289 : const struct xfs_swapext_intent *sxi)
1290 : {
1291 366134 : if (!(sxi->sxi_op_flags & XFS_SWAP_EXT_OP_NREXT64))
1292 : return;
1293 :
1294 0 : sxi->sxi_ip1->i_diflags2 |= XFS_DIFLAG2_NREXT64;
1295 0 : xfs_trans_log_inode(tp, sxi->sxi_ip1, XFS_ILOG_CORE);
1296 :
1297 0 : sxi->sxi_ip2->i_diflags2 |= XFS_DIFLAG2_NREXT64;
1298 0 : xfs_trans_log_inode(tp, sxi->sxi_ip2, XFS_ILOG_CORE);
1299 : }
1300 :
1301 : /*
1302 : * Schedule a swap a range of extents from one inode to another. If the atomic
1303 : * swap feature is enabled, then the operation progress can be resumed even if
1304 : * the system goes down. The caller must commit the transaction to start the
1305 : * work.
1306 : *
1307 : * The caller must ensure the inodes must be joined to the transaction and
1308 : * ILOCKd; they will still be joined to the transaction at exit.
1309 : */
1310 : void
1311 366134 : xfs_swapext(
1312 : struct xfs_trans *tp,
1313 : const struct xfs_swapext_req *req)
1314 : {
1315 366134 : struct xfs_swapext_intent *sxi;
1316 366134 : unsigned int reflink_state;
1317 :
1318 366134 : ASSERT(xfs_isilocked(req->ip1, XFS_ILOCK_EXCL));
1319 366134 : ASSERT(xfs_isilocked(req->ip2, XFS_ILOCK_EXCL));
1320 366134 : ASSERT(req->whichfork != XFS_COW_FORK);
1321 366134 : ASSERT(!(req->req_flags & ~XFS_SWAP_REQ_FLAGS));
1322 366134 : if (req->req_flags & XFS_SWAP_REQ_SET_SIZES)
1323 61902 : ASSERT(req->whichfork == XFS_DATA_FORK);
1324 366134 : if (req->req_flags & XFS_SWAP_REQ_CVT_INO2_SF)
1325 47445 : ASSERT(req->whichfork == XFS_ATTR_FORK ||
1326 : (req->whichfork == XFS_DATA_FORK &&
1327 : (S_ISDIR(VFS_I(req->ip2)->i_mode) ||
1328 : S_ISLNK(VFS_I(req->ip2)->i_mode))));
1329 :
1330 366134 : if (req->blockcount == 0)
1331 0 : return;
1332 :
1333 366134 : sxi = xfs_swapext_init_intent(req, &reflink_state);
1334 366134 : xfs_swapext_schedule(tp, sxi);
1335 366134 : xfs_swapext_ensure_reflink(tp, sxi, reflink_state);
1336 366134 : xfs_swapext_upgrade_extent_counts(tp, sxi);
1337 : }
|