Line data Source code
1 : // SPDX-License-Identifier: GPL-2.0-or-later
2 : /*
3 : * Copyright (C) 2020-2023 Oracle. All Rights Reserved.
4 : * Author: Darrick J. Wong <djwong@kernel.org>
5 : */
6 : #include "xfs.h"
7 : #include "xfs_fs.h"
8 : #include "xfs_shared.h"
9 : #include "xfs_format.h"
10 : #include "xfs_log_format.h"
11 : #include "xfs_trans_resv.h"
12 : #include "xfs_mount.h"
13 : #include "xfs_defer.h"
14 : #include "xfs_inode.h"
15 : #include "xfs_trans.h"
16 : #include "xfs_bmap.h"
17 : #include "xfs_icache.h"
18 : #include "xfs_quota.h"
19 : #include "xfs_swapext.h"
20 : #include "xfs_trace.h"
21 : #include "xfs_bmap_btree.h"
22 : #include "xfs_trans_space.h"
23 : #include "xfs_error.h"
24 : #include "xfs_errortag.h"
25 : #include "xfs_health.h"
26 : #include "xfs_da_format.h"
27 : #include "xfs_da_btree.h"
28 : #include "xfs_attr_leaf.h"
29 : #include "xfs_attr.h"
30 : #include "xfs_dir2_priv.h"
31 : #include "xfs_dir2.h"
32 : #include "xfs_symlink_remote.h"
33 : #include "xfs_rtbitmap.h"
34 :
35 : struct kmem_cache *xfs_swapext_intent_cache;
36 :
37 : /* bmbt mappings adjacent to a pair of records. */
38 : struct xfs_swapext_adjacent {
39 : struct xfs_bmbt_irec left1;
40 : struct xfs_bmbt_irec right1;
41 : struct xfs_bmbt_irec left2;
42 : struct xfs_bmbt_irec right2;
43 : };
44 :
45 : #define ADJACENT_INIT { \
46 : .left1 = { .br_startblock = HOLESTARTBLOCK }, \
47 : .right1 = { .br_startblock = HOLESTARTBLOCK }, \
48 : .left2 = { .br_startblock = HOLESTARTBLOCK }, \
49 : .right2 = { .br_startblock = HOLESTARTBLOCK }, \
50 : }
51 :
52 : /* Information to help us reset reflink flag / CoW fork state after a swap. */
53 :
54 : /* Previous state of the two inodes' reflink flags. */
55 : #define XFS_REFLINK_STATE_IP1 (1U << 0)
56 : #define XFS_REFLINK_STATE_IP2 (1U << 1)
57 :
58 : /*
59 : * If the reflink flag is set on either inode, make sure it has an incore CoW
60 : * fork, since all reflink inodes must have them. If there's a CoW fork and it
61 : * has extents in it, make sure the inodes are tagged appropriately so that
62 : * speculative preallocations can be GC'd if we run low of space.
63 : */
64 : static inline void
65 3937868 : xfs_swapext_ensure_cowfork(
66 : struct xfs_inode *ip)
67 : {
68 3937868 : struct xfs_ifork *cfork;
69 :
70 3937868 : if (xfs_is_reflink_inode(ip))
71 3618322 : xfs_ifork_init_cow(ip);
72 :
73 3937858 : cfork = xfs_ifork_ptr(ip, XFS_COW_FORK);
74 3937858 : if (!cfork)
75 : return;
76 3662492 : if (cfork->if_bytes > 0)
77 1883951 : xfs_inode_set_cowblocks_tag(ip);
78 : else
79 1778541 : xfs_inode_clear_cowblocks_tag(ip);
80 : }
81 :
82 : /* Schedule an atomic extent swap. */
83 : void
84 2048644 : xfs_swapext_schedule(
85 : struct xfs_trans *tp,
86 : struct xfs_swapext_intent *sxi)
87 : {
88 2048644 : trace_xfs_swapext_defer(tp->t_mountp, sxi);
89 2048618 : xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_SWAPEXT, &sxi->sxi_list);
90 2048644 : }
91 :
92 : /*
93 : * Adjust the on-disk inode size upwards if needed so that we never map extents
94 : * into the file past EOF. This is crucial so that log recovery won't get
95 : * confused by the sudden appearance of post-eof extents.
96 : */
97 : STATIC void
98 13939926 : xfs_swapext_update_size(
99 : struct xfs_trans *tp,
100 : struct xfs_inode *ip,
101 : struct xfs_bmbt_irec *imap,
102 : xfs_fsize_t new_isize)
103 : {
104 13939926 : struct xfs_mount *mp = tp->t_mountp;
105 13939926 : xfs_fsize_t len;
106 :
107 13939926 : if (new_isize < 0)
108 : return;
109 :
110 399368 : len = min(XFS_FSB_TO_B(mp, imap->br_startoff + imap->br_blockcount),
111 : new_isize);
112 :
113 399368 : if (len <= ip->i_disk_size)
114 : return;
115 :
116 411 : trace_xfs_swapext_update_inode_size(ip, len);
117 :
118 411 : ip->i_disk_size = len;
119 411 : xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
120 : }
121 :
122 : static inline bool
123 : sxi_has_more_swap_work(const struct xfs_swapext_intent *sxi)
124 : {
125 59449795 : return sxi->sxi_blockcount > 0;
126 : }
127 :
128 : static inline bool
129 : sxi_has_postop_work(const struct xfs_swapext_intent *sxi)
130 : {
131 2266455 : return sxi->sxi_flags & (XFS_SWAP_EXT_CLEAR_INO1_REFLINK |
132 : XFS_SWAP_EXT_CLEAR_INO2_REFLINK |
133 : XFS_SWAP_EXT_CVT_INO2_SF);
134 : }
135 :
136 : static inline void
137 : sxi_advance(
138 : struct xfs_swapext_intent *sxi,
139 : const struct xfs_bmbt_irec *irec)
140 : {
141 17342391 : sxi->sxi_startoff1 += irec->br_blockcount;
142 17342391 : sxi->sxi_startoff2 += irec->br_blockcount;
143 17342391 : sxi->sxi_blockcount -= irec->br_blockcount;
144 10280989 : }
145 :
146 : #ifdef DEBUG
147 : static inline bool
148 1861796 : xfs_swapext_need_rt_conversion(
149 : const struct xfs_swapext_req *req)
150 : {
151 1861796 : struct xfs_inode *ip = req->ip2;
152 1861796 : struct xfs_mount *mp = ip->i_mount;
153 :
154 : /* xattrs don't live on the rt device */
155 1861796 : if (req->whichfork == XFS_ATTR_FORK)
156 : return false;
157 :
158 : /*
159 : * Caller got permission to use logged swapext, so log recovery will
160 : * finish the swap and not leave us with partially swapped rt extents
161 : * exposed to userspace.
162 : */
163 1861796 : if (req->req_flags & XFS_SWAP_REQ_LOGGED)
164 : return false;
165 :
166 : /*
167 : * If we can't use log intent items at all, the only supported
168 : * operation is full fork swaps.
169 : */
170 35251 : if (!xfs_swapext_supported(mp))
171 : return false;
172 :
173 : /* Conversion is only needed for realtime files with big rt extents */
174 28227 : return xfs_inode_has_bigrtextents(ip);
175 : }
176 :
177 : static inline int
178 1861802 : xfs_swapext_check_rt_extents(
179 : struct xfs_mount *mp,
180 : const struct xfs_swapext_req *req)
181 : {
182 1861802 : struct xfs_bmbt_irec irec1, irec2;
183 1861802 : xfs_fileoff_t startoff1 = req->startoff1;
184 1861802 : xfs_fileoff_t startoff2 = req->startoff2;
185 1861802 : xfs_filblks_t blockcount = req->blockcount;
186 1861802 : uint32_t mod;
187 1861802 : int nimaps;
188 1861802 : int error;
189 :
190 1861802 : if (!xfs_swapext_need_rt_conversion(req))
191 : return 0;
192 :
193 331324 : while (blockcount > 0) {
194 : /* Read extent from the first file */
195 326061 : nimaps = 1;
196 326061 : error = xfs_bmapi_read(req->ip1, startoff1, blockcount,
197 : &irec1, &nimaps, 0);
198 326051 : if (error)
199 0 : return error;
200 326051 : ASSERT(nimaps == 1);
201 :
202 : /* Read extent from the second file */
203 326051 : nimaps = 1;
204 326051 : error = xfs_bmapi_read(req->ip2, startoff2,
205 : irec1.br_blockcount, &irec2, &nimaps,
206 : 0);
207 326051 : if (error)
208 0 : return error;
209 326051 : ASSERT(nimaps == 1);
210 :
211 : /*
212 : * We can only swap as many blocks as the smaller of the two
213 : * extent maps.
214 : */
215 326051 : irec1.br_blockcount = min(irec1.br_blockcount,
216 : irec2.br_blockcount);
217 :
218 : /* Both mappings must be aligned to the realtime extent size. */
219 326051 : xfs_rtb_to_rtx(mp, irec1.br_startoff, &mod);
220 326051 : if (mod) {
221 0 : ASSERT(mod == 0);
222 0 : return -EINVAL;
223 : }
224 :
225 326051 : xfs_rtb_to_rtx(mp, irec1.br_startoff, &mod);
226 326051 : if (mod) {
227 0 : ASSERT(mod == 0);
228 0 : return -EINVAL;
229 : }
230 :
231 326051 : xfs_rtb_to_rtx(mp, irec1.br_blockcount, &mod);
232 326051 : if (mod) {
233 0 : ASSERT(mod == 0);
234 0 : return -EINVAL;
235 : }
236 :
237 326051 : startoff1 += irec1.br_blockcount;
238 326051 : startoff2 += irec1.br_blockcount;
239 326051 : blockcount -= irec1.br_blockcount;
240 : }
241 :
242 : return 0;
243 : }
244 : #else
245 : # define xfs_swapext_check_rt_extents(mp, req) (0)
246 : #endif
247 :
248 : /* Check all extents to make sure we can actually swap them. */
249 : int
250 1861808 : xfs_swapext_check_extents(
251 : struct xfs_mount *mp,
252 : const struct xfs_swapext_req *req)
253 : {
254 1861808 : struct xfs_ifork *ifp1, *ifp2;
255 :
256 : /* No fork? */
257 1861808 : ifp1 = xfs_ifork_ptr(req->ip1, req->whichfork);
258 1861802 : ifp2 = xfs_ifork_ptr(req->ip2, req->whichfork);
259 1861802 : if (!ifp1 || !ifp2)
260 : return -EINVAL;
261 :
262 : /* We don't know how to swap local format forks. */
263 1861802 : if (ifp1->if_format == XFS_DINODE_FMT_LOCAL ||
264 1861802 : ifp2->if_format == XFS_DINODE_FMT_LOCAL)
265 : return -EINVAL;
266 :
267 1861802 : return xfs_swapext_check_rt_extents(mp, req);
268 : }
269 :
270 : #ifdef CONFIG_XFS_QUOTA
271 : /* Log the actual updates to the quota accounting. */
272 : static inline void
273 7061395 : xfs_swapext_update_quota(
274 : struct xfs_trans *tp,
275 : struct xfs_swapext_intent *sxi,
276 : struct xfs_bmbt_irec *irec1,
277 : struct xfs_bmbt_irec *irec2)
278 : {
279 7061395 : int64_t ip1_delta = 0, ip2_delta = 0;
280 7061395 : unsigned int qflag;
281 :
282 7061395 : qflag = XFS_IS_REALTIME_INODE(sxi->sxi_ip1) ? XFS_TRANS_DQ_RTBCOUNT :
283 : XFS_TRANS_DQ_BCOUNT;
284 :
285 12205441 : if (xfs_bmap_is_real_extent(irec1)) {
286 5144049 : ip1_delta -= irec1->br_blockcount;
287 5144049 : ip2_delta += irec1->br_blockcount;
288 : }
289 :
290 12206220 : if (xfs_bmap_is_real_extent(irec2)) {
291 5144825 : ip1_delta += irec2->br_blockcount;
292 5144825 : ip2_delta -= irec2->br_blockcount;
293 : }
294 :
295 7061395 : xfs_trans_mod_dquot_byino(tp, sxi->sxi_ip1, qflag, ip1_delta);
296 7061401 : xfs_trans_mod_dquot_byino(tp, sxi->sxi_ip2, qflag, ip2_delta);
297 7061401 : }
298 : #else
299 : # define xfs_swapext_update_quota(tp, sxi, irec1, irec2) ((void)0)
300 : #endif
301 :
302 : /* Decide if we want to skip this mapping from file1. */
303 : static inline bool
304 17342404 : xfs_swapext_can_skip_mapping(
305 : struct xfs_swapext_intent *sxi,
306 : struct xfs_bmbt_irec *irec)
307 : {
308 17342404 : struct xfs_mount *mp = sxi->sxi_ip1->i_mount;
309 :
310 : /* Do not skip this mapping if the caller did not tell us to. */
311 17342404 : if (!(sxi->sxi_flags & XFS_SWAP_EXT_INO1_WRITTEN))
312 : return false;
313 :
314 : /* Do not skip mapped, written extents. */
315 574 : if (xfs_bmap_is_written_extent(irec))
316 : return false;
317 :
318 : /*
319 : * The mapping is unwritten or a hole. It cannot be a delalloc
320 : * reservation because we already excluded those. It cannot be an
321 : * unwritten extent with dirty page cache because we flushed the page
322 : * cache. For files where the allocation unit is 1FSB (files on the
323 : * data dev, rt files if the extent size is 1FSB), we can safely
324 : * skip this mapping.
325 : */
326 376 : if (!xfs_inode_has_bigrtextents(sxi->sxi_ip1))
327 : return true;
328 :
329 : /*
330 : * For a realtime file with a multi-fsb allocation unit, the decision
331 : * is trickier because we can only swap full allocation units.
332 : * Unwritten mappings can appear in the middle of an rtx if the rtx is
333 : * partially written, but they can also appear for preallocations.
334 : *
335 : * If the mapping is a hole, skip it entirely. Holes should align with
336 : * rtx boundaries.
337 : */
338 104 : if (!xfs_bmap_is_real_extent(irec))
339 : return true;
340 :
341 : /*
342 : * All mappings below this point are unwritten.
343 : *
344 : * - If the beginning is not aligned to an rtx, trim the end of the
345 : * mapping so that it does not cross an rtx boundary, and swap it.
346 : *
347 : * - If both ends are aligned to an rtx, skip the entire mapping.
348 : */
349 8 : if (!isaligned_64(irec->br_startoff, mp->m_sb.sb_rextsize)) {
350 0 : xfs_fileoff_t new_end;
351 :
352 0 : new_end = roundup_64(irec->br_startoff, mp->m_sb.sb_rextsize);
353 0 : irec->br_blockcount = new_end - irec->br_startoff;
354 0 : return false;
355 : }
356 8 : if (isaligned_64(irec->br_blockcount, mp->m_sb.sb_rextsize))
357 : return true;
358 :
359 : /*
360 : * All mappings below this point are unwritten, start on an rtx
361 : * boundary, and do not end on an rtx boundary.
362 : *
363 : * - If the mapping is longer than one rtx, trim the end of the mapping
364 : * down to an rtx boundary and skip it.
365 : *
366 : * - The mapping is shorter than one rtx. Swap it.
367 : */
368 0 : if (irec->br_blockcount > mp->m_sb.sb_rextsize) {
369 0 : xfs_fileoff_t new_end;
370 :
371 0 : new_end = rounddown_64(irec->br_startoff + irec->br_blockcount,
372 : mp->m_sb.sb_rextsize);
373 0 : irec->br_blockcount = new_end - irec->br_startoff;
374 0 : return true;
375 : }
376 :
377 : return false;
378 : }
379 :
380 : /*
381 : * Walk forward through the file ranges in @sxi until we find two different
382 : * mappings to exchange. If there is work to do, return the mappings;
383 : * otherwise we've reached the end of the range and sxi_blockcount will be
384 : * zero.
385 : *
386 : * If the walk skips over a pair of mappings to the same storage, save them as
387 : * the left records in @adj (if provided) so that the simulation phase can
388 : * avoid an extra lookup.
389 : */
390 : static int
391 15620233 : xfs_swapext_find_mappings(
392 : struct xfs_swapext_intent *sxi,
393 : struct xfs_bmbt_irec *irec1,
394 : struct xfs_bmbt_irec *irec2,
395 : struct xfs_swapext_adjacent *adj)
396 : {
397 15620233 : int nimaps;
398 15620233 : int bmap_flags;
399 15620233 : int error;
400 :
401 15620233 : bmap_flags = xfs_bmapi_aflag(xfs_swapext_whichfork(sxi));
402 :
403 18648443 : for (; sxi_has_more_swap_work(sxi); sxi_advance(sxi, irec1)) {
404 : /* Read extent from the first file */
405 17342366 : nimaps = 1;
406 17342366 : error = xfs_bmapi_read(sxi->sxi_ip1, sxi->sxi_startoff1,
407 : sxi->sxi_blockcount, irec1, &nimaps,
408 : bmap_flags);
409 17342406 : if (error)
410 0 : return error;
411 17342406 : if (nimaps != 1 ||
412 17342406 : irec1->br_startblock == DELAYSTARTBLOCK ||
413 17342406 : irec1->br_startoff != sxi->sxi_startoff1) {
414 : /*
415 : * We should never get no mapping or a delalloc extent
416 : * or something that doesn't match what we asked for,
417 : * since the caller flushed both inodes and we hold the
418 : * ILOCKs for both inodes.
419 : */
420 0 : ASSERT(0);
421 0 : return -EINVAL;
422 : }
423 :
424 17342406 : if (xfs_swapext_can_skip_mapping(sxi, irec1)) {
425 376 : trace_xfs_swapext_extent1_skip(sxi->sxi_ip1, irec1);
426 376 : continue;
427 : }
428 :
429 : /* Read extent from the second file */
430 17342022 : nimaps = 1;
431 17342022 : error = xfs_bmapi_read(sxi->sxi_ip2, sxi->sxi_startoff2,
432 : irec1->br_blockcount, irec2, &nimaps,
433 : bmap_flags);
434 17342030 : if (error)
435 0 : return error;
436 17342030 : if (nimaps != 1 ||
437 17342030 : irec2->br_startblock == DELAYSTARTBLOCK ||
438 17342030 : irec2->br_startoff != sxi->sxi_startoff2) {
439 : /*
440 : * We should never get no mapping or a delalloc extent
441 : * or something that doesn't match what we asked for,
442 : * since the caller flushed both inodes and we hold the
443 : * ILOCKs for both inodes.
444 : */
445 0 : ASSERT(0);
446 0 : return -EINVAL;
447 : }
448 :
449 : /*
450 : * We can only swap as many blocks as the smaller of the two
451 : * extent maps.
452 : */
453 17342030 : irec1->br_blockcount = min(irec1->br_blockcount,
454 : irec2->br_blockcount);
455 :
456 17342030 : trace_xfs_swapext_extent1(sxi->sxi_ip1, irec1);
457 17342021 : trace_xfs_swapext_extent2(sxi->sxi_ip2, irec2);
458 :
459 : /* We found something to swap, so return it. */
460 17342013 : if (irec1->br_startblock != irec2->br_startblock)
461 : return 0;
462 :
463 : /*
464 : * Two extents mapped to the same physical block must not have
465 : * different states; that's filesystem corruption. Move on to
466 : * the next extent if they're both holes or both the same
467 : * physical extent.
468 : */
469 3027834 : if (irec1->br_state != irec2->br_state) {
470 0 : xfs_bmap_mark_sick(sxi->sxi_ip1,
471 : xfs_swapext_whichfork(sxi));
472 0 : xfs_bmap_mark_sick(sxi->sxi_ip2,
473 : xfs_swapext_whichfork(sxi));
474 0 : return -EFSCORRUPTED;
475 : }
476 :
477 : /*
478 : * Save the mappings if we're estimating work and skipping
479 : * these identical mappings.
480 : */
481 3027834 : if (adj) {
482 3035890 : memcpy(&adj->left1, irec1, sizeof(*irec1));
483 3035890 : memcpy(&adj->left2, irec2, sizeof(*irec2));
484 : }
485 : }
486 :
487 : return 0;
488 : }
489 :
490 : /* Exchange these two mappings. */
491 : static void
492 7061399 : xfs_swapext_exchange_mappings(
493 : struct xfs_trans *tp,
494 : struct xfs_swapext_intent *sxi,
495 : struct xfs_bmbt_irec *irec1,
496 : struct xfs_bmbt_irec *irec2)
497 : {
498 7061399 : int whichfork = xfs_swapext_whichfork(sxi);
499 :
500 7061399 : xfs_swapext_update_quota(tp, sxi, irec1, irec2);
501 :
502 : /* Remove both mappings. */
503 7061401 : xfs_bmap_unmap_extent(tp, sxi->sxi_ip1, whichfork, irec1);
504 7061399 : xfs_bmap_unmap_extent(tp, sxi->sxi_ip2, whichfork, irec2);
505 :
506 : /*
507 : * Re-add both mappings. We swap the file offsets between the two maps
508 : * and add the opposite map, which has the effect of filling the
509 : * logical offsets we just unmapped, but with with the physical mapping
510 : * information swapped.
511 : */
512 7061402 : swap(irec1->br_startoff, irec2->br_startoff);
513 7061402 : xfs_bmap_map_extent(tp, sxi->sxi_ip1, whichfork, irec2);
514 7061402 : xfs_bmap_map_extent(tp, sxi->sxi_ip2, whichfork, irec1);
515 :
516 : /* Make sure we're not mapping extents past EOF. */
517 7061402 : if (whichfork == XFS_DATA_FORK) {
518 6969963 : xfs_swapext_update_size(tp, sxi->sxi_ip1, irec2,
519 : sxi->sxi_isize1);
520 6969963 : xfs_swapext_update_size(tp, sxi->sxi_ip2, irec1,
521 : sxi->sxi_isize2);
522 : }
523 :
524 : /*
525 : * Advance our cursor and exit. The caller (either defer ops or log
526 : * recovery) will log the SXD item, and if *blockcount is nonzero, it
527 : * will log a new SXI item for the remainder and call us back.
528 : */
529 7061402 : sxi_advance(sxi, irec1);
530 7061402 : }
531 :
532 : /* Convert inode2's leaf attr fork back to shortform, if possible.. */
533 : STATIC int
534 79666 : xfs_swapext_attr_to_sf(
535 : struct xfs_trans *tp,
536 : struct xfs_swapext_intent *sxi)
537 : {
538 79666 : struct xfs_da_args args = {
539 79666 : .dp = sxi->sxi_ip2,
540 79666 : .geo = tp->t_mountp->m_attr_geo,
541 : .whichfork = XFS_ATTR_FORK,
542 : .trans = tp,
543 79666 : .owner = sxi->sxi_ip2->i_ino,
544 : };
545 79666 : struct xfs_buf *bp;
546 79666 : int forkoff;
547 79666 : int error;
548 :
549 79666 : if (!xfs_attr_is_leaf(sxi->sxi_ip2))
550 : return 0;
551 :
552 73730 : error = xfs_attr3_leaf_read(tp, sxi->sxi_ip2, sxi->sxi_ip2->i_ino, 0,
553 : &bp);
554 73731 : if (error)
555 : return error;
556 :
557 73730 : forkoff = xfs_attr_shortform_allfit(bp, sxi->sxi_ip2);
558 73731 : if (forkoff == 0)
559 : return 0;
560 :
561 205 : return xfs_attr3_leaf_to_shortform(bp, &args, forkoff);
562 : }
563 :
564 : /* Convert inode2's block dir fork back to shortform, if possible.. */
565 : STATIC int
566 10638 : xfs_swapext_dir_to_sf(
567 : struct xfs_trans *tp,
568 : struct xfs_swapext_intent *sxi)
569 : {
570 10638 : struct xfs_da_args args = {
571 10638 : .dp = sxi->sxi_ip2,
572 10638 : .geo = tp->t_mountp->m_dir_geo,
573 : .whichfork = XFS_DATA_FORK,
574 : .trans = tp,
575 10638 : .owner = sxi->sxi_ip2->i_ino,
576 : };
577 10638 : struct xfs_dir2_sf_hdr sfh;
578 10638 : struct xfs_buf *bp;
579 10638 : bool isblock;
580 10638 : int size;
581 10638 : int error;
582 :
583 10638 : error = xfs_dir2_isblock(&args, &isblock);
584 10638 : if (error)
585 : return error;
586 :
587 10638 : if (!isblock)
588 : return 0;
589 :
590 8997 : error = xfs_dir3_block_read(tp, sxi->sxi_ip2, sxi->sxi_ip2->i_ino, &bp);
591 8997 : if (error)
592 : return error;
593 :
594 8997 : size = xfs_dir2_block_sfsize(sxi->sxi_ip2, bp->b_addr, &sfh);
595 8997 : if (size > xfs_inode_data_fork_size(sxi->sxi_ip2))
596 : return 0;
597 :
598 104 : return xfs_dir2_block_to_sf(&args, bp, size, &sfh);
599 : }
600 :
601 : /* Convert inode2's remote symlink target back to shortform, if possible. */
602 : STATIC int
603 18562 : xfs_swapext_link_to_sf(
604 : struct xfs_trans *tp,
605 : struct xfs_swapext_intent *sxi)
606 : {
607 18562 : struct xfs_inode *ip = sxi->sxi_ip2;
608 18562 : struct xfs_ifork *ifp = xfs_ifork_ptr(ip, XFS_DATA_FORK);
609 18562 : char *buf;
610 18562 : int error;
611 :
612 18562 : if (ifp->if_format == XFS_DINODE_FMT_LOCAL ||
613 18562 : ip->i_disk_size > xfs_inode_data_fork_size(ip))
614 : return 0;
615 :
616 : /* Read the current symlink target into a buffer. */
617 0 : buf = kmem_alloc(ip->i_disk_size + 1, KM_NOFS);
618 0 : if (!buf) {
619 0 : ASSERT(0);
620 0 : return -ENOMEM;
621 : }
622 :
623 0 : error = xfs_symlink_remote_read(ip, buf);
624 0 : if (error)
625 0 : goto free;
626 :
627 : /* Remove the blocks. */
628 0 : error = xfs_symlink_remote_truncate(tp, ip);
629 0 : if (error)
630 0 : goto free;
631 :
632 : /* Convert fork to local format and log our changes. */
633 0 : xfs_idestroy_fork(ifp);
634 0 : ifp->if_bytes = 0;
635 0 : ifp->if_format = XFS_DINODE_FMT_LOCAL;
636 0 : xfs_init_local_fork(ip, XFS_DATA_FORK, buf, ip->i_disk_size);
637 0 : xfs_trans_log_inode(tp, ip, XFS_ILOG_DDATA | XFS_ILOG_CORE);
638 0 : free:
639 0 : kmem_free(buf);
640 0 : return error;
641 : }
642 :
643 : static inline void
644 65 : xfs_swapext_clear_reflink(
645 : struct xfs_trans *tp,
646 : struct xfs_inode *ip)
647 : {
648 65 : trace_xfs_reflink_unset_inode_flag(ip);
649 :
650 65 : ip->i_diflags2 &= ~XFS_DIFLAG2_REFLINK;
651 65 : xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
652 65 : }
653 :
654 : /* Finish whatever work might come after a swap operation. */
655 : static int
656 108934 : xfs_swapext_do_postop_work(
657 : struct xfs_trans *tp,
658 : struct xfs_swapext_intent *sxi)
659 : {
660 108934 : if (sxi->sxi_flags & XFS_SWAP_EXT_CVT_INO2_SF) {
661 108868 : int error = 0;
662 :
663 108868 : if (sxi->sxi_flags & XFS_SWAP_EXT_ATTR_FORK)
664 79668 : error = xfs_swapext_attr_to_sf(tp, sxi);
665 29200 : else if (S_ISDIR(VFS_I(sxi->sxi_ip2)->i_mode))
666 10638 : error = xfs_swapext_dir_to_sf(tp, sxi);
667 18562 : else if (S_ISLNK(VFS_I(sxi->sxi_ip2)->i_mode))
668 18562 : error = xfs_swapext_link_to_sf(tp, sxi);
669 108866 : sxi->sxi_flags &= ~XFS_SWAP_EXT_CVT_INO2_SF;
670 108866 : if (error)
671 : return error;
672 : }
673 :
674 108932 : if (sxi->sxi_flags & XFS_SWAP_EXT_CLEAR_INO1_REFLINK) {
675 0 : xfs_swapext_clear_reflink(tp, sxi->sxi_ip1);
676 0 : sxi->sxi_flags &= ~XFS_SWAP_EXT_CLEAR_INO1_REFLINK;
677 : }
678 :
679 108932 : if (sxi->sxi_flags & XFS_SWAP_EXT_CLEAR_INO2_REFLINK) {
680 65 : xfs_swapext_clear_reflink(tp, sxi->sxi_ip2);
681 65 : sxi->sxi_flags &= ~XFS_SWAP_EXT_CLEAR_INO2_REFLINK;
682 : }
683 :
684 : return 0;
685 : }
686 :
687 : /* Finish one extent swap, possibly log more. */
688 : int
689 7831011 : xfs_swapext_finish_one(
690 : struct xfs_trans *tp,
691 : struct xfs_swapext_intent *sxi)
692 : {
693 7831011 : struct xfs_bmbt_irec irec1, irec2;
694 7831011 : int error;
695 :
696 7831011 : if (sxi_has_more_swap_work(sxi)) {
697 : /*
698 : * If the operation state says that some range of the files
699 : * have not yet been swapped, look for extents in that range to
700 : * swap. If we find some extents, swap them.
701 : */
702 7722077 : error = xfs_swapext_find_mappings(sxi, &irec1, &irec2, NULL);
703 7722068 : if (error)
704 : return error;
705 :
706 7722068 : if (sxi_has_more_swap_work(sxi))
707 7061399 : xfs_swapext_exchange_mappings(tp, sxi, &irec1, &irec2);
708 :
709 : /*
710 : * If the caller asked us to exchange the file sizes after the
711 : * swap and either we just swapped the last extents in the
712 : * range or we didn't find anything to swap, update the ondisk
713 : * file sizes.
714 : */
715 7722071 : if ((sxi->sxi_flags & XFS_SWAP_EXT_SET_SIZES) &&
716 : !sxi_has_more_swap_work(sxi)) {
717 111493 : sxi->sxi_ip1->i_disk_size = sxi->sxi_isize1;
718 111493 : sxi->sxi_ip2->i_disk_size = sxi->sxi_isize2;
719 :
720 111493 : xfs_trans_log_inode(tp, sxi->sxi_ip1, XFS_ILOG_CORE);
721 111493 : xfs_trans_log_inode(tp, sxi->sxi_ip2, XFS_ILOG_CORE);
722 : }
723 108934 : } else if (sxi_has_postop_work(sxi)) {
724 : /*
725 : * Now that we're finished with the swap operation, complete
726 : * the post-op cleanup work.
727 : */
728 108934 : error = xfs_swapext_do_postop_work(tp, sxi);
729 108930 : if (error)
730 : return error;
731 : }
732 :
733 7831001 : if (XFS_TEST_ERROR(false, tp->t_mountp, XFS_ERRTAG_SWAPEXT_FINISH_ONE))
734 : return -EIO;
735 :
736 : /* If we still have work to do, ask for a new transaction. */
737 7830997 : if (sxi_has_more_swap_work(sxi) || sxi_has_postop_work(sxi)) {
738 5782408 : trace_xfs_swapext_defer(tp->t_mountp, sxi);
739 5782408 : return -EAGAIN;
740 : }
741 :
742 : /*
743 : * If we reach here, we've finished all the swapping work and the post
744 : * operation work. The last thing we need to do before returning to
745 : * the caller is to make sure that COW forks are set up correctly.
746 : */
747 2048589 : if (!(sxi->sxi_flags & XFS_SWAP_EXT_ATTR_FORK)) {
748 1968928 : xfs_swapext_ensure_cowfork(sxi->sxi_ip1);
749 1968939 : xfs_swapext_ensure_cowfork(sxi->sxi_ip2);
750 : }
751 :
752 : return 0;
753 : }
754 :
755 : /*
756 : * Compute the amount of bmbt blocks we should reserve for each file. In the
757 : * worst case, each exchange will fill a hole with a new mapping, which could
758 : * result in a btree split every time we add a new leaf block.
759 : */
760 : static inline uint64_t
761 6810709 : xfs_swapext_bmbt_blocks(
762 : struct xfs_mount *mp,
763 : const struct xfs_swapext_req *req)
764 : {
765 6810709 : return howmany_64(req->nr_exchanges,
766 6810709 : XFS_MAX_CONTIG_BMAPS_PER_BLOCK(mp)) *
767 6807334 : XFS_EXTENTADD_SPACE_RES(mp, req->whichfork);
768 : }
769 :
770 : static inline uint64_t
771 6810239 : xfs_swapext_rmapbt_blocks(
772 : struct xfs_mount *mp,
773 : const struct xfs_swapext_req *req)
774 : {
775 6810239 : if (!xfs_has_rmapbt(mp))
776 : return 0;
777 6806728 : if (XFS_IS_REALTIME_INODE(req->ip1))
778 644968 : return howmany_64(req->nr_exchanges,
779 644968 : XFS_MAX_CONTIG_RTRMAPS_PER_BLOCK(mp)) *
780 644968 : XFS_RTRMAPADD_SPACE_RES(mp);
781 :
782 6161760 : return howmany_64(req->nr_exchanges,
783 6161760 : XFS_MAX_CONTIG_RMAPS_PER_BLOCK(mp)) *
784 6161065 : XFS_RMAPADD_SPACE_RES(mp);
785 : }
786 :
787 : /* Estimate the bmbt and rmapbt overhead required to exchange extents. */
788 : int
789 6814189 : xfs_swapext_estimate_overhead(
790 : struct xfs_swapext_req *req)
791 : {
792 6814189 : struct xfs_mount *mp = req->ip1->i_mount;
793 6814189 : xfs_filblks_t bmbt_blocks;
794 6814189 : xfs_filblks_t rmapbt_blocks;
795 6814189 : xfs_filblks_t resblks = req->resblks;
796 :
797 : /*
798 : * Compute the number of bmbt and rmapbt blocks we might need to handle
799 : * the estimated number of exchanges.
800 : */
801 6814189 : bmbt_blocks = xfs_swapext_bmbt_blocks(mp, req);
802 6808616 : rmapbt_blocks = xfs_swapext_rmapbt_blocks(mp, req);
803 :
804 6809645 : trace_xfs_swapext_overhead(mp, bmbt_blocks, rmapbt_blocks);
805 :
806 : /* Make sure the change in file block count doesn't overflow. */
807 6809406 : if (check_add_overflow(req->ip1_bcount, bmbt_blocks, &req->ip1_bcount))
808 : return -EFBIG;
809 6809406 : if (check_add_overflow(req->ip2_bcount, bmbt_blocks, &req->ip2_bcount))
810 : return -EFBIG;
811 :
812 : /*
813 : * Add together the number of blocks we need to handle btree growth,
814 : * then add it to the number of blocks we need to reserve to this
815 : * transaction.
816 : */
817 6809406 : if (check_add_overflow(resblks, bmbt_blocks, &resblks))
818 : return -ENOSPC;
819 6809406 : if (check_add_overflow(resblks, bmbt_blocks, &resblks))
820 : return -ENOSPC;
821 6809406 : if (check_add_overflow(resblks, rmapbt_blocks, &resblks))
822 : return -ENOSPC;
823 6809406 : if (check_add_overflow(resblks, rmapbt_blocks, &resblks))
824 : return -ENOSPC;
825 :
826 : /* Can't actually reserve more than UINT_MAX blocks. */
827 6809406 : if (req->resblks > UINT_MAX)
828 : return -ENOSPC;
829 :
830 6809406 : req->resblks = resblks;
831 6809406 : trace_xfs_swapext_final_estimate(req);
832 6809406 : return 0;
833 : }
834 :
835 : /* Decide if we can merge two real extents. */
836 : static inline bool
837 39902099 : can_merge(
838 : const struct xfs_bmbt_irec *b1,
839 : const struct xfs_bmbt_irec *b2)
840 : {
841 : /* Don't merge holes. */
842 39902099 : if (b1->br_startblock == HOLESTARTBLOCK ||
843 32652856 : b2->br_startblock == HOLESTARTBLOCK)
844 : return false;
845 :
846 : /* We don't merge holes. */
847 92629452 : if (!xfs_bmap_is_real_extent(b1) || !xfs_bmap_is_real_extent(b2))
848 : return false;
849 :
850 30876484 : if (b1->br_startoff + b1->br_blockcount == b2->br_startoff &&
851 22913235 : b1->br_startblock + b1->br_blockcount == b2->br_startblock &&
852 9799811 : b1->br_state == b2->br_state &&
853 8997768 : b1->br_blockcount + b2->br_blockcount <= XFS_MAX_BMBT_EXTLEN)
854 8997768 : return true;
855 :
856 : return false;
857 : }
858 :
859 : #define CLEFT_CONTIG 0x01
860 : #define CRIGHT_CONTIG 0x02
861 : #define CHOLE 0x04
862 : #define CBOTH_CONTIG (CLEFT_CONTIG | CRIGHT_CONTIG)
863 :
864 : #define NLEFT_CONTIG 0x10
865 : #define NRIGHT_CONTIG 0x20
866 : #define NHOLE 0x40
867 : #define NBOTH_CONTIG (NLEFT_CONTIG | NRIGHT_CONTIG)
868 :
869 : /* Estimate the effect of a single swap on extent count. */
870 : static inline int
871 14505558 : delta_nextents_step(
872 : struct xfs_mount *mp,
873 : const struct xfs_bmbt_irec *left,
874 : const struct xfs_bmbt_irec *curr,
875 : const struct xfs_bmbt_irec *new,
876 : const struct xfs_bmbt_irec *right)
877 : {
878 14505558 : bool lhole, rhole, chole, nhole;
879 14505558 : unsigned int state = 0;
880 14505558 : int ret = 0;
881 :
882 14505558 : lhole = left->br_startblock == HOLESTARTBLOCK;
883 14505558 : rhole = right->br_startblock == HOLESTARTBLOCK;
884 14505558 : chole = curr->br_startblock == HOLESTARTBLOCK;
885 14505558 : nhole = new->br_startblock == HOLESTARTBLOCK;
886 :
887 14505558 : if (chole)
888 3935459 : state |= CHOLE;
889 14505558 : if (!lhole && !chole && can_merge(left, curr))
890 1427 : state |= CLEFT_CONTIG;
891 14505558 : if (!rhole && !chole && can_merge(curr, right))
892 3801433 : state |= CRIGHT_CONTIG;
893 14505558 : if ((state & CBOTH_CONTIG) == CBOTH_CONTIG &&
894 470 : left->br_startblock + curr->br_startblock +
895 : right->br_startblock > XFS_MAX_BMBT_EXTLEN)
896 133 : state &= ~CRIGHT_CONTIG;
897 :
898 14505558 : if (nhole)
899 3935459 : state |= NHOLE;
900 14505558 : if (!lhole && !nhole && can_merge(left, new))
901 2597404 : state |= NLEFT_CONTIG;
902 14505558 : if (!rhole && !nhole && can_merge(new, right))
903 100 : state |= NRIGHT_CONTIG;
904 14505558 : if ((state & NBOTH_CONTIG) == NBOTH_CONTIG &&
905 21 : left->br_startblock + new->br_startblock +
906 : right->br_startblock > XFS_MAX_BMBT_EXTLEN)
907 6 : state &= ~NRIGHT_CONTIG;
908 :
909 14505558 : switch (state & (CLEFT_CONTIG | CRIGHT_CONTIG | CHOLE)) {
910 337 : case CLEFT_CONTIG | CRIGHT_CONTIG:
911 : /*
912 : * left/curr/right are the same extent, so deleting curr causes
913 : * 2 new extents to be created.
914 : */
915 337 : ret += 2;
916 337 : break;
917 6767709 : case 0:
918 : /*
919 : * curr is not contiguous with any extent, so we remove curr
920 : * completely
921 : */
922 6767709 : ret--;
923 6767709 : break;
924 : case CHOLE:
925 : /* hole, do nothing */
926 : break;
927 : case CLEFT_CONTIG:
928 : case CRIGHT_CONTIG:
929 : /* trim either left or right, no change */
930 : break;
931 : }
932 :
933 14505558 : switch (state & (NLEFT_CONTIG | NRIGHT_CONTIG | NHOLE)) {
934 15 : case NLEFT_CONTIG | NRIGHT_CONTIG:
935 : /*
936 : * left/curr/right will become the same extent, so adding
937 : * curr causes the deletion of right.
938 : */
939 15 : ret--;
940 15 : break;
941 7972617 : case 0:
942 : /* new is not contiguous with any extent */
943 7972617 : ret++;
944 7972617 : break;
945 : case NHOLE:
946 : /* hole, do nothing. */
947 : break;
948 : case NLEFT_CONTIG:
949 : case NRIGHT_CONTIG:
950 : /* new is absorbed into left or right, no change */
951 : break;
952 : }
953 :
954 14505558 : trace_xfs_swapext_delta_nextents_step(mp, left, curr, new, right, ret,
955 : state);
956 14505557 : return ret;
957 : }
958 :
959 : /* Make sure we don't overflow the extent counters. */
960 : static inline int
961 2245064 : ensure_delta_nextents(
962 : struct xfs_swapext_req *req,
963 : struct xfs_inode *ip,
964 : int64_t delta)
965 : {
966 2245064 : struct xfs_mount *mp = ip->i_mount;
967 2245064 : struct xfs_ifork *ifp = xfs_ifork_ptr(ip, req->whichfork);
968 2245051 : xfs_extnum_t max_extents;
969 2245051 : bool large_extcount;
970 :
971 2245051 : if (delta < 0)
972 : return 0;
973 :
974 2221166 : if (XFS_TEST_ERROR(false, mp, XFS_ERRTAG_REDUCE_MAX_IEXTENTS)) {
975 10 : if (ifp->if_nextents + delta > 10)
976 : return -EFBIG;
977 : }
978 :
979 2221160 : if (req->req_flags & XFS_SWAP_REQ_NREXT64)
980 : large_extcount = true;
981 : else
982 2221160 : large_extcount = xfs_inode_has_large_extent_counts(ip);
983 :
984 2221160 : max_extents = xfs_iext_max_nextents(large_extcount, req->whichfork);
985 2221160 : if (ifp->if_nextents + delta <= max_extents)
986 : return 0;
987 0 : if (large_extcount)
988 : return -EFBIG;
989 0 : if (!xfs_has_large_extent_counts(mp))
990 : return -EFBIG;
991 :
992 0 : max_extents = xfs_iext_max_nextents(true, req->whichfork);
993 0 : if (ifp->if_nextents + delta > max_extents)
994 : return -EFBIG;
995 :
996 0 : req->req_flags |= XFS_SWAP_REQ_NREXT64;
997 0 : return 0;
998 : }
999 :
1000 : /* Find the next extent after irec. */
1001 : static inline int
1002 14505558 : get_next_ext(
1003 : struct xfs_inode *ip,
1004 : int bmap_flags,
1005 : const struct xfs_bmbt_irec *irec,
1006 : struct xfs_bmbt_irec *nrec)
1007 : {
1008 14505558 : xfs_fileoff_t off;
1009 14505558 : xfs_filblks_t blockcount;
1010 14505558 : int nimaps = 1;
1011 14505558 : int error;
1012 :
1013 14505558 : off = irec->br_startoff + irec->br_blockcount;
1014 14505558 : blockcount = XFS_MAX_FILEOFF - off;
1015 14505558 : error = xfs_bmapi_read(ip, off, blockcount, nrec, &nimaps, bmap_flags);
1016 14505558 : if (error)
1017 : return error;
1018 14505558 : if (nrec->br_startblock == DELAYSTARTBLOCK ||
1019 14498687 : nrec->br_startoff != off) {
1020 : /*
1021 : * If we don't get the extent we want, return a zero-length
1022 : * mapping, which our estimator function will pretend is a hole.
1023 : * We shouldn't get delalloc reservations.
1024 : */
1025 6871 : nrec->br_startblock = HOLESTARTBLOCK;
1026 : }
1027 :
1028 : return 0;
1029 : }
1030 :
1031 : int __init
1032 59 : xfs_swapext_intent_init_cache(void)
1033 : {
1034 59 : xfs_swapext_intent_cache = kmem_cache_create("xfs_swapext_intent",
1035 : sizeof(struct xfs_swapext_intent),
1036 : 0, 0, NULL);
1037 :
1038 59 : return xfs_swapext_intent_cache != NULL ? 0 : -ENOMEM;
1039 : }
1040 :
1041 : void
1042 58 : xfs_swapext_intent_destroy_cache(void)
1043 : {
1044 58 : kmem_cache_destroy(xfs_swapext_intent_cache);
1045 58 : xfs_swapext_intent_cache = NULL;
1046 58 : }
1047 :
1048 : /*
1049 : * Decide if we will swap the reflink flags between the two files after the
1050 : * swap. The only time we want to do this is if we're exchanging all extents
1051 : * under EOF and the inode reflink flags have different states.
1052 : */
1053 : static inline bool
1054 3931908 : sxi_can_exchange_reflink_flags(
1055 : const struct xfs_swapext_req *req,
1056 : unsigned int reflink_state)
1057 : {
1058 3931908 : struct xfs_mount *mp = req->ip1->i_mount;
1059 :
1060 3931908 : if (hweight32(reflink_state) != 1)
1061 : return false;
1062 218 : if (req->startoff1 != 0 || req->startoff2 != 0)
1063 : return false;
1064 186 : if (req->blockcount != XFS_B_TO_FSB(mp, req->ip1->i_disk_size))
1065 : return false;
1066 178 : if (req->blockcount != XFS_B_TO_FSB(mp, req->ip2->i_disk_size))
1067 0 : return false;
1068 : return true;
1069 : }
1070 :
1071 :
1072 : /* Allocate and initialize a new incore intent item from a request. */
1073 : struct xfs_swapext_intent *
1074 4086001 : xfs_swapext_init_intent(
1075 : const struct xfs_swapext_req *req,
1076 : unsigned int *reflink_state)
1077 : {
1078 4086001 : struct xfs_swapext_intent *sxi;
1079 4086001 : unsigned int rs = 0;
1080 :
1081 4086001 : sxi = kmem_cache_zalloc(xfs_swapext_intent_cache,
1082 : GFP_NOFS | __GFP_NOFAIL);
1083 4086002 : INIT_LIST_HEAD(&sxi->sxi_list);
1084 4086002 : sxi->sxi_ip1 = req->ip1;
1085 4086002 : sxi->sxi_ip2 = req->ip2;
1086 4086002 : sxi->sxi_startoff1 = req->startoff1;
1087 4086002 : sxi->sxi_startoff2 = req->startoff2;
1088 4086002 : sxi->sxi_blockcount = req->blockcount;
1089 4086002 : sxi->sxi_isize1 = sxi->sxi_isize2 = -1;
1090 :
1091 4086002 : if (req->whichfork == XFS_ATTR_FORK)
1092 154103 : sxi->sxi_flags |= XFS_SWAP_EXT_ATTR_FORK;
1093 :
1094 4086002 : if (req->whichfork == XFS_DATA_FORK &&
1095 : (req->req_flags & XFS_SWAP_REQ_SET_SIZES)) {
1096 212941 : sxi->sxi_flags |= XFS_SWAP_EXT_SET_SIZES;
1097 212941 : sxi->sxi_isize1 = req->ip2->i_disk_size;
1098 212941 : sxi->sxi_isize2 = req->ip1->i_disk_size;
1099 : }
1100 :
1101 4086002 : if (req->req_flags & XFS_SWAP_REQ_INO1_WRITTEN)
1102 164 : sxi->sxi_flags |= XFS_SWAP_EXT_INO1_WRITTEN;
1103 4086002 : if (req->req_flags & XFS_SWAP_REQ_CVT_INO2_SF)
1104 202438 : sxi->sxi_flags |= XFS_SWAP_EXT_CVT_INO2_SF;
1105 :
1106 4086002 : if (req->req_flags & XFS_SWAP_REQ_LOGGED)
1107 4026070 : sxi->sxi_op_flags |= XFS_SWAP_EXT_OP_LOGGED;
1108 4086002 : if (req->req_flags & XFS_SWAP_REQ_NREXT64)
1109 0 : sxi->sxi_op_flags |= XFS_SWAP_EXT_OP_NREXT64;
1110 :
1111 4086002 : if (req->whichfork == XFS_DATA_FORK) {
1112 : /*
1113 : * Record the state of each inode's reflink flag before the
1114 : * operation.
1115 : */
1116 3931901 : if (xfs_is_reflink_inode(req->ip1))
1117 3618244 : rs |= XFS_REFLINK_STATE_IP1;
1118 3931901 : if (xfs_is_reflink_inode(req->ip2))
1119 3618461 : rs |= XFS_REFLINK_STATE_IP2;
1120 :
1121 : /*
1122 : * Figure out if we're clearing the reflink flags (which
1123 : * effectively swaps them) after the operation.
1124 : */
1125 3931901 : if (sxi_can_exchange_reflink_flags(req, rs)) {
1126 178 : if (rs & XFS_REFLINK_STATE_IP1)
1127 0 : sxi->sxi_flags |=
1128 : XFS_SWAP_EXT_CLEAR_INO1_REFLINK;
1129 178 : if (rs & XFS_REFLINK_STATE_IP2)
1130 178 : sxi->sxi_flags |=
1131 : XFS_SWAP_EXT_CLEAR_INO2_REFLINK;
1132 : }
1133 : }
1134 :
1135 4086013 : if (reflink_state)
1136 2048644 : *reflink_state = rs;
1137 4086013 : return sxi;
1138 : }
1139 :
1140 : /*
1141 : * Estimate the number of exchange operations and the number of file blocks
1142 : * in each file that will be affected by the exchange operation.
1143 : */
1144 : int
1145 2037371 : xfs_swapext_estimate(
1146 : struct xfs_swapext_req *req)
1147 : {
1148 2037371 : struct xfs_swapext_intent *sxi;
1149 2037371 : struct xfs_bmbt_irec irec1, irec2;
1150 2037371 : struct xfs_swapext_adjacent adj = ADJACENT_INIT;
1151 2037371 : xfs_filblks_t ip1_blocks = 0, ip2_blocks = 0;
1152 2037371 : int64_t d_nexts1, d_nexts2;
1153 2037371 : int bmap_flags;
1154 2037371 : int error;
1155 :
1156 2037371 : ASSERT(!(req->req_flags & ~XFS_SWAP_REQ_FLAGS));
1157 :
1158 2037371 : bmap_flags = xfs_bmapi_aflag(req->whichfork);
1159 2037371 : sxi = xfs_swapext_init_intent(req, NULL);
1160 :
1161 : /*
1162 : * To guard against the possibility of overflowing the extent counters,
1163 : * we have to estimate an upper bound on the potential increase in that
1164 : * counter. We can split the extent at each end of the range, and for
1165 : * each step of the swap we can split the extent that we're working on
1166 : * if the extents do not align.
1167 : */
1168 2037371 : d_nexts1 = d_nexts2 = 3;
1169 :
1170 9290150 : while (sxi_has_more_swap_work(sxi)) {
1171 : /*
1172 : * Walk through the file ranges until we find something to
1173 : * swap. Because we're simulating the swap, pass in adj to
1174 : * capture skipped mappings for correct estimation of bmbt
1175 : * record merges.
1176 : */
1177 7898185 : error = xfs_swapext_find_mappings(sxi, &irec1, &irec2, &adj);
1178 7898180 : if (error)
1179 0 : goto out_free;
1180 7898180 : if (!sxi_has_more_swap_work(sxi))
1181 : break;
1182 :
1183 : /* Update accounting. */
1184 12536673 : if (xfs_bmap_is_real_extent(&irec1))
1185 5283895 : ip1_blocks += irec1.br_blockcount;
1186 12538981 : if (xfs_bmap_is_real_extent(&irec2))
1187 5286203 : ip2_blocks += irec2.br_blockcount;
1188 7252778 : req->nr_exchanges++;
1189 :
1190 : /* Read the next extents from both files. */
1191 7252778 : error = get_next_ext(req->ip1, bmap_flags, &irec1, &adj.right1);
1192 7252779 : if (error)
1193 0 : goto out_free;
1194 :
1195 7252779 : error = get_next_ext(req->ip2, bmap_flags, &irec2, &adj.right2);
1196 7252780 : if (error)
1197 0 : goto out_free;
1198 :
1199 : /* Update extent count deltas. */
1200 7252780 : d_nexts1 += delta_nextents_step(req->ip1->i_mount,
1201 : &adj.left1, &irec1, &irec2, &adj.right1);
1202 :
1203 7252779 : d_nexts2 += delta_nextents_step(req->ip1->i_mount,
1204 : &adj.left2, &irec2, &irec1, &adj.right2);
1205 :
1206 : /* Now pretend we swapped the extents. */
1207 7252779 : if (can_merge(&adj.left2, &irec1))
1208 1596573 : adj.left2.br_blockcount += irec1.br_blockcount;
1209 : else
1210 5656206 : memcpy(&adj.left2, &irec1, sizeof(irec1));
1211 :
1212 7252779 : if (can_merge(&adj.left1, &irec2))
1213 1000831 : adj.left1.br_blockcount += irec2.br_blockcount;
1214 : else
1215 6251948 : memcpy(&adj.left1, &irec2, sizeof(irec2));
1216 :
1217 7252779 : sxi_advance(sxi, &irec1);
1218 : }
1219 :
1220 : /* Account for the blocks that are being exchanged. */
1221 2037363 : if (XFS_IS_REALTIME_INODE(req->ip1) &&
1222 648482 : req->whichfork == XFS_DATA_FORK) {
1223 648483 : req->ip1_rtbcount = ip1_blocks;
1224 648483 : req->ip2_rtbcount = ip2_blocks;
1225 : } else {
1226 1388880 : req->ip1_bcount = ip1_blocks;
1227 1388880 : req->ip2_bcount = ip2_blocks;
1228 : }
1229 :
1230 : /*
1231 : * Make sure that both forks have enough slack left in their extent
1232 : * counters that the swap operation will not overflow.
1233 : */
1234 2037363 : trace_xfs_swapext_delta_nextents(req, d_nexts1, d_nexts2);
1235 2037356 : if (req->ip1 == req->ip2) {
1236 1829650 : error = ensure_delta_nextents(req, req->ip1,
1237 : d_nexts1 + d_nexts2);
1238 : } else {
1239 207706 : error = ensure_delta_nextents(req, req->ip1, d_nexts1);
1240 207706 : if (error)
1241 10 : goto out_free;
1242 207696 : error = ensure_delta_nextents(req, req->ip2, d_nexts2);
1243 : }
1244 2037353 : if (error)
1245 0 : goto out_free;
1246 :
1247 2037353 : trace_xfs_swapext_initial_estimate(req);
1248 2037353 : error = xfs_swapext_estimate_overhead(req);
1249 2037354 : out_free:
1250 2037354 : kmem_cache_free(xfs_swapext_intent_cache, sxi);
1251 2037370 : return error;
1252 : }
1253 :
1254 : static inline void
1255 109 : xfs_swapext_set_reflink(
1256 : struct xfs_trans *tp,
1257 : struct xfs_inode *ip)
1258 : {
1259 109 : trace_xfs_reflink_set_inode_flag(ip);
1260 :
1261 109 : ip->i_diflags2 |= XFS_DIFLAG2_REFLINK;
1262 109 : xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
1263 109 : }
1264 :
1265 : /*
1266 : * If either file has shared blocks and we're swapping data forks, we must flag
1267 : * the other file as having shared blocks so that we get the shared-block rmap
1268 : * functions if we need to fix up the rmaps.
1269 : */
1270 : void
1271 2048654 : xfs_swapext_ensure_reflink(
1272 : struct xfs_trans *tp,
1273 : const struct xfs_swapext_intent *sxi,
1274 : unsigned int reflink_state)
1275 : {
1276 2048654 : if ((reflink_state & XFS_REFLINK_STATE_IP1) &&
1277 1809112 : !xfs_is_reflink_inode(sxi->sxi_ip2))
1278 0 : xfs_swapext_set_reflink(tp, sxi->sxi_ip2);
1279 :
1280 2048654 : if ((reflink_state & XFS_REFLINK_STATE_IP2) &&
1281 1809221 : !xfs_is_reflink_inode(sxi->sxi_ip1))
1282 109 : xfs_swapext_set_reflink(tp, sxi->sxi_ip1);
1283 2048654 : }
1284 :
1285 : /* Widen the extent counts of both inodes if necessary. */
1286 : static inline void
1287 2048617 : xfs_swapext_upgrade_extent_counts(
1288 : struct xfs_trans *tp,
1289 : const struct xfs_swapext_intent *sxi)
1290 : {
1291 2048617 : if (!(sxi->sxi_op_flags & XFS_SWAP_EXT_OP_NREXT64))
1292 : return;
1293 :
1294 0 : sxi->sxi_ip1->i_diflags2 |= XFS_DIFLAG2_NREXT64;
1295 0 : xfs_trans_log_inode(tp, sxi->sxi_ip1, XFS_ILOG_CORE);
1296 :
1297 0 : sxi->sxi_ip2->i_diflags2 |= XFS_DIFLAG2_NREXT64;
1298 0 : xfs_trans_log_inode(tp, sxi->sxi_ip2, XFS_ILOG_CORE);
1299 : }
1300 :
1301 : /*
1302 : * Schedule a swap a range of extents from one inode to another. If the atomic
1303 : * swap feature is enabled, then the operation progress can be resumed even if
1304 : * the system goes down. The caller must commit the transaction to start the
1305 : * work.
1306 : *
1307 : * The caller must ensure the inodes must be joined to the transaction and
1308 : * ILOCKd; they will still be joined to the transaction at exit.
1309 : */
1310 : void
1311 2048614 : xfs_swapext(
1312 : struct xfs_trans *tp,
1313 : const struct xfs_swapext_req *req)
1314 : {
1315 2048614 : struct xfs_swapext_intent *sxi;
1316 2048614 : unsigned int reflink_state;
1317 :
1318 2048614 : ASSERT(xfs_isilocked(req->ip1, XFS_ILOCK_EXCL));
1319 2048598 : ASSERT(xfs_isilocked(req->ip2, XFS_ILOCK_EXCL));
1320 2048610 : ASSERT(req->whichfork != XFS_COW_FORK);
1321 2048610 : ASSERT(!(req->req_flags & ~XFS_SWAP_REQ_FLAGS));
1322 2048610 : if (req->req_flags & XFS_SWAP_REQ_SET_SIZES)
1323 111493 : ASSERT(req->whichfork == XFS_DATA_FORK);
1324 2048610 : if (req->req_flags & XFS_SWAP_REQ_CVT_INO2_SF)
1325 108867 : ASSERT(req->whichfork == XFS_ATTR_FORK ||
1326 : (req->whichfork == XFS_DATA_FORK &&
1327 : (S_ISDIR(VFS_I(req->ip2)->i_mode) ||
1328 : S_ISLNK(VFS_I(req->ip2)->i_mode))));
1329 :
1330 2048610 : if (req->blockcount == 0)
1331 0 : return;
1332 :
1333 2048610 : sxi = xfs_swapext_init_intent(req, &reflink_state);
1334 2048615 : xfs_swapext_schedule(tp, sxi);
1335 2048618 : xfs_swapext_ensure_reflink(tp, sxi, reflink_state);
1336 2048619 : xfs_swapext_upgrade_extent_counts(tp, sxi);
1337 : }
|