Line data Source code
1 : // SPDX-License-Identifier: GPL-2.0-or-later
2 : /*
3 : * Copyright (C) 2020-2023 Oracle. All Rights Reserved.
4 : * Author: Darrick J. Wong <djwong@kernel.org>
5 : */
6 : #include "xfs.h"
7 : #include "xfs_fs.h"
8 : #include "xfs_shared.h"
9 : #include "xfs_format.h"
10 : #include "xfs_log_format.h"
11 : #include "xfs_trans_resv.h"
12 : #include "xfs_mount.h"
13 : #include "xfs_defer.h"
14 : #include "xfs_inode.h"
15 : #include "xfs_trans.h"
16 : #include "xfs_bmap.h"
17 : #include "xfs_icache.h"
18 : #include "xfs_quota.h"
19 : #include "xfs_swapext.h"
20 : #include "xfs_trace.h"
21 : #include "xfs_bmap_btree.h"
22 : #include "xfs_trans_space.h"
23 : #include "xfs_error.h"
24 : #include "xfs_errortag.h"
25 : #include "xfs_health.h"
26 : #include "xfs_da_format.h"
27 : #include "xfs_da_btree.h"
28 : #include "xfs_attr_leaf.h"
29 : #include "xfs_attr.h"
30 : #include "xfs_dir2_priv.h"
31 : #include "xfs_dir2.h"
32 : #include "xfs_symlink_remote.h"
33 :
34 : struct kmem_cache *xfs_swapext_intent_cache;
35 :
36 : /* bmbt mappings adjacent to a pair of records. */
37 : struct xfs_swapext_adjacent {
38 : struct xfs_bmbt_irec left1;
39 : struct xfs_bmbt_irec right1;
40 : struct xfs_bmbt_irec left2;
41 : struct xfs_bmbt_irec right2;
42 : };
43 :
44 : #define ADJACENT_INIT { \
45 : .left1 = { .br_startblock = HOLESTARTBLOCK }, \
46 : .right1 = { .br_startblock = HOLESTARTBLOCK }, \
47 : .left2 = { .br_startblock = HOLESTARTBLOCK }, \
48 : .right2 = { .br_startblock = HOLESTARTBLOCK }, \
49 : }
50 :
51 : /* Information to help us reset reflink flag / CoW fork state after a swap. */
52 :
53 : /* Previous state of the two inodes' reflink flags. */
54 : #define XFS_REFLINK_STATE_IP1 (1U << 0)
55 : #define XFS_REFLINK_STATE_IP2 (1U << 1)
56 :
57 : /*
58 : * If the reflink flag is set on either inode, make sure it has an incore CoW
59 : * fork, since all reflink inodes must have them. If there's a CoW fork and it
60 : * has extents in it, make sure the inodes are tagged appropriately so that
61 : * speculative preallocations can be GC'd if we run low of space.
62 : */
63 : static inline void
64 2479974 : xfs_swapext_ensure_cowfork(
65 : struct xfs_inode *ip)
66 : {
67 2479974 : struct xfs_ifork *cfork;
68 :
69 2479974 : if (xfs_is_reflink_inode(ip))
70 2392670 : xfs_ifork_init_cow(ip);
71 :
72 2479970 : cfork = xfs_ifork_ptr(ip, XFS_COW_FORK);
73 2479970 : if (!cfork)
74 : return;
75 2421129 : if (cfork->if_bytes > 0)
76 1407560 : xfs_inode_set_cowblocks_tag(ip);
77 : else
78 1013569 : xfs_inode_clear_cowblocks_tag(ip);
79 : }
80 :
81 : /* Schedule an atomic extent swap. */
82 : void
83 1293112 : xfs_swapext_schedule(
84 : struct xfs_trans *tp,
85 : struct xfs_swapext_intent *sxi)
86 : {
87 1293112 : trace_xfs_swapext_defer(tp->t_mountp, sxi);
88 1293121 : xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_SWAPEXT, &sxi->sxi_list);
89 1293120 : }
90 :
91 : /*
92 : * Adjust the on-disk inode size upwards if needed so that we never map extents
93 : * into the file past EOF. This is crucial so that log recovery won't get
94 : * confused by the sudden appearance of post-eof extents.
95 : */
96 : STATIC void
97 9053876 : xfs_swapext_update_size(
98 : struct xfs_trans *tp,
99 : struct xfs_inode *ip,
100 : struct xfs_bmbt_irec *imap,
101 : xfs_fsize_t new_isize)
102 : {
103 9053876 : struct xfs_mount *mp = tp->t_mountp;
104 9053876 : xfs_fsize_t len;
105 :
106 9053876 : if (new_isize < 0)
107 : return;
108 :
109 45474 : len = min(XFS_FSB_TO_B(mp, imap->br_startoff + imap->br_blockcount),
110 : new_isize);
111 :
112 45474 : if (len <= ip->i_disk_size)
113 : return;
114 :
115 176 : trace_xfs_swapext_update_inode_size(ip, len);
116 :
117 176 : ip->i_disk_size = len;
118 176 : xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
119 : }
120 :
121 : static inline bool
122 : sxi_has_more_swap_work(const struct xfs_swapext_intent *sxi)
123 : {
124 40826399 : return sxi->sxi_blockcount > 0;
125 : }
126 :
127 : static inline bool
128 : sxi_has_postop_work(const struct xfs_swapext_intent *sxi)
129 : {
130 1426609 : return sxi->sxi_flags & (XFS_SWAP_EXT_CLEAR_INO1_REFLINK |
131 : XFS_SWAP_EXT_CLEAR_INO2_REFLINK |
132 : XFS_SWAP_EXT_CVT_INO2_SF);
133 : }
134 :
135 : static inline void
136 : sxi_advance(
137 : struct xfs_swapext_intent *sxi,
138 : const struct xfs_bmbt_irec *irec)
139 : {
140 12127161 : sxi->sxi_startoff1 += irec->br_blockcount;
141 12127161 : sxi->sxi_startoff2 += irec->br_blockcount;
142 12127161 : sxi->sxi_blockcount -= irec->br_blockcount;
143 7540665 : }
144 :
145 : #ifdef DEBUG
146 : static inline bool
147 1242453 : xfs_swapext_need_rt_conversion(
148 : const struct xfs_swapext_req *req)
149 : {
150 1242453 : struct xfs_inode *ip = req->ip2;
151 1242453 : struct xfs_mount *mp = ip->i_mount;
152 :
153 : /* xattrs don't live on the rt device */
154 1242453 : if (req->whichfork == XFS_ATTR_FORK)
155 : return false;
156 :
157 : /*
158 : * Caller got permission to use logged swapext, so log recovery will
159 : * finish the swap and not leave us with partially swapped rt extents
160 : * exposed to userspace.
161 : */
162 1242453 : if (req->req_flags & XFS_SWAP_REQ_LOGGED)
163 : return false;
164 :
165 : /*
166 : * If we can't use log intent items at all, the only supported
167 : * operation is full fork swaps.
168 : */
169 47393 : if (!xfs_swapext_supported(mp))
170 : return false;
171 :
172 : /* Conversion is only needed for realtime files with big rt extents */
173 15843 : return xfs_inode_has_bigrtextents(ip);
174 : }
175 :
176 : static inline int
177 1242454 : xfs_swapext_check_rt_extents(
178 : struct xfs_mount *mp,
179 : const struct xfs_swapext_req *req)
180 : {
181 1242454 : struct xfs_bmbt_irec irec1, irec2;
182 1242454 : xfs_fileoff_t startoff1 = req->startoff1;
183 1242454 : xfs_fileoff_t startoff2 = req->startoff2;
184 1242454 : xfs_filblks_t blockcount = req->blockcount;
185 1242454 : uint32_t mod;
186 1242454 : int nimaps;
187 1242454 : int error;
188 :
189 1242454 : if (!xfs_swapext_need_rt_conversion(req))
190 : return 0;
191 :
192 3 : while (blockcount > 0) {
193 : /* Read extent from the first file */
194 3 : nimaps = 1;
195 3 : error = xfs_bmapi_read(req->ip1, startoff1, blockcount,
196 : &irec1, &nimaps, 0);
197 0 : if (error)
198 0 : return error;
199 0 : ASSERT(nimaps == 1);
200 :
201 : /* Read extent from the second file */
202 0 : nimaps = 1;
203 0 : error = xfs_bmapi_read(req->ip2, startoff2,
204 : irec1.br_blockcount, &irec2, &nimaps,
205 : 0);
206 0 : if (error)
207 0 : return error;
208 0 : ASSERT(nimaps == 1);
209 :
210 : /*
211 : * We can only swap as many blocks as the smaller of the two
212 : * extent maps.
213 : */
214 0 : irec1.br_blockcount = min(irec1.br_blockcount,
215 : irec2.br_blockcount);
216 :
217 : /* Both mappings must be aligned to the realtime extent size. */
218 0 : div_u64_rem(irec1.br_startoff, mp->m_sb.sb_rextsize, &mod);
219 0 : if (mod) {
220 0 : ASSERT(mod == 0);
221 0 : return -EINVAL;
222 : }
223 :
224 0 : div_u64_rem(irec2.br_startoff, mp->m_sb.sb_rextsize, &mod);
225 0 : if (mod) {
226 0 : ASSERT(mod == 0);
227 0 : return -EINVAL;
228 : }
229 :
230 0 : div_u64_rem(irec1.br_blockcount, mp->m_sb.sb_rextsize, &mod);
231 0 : if (mod) {
232 0 : ASSERT(mod == 0);
233 0 : return -EINVAL;
234 : }
235 :
236 0 : startoff1 += irec1.br_blockcount;
237 0 : startoff2 += irec1.br_blockcount;
238 0 : blockcount -= irec1.br_blockcount;
239 : }
240 :
241 : return 0;
242 : }
243 : #else
244 : # define xfs_swapext_check_rt_extents(mp, req) (0)
245 : #endif
246 :
247 : /* Check all extents to make sure we can actually swap them. */
248 : int
249 1242463 : xfs_swapext_check_extents(
250 : struct xfs_mount *mp,
251 : const struct xfs_swapext_req *req)
252 : {
253 1242463 : struct xfs_ifork *ifp1, *ifp2;
254 :
255 : /* No fork? */
256 1242463 : ifp1 = xfs_ifork_ptr(req->ip1, req->whichfork);
257 1242461 : ifp2 = xfs_ifork_ptr(req->ip2, req->whichfork);
258 1242456 : if (!ifp1 || !ifp2)
259 : return -EINVAL;
260 :
261 : /* We don't know how to swap local format forks. */
262 1242456 : if (ifp1->if_format == XFS_DINODE_FMT_LOCAL ||
263 1242456 : ifp2->if_format == XFS_DINODE_FMT_LOCAL)
264 : return -EINVAL;
265 :
266 1242456 : return xfs_swapext_check_rt_extents(mp, req);
267 : }
268 :
269 : #ifdef CONFIG_XFS_QUOTA
270 : /* Log the actual updates to the quota accounting. */
271 : static inline void
272 4586493 : xfs_swapext_update_quota(
273 : struct xfs_trans *tp,
274 : struct xfs_swapext_intent *sxi,
275 : struct xfs_bmbt_irec *irec1,
276 : struct xfs_bmbt_irec *irec2)
277 : {
278 4586493 : int64_t ip1_delta = 0, ip2_delta = 0;
279 4586493 : unsigned int qflag;
280 :
281 4586493 : qflag = XFS_IS_REALTIME_INODE(sxi->sxi_ip1) ? XFS_TRANS_DQ_RTBCOUNT :
282 : XFS_TRANS_DQ_BCOUNT;
283 :
284 7880390 : if (xfs_bmap_is_real_extent(irec1)) {
285 3293898 : ip1_delta -= irec1->br_blockcount;
286 3293898 : ip2_delta += irec1->br_blockcount;
287 : }
288 :
289 7881885 : if (xfs_bmap_is_real_extent(irec2)) {
290 3295392 : ip1_delta += irec2->br_blockcount;
291 3295392 : ip2_delta -= irec2->br_blockcount;
292 : }
293 :
294 4586493 : xfs_trans_mod_dquot_byino(tp, sxi->sxi_ip1, qflag, ip1_delta);
295 4586494 : xfs_trans_mod_dquot_byino(tp, sxi->sxi_ip2, qflag, ip2_delta);
296 4586495 : }
297 : #else
298 : # define xfs_swapext_update_quota(tp, sxi, irec1, irec2) ((void)0)
299 : #endif
300 :
301 : /* Decide if we want to skip this mapping from file1. */
302 : static inline bool
303 12127172 : xfs_swapext_can_skip_mapping(
304 : struct xfs_swapext_intent *sxi,
305 : struct xfs_bmbt_irec *irec)
306 : {
307 12127172 : struct xfs_mount *mp = sxi->sxi_ip1->i_mount;
308 :
309 : /* Do not skip this mapping if the caller did not tell us to. */
310 12127172 : if (!(sxi->sxi_flags & XFS_SWAP_EXT_INO1_WRITTEN))
311 : return false;
312 :
313 : /* Do not skip mapped, written extents. */
314 316 : if (xfs_bmap_is_written_extent(irec))
315 : return false;
316 :
317 : /*
318 : * The mapping is unwritten or a hole. It cannot be a delalloc
319 : * reservation because we already excluded those. It cannot be an
320 : * unwritten extent with dirty page cache because we flushed the page
321 : * cache. For files where the allocation unit is 1FSB (files on the
322 : * data dev, rt files if the extent size is 1FSB), we can safely
323 : * skip this mapping.
324 : */
325 208 : if (!xfs_inode_has_bigrtextents(sxi->sxi_ip1))
326 : return true;
327 :
328 : /*
329 : * For a realtime file with a multi-fsb allocation unit, the decision
330 : * is trickier because we can only swap full allocation units.
331 : * Unwritten mappings can appear in the middle of an rtx if the rtx is
332 : * partially written, but they can also appear for preallocations.
333 : *
334 : * If the mapping is a hole, skip it entirely. Holes should align with
335 : * rtx boundaries.
336 : */
337 0 : if (!xfs_bmap_is_real_extent(irec))
338 : return true;
339 :
340 : /*
341 : * All mappings below this point are unwritten.
342 : *
343 : * - If the beginning is not aligned to an rtx, trim the end of the
344 : * mapping so that it does not cross an rtx boundary, and swap it.
345 : *
346 : * - If both ends are aligned to an rtx, skip the entire mapping.
347 : */
348 0 : if (!isaligned_64(irec->br_startoff, mp->m_sb.sb_rextsize)) {
349 0 : xfs_fileoff_t new_end;
350 :
351 0 : new_end = roundup_64(irec->br_startoff, mp->m_sb.sb_rextsize);
352 0 : irec->br_blockcount = new_end - irec->br_startoff;
353 0 : return false;
354 : }
355 0 : if (isaligned_64(irec->br_blockcount, mp->m_sb.sb_rextsize))
356 : return true;
357 :
358 : /*
359 : * All mappings below this point are unwritten, start on an rtx
360 : * boundary, and do not end on an rtx boundary.
361 : *
362 : * - If the mapping is longer than one rtx, trim the end of the mapping
363 : * down to an rtx boundary and skip it.
364 : *
365 : * - The mapping is shorter than one rtx. Swap it.
366 : */
367 0 : if (irec->br_blockcount > mp->m_sb.sb_rextsize) {
368 0 : xfs_fileoff_t new_end;
369 :
370 0 : new_end = rounddown_64(irec->br_startoff + irec->br_blockcount,
371 : mp->m_sb.sb_rextsize);
372 0 : irec->br_blockcount = new_end - irec->br_startoff;
373 0 : return true;
374 : }
375 :
376 : return false;
377 : }
378 :
379 : /*
380 : * Walk forward through the file ranges in @sxi until we find two different
381 : * mappings to exchange. If there is work to do, return the mappings;
382 : * otherwise we've reached the end of the range and sxi_blockcount will be
383 : * zero.
384 : *
385 : * If the walk skips over a pair of mappings to the same storage, save them as
386 : * the left records in @adj (if provided) so that the simulation phase can
387 : * avoid an extra lookup.
388 : */
389 : static int
390 10855839 : xfs_swapext_find_mappings(
391 : struct xfs_swapext_intent *sxi,
392 : struct xfs_bmbt_irec *irec1,
393 : struct xfs_bmbt_irec *irec2,
394 : struct xfs_swapext_adjacent *adj)
395 : {
396 10855839 : int nimaps;
397 10855839 : int bmap_flags;
398 10855839 : int error;
399 :
400 10855839 : bmap_flags = xfs_bmapi_aflag(xfs_swapext_whichfork(sxi));
401 :
402 13049083 : for (; sxi_has_more_swap_work(sxi); sxi_advance(sxi, irec1)) {
403 : /* Read extent from the first file */
404 12127142 : nimaps = 1;
405 12127142 : error = xfs_bmapi_read(sxi->sxi_ip1, sxi->sxi_startoff1,
406 : sxi->sxi_blockcount, irec1, &nimaps,
407 : bmap_flags);
408 12127173 : if (error)
409 0 : return error;
410 12127173 : if (nimaps != 1 ||
411 12127173 : irec1->br_startblock == DELAYSTARTBLOCK ||
412 12127173 : irec1->br_startoff != sxi->sxi_startoff1) {
413 : /*
414 : * We should never get no mapping or a delalloc extent
415 : * or something that doesn't match what we asked for,
416 : * since the caller flushed both inodes and we hold the
417 : * ILOCKs for both inodes.
418 : */
419 0 : ASSERT(0);
420 0 : return -EINVAL;
421 : }
422 :
423 12127173 : if (xfs_swapext_can_skip_mapping(sxi, irec1)) {
424 208 : trace_xfs_swapext_extent1_skip(sxi->sxi_ip1, irec1);
425 208 : continue;
426 : }
427 :
428 : /* Read extent from the second file */
429 12126961 : nimaps = 1;
430 12126961 : error = xfs_bmapi_read(sxi->sxi_ip2, sxi->sxi_startoff2,
431 : irec1->br_blockcount, irec2, &nimaps,
432 : bmap_flags);
433 12126966 : if (error)
434 0 : return error;
435 12126966 : if (nimaps != 1 ||
436 12126966 : irec2->br_startblock == DELAYSTARTBLOCK ||
437 12126966 : irec2->br_startoff != sxi->sxi_startoff2) {
438 : /*
439 : * We should never get no mapping or a delalloc extent
440 : * or something that doesn't match what we asked for,
441 : * since the caller flushed both inodes and we hold the
442 : * ILOCKs for both inodes.
443 : */
444 0 : ASSERT(0);
445 0 : return -EINVAL;
446 : }
447 :
448 : /*
449 : * We can only swap as many blocks as the smaller of the two
450 : * extent maps.
451 : */
452 12126966 : irec1->br_blockcount = min(irec1->br_blockcount,
453 : irec2->br_blockcount);
454 :
455 12126966 : trace_xfs_swapext_extent1(sxi->sxi_ip1, irec1);
456 12126958 : trace_xfs_swapext_extent2(sxi->sxi_ip2, irec2);
457 :
458 : /* We found something to swap, so return it. */
459 12126950 : if (irec1->br_startblock != irec2->br_startblock)
460 : return 0;
461 :
462 : /*
463 : * Two extents mapped to the same physical block must not have
464 : * different states; that's filesystem corruption. Move on to
465 : * the next extent if they're both holes or both the same
466 : * physical extent.
467 : */
468 2193036 : if (irec1->br_state != irec2->br_state) {
469 0 : xfs_bmap_mark_sick(sxi->sxi_ip1,
470 : xfs_swapext_whichfork(sxi));
471 0 : xfs_bmap_mark_sick(sxi->sxi_ip2,
472 : xfs_swapext_whichfork(sxi));
473 0 : return -EFSCORRUPTED;
474 : }
475 :
476 : /*
477 : * Save the mappings if we're estimating work and skipping
478 : * these identical mappings.
479 : */
480 2193036 : if (adj) {
481 2292848 : memcpy(&adj->left1, irec1, sizeof(*irec1));
482 2292848 : memcpy(&adj->left2, irec2, sizeof(*irec2));
483 : }
484 : }
485 :
486 : return 0;
487 : }
488 :
489 : /* Exchange these two mappings. */
490 : static void
491 4586495 : xfs_swapext_exchange_mappings(
492 : struct xfs_trans *tp,
493 : struct xfs_swapext_intent *sxi,
494 : struct xfs_bmbt_irec *irec1,
495 : struct xfs_bmbt_irec *irec2)
496 : {
497 4586495 : int whichfork = xfs_swapext_whichfork(sxi);
498 :
499 4586495 : xfs_swapext_update_quota(tp, sxi, irec1, irec2);
500 :
501 : /* Remove both mappings. */
502 4586496 : xfs_bmap_unmap_extent(tp, sxi->sxi_ip1, whichfork, irec1);
503 4586495 : xfs_bmap_unmap_extent(tp, sxi->sxi_ip2, whichfork, irec2);
504 :
505 : /*
506 : * Re-add both mappings. We swap the file offsets between the two maps
507 : * and add the opposite map, which has the effect of filling the
508 : * logical offsets we just unmapped, but with with the physical mapping
509 : * information swapped.
510 : */
511 4586495 : swap(irec1->br_startoff, irec2->br_startoff);
512 4586495 : xfs_bmap_map_extent(tp, sxi->sxi_ip1, whichfork, irec2);
513 4586494 : xfs_bmap_map_extent(tp, sxi->sxi_ip2, whichfork, irec1);
514 :
515 : /* Make sure we're not mapping extents past EOF. */
516 4586495 : if (whichfork == XFS_DATA_FORK) {
517 4526938 : xfs_swapext_update_size(tp, sxi->sxi_ip1, irec2,
518 : sxi->sxi_isize1);
519 4526938 : xfs_swapext_update_size(tp, sxi->sxi_ip2, irec1,
520 : sxi->sxi_isize2);
521 : }
522 :
523 : /*
524 : * Advance our cursor and exit. The caller (either defer ops or log
525 : * recovery) will log the SXD item, and if *blockcount is nonzero, it
526 : * will log a new SXI item for the remainder and call us back.
527 : */
528 4586496 : sxi_advance(sxi, irec1);
529 4586496 : }
530 :
531 : /* Convert inode2's leaf attr fork back to shortform, if possible.. */
532 : STATIC int
533 53103 : xfs_swapext_attr_to_sf(
534 : struct xfs_trans *tp,
535 : struct xfs_swapext_intent *sxi)
536 : {
537 53103 : struct xfs_da_args args = {
538 53103 : .dp = sxi->sxi_ip2,
539 53103 : .geo = tp->t_mountp->m_attr_geo,
540 : .whichfork = XFS_ATTR_FORK,
541 : .trans = tp,
542 53103 : .owner = sxi->sxi_ip2->i_ino,
543 : };
544 53103 : struct xfs_buf *bp;
545 53103 : int forkoff;
546 53103 : int error;
547 :
548 53103 : if (!xfs_attr_is_leaf(sxi->sxi_ip2))
549 : return 0;
550 :
551 49669 : error = xfs_attr3_leaf_read(tp, sxi->sxi_ip2, sxi->sxi_ip2->i_ino, 0,
552 : &bp);
553 49671 : if (error)
554 : return error;
555 :
556 49671 : forkoff = xfs_attr_shortform_allfit(bp, sxi->sxi_ip2);
557 49670 : if (forkoff == 0)
558 : return 0;
559 :
560 144 : return xfs_attr3_leaf_to_shortform(bp, &args, forkoff);
561 : }
562 :
563 : /* Convert inode2's block dir fork back to shortform, if possible.. */
564 : STATIC int
565 4391 : xfs_swapext_dir_to_sf(
566 : struct xfs_trans *tp,
567 : struct xfs_swapext_intent *sxi)
568 : {
569 4391 : struct xfs_da_args args = {
570 4391 : .dp = sxi->sxi_ip2,
571 4391 : .geo = tp->t_mountp->m_dir_geo,
572 : .whichfork = XFS_DATA_FORK,
573 : .trans = tp,
574 4391 : .owner = sxi->sxi_ip2->i_ino,
575 : };
576 4391 : struct xfs_dir2_sf_hdr sfh;
577 4391 : struct xfs_buf *bp;
578 4391 : bool isblock;
579 4391 : int size;
580 4391 : int error;
581 :
582 4391 : error = xfs_dir2_isblock(&args, &isblock);
583 4391 : if (error)
584 : return error;
585 :
586 4391 : if (!isblock)
587 : return 0;
588 :
589 3306 : error = xfs_dir3_block_read(tp, sxi->sxi_ip2, sxi->sxi_ip2->i_ino, &bp);
590 3306 : if (error)
591 : return error;
592 :
593 3306 : size = xfs_dir2_block_sfsize(sxi->sxi_ip2, bp->b_addr, &sfh);
594 3306 : if (size > xfs_inode_data_fork_size(sxi->sxi_ip2))
595 : return 0;
596 :
597 57 : return xfs_dir2_block_to_sf(&args, bp, size, &sfh);
598 : }
599 :
600 : /* Convert inode2's remote symlink target back to shortform, if possible. */
601 : STATIC int
602 9237 : xfs_swapext_link_to_sf(
603 : struct xfs_trans *tp,
604 : struct xfs_swapext_intent *sxi)
605 : {
606 9237 : struct xfs_inode *ip = sxi->sxi_ip2;
607 9237 : struct xfs_ifork *ifp = xfs_ifork_ptr(ip, XFS_DATA_FORK);
608 9237 : char *buf;
609 9237 : int error;
610 :
611 9237 : if (ifp->if_format == XFS_DINODE_FMT_LOCAL ||
612 9237 : ip->i_disk_size > xfs_inode_data_fork_size(ip))
613 : return 0;
614 :
615 : /* Read the current symlink target into a buffer. */
616 0 : buf = kmem_alloc(ip->i_disk_size + 1, KM_NOFS);
617 0 : if (!buf) {
618 0 : ASSERT(0);
619 0 : return -ENOMEM;
620 : }
621 :
622 0 : error = xfs_symlink_remote_read(ip, buf);
623 0 : if (error)
624 0 : goto free;
625 :
626 : /* Remove the blocks. */
627 0 : error = xfs_symlink_remote_truncate(tp, ip);
628 0 : if (error)
629 0 : goto free;
630 :
631 : /* Convert fork to local format and log our changes. */
632 0 : xfs_idestroy_fork(ifp);
633 0 : ifp->if_bytes = 0;
634 0 : ifp->if_format = XFS_DINODE_FMT_LOCAL;
635 0 : xfs_init_local_fork(ip, XFS_DATA_FORK, buf, ip->i_disk_size);
636 0 : xfs_trans_log_inode(tp, ip, XFS_ILOG_DDATA | XFS_ILOG_CORE);
637 0 : free:
638 0 : kmem_free(buf);
639 0 : return error;
640 : }
641 :
642 : static inline void
643 30 : xfs_swapext_clear_reflink(
644 : struct xfs_trans *tp,
645 : struct xfs_inode *ip)
646 : {
647 30 : trace_xfs_reflink_unset_inode_flag(ip);
648 :
649 30 : ip->i_diflags2 &= ~XFS_DIFLAG2_REFLINK;
650 30 : xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
651 30 : }
652 :
653 : /* Finish whatever work might come after a swap operation. */
654 : static int
655 66761 : xfs_swapext_do_postop_work(
656 : struct xfs_trans *tp,
657 : struct xfs_swapext_intent *sxi)
658 : {
659 66761 : if (sxi->sxi_flags & XFS_SWAP_EXT_CVT_INO2_SF) {
660 66731 : int error = 0;
661 :
662 66731 : if (sxi->sxi_flags & XFS_SWAP_EXT_ATTR_FORK)
663 53103 : error = xfs_swapext_attr_to_sf(tp, sxi);
664 13628 : else if (S_ISDIR(VFS_I(sxi->sxi_ip2)->i_mode))
665 4391 : error = xfs_swapext_dir_to_sf(tp, sxi);
666 9237 : else if (S_ISLNK(VFS_I(sxi->sxi_ip2)->i_mode))
667 9237 : error = xfs_swapext_link_to_sf(tp, sxi);
668 66731 : sxi->sxi_flags &= ~XFS_SWAP_EXT_CVT_INO2_SF;
669 66731 : if (error)
670 : return error;
671 : }
672 :
673 66761 : if (sxi->sxi_flags & XFS_SWAP_EXT_CLEAR_INO1_REFLINK) {
674 0 : xfs_swapext_clear_reflink(tp, sxi->sxi_ip1);
675 0 : sxi->sxi_flags &= ~XFS_SWAP_EXT_CLEAR_INO1_REFLINK;
676 : }
677 :
678 66761 : if (sxi->sxi_flags & XFS_SWAP_EXT_CLEAR_INO2_REFLINK) {
679 30 : xfs_swapext_clear_reflink(tp, sxi->sxi_ip2);
680 30 : sxi->sxi_flags &= ~XFS_SWAP_EXT_CLEAR_INO2_REFLINK;
681 : }
682 :
683 : return 0;
684 : }
685 :
686 : /* Finish one extent swap, possibly log more. */
687 : int
688 5117751 : xfs_swapext_finish_one(
689 : struct xfs_trans *tp,
690 : struct xfs_swapext_intent *sxi)
691 : {
692 5117751 : struct xfs_bmbt_irec irec1, irec2;
693 5117751 : int error;
694 :
695 5117751 : if (sxi_has_more_swap_work(sxi)) {
696 : /*
697 : * If the operation state says that some range of the files
698 : * have not yet been swapped, look for extents in that range to
699 : * swap. If we find some extents, swap them.
700 : */
701 5050990 : error = xfs_swapext_find_mappings(sxi, &irec1, &irec2, NULL);
702 5050987 : if (error)
703 : return error;
704 :
705 5050987 : if (sxi_has_more_swap_work(sxi))
706 4586495 : xfs_swapext_exchange_mappings(tp, sxi, &irec1, &irec2);
707 :
708 : /*
709 : * If the caller asked us to exchange the file sizes after the
710 : * swap and either we just swapped the last extents in the
711 : * range or we didn't find anything to swap, update the ondisk
712 : * file sizes.
713 : */
714 5050988 : if ((sxi->sxi_flags & XFS_SWAP_EXT_SET_SIZES) &&
715 : !sxi_has_more_swap_work(sxi)) {
716 13830 : sxi->sxi_ip1->i_disk_size = sxi->sxi_isize1;
717 13830 : sxi->sxi_ip2->i_disk_size = sxi->sxi_isize2;
718 :
719 13830 : xfs_trans_log_inode(tp, sxi->sxi_ip1, XFS_ILOG_CORE);
720 13830 : xfs_trans_log_inode(tp, sxi->sxi_ip2, XFS_ILOG_CORE);
721 : }
722 66761 : } else if (sxi_has_postop_work(sxi)) {
723 : /*
724 : * Now that we're finished with the swap operation, complete
725 : * the post-op cleanup work.
726 : */
727 66761 : error = xfs_swapext_do_postop_work(tp, sxi);
728 66759 : if (error)
729 : return error;
730 : }
731 :
732 5117747 : if (XFS_TEST_ERROR(false, tp->t_mountp, XFS_ERRTAG_SWAPEXT_FINISH_ONE))
733 : return -EIO;
734 :
735 : /* If we still have work to do, ask for a new transaction. */
736 5117748 : if (sxi_has_more_swap_work(sxi) || sxi_has_postop_work(sxi)) {
737 3824660 : trace_xfs_swapext_defer(tp->t_mountp, sxi);
738 3824660 : return -EAGAIN;
739 : }
740 :
741 : /*
742 : * If we reach here, we've finished all the swapping work and the post
743 : * operation work. The last thing we need to do before returning to
744 : * the caller is to make sure that COW forks are set up correctly.
745 : */
746 1293088 : if (!(sxi->sxi_flags & XFS_SWAP_EXT_ATTR_FORK)) {
747 1239986 : xfs_swapext_ensure_cowfork(sxi->sxi_ip1);
748 1239992 : xfs_swapext_ensure_cowfork(sxi->sxi_ip2);
749 : }
750 :
751 : return 0;
752 : }
753 :
754 : /*
755 : * Compute the amount of bmbt blocks we should reserve for each file. In the
756 : * worst case, each exchange will fill a hole with a new mapping, which could
757 : * result in a btree split every time we add a new leaf block.
758 : */
759 : static inline uint64_t
760 5815904 : xfs_swapext_bmbt_blocks(
761 : struct xfs_mount *mp,
762 : const struct xfs_swapext_req *req)
763 : {
764 5815904 : return howmany_64(req->nr_exchanges,
765 5815904 : XFS_MAX_CONTIG_BMAPS_PER_BLOCK(mp)) *
766 5816025 : XFS_EXTENTADD_SPACE_RES(mp, req->whichfork);
767 : }
768 :
769 : static inline uint64_t
770 5816358 : xfs_swapext_rmapbt_blocks(
771 : struct xfs_mount *mp,
772 : const struct xfs_swapext_req *req)
773 : {
774 5816358 : if (!xfs_has_rmapbt(mp))
775 : return 0;
776 5800583 : if (XFS_IS_REALTIME_INODE(req->ip1))
777 : return 0;
778 :
779 5800583 : return howmany_64(req->nr_exchanges,
780 5800583 : XFS_MAX_CONTIG_RMAPS_PER_BLOCK(mp)) *
781 5800654 : XFS_RMAPADD_SPACE_RES(mp);
782 : }
783 :
784 : /* Estimate the bmbt and rmapbt overhead required to exchange extents. */
785 : int
786 5816755 : xfs_swapext_estimate_overhead(
787 : struct xfs_swapext_req *req)
788 : {
789 5816755 : struct xfs_mount *mp = req->ip1->i_mount;
790 5816755 : xfs_filblks_t bmbt_blocks;
791 5816755 : xfs_filblks_t rmapbt_blocks;
792 5816755 : xfs_filblks_t resblks = req->resblks;
793 :
794 : /*
795 : * Compute the number of bmbt and rmapbt blocks we might need to handle
796 : * the estimated number of exchanges.
797 : */
798 5816755 : bmbt_blocks = xfs_swapext_bmbt_blocks(mp, req);
799 5816082 : rmapbt_blocks = xfs_swapext_rmapbt_blocks(mp, req);
800 :
801 5816524 : trace_xfs_swapext_overhead(mp, bmbt_blocks, rmapbt_blocks);
802 :
803 : /* Make sure the change in file block count doesn't overflow. */
804 5816458 : if (check_add_overflow(req->ip1_bcount, bmbt_blocks, &req->ip1_bcount))
805 : return -EFBIG;
806 5816458 : if (check_add_overflow(req->ip2_bcount, bmbt_blocks, &req->ip2_bcount))
807 : return -EFBIG;
808 :
809 : /*
810 : * Add together the number of blocks we need to handle btree growth,
811 : * then add it to the number of blocks we need to reserve to this
812 : * transaction.
813 : */
814 5816458 : if (check_add_overflow(resblks, bmbt_blocks, &resblks))
815 : return -ENOSPC;
816 5816458 : if (check_add_overflow(resblks, bmbt_blocks, &resblks))
817 : return -ENOSPC;
818 5816458 : if (check_add_overflow(resblks, rmapbt_blocks, &resblks))
819 : return -ENOSPC;
820 5816458 : if (check_add_overflow(resblks, rmapbt_blocks, &resblks))
821 : return -ENOSPC;
822 :
823 : /* Can't actually reserve more than UINT_MAX blocks. */
824 5816458 : if (req->resblks > UINT_MAX)
825 : return -ENOSPC;
826 :
827 5816458 : req->resblks = resblks;
828 5816458 : trace_xfs_swapext_final_estimate(req);
829 5816458 : return 0;
830 : }
831 :
832 : /* Decide if we can merge two real extents. */
833 : static inline bool
834 28794972 : can_merge(
835 : const struct xfs_bmbt_irec *b1,
836 : const struct xfs_bmbt_irec *b2)
837 : {
838 : /* Don't merge holes. */
839 28794972 : if (b1->br_startblock == HOLESTARTBLOCK ||
840 23504990 : b2->br_startblock == HOLESTARTBLOCK)
841 : return false;
842 :
843 : /* We don't merge holes. */
844 65788713 : if (!xfs_bmap_is_real_extent(b1) || !xfs_bmap_is_real_extent(b2))
845 : return false;
846 :
847 21929571 : if (b1->br_startoff + b1->br_blockcount == b2->br_startoff &&
848 16554920 : b1->br_startblock + b1->br_blockcount == b2->br_startblock &&
849 6807001 : b1->br_state == b2->br_state &&
850 6294406 : b1->br_blockcount + b2->br_blockcount <= XFS_MAX_BMBT_EXTLEN)
851 6294406 : return true;
852 :
853 : return false;
854 : }
855 :
856 : #define CLEFT_CONTIG 0x01
857 : #define CRIGHT_CONTIG 0x02
858 : #define CHOLE 0x04
859 : #define CBOTH_CONTIG (CLEFT_CONTIG | CRIGHT_CONTIG)
860 :
861 : #define NLEFT_CONTIG 0x10
862 : #define NRIGHT_CONTIG 0x20
863 : #define NHOLE 0x40
864 : #define NBOTH_CONTIG (NLEFT_CONTIG | NRIGHT_CONTIG)
865 :
866 : /* Estimate the effect of a single swap on extent count. */
867 : static inline int
868 10694840 : delta_nextents_step(
869 : struct xfs_mount *mp,
870 : const struct xfs_bmbt_irec *left,
871 : const struct xfs_bmbt_irec *curr,
872 : const struct xfs_bmbt_irec *new,
873 : const struct xfs_bmbt_irec *right)
874 : {
875 10694840 : bool lhole, rhole, chole, nhole;
876 10694840 : unsigned int state = 0;
877 10694840 : int ret = 0;
878 :
879 10694840 : lhole = left->br_startblock == HOLESTARTBLOCK;
880 10694840 : rhole = right->br_startblock == HOLESTARTBLOCK;
881 10694840 : chole = curr->br_startblock == HOLESTARTBLOCK;
882 10694840 : nhole = new->br_startblock == HOLESTARTBLOCK;
883 :
884 10694840 : if (chole)
885 3086461 : state |= CHOLE;
886 10694840 : if (!lhole && !chole && can_merge(left, curr))
887 969 : state |= CLEFT_CONTIG;
888 10694840 : if (!rhole && !chole && can_merge(curr, right))
889 2628102 : state |= CRIGHT_CONTIG;
890 10694840 : if ((state & CBOTH_CONTIG) == CBOTH_CONTIG &&
891 335 : left->br_startblock + curr->br_startblock +
892 : right->br_startblock > XFS_MAX_BMBT_EXTLEN)
893 1 : state &= ~CRIGHT_CONTIG;
894 :
895 10694840 : if (nhole)
896 3086461 : state |= NHOLE;
897 10694840 : if (!lhole && !nhole && can_merge(left, new))
898 1832657 : state |= NLEFT_CONTIG;
899 10694840 : if (!rhole && !nhole && can_merge(new, right))
900 21 : state |= NRIGHT_CONTIG;
901 10694840 : if ((state & NBOTH_CONTIG) == NBOTH_CONTIG &&
902 12 : left->br_startblock + new->br_startblock +
903 : right->br_startblock > XFS_MAX_BMBT_EXTLEN)
904 1 : state &= ~NRIGHT_CONTIG;
905 :
906 10694840 : switch (state & (CLEFT_CONTIG | CRIGHT_CONTIG | CHOLE)) {
907 334 : case CLEFT_CONTIG | CRIGHT_CONTIG:
908 : /*
909 : * left/curr/right are the same extent, so deleting curr causes
910 : * 2 new extents to be created.
911 : */
912 334 : ret += 2;
913 334 : break;
914 4979643 : case 0:
915 : /*
916 : * curr is not contiguous with any extent, so we remove curr
917 : * completely
918 : */
919 4979643 : ret--;
920 4979643 : break;
921 : case CHOLE:
922 : /* hole, do nothing */
923 : break;
924 : case CLEFT_CONTIG:
925 : case CRIGHT_CONTIG:
926 : /* trim either left or right, no change */
927 : break;
928 : }
929 :
930 10694840 : switch (state & (NLEFT_CONTIG | NRIGHT_CONTIG | NHOLE)) {
931 11 : case NLEFT_CONTIG | NRIGHT_CONTIG:
932 : /*
933 : * left/curr/right will become the same extent, so adding
934 : * curr causes the deletion of right.
935 : */
936 11 : ret--;
937 11 : break;
938 5775712 : case 0:
939 : /* new is not contiguous with any extent */
940 5775712 : ret++;
941 5775712 : break;
942 : case NHOLE:
943 : /* hole, do nothing. */
944 : break;
945 : case NLEFT_CONTIG:
946 : case NRIGHT_CONTIG:
947 : /* new is absorbed into left or right, no change */
948 : break;
949 : }
950 :
951 10694840 : trace_xfs_swapext_delta_nextents_step(mp, left, curr, new, right, ret,
952 : state);
953 10694839 : return ret;
954 : }
955 :
956 : /* Make sure we don't overflow the extent counters. */
957 : static inline int
958 1393520 : ensure_delta_nextents(
959 : struct xfs_swapext_req *req,
960 : struct xfs_inode *ip,
961 : int64_t delta)
962 : {
963 1393520 : struct xfs_mount *mp = ip->i_mount;
964 1393520 : struct xfs_ifork *ifp = xfs_ifork_ptr(ip, req->whichfork);
965 1393516 : xfs_extnum_t max_extents;
966 1393516 : bool large_extcount;
967 :
968 1393516 : if (delta < 0)
969 : return 0;
970 :
971 1369826 : if (XFS_TEST_ERROR(false, mp, XFS_ERRTAG_REDUCE_MAX_IEXTENTS)) {
972 6 : if (ifp->if_nextents + delta > 10)
973 : return -EFBIG;
974 : }
975 :
976 1369825 : if (req->req_flags & XFS_SWAP_REQ_NREXT64)
977 : large_extcount = true;
978 : else
979 1369825 : large_extcount = xfs_inode_has_large_extent_counts(ip);
980 :
981 1369825 : max_extents = xfs_iext_max_nextents(large_extcount, req->whichfork);
982 1369817 : if (ifp->if_nextents + delta <= max_extents)
983 : return 0;
984 0 : if (large_extcount)
985 : return -EFBIG;
986 0 : if (!xfs_has_large_extent_counts(mp))
987 : return -EFBIG;
988 :
989 0 : max_extents = xfs_iext_max_nextents(true, req->whichfork);
990 0 : if (ifp->if_nextents + delta > max_extents)
991 : return -EFBIG;
992 :
993 0 : req->req_flags |= XFS_SWAP_REQ_NREXT64;
994 0 : return 0;
995 : }
996 :
997 : /* Find the next extent after irec. */
998 : static inline int
999 10694838 : get_next_ext(
1000 : struct xfs_inode *ip,
1001 : int bmap_flags,
1002 : const struct xfs_bmbt_irec *irec,
1003 : struct xfs_bmbt_irec *nrec)
1004 : {
1005 10694838 : xfs_fileoff_t off;
1006 10694838 : xfs_filblks_t blockcount;
1007 10694838 : int nimaps = 1;
1008 10694838 : int error;
1009 :
1010 10694838 : off = irec->br_startoff + irec->br_blockcount;
1011 10694838 : blockcount = XFS_MAX_FILEOFF - off;
1012 10694838 : error = xfs_bmapi_read(ip, off, blockcount, nrec, &nimaps, bmap_flags);
1013 10694840 : if (error)
1014 : return error;
1015 10694840 : if (nrec->br_startblock == DELAYSTARTBLOCK ||
1016 10687907 : nrec->br_startoff != off) {
1017 : /*
1018 : * If we don't get the extent we want, return a zero-length
1019 : * mapping, which our estimator function will pretend is a hole.
1020 : * We shouldn't get delalloc reservations.
1021 : */
1022 6932 : nrec->br_startblock = HOLESTARTBLOCK;
1023 : }
1024 :
1025 : return 0;
1026 : }
1027 :
1028 : int __init
1029 50 : xfs_swapext_intent_init_cache(void)
1030 : {
1031 50 : xfs_swapext_intent_cache = kmem_cache_create("xfs_swapext_intent",
1032 : sizeof(struct xfs_swapext_intent),
1033 : 0, 0, NULL);
1034 :
1035 50 : return xfs_swapext_intent_cache != NULL ? 0 : -ENOMEM;
1036 : }
1037 :
1038 : void
1039 49 : xfs_swapext_intent_destroy_cache(void)
1040 : {
1041 49 : kmem_cache_destroy(xfs_swapext_intent_cache);
1042 49 : xfs_swapext_intent_cache = NULL;
1043 49 : }
1044 :
1045 : /*
1046 : * Decide if we will swap the reflink flags between the two files after the
1047 : * swap. The only time we want to do this is if we're exchanging all extents
1048 : * under EOF and the inode reflink flags have different states.
1049 : */
1050 : static inline bool
1051 2491852 : sxi_can_exchange_reflink_flags(
1052 : const struct xfs_swapext_req *req,
1053 : unsigned int reflink_state)
1054 : {
1055 2491852 : struct xfs_mount *mp = req->ip1->i_mount;
1056 :
1057 2491852 : if (hweight32(reflink_state) != 1)
1058 : return false;
1059 110 : if (req->startoff1 != 0 || req->startoff2 != 0)
1060 : return false;
1061 90 : if (req->blockcount != XFS_B_TO_FSB(mp, req->ip1->i_disk_size))
1062 : return false;
1063 90 : if (req->blockcount != XFS_B_TO_FSB(mp, req->ip2->i_disk_size))
1064 0 : return false;
1065 : return true;
1066 : }
1067 :
1068 :
1069 : /* Allocate and initialize a new incore intent item from a request. */
1070 : struct xfs_swapext_intent *
1071 2595254 : xfs_swapext_init_intent(
1072 : const struct xfs_swapext_req *req,
1073 : unsigned int *reflink_state)
1074 : {
1075 2595254 : struct xfs_swapext_intent *sxi;
1076 2595254 : unsigned int rs = 0;
1077 :
1078 2595254 : sxi = kmem_cache_zalloc(xfs_swapext_intent_cache,
1079 : GFP_NOFS | __GFP_NOFAIL);
1080 2595264 : INIT_LIST_HEAD(&sxi->sxi_list);
1081 2595264 : sxi->sxi_ip1 = req->ip1;
1082 2595264 : sxi->sxi_ip2 = req->ip2;
1083 2595264 : sxi->sxi_startoff1 = req->startoff1;
1084 2595264 : sxi->sxi_startoff2 = req->startoff2;
1085 2595264 : sxi->sxi_blockcount = req->blockcount;
1086 2595264 : sxi->sxi_isize1 = sxi->sxi_isize2 = -1;
1087 :
1088 2595264 : if (req->whichfork == XFS_ATTR_FORK)
1089 103414 : sxi->sxi_flags |= XFS_SWAP_EXT_ATTR_FORK;
1090 :
1091 2595264 : if (req->whichfork == XFS_DATA_FORK &&
1092 : (req->req_flags & XFS_SWAP_REQ_SET_SIZES)) {
1093 23398 : sxi->sxi_flags |= XFS_SWAP_EXT_SET_SIZES;
1094 23398 : sxi->sxi_isize1 = req->ip2->i_disk_size;
1095 23398 : sxi->sxi_isize2 = req->ip1->i_disk_size;
1096 : }
1097 :
1098 2595264 : if (req->req_flags & XFS_SWAP_REQ_INO1_WRITTEN)
1099 92 : sxi->sxi_flags |= XFS_SWAP_EXT_INO1_WRITTEN;
1100 2595264 : if (req->req_flags & XFS_SWAP_REQ_CVT_INO2_SF)
1101 126396 : sxi->sxi_flags |= XFS_SWAP_EXT_CVT_INO2_SF;
1102 :
1103 2595264 : if (req->req_flags & XFS_SWAP_REQ_LOGGED)
1104 2547817 : sxi->sxi_op_flags |= XFS_SWAP_EXT_OP_LOGGED;
1105 2595264 : if (req->req_flags & XFS_SWAP_REQ_NREXT64)
1106 0 : sxi->sxi_op_flags |= XFS_SWAP_EXT_OP_NREXT64;
1107 :
1108 2595264 : if (req->whichfork == XFS_DATA_FORK) {
1109 : /*
1110 : * Record the state of each inode's reflink flag before the
1111 : * operation.
1112 : */
1113 2491849 : if (xfs_is_reflink_inode(req->ip1))
1114 2392643 : rs |= XFS_REFLINK_STATE_IP1;
1115 2491849 : if (xfs_is_reflink_inode(req->ip2))
1116 2392752 : rs |= XFS_REFLINK_STATE_IP2;
1117 :
1118 : /*
1119 : * Figure out if we're clearing the reflink flags (which
1120 : * effectively swaps them) after the operation.
1121 : */
1122 2491849 : if (sxi_can_exchange_reflink_flags(req, rs)) {
1123 90 : if (rs & XFS_REFLINK_STATE_IP1)
1124 0 : sxi->sxi_flags |=
1125 : XFS_SWAP_EXT_CLEAR_INO1_REFLINK;
1126 90 : if (rs & XFS_REFLINK_STATE_IP2)
1127 90 : sxi->sxi_flags |=
1128 : XFS_SWAP_EXT_CLEAR_INO2_REFLINK;
1129 : }
1130 : }
1131 :
1132 2595262 : if (reflink_state)
1133 1293120 : *reflink_state = rs;
1134 2595262 : return sxi;
1135 : }
1136 :
1137 : /*
1138 : * Estimate the number of exchange operations and the number of file blocks
1139 : * in each file that will be affected by the exchange operation.
1140 : */
1141 : int
1142 1302147 : xfs_swapext_estimate(
1143 : struct xfs_swapext_req *req)
1144 : {
1145 1302147 : struct xfs_swapext_intent *sxi;
1146 1302147 : struct xfs_bmbt_irec irec1, irec2;
1147 1302147 : struct xfs_swapext_adjacent adj = ADJACENT_INIT;
1148 1302147 : xfs_filblks_t ip1_blocks = 0, ip2_blocks = 0;
1149 1302147 : int64_t d_nexts1, d_nexts2;
1150 1302147 : int bmap_flags;
1151 1302147 : int error;
1152 :
1153 1302147 : ASSERT(!(req->req_flags & ~XFS_SWAP_REQ_FLAGS));
1154 :
1155 1302147 : bmap_flags = xfs_bmapi_aflag(req->whichfork);
1156 1302147 : sxi = xfs_swapext_init_intent(req, NULL);
1157 :
1158 : /*
1159 : * To guard against the possibility of overflowing the extent counters,
1160 : * we have to estimate an upper bound on the potential increase in that
1161 : * counter. We can split the extent at each end of the range, and for
1162 : * each step of the swap we can split the extent that we're working on
1163 : * if the extents do not align.
1164 : */
1165 1302147 : d_nexts1 = d_nexts2 = 3;
1166 :
1167 6649568 : while (sxi_has_more_swap_work(sxi)) {
1168 : /*
1169 : * Walk through the file ranges until we find something to
1170 : * swap. Because we're simulating the swap, pass in adj to
1171 : * capture skipped mappings for correct estimation of bmbt
1172 : * record merges.
1173 : */
1174 5804878 : error = xfs_swapext_find_mappings(sxi, &irec1, &irec2, &adj);
1175 5804866 : if (error)
1176 0 : goto out_free;
1177 5804866 : if (!sxi_has_more_swap_work(sxi))
1178 : break;
1179 :
1180 : /* Update accounting. */
1181 9149559 : if (xfs_bmap_is_real_extent(&irec1))
1182 3802139 : ip1_blocks += irec1.br_blockcount;
1183 9153659 : if (xfs_bmap_is_real_extent(&irec2))
1184 3806239 : ip2_blocks += irec2.br_blockcount;
1185 5347420 : req->nr_exchanges++;
1186 :
1187 : /* Read the next extents from both files. */
1188 5347420 : error = get_next_ext(req->ip1, bmap_flags, &irec1, &adj.right1);
1189 5347419 : if (error)
1190 0 : goto out_free;
1191 :
1192 5347419 : error = get_next_ext(req->ip2, bmap_flags, &irec2, &adj.right2);
1193 5347421 : if (error)
1194 0 : goto out_free;
1195 :
1196 : /* Update extent count deltas. */
1197 5347421 : d_nexts1 += delta_nextents_step(req->ip1->i_mount,
1198 : &adj.left1, &irec1, &irec2, &adj.right1);
1199 :
1200 5347420 : d_nexts2 += delta_nextents_step(req->ip1->i_mount,
1201 : &adj.left2, &irec2, &irec1, &adj.right2);
1202 :
1203 : /* Now pretend we swapped the extents. */
1204 5347421 : if (can_merge(&adj.left2, &irec1))
1205 1125418 : adj.left2.br_blockcount += irec1.br_blockcount;
1206 : else
1207 4222003 : memcpy(&adj.left2, &irec1, sizeof(irec1));
1208 :
1209 5347421 : if (can_merge(&adj.left1, &irec2))
1210 707239 : adj.left1.br_blockcount += irec2.br_blockcount;
1211 : else
1212 4640182 : memcpy(&adj.left1, &irec2, sizeof(irec2));
1213 :
1214 5347421 : sxi_advance(sxi, &irec1);
1215 : }
1216 :
1217 : /* Account for the blocks that are being exchanged. */
1218 1302133 : if (XFS_IS_REALTIME_INODE(req->ip1) &&
1219 15775 : req->whichfork == XFS_DATA_FORK) {
1220 15775 : req->ip1_rtbcount = ip1_blocks;
1221 15775 : req->ip2_rtbcount = ip2_blocks;
1222 : } else {
1223 1286358 : req->ip1_bcount = ip1_blocks;
1224 1286358 : req->ip2_bcount = ip2_blocks;
1225 : }
1226 :
1227 : /*
1228 : * Make sure that both forks have enough slack left in their extent
1229 : * counters that the swap operation will not overflow.
1230 : */
1231 1302133 : trace_xfs_swapext_delta_nextents(req, d_nexts1, d_nexts2);
1232 1302127 : if (req->ip1 == req->ip2) {
1233 1210739 : error = ensure_delta_nextents(req, req->ip1,
1234 : d_nexts1 + d_nexts2);
1235 : } else {
1236 91388 : error = ensure_delta_nextents(req, req->ip1, d_nexts1);
1237 91387 : if (error)
1238 6 : goto out_free;
1239 91381 : error = ensure_delta_nextents(req, req->ip2, d_nexts2);
1240 : }
1241 1302127 : if (error)
1242 0 : goto out_free;
1243 :
1244 1302127 : trace_xfs_swapext_initial_estimate(req);
1245 1302125 : error = xfs_swapext_estimate_overhead(req);
1246 1302136 : out_free:
1247 1302136 : kmem_cache_free(xfs_swapext_intent_cache, sxi);
1248 1302143 : return error;
1249 : }
1250 :
1251 : static inline void
1252 55 : xfs_swapext_set_reflink(
1253 : struct xfs_trans *tp,
1254 : struct xfs_inode *ip)
1255 : {
1256 55 : trace_xfs_reflink_set_inode_flag(ip);
1257 :
1258 55 : ip->i_diflags2 |= XFS_DIFLAG2_REFLINK;
1259 55 : xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
1260 55 : }
1261 :
1262 : /*
1263 : * If either file has shared blocks and we're swapping data forks, we must flag
1264 : * the other file as having shared blocks so that we get the shared-block rmap
1265 : * functions if we need to fix up the rmaps.
1266 : */
1267 : void
1268 1293123 : xfs_swapext_ensure_reflink(
1269 : struct xfs_trans *tp,
1270 : const struct xfs_swapext_intent *sxi,
1271 : unsigned int reflink_state)
1272 : {
1273 1293123 : if ((reflink_state & XFS_REFLINK_STATE_IP1) &&
1274 1196308 : !xfs_is_reflink_inode(sxi->sxi_ip2))
1275 0 : xfs_swapext_set_reflink(tp, sxi->sxi_ip2);
1276 :
1277 1293123 : if ((reflink_state & XFS_REFLINK_STATE_IP2) &&
1278 1196365 : !xfs_is_reflink_inode(sxi->sxi_ip1))
1279 55 : xfs_swapext_set_reflink(tp, sxi->sxi_ip1);
1280 1293123 : }
1281 :
1282 : /* Widen the extent counts of both inodes if necessary. */
1283 : static inline void
1284 1293105 : xfs_swapext_upgrade_extent_counts(
1285 : struct xfs_trans *tp,
1286 : const struct xfs_swapext_intent *sxi)
1287 : {
1288 1293105 : if (!(sxi->sxi_op_flags & XFS_SWAP_EXT_OP_NREXT64))
1289 : return;
1290 :
1291 0 : sxi->sxi_ip1->i_diflags2 |= XFS_DIFLAG2_NREXT64;
1292 0 : xfs_trans_log_inode(tp, sxi->sxi_ip1, XFS_ILOG_CORE);
1293 :
1294 0 : sxi->sxi_ip2->i_diflags2 |= XFS_DIFLAG2_NREXT64;
1295 0 : xfs_trans_log_inode(tp, sxi->sxi_ip2, XFS_ILOG_CORE);
1296 : }
1297 :
1298 : /*
1299 : * Schedule a swap a range of extents from one inode to another. If the atomic
1300 : * swap feature is enabled, then the operation progress can be resumed even if
1301 : * the system goes down. The caller must commit the transaction to start the
1302 : * work.
1303 : *
1304 : * The caller must ensure the inodes must be joined to the transaction and
1305 : * ILOCKd; they will still be joined to the transaction at exit.
1306 : */
1307 : void
1308 1293105 : xfs_swapext(
1309 : struct xfs_trans *tp,
1310 : const struct xfs_swapext_req *req)
1311 : {
1312 1293105 : struct xfs_swapext_intent *sxi;
1313 1293105 : unsigned int reflink_state;
1314 :
1315 1293105 : ASSERT(xfs_isilocked(req->ip1, XFS_ILOCK_EXCL));
1316 1293087 : ASSERT(xfs_isilocked(req->ip2, XFS_ILOCK_EXCL));
1317 1293092 : ASSERT(req->whichfork != XFS_COW_FORK);
1318 1293092 : ASSERT(!(req->req_flags & ~XFS_SWAP_REQ_FLAGS));
1319 1293092 : if (req->req_flags & XFS_SWAP_REQ_SET_SIZES)
1320 13830 : ASSERT(req->whichfork == XFS_DATA_FORK);
1321 1293092 : if (req->req_flags & XFS_SWAP_REQ_CVT_INO2_SF)
1322 66730 : ASSERT(req->whichfork == XFS_ATTR_FORK ||
1323 : (req->whichfork == XFS_DATA_FORK &&
1324 : (S_ISDIR(VFS_I(req->ip2)->i_mode) ||
1325 : S_ISLNK(VFS_I(req->ip2)->i_mode))));
1326 :
1327 1293092 : if (req->blockcount == 0)
1328 0 : return;
1329 :
1330 1293092 : sxi = xfs_swapext_init_intent(req, &reflink_state);
1331 1293100 : xfs_swapext_schedule(tp, sxi);
1332 1293106 : xfs_swapext_ensure_reflink(tp, sxi, reflink_state);
1333 1293104 : xfs_swapext_upgrade_extent_counts(tp, sxi);
1334 : }
|