Line data Source code
1 : // SPDX-License-Identifier: GPL-2.0-or-later
2 : /*
3 : * Copyright (C) 2021-2023 Oracle. All Rights Reserved.
4 : * Author: Darrick J. Wong <djwong@kernel.org>
5 : */
6 : #include "xfs.h"
7 : #include "xfs_fs.h"
8 : #include "xfs_shared.h"
9 : #include "xfs_format.h"
10 : #include "xfs_trans_resv.h"
11 : #include "xfs_mount.h"
12 : #include "xfs_log_format.h"
13 : #include "xfs_trans.h"
14 : #include "xfs_inode.h"
15 : #include "xfs_ialloc.h"
16 : #include "xfs_quota.h"
17 : #include "xfs_bmap.h"
18 : #include "xfs_bmap_btree.h"
19 : #include "xfs_trans_space.h"
20 : #include "xfs_dir2.h"
21 : #include "xfs_xchgrange.h"
22 : #include "xfs_swapext.h"
23 : #include "xfs_defer.h"
24 : #include "xfs_symlink_remote.h"
25 : #include "scrub/scrub.h"
26 : #include "scrub/common.h"
27 : #include "scrub/repair.h"
28 : #include "scrub/trace.h"
29 : #include "scrub/tempfile.h"
30 : #include "scrub/tempswap.h"
31 : #include "scrub/xfile.h"
32 :
33 : /*
34 : * Create a temporary file for reconstructing metadata, with the intention of
35 : * atomically swapping the temporary file's contents with the file that's
36 : * being repaired.
37 : */
38 : int
39 27773448 : xrep_tempfile_create(
40 : struct xfs_scrub *sc,
41 : uint16_t mode)
42 : {
43 27773448 : struct xfs_mount *mp = sc->mp;
44 27773448 : struct xfs_trans *tp = NULL;
45 27773448 : struct xfs_dquot *udqp = NULL;
46 27773448 : struct xfs_dquot *gdqp = NULL;
47 27773448 : struct xfs_dquot *pdqp = NULL;
48 27773448 : struct xfs_trans_res *tres;
49 27773448 : struct xfs_inode *dp = mp->m_rootip;
50 27773448 : xfs_ino_t ino;
51 27773448 : unsigned int resblks;
52 27773448 : bool is_dir = S_ISDIR(mode);
53 27773448 : int error;
54 :
55 55546896 : if (xfs_is_shutdown(mp))
56 : return -EIO;
57 55546896 : if (xfs_is_readonly(mp))
58 : return -EROFS;
59 :
60 27773448 : ASSERT(sc->tp == NULL);
61 27773448 : ASSERT(sc->tempip == NULL);
62 :
63 : /*
64 : * Make sure that we have allocated dquot(s) on disk. The temporary
65 : * inode should be completely root owned so that we don't fail due to
66 : * quota limits.
67 : */
68 27773448 : error = xfs_qm_vop_dqalloc(dp, GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, 0,
69 : XFS_QMOPT_QUOTALL, &udqp, &gdqp, &pdqp);
70 27762681 : if (error)
71 : return error;
72 :
73 27762681 : if (is_dir) {
74 584906 : resblks = xfs_mkdir_space_res(mp, 0);
75 584906 : tres = &M_RES(mp)->tr_mkdir;
76 : } else {
77 27177775 : resblks = XFS_IALLOC_SPACE_RES(mp);
78 27177775 : tres = &M_RES(mp)->tr_create_tmpfile;
79 : }
80 :
81 27762681 : error = xfs_trans_alloc_icreate(mp, tres, udqp, gdqp, pdqp, resblks,
82 : &tp);
83 27555061 : if (error)
84 102295 : goto out_release_dquots;
85 :
86 : /* Allocate inode, set up directory. */
87 27452766 : error = xfs_dialloc(&tp, dp->i_ino, mode, &ino);
88 27709109 : if (error)
89 7175912 : goto out_trans_cancel;
90 20533197 : error = xfs_init_new_inode(&nop_mnt_idmap, tp, dp, ino, mode, 0, 0,
91 : 0, false, &sc->tempip);
92 20532347 : if (error)
93 0 : goto out_trans_cancel;
94 :
95 : /* Change the ownership of the inode to root. */
96 20532347 : VFS_I(sc->tempip)->i_uid = GLOBAL_ROOT_UID;
97 20532347 : VFS_I(sc->tempip)->i_gid = GLOBAL_ROOT_GID;
98 20532347 : sc->tempip->i_diflags &= ~(XFS_DIFLAG_REALTIME | XFS_DIFLAG_RTINHERIT);
99 20532347 : xfs_trans_log_inode(tp, sc->tempip, XFS_ILOG_CORE);
100 :
101 : /*
102 : * Mark our temporary file as private so that LSMs and the ACL code
103 : * don't try to add their own metadata or reason about these files.
104 : * The file should never be exposed to userspace.
105 : */
106 20533215 : VFS_I(sc->tempip)->i_flags |= S_PRIVATE;
107 20533215 : VFS_I(sc->tempip)->i_opflags &= ~IOP_XATTR;
108 :
109 20533215 : if (is_dir) {
110 584335 : error = xfs_dir_init(tp, sc->tempip, dp);
111 584222 : if (error)
112 0 : goto out_trans_cancel;
113 19948880 : } else if (S_ISLNK(VFS_I(sc->tempip)->i_mode)) {
114 1417606 : error = xfs_symlink_write_target(tp, sc->tempip, ".", 1, 0, 0);
115 1417606 : if (error)
116 0 : goto out_trans_cancel;
117 : }
118 :
119 : /*
120 : * Attach the dquot(s) to the inodes and modify them incore.
121 : * These ids of the inode couldn't have changed since the new
122 : * inode has been locked ever since it was created.
123 : */
124 20533102 : xfs_qm_vop_create_dqattach(tp, sc->tempip, udqp, gdqp, pdqp);
125 :
126 : /*
127 : * Put our temp file on the unlinked list so it's purged automatically.
128 : * Anything being reconstructed using this file must be atomically
129 : * swapped with the original file because the contents here will be
130 : * purged when the inode is dropped or log recovery cleans out the
131 : * unlinked list.
132 : */
133 20532822 : error = xfs_iunlink(tp, sc->tempip);
134 20533164 : if (error)
135 0 : goto out_trans_cancel;
136 :
137 20533164 : error = xfs_trans_commit(tp);
138 20533159 : if (error)
139 0 : goto out_release_inode;
140 :
141 20533159 : trace_xrep_tempfile_create(sc);
142 :
143 20533029 : xfs_qm_dqrele(udqp);
144 20532988 : xfs_qm_dqrele(gdqp);
145 20533031 : xfs_qm_dqrele(pdqp);
146 :
147 : /* Finish setting up the incore / vfs context. */
148 20533043 : xfs_iunlock(sc->tempip, XFS_ILOCK_EXCL);
149 20532998 : xfs_setup_iops(sc->tempip);
150 20532762 : xfs_finish_inode_setup(sc->tempip);
151 :
152 20533061 : sc->temp_ilock_flags = 0;
153 20533061 : return error;
154 :
155 7175912 : out_trans_cancel:
156 7175912 : xfs_trans_cancel(tp);
157 6913820 : out_release_inode:
158 : /*
159 : * Wait until after the current transaction is aborted to finish the
160 : * setup of the inode and release the inode. This prevents recursive
161 : * transactions and deadlocks from xfs_inactive.
162 : */
163 6913820 : if (sc->tempip) {
164 0 : xfs_iunlock(sc->tempip, XFS_ILOCK_EXCL);
165 0 : xfs_finish_inode_setup(sc->tempip);
166 0 : xchk_irele(sc, sc->tempip);
167 : }
168 6913820 : out_release_dquots:
169 7016115 : xfs_qm_dqrele(udqp);
170 7083010 : xfs_qm_dqrele(gdqp);
171 7137092 : xfs_qm_dqrele(pdqp);
172 :
173 7137092 : return error;
174 : }
175 :
176 : /* Take IOLOCK_EXCL on the temporary file, maybe. */
177 : bool
178 6765097 : xrep_tempfile_iolock_nowait(
179 : struct xfs_scrub *sc)
180 : {
181 6765097 : if (xfs_ilock_nowait(sc->tempip, XFS_IOLOCK_EXCL)) {
182 6765059 : sc->temp_ilock_flags |= XFS_IOLOCK_EXCL;
183 6765059 : return true;
184 : }
185 :
186 : return false;
187 : }
188 :
189 : /*
190 : * Take the temporary file's IOLOCK while holding a different inode's IOLOCK.
191 : * In theory nobody else should hold the tempfile's IOLOCK, but we use trylock
192 : * to avoid deadlocks and lockdep complaints.
193 : */
194 : int
195 6765090 : xrep_tempfile_iolock_polled(
196 : struct xfs_scrub *sc)
197 : {
198 6765090 : int error = 0;
199 :
200 6765090 : while (!xrep_tempfile_iolock_nowait(sc)) {
201 0 : if (xchk_should_terminate(sc, &error))
202 0 : return error;
203 0 : delay(1);
204 : }
205 :
206 : return 0;
207 : }
208 :
209 : /* Release IOLOCK_EXCL on the temporary file. */
210 : void
211 2244119 : xrep_tempfile_iounlock(
212 : struct xfs_scrub *sc)
213 : {
214 2244119 : xfs_iunlock(sc->tempip, XFS_IOLOCK_EXCL);
215 2244097 : sc->temp_ilock_flags &= ~XFS_IOLOCK_EXCL;
216 2244097 : }
217 :
218 : /* Prepare the temporary file for metadata updates by grabbing ILOCK_EXCL. */
219 : void
220 2306185 : xrep_tempfile_ilock(
221 : struct xfs_scrub *sc)
222 : {
223 2306185 : sc->temp_ilock_flags |= XFS_ILOCK_EXCL;
224 2306185 : xfs_ilock(sc->tempip, XFS_ILOCK_EXCL);
225 2306182 : }
226 :
227 : /* Try to grab ILOCK_EXCL on the temporary file. */
228 : bool
229 0 : xrep_tempfile_ilock_nowait(
230 : struct xfs_scrub *sc)
231 : {
232 0 : if (xfs_ilock_nowait(sc->tempip, XFS_ILOCK_EXCL)) {
233 0 : sc->temp_ilock_flags |= XFS_ILOCK_EXCL;
234 0 : return true;
235 : }
236 :
237 : return false;
238 : }
239 :
240 : /* Unlock ILOCK_EXCL on the temporary file after an update. */
241 : void
242 4451249 : xrep_tempfile_iunlock(
243 : struct xfs_scrub *sc)
244 : {
245 4451249 : xfs_iunlock(sc->tempip, XFS_ILOCK_EXCL);
246 4450863 : sc->temp_ilock_flags &= ~XFS_ILOCK_EXCL;
247 4450863 : }
248 :
249 : /*
250 : * Begin the process of making changes to both the file being scrubbed and
251 : * the temporary file by taking ILOCK_EXCL on both.
252 : */
253 : void
254 0 : xrep_tempfile_ilock_both(
255 : struct xfs_scrub *sc)
256 : {
257 0 : xfs_lock_two_inodes(sc->ip, XFS_ILOCK_EXCL, sc->tempip, XFS_ILOCK_EXCL);
258 0 : sc->ilock_flags |= XFS_ILOCK_EXCL;
259 0 : sc->temp_ilock_flags |= XFS_ILOCK_EXCL;
260 0 : }
261 :
262 : /* Unlock ILOCK_EXCL on both files. */
263 : void
264 0 : xrep_tempfile_iunlock_both(
265 : struct xfs_scrub *sc)
266 : {
267 0 : xrep_tempfile_iunlock(sc);
268 0 : xchk_iunlock(sc, XFS_ILOCK_EXCL);
269 0 : }
270 :
271 : /* Release the temporary file. */
272 : void
273 1105648183 : xrep_tempfile_rele(
274 : struct xfs_scrub *sc)
275 : {
276 1105648183 : if (!sc->tempip)
277 : return;
278 :
279 20530639 : if (sc->temp_ilock_flags) {
280 4573988 : xfs_iunlock(sc->tempip, sc->temp_ilock_flags);
281 4574544 : sc->temp_ilock_flags = 0;
282 : }
283 :
284 20531195 : xchk_irele(sc, sc->tempip);
285 20520519 : sc->tempip = NULL;
286 : }
287 :
288 : /*
289 : * Make sure that the given range of the data fork of the temporary file is
290 : * mapped to written blocks. The caller must ensure that both inodes are
291 : * joined to the transaction.
292 : */
293 : int
294 0 : xrep_tempfile_prealloc(
295 : struct xfs_scrub *sc,
296 : xfs_fileoff_t off,
297 : xfs_filblks_t len)
298 : {
299 0 : struct xfs_bmbt_irec map;
300 0 : xfs_fileoff_t end = off + len;
301 0 : int error;
302 :
303 0 : ASSERT(sc->tempip != NULL);
304 0 : ASSERT(!XFS_NOT_DQATTACHED(sc->mp, sc->tempip));
305 :
306 0 : for (; off < end; off = map.br_startoff + map.br_blockcount) {
307 0 : int nmaps = 1;
308 :
309 : /*
310 : * If we have a real extent mapping this block then we're
311 : * in ok shape.
312 : */
313 0 : error = xfs_bmapi_read(sc->tempip, off, end - off, &map, &nmaps,
314 : XFS_DATA_FORK);
315 0 : if (error)
316 0 : return error;
317 0 : if (nmaps == 0) {
318 0 : ASSERT(nmaps != 0);
319 0 : return -EFSCORRUPTED;
320 : }
321 :
322 0 : if (xfs_bmap_is_written_extent(&map))
323 0 : continue;
324 :
325 : /*
326 : * If we find a delalloc reservation then something is very
327 : * very wrong. Bail out.
328 : */
329 0 : if (map.br_startblock == DELAYSTARTBLOCK)
330 : return -EFSCORRUPTED;
331 :
332 : /*
333 : * Make sure this block has a real zeroed extent allocated to
334 : * it.
335 : */
336 0 : nmaps = 1;
337 0 : error = xfs_bmapi_write(sc->tp, sc->tempip, off, end - off,
338 : XFS_BMAPI_CONVERT | XFS_BMAPI_ZERO, 0, &map,
339 : &nmaps);
340 0 : if (error)
341 0 : return error;
342 :
343 0 : trace_xrep_tempfile_prealloc(sc, XFS_DATA_FORK, &map);
344 :
345 : /* Commit new extent and all deferred work. */
346 0 : error = xfs_defer_finish(&sc->tp);
347 0 : if (error)
348 0 : return error;
349 : }
350 :
351 : return 0;
352 : }
353 :
354 : /*
355 : * Write data to each block of a file. The given range of the tempfile's data
356 : * fork must already be populated with written extents.
357 : */
358 : int
359 0 : xrep_tempfile_copyin(
360 : struct xfs_scrub *sc,
361 : xfs_fileoff_t off,
362 : xfs_filblks_t len,
363 : xrep_tempfile_copyin_fn prep_fn,
364 : void *data)
365 : {
366 0 : LIST_HEAD(buffers_list);
367 0 : struct xfs_mount *mp = sc->mp;
368 0 : struct xfs_buf *bp;
369 0 : xfs_fileoff_t flush_mask;
370 0 : xfs_fileoff_t end = off + len;
371 0 : loff_t pos = XFS_FSB_TO_B(mp, off);
372 0 : int error = 0;
373 :
374 0 : ASSERT(S_ISREG(VFS_I(sc->tempip)->i_mode));
375 :
376 : /* Flush buffers to disk every 512K */
377 0 : flush_mask = XFS_B_TO_FSBT(mp, (1U << 19)) - 1;
378 :
379 0 : for (; off < end; off++, pos += mp->m_sb.sb_blocksize) {
380 0 : struct xfs_bmbt_irec map;
381 0 : int nmaps = 1;
382 :
383 : /* Read block mapping for this file block. */
384 0 : error = xfs_bmapi_read(sc->tempip, off, 1, &map, &nmaps, 0);
385 0 : if (error)
386 0 : goto out_err;
387 0 : if (nmaps == 0 || !xfs_bmap_is_written_extent(&map)) {
388 0 : error = -EFSCORRUPTED;
389 0 : goto out_err;
390 : }
391 :
392 : /* Get the metadata buffer for this offset in the file. */
393 0 : error = xfs_trans_get_buf(sc->tp, mp->m_ddev_targp,
394 0 : XFS_FSB_TO_DADDR(mp, map.br_startblock),
395 : mp->m_bsize, 0, &bp);
396 0 : if (error)
397 0 : goto out_err;
398 :
399 0 : trace_xrep_tempfile_copyin(sc, XFS_DATA_FORK, &map);
400 :
401 : /* Read in a block's worth of data from the xfile. */
402 0 : error = prep_fn(sc, bp, data);
403 0 : if (error) {
404 0 : xfs_trans_brelse(sc->tp, bp);
405 0 : goto out_err;
406 : }
407 :
408 : /* Queue buffer, and flush if we have too much dirty data. */
409 0 : xfs_buf_delwri_queue_here(bp, &buffers_list);
410 0 : xfs_trans_brelse(sc->tp, bp);
411 :
412 0 : if (!(off & flush_mask)) {
413 0 : error = xfs_buf_delwri_submit(&buffers_list);
414 0 : if (error)
415 0 : goto out_err;
416 : }
417 : }
418 :
419 : /*
420 : * Write the new blocks to disk. If the ordered list isn't empty after
421 : * that, then something went wrong and we have to fail. This should
422 : * never happen, but we'll check anyway.
423 : */
424 0 : error = xfs_buf_delwri_submit(&buffers_list);
425 0 : if (error)
426 0 : goto out_err;
427 :
428 0 : if (!list_empty(&buffers_list)) {
429 0 : ASSERT(list_empty(&buffers_list));
430 0 : error = -EIO;
431 0 : goto out_err;
432 : }
433 :
434 : return 0;
435 :
436 0 : out_err:
437 0 : xfs_buf_delwri_cancel(&buffers_list);
438 0 : return error;
439 : }
440 :
441 : /*
442 : * Set the temporary file's size. Caller must join the tempfile to the scrub
443 : * transaction and is responsible for adjusting block mappings as needed.
444 : */
445 : int
446 0 : xrep_tempfile_set_isize(
447 : struct xfs_scrub *sc,
448 : unsigned long long isize)
449 : {
450 0 : if (sc->tempip->i_disk_size == isize)
451 : return 0;
452 :
453 0 : sc->tempip->i_disk_size = isize;
454 0 : i_size_write(VFS_I(sc->tempip), isize);
455 0 : return xrep_tempfile_roll_trans(sc);
456 : }
457 :
458 : /*
459 : * Roll a repair transaction involving the temporary file. Caller must join
460 : * both the temporary file and the file being scrubbed to the transaction.
461 : * This function return with both inodes joined to a new scrub transaction,
462 : * or the usual negative errno.
463 : */
464 : int
465 4579346 : xrep_tempfile_roll_trans(
466 : struct xfs_scrub *sc)
467 : {
468 4579346 : int error;
469 :
470 4579346 : xfs_trans_log_inode(sc->tp, sc->tempip, XFS_ILOG_CORE);
471 4579548 : error = xrep_roll_trans(sc);
472 4579562 : if (error)
473 : return error;
474 :
475 4579535 : xfs_trans_ijoin(sc->tp, sc->tempip, 0);
476 4579535 : return 0;
477 : }
478 :
479 : /* Enable atomic extent swapping. */
480 : int
481 4575669 : xrep_tempswap_grab_log_assist(
482 : struct xfs_scrub *sc)
483 : {
484 4575669 : bool need_rele = false;
485 4575669 : int error;
486 :
487 4575669 : if (sc->flags & XREP_FSGATES_ATOMIC_XCHG)
488 : return 0;
489 :
490 4575432 : error = xfs_xchg_range_grab_log_assist(sc->mp, true, &need_rele);
491 4575765 : if (error)
492 : return error;
493 4574010 : if (!need_rele) {
494 0 : ASSERT(need_rele);
495 0 : return -EOPNOTSUPP;
496 : }
497 :
498 4574010 : trace_xchk_fsgates_enable(sc, XREP_FSGATES_ATOMIC_XCHG);
499 :
500 4572822 : sc->flags |= XREP_FSGATES_ATOMIC_XCHG;
501 4572822 : return 0;
502 : }
503 :
504 : /*
505 : * Fill out the swapext request in preparation for swapping the contents of a
506 : * metadata file that we've rebuilt in the temp file.
507 : */
508 : STATIC int
509 4573757 : xrep_tempswap_prep_request(
510 : struct xfs_scrub *sc,
511 : int whichfork,
512 : struct xrep_tempswap *tx)
513 : {
514 4573757 : struct xfs_swapext_req *req = &tx->req;
515 :
516 4573757 : memset(tx, 0, sizeof(struct xrep_tempswap));
517 :
518 : /* COW forks don't exist on disk. */
519 4573757 : if (whichfork == XFS_COW_FORK) {
520 0 : ASSERT(0);
521 0 : return -EINVAL;
522 : }
523 :
524 : /* Both files should have the relevant forks. */
525 9147162 : if (!xfs_ifork_ptr(sc->ip, whichfork) ||
526 4573374 : !xfs_ifork_ptr(sc->tempip, whichfork)) {
527 0 : ASSERT(xfs_ifork_ptr(sc->ip, whichfork) != NULL);
528 0 : ASSERT(xfs_ifork_ptr(sc->tempip, whichfork) != NULL);
529 0 : return -EINVAL;
530 : }
531 :
532 : /* Swap all mappings in both forks. */
533 4573405 : req->ip1 = sc->tempip;
534 4573405 : req->ip2 = sc->ip;
535 4573405 : req->startoff1 = 0;
536 4573405 : req->startoff2 = 0;
537 4573405 : req->whichfork = whichfork;
538 4573405 : req->blockcount = XFS_MAX_FILEOFF;
539 4573405 : req->req_flags = XFS_SWAP_REQ_LOGGED;
540 :
541 : /* Always swap sizes when we're swapping data fork mappings. */
542 4573405 : if (whichfork == XFS_DATA_FORK)
543 247389 : req->req_flags |= XFS_SWAP_REQ_SET_SIZES;
544 :
545 : /*
546 : * If we're repairing symlinks, xattrs, or directories, always try to
547 : * convert ip2 to short format after swapping.
548 : */
549 4573405 : if (whichfork == XFS_ATTR_FORK || S_ISDIR(VFS_I(sc->ip)->i_mode) ||
550 : S_ISLNK(VFS_I(sc->ip)->i_mode))
551 4573405 : req->req_flags |= XFS_SWAP_REQ_CVT_INO2_SF;
552 :
553 : return 0;
554 : }
555 :
556 : /*
557 : * Fill out the swapext resource estimation structures in preparation for
558 : * swapping the contents of a metadata file that we've rebuilt in the temp
559 : * file. Caller must hold IOLOCK_EXCL but not ILOCK_EXCL on both files.
560 : */
561 : STATIC int
562 4574070 : xrep_tempswap_estimate(
563 : struct xfs_scrub *sc,
564 : struct xrep_tempswap *tx)
565 : {
566 4574070 : struct xfs_swapext_req *req = &tx->req;
567 4574070 : struct xfs_ifork *ifp;
568 4574070 : struct xfs_ifork *tifp;
569 4574070 : int state = 0;
570 :
571 : /*
572 : * Deal with either fork being in local format. The swapext code only
573 : * knows how to exchange block mappings for regular files, so we only
574 : * have to know about local format for xattrs and directories.
575 : */
576 4574070 : ifp = xfs_ifork_ptr(sc->ip, req->whichfork);
577 4572920 : if (ifp->if_format == XFS_DINODE_FMT_LOCAL)
578 4506786 : state |= 1;
579 :
580 4572920 : tifp = xfs_ifork_ptr(sc->tempip, req->whichfork);
581 4572806 : if (tifp->if_format == XFS_DINODE_FMT_LOCAL)
582 4513896 : state |= 2;
583 :
584 4572806 : switch (state) {
585 59666 : case 0:
586 : /* Both files have mapped extents; use the regular estimate. */
587 59666 : return xfs_xchg_range_estimate(req);
588 24 : case 1:
589 : /*
590 : * The file being repaired is in local format, but the temp
591 : * file has mapped extents. To perform the swap, the file
592 : * being repaired must have its shorform data converted to a
593 : * fsblock, and the fork changed to extents format. We need
594 : * one resblk for the conversion; the number of exchanges is
595 : * (worst case) the temporary file's extent count plus the
596 : * block we converted.
597 : */
598 24 : req->ip1_bcount = sc->tempip->i_nblocks;
599 24 : req->ip2_bcount = 1;
600 24 : req->nr_exchanges = 1 + tifp->if_nextents;
601 24 : req->resblks = 1;
602 24 : break;
603 7041 : case 2:
604 : /*
605 : * The temporary file is in local format, but the file being
606 : * repaired has mapped extents. To perform the swap, the temp
607 : * file must have its shortform data converted to an fsblock,
608 : * and the fork changed to extents format. We need one resblk
609 : * for the conversion; the number of exchanges is (worst case)
610 : * the extent count of the file being repaired plus the block
611 : * we converted.
612 : */
613 7041 : req->ip1_bcount = 1;
614 7041 : req->ip2_bcount = sc->ip->i_nblocks;
615 7041 : req->nr_exchanges = 1 + ifp->if_nextents;
616 7041 : req->resblks = 1;
617 7041 : break;
618 4506075 : case 3:
619 : /*
620 : * Both forks are in local format. To perform the swap, both
621 : * files must have their shortform data converted to fsblocks,
622 : * and both forks must be converted to extents format. We
623 : * need two resblks for the two conversions, and the number of
624 : * exchanges is 1 since there's only one block at fileoff 0.
625 : * Presumably, the caller could not exchange the two inode fork
626 : * areas directly.
627 : */
628 4506075 : req->ip1_bcount = 1;
629 4506075 : req->ip2_bcount = 1;
630 4506075 : req->nr_exchanges = 1;
631 4506075 : req->resblks = 2;
632 4506075 : break;
633 : }
634 :
635 4513140 : return xfs_swapext_estimate_overhead(req);
636 : }
637 :
638 : /*
639 : * Obtain a quota reservation to make sure we don't hit EDQUOT. We can skip
640 : * this if quota enforcement is disabled or if both inodes' dquots are the
641 : * same. The qretry structure must be initialized to zeroes before the first
642 : * call to this function.
643 : */
644 : STATIC int
645 4574279 : xrep_tempswap_reserve_quota(
646 : struct xfs_scrub *sc,
647 : const struct xrep_tempswap *tx)
648 : {
649 4574279 : struct xfs_trans *tp = sc->tp;
650 4574279 : const struct xfs_swapext_req *req = &tx->req;
651 4574279 : int64_t ddelta, rdelta;
652 4574279 : int error;
653 :
654 : /*
655 : * Don't bother with a quota reservation if we're not enforcing them
656 : * or the two inodes have the same dquots.
657 : */
658 4574279 : if (!XFS_IS_QUOTA_ON(tp->t_mountp) || req->ip1 == req->ip2 ||
659 4564709 : (req->ip1->i_udquot == req->ip2->i_udquot &&
660 4261521 : req->ip1->i_gdquot == req->ip2->i_gdquot &&
661 4258500 : req->ip1->i_pdquot == req->ip2->i_pdquot))
662 : return 0;
663 :
664 : /*
665 : * Quota reservation for each file comes from two sources. First, we
666 : * need to account for any net gain in mapped blocks during the swap.
667 : * Second, we need reservation for the gross gain in mapped blocks so
668 : * that we don't trip over any quota block reservation assertions. We
669 : * must reserve the gross gain because the quota code subtracts from
670 : * bcount the number of blocks that we unmap; it does not add that
671 : * quantity back to the quota block reservation.
672 : */
673 314247 : ddelta = max_t(int64_t, 0, req->ip2_bcount - req->ip1_bcount);
674 314247 : rdelta = max_t(int64_t, 0, req->ip2_rtbcount - req->ip1_rtbcount);
675 314247 : error = xfs_trans_reserve_quota_nblks(tp, req->ip1,
676 314247 : ddelta + req->ip1_bcount, rdelta + req->ip1_rtbcount,
677 : true);
678 313417 : if (error)
679 : return error;
680 :
681 313417 : ddelta = max_t(int64_t, 0, req->ip1_bcount - req->ip2_bcount);
682 313417 : rdelta = max_t(int64_t, 0, req->ip1_rtbcount - req->ip2_rtbcount);
683 313417 : return xfs_trans_reserve_quota_nblks(tp, req->ip2,
684 313417 : ddelta + req->ip2_bcount, rdelta + req->ip2_rtbcount,
685 : true);
686 : }
687 :
688 : /*
689 : * Prepare an existing transaction for a swap. The caller must hold
690 : * the ILOCK of both the inode being repaired and the temporary file.
691 : * Only use this when those ILOCKs cannot be dropped.
692 : *
693 : * Fill out the swapext request and resource estimation structures in
694 : * preparation for swapping the contents of a metadata file that we've rebuilt
695 : * in the temp file, then reserve space and quota to the transaction.
696 : */
697 : int
698 0 : xrep_tempswap_trans_reserve(
699 : struct xfs_scrub *sc,
700 : int whichfork,
701 : struct xrep_tempswap *tx)
702 : {
703 0 : int error;
704 :
705 0 : ASSERT(sc->tp != NULL);
706 0 : ASSERT(xfs_isilocked(sc->ip, XFS_ILOCK_EXCL));
707 0 : ASSERT(xfs_isilocked(sc->tempip, XFS_ILOCK_EXCL));
708 :
709 0 : error = xrep_tempswap_prep_request(sc, whichfork, tx);
710 0 : if (error)
711 : return error;
712 :
713 0 : error = xfs_swapext_estimate(&tx->req);
714 0 : if (error)
715 : return error;
716 :
717 0 : error = xfs_trans_reserve_more(sc->tp, tx->req.resblks, 0);
718 0 : if (error)
719 : return error;
720 :
721 0 : return xrep_tempswap_reserve_quota(sc, tx);
722 : }
723 :
724 : /*
725 : * Allocate a transaction, ILOCK the temporary file and the file being
726 : * repaired, and join them to the transaction in preparation to swap fork
727 : * contents as part of a repair operation.
728 : */
729 : int
730 4574889 : xrep_tempswap_trans_alloc(
731 : struct xfs_scrub *sc,
732 : int whichfork,
733 : struct xrep_tempswap *tx)
734 : {
735 4574889 : unsigned int flags = 0;
736 4574889 : int error;
737 :
738 4574889 : ASSERT(sc->tp == NULL);
739 :
740 4574889 : error = xrep_tempswap_prep_request(sc, whichfork, tx);
741 4573462 : if (error)
742 : return error;
743 :
744 4573635 : error = xrep_tempswap_estimate(sc, tx);
745 4573983 : if (error)
746 : return error;
747 :
748 4573983 : if (xfs_has_lazysbcount(sc->mp))
749 4573993 : flags |= XFS_TRANS_RES_FDBLKS;
750 :
751 4573983 : error = xrep_tempswap_grab_log_assist(sc);
752 4573145 : if (error)
753 : return error;
754 :
755 4573511 : error = xfs_trans_alloc(sc->mp, &M_RES(sc->mp)->tr_itruncate,
756 4573511 : tx->req.resblks, 0, flags, &sc->tp);
757 4575312 : if (error)
758 : return error;
759 :
760 4575282 : sc->temp_ilock_flags |= XFS_ILOCK_EXCL;
761 4575282 : sc->ilock_flags |= XFS_ILOCK_EXCL;
762 4575282 : xfs_xchg_range_ilock(sc->tp, sc->ip, sc->tempip);
763 :
764 4575071 : return xrep_tempswap_reserve_quota(sc, tx);
765 : }
766 :
767 : /* Swap forks between the file being repaired and the temporary file. */
768 : int
769 66730 : xrep_tempswap_contents(
770 : struct xfs_scrub *sc,
771 : struct xrep_tempswap *tx)
772 : {
773 66730 : int error;
774 :
775 66730 : ASSERT(sc->flags & XREP_FSGATES_ATOMIC_XCHG);
776 :
777 66730 : xfs_swapext(sc->tp, &tx->req);
778 66729 : error = xfs_defer_finish(&sc->tp);
779 66731 : if (error)
780 : return error;
781 :
782 : /*
783 : * If we swapped the ondisk sizes of two metadata files, we must swap
784 : * the incore sizes as well. Since online fsck doesn't use swapext on
785 : * the data forks of user-accessible files, the two sizes are always
786 : * the same, so we don't need to log the inodes.
787 : */
788 66731 : if (tx->req.req_flags & XFS_SWAP_REQ_SET_SIZES) {
789 13628 : loff_t temp;
790 :
791 13628 : temp = i_size_read(VFS_I(sc->ip));
792 13628 : i_size_write(VFS_I(sc->ip), i_size_read(VFS_I(sc->tempip)));
793 13628 : i_size_write(VFS_I(sc->tempip), temp);
794 : }
795 :
796 : return 0;
797 : }
798 :
799 : /*
800 : * Write local format data from one of the temporary file's forks into the same
801 : * fork of file being repaired, and swap the file sizes, if appropriate.
802 : * Caller must ensure that the file being repaired has enough fork space to
803 : * hold all the bytes.
804 : */
805 : void
806 4505973 : xrep_tempfile_copyout_local(
807 : struct xfs_scrub *sc,
808 : int whichfork)
809 : {
810 4505973 : struct xfs_ifork *temp_ifp;
811 4505973 : struct xfs_ifork *ifp;
812 4505973 : unsigned int ilog_flags = XFS_ILOG_CORE;
813 :
814 4505973 : temp_ifp = xfs_ifork_ptr(sc->tempip, whichfork);
815 4506240 : ifp = xfs_ifork_ptr(sc->ip, whichfork);
816 :
817 4506116 : ASSERT(temp_ifp != NULL);
818 4506116 : ASSERT(ifp != NULL);
819 4506116 : ASSERT(temp_ifp->if_format == XFS_DINODE_FMT_LOCAL);
820 4506116 : ASSERT(ifp->if_format == XFS_DINODE_FMT_LOCAL);
821 :
822 4506116 : switch (whichfork) {
823 233768 : case XFS_DATA_FORK:
824 233768 : ASSERT(sc->tempip->i_disk_size <= xfs_inode_data_fork_size(sc->ip));
825 : break;
826 4272348 : case XFS_ATTR_FORK:
827 4272348 : ASSERT(sc->tempip->i_forkoff >= sc->ip->i_forkoff);
828 : break;
829 0 : default:
830 0 : ASSERT(0);
831 0 : return;
832 : }
833 :
834 4506116 : xfs_idestroy_fork(ifp);
835 4508115 : xfs_init_local_fork(sc->ip, whichfork, temp_ifp->if_u1.if_data,
836 : temp_ifp->if_bytes);
837 :
838 4506807 : if (whichfork == XFS_DATA_FORK) {
839 233765 : i_size_write(VFS_I(sc->ip), i_size_read(VFS_I(sc->tempip)));
840 233765 : sc->ip->i_disk_size = sc->tempip->i_disk_size;
841 : }
842 :
843 4506807 : ilog_flags |= xfs_ilog_fdata(whichfork);
844 4506807 : xfs_trans_log_inode(sc->tp, sc->ip, ilog_flags);
845 : }
846 :
847 : /* Decide if a given XFS inode is a temporary file for a repair. */
848 : bool
849 72001919605 : xrep_is_tempfile(
850 : const struct xfs_inode *ip)
851 : {
852 72001919605 : const struct inode *inode = &ip->i_vnode;
853 :
854 72001919605 : if (IS_PRIVATE(inode) && !(inode->i_opflags & IOP_XATTR))
855 18447733 : return true;
856 :
857 : return false;
858 : }
|