Line data Source code
1 : // SPDX-License-Identifier: GPL-2.0-or-later
2 : /*
3 : * Copyright (C) 2021-2023 Oracle. All Rights Reserved.
4 : * Author: Darrick J. Wong <djwong@kernel.org>
5 : */
6 : #include "xfs.h"
7 : #include "xfs_fs.h"
8 : #include "xfs_shared.h"
9 : #include "xfs_format.h"
10 : #include "xfs_trans_resv.h"
11 : #include "xfs_mount.h"
12 : #include "xfs_log_format.h"
13 : #include "xfs_trans.h"
14 : #include "xfs_inode.h"
15 : #include "xfs_ialloc.h"
16 : #include "xfs_quota.h"
17 : #include "xfs_bmap.h"
18 : #include "xfs_bmap_btree.h"
19 : #include "xfs_trans_space.h"
20 : #include "xfs_dir2.h"
21 : #include "xfs_xchgrange.h"
22 : #include "xfs_swapext.h"
23 : #include "xfs_defer.h"
24 : #include "xfs_symlink_remote.h"
25 : #include "scrub/scrub.h"
26 : #include "scrub/common.h"
27 : #include "scrub/repair.h"
28 : #include "scrub/trace.h"
29 : #include "scrub/tempfile.h"
30 : #include "scrub/tempswap.h"
31 : #include "scrub/xfile.h"
32 :
33 : /*
34 : * Create a temporary file for reconstructing metadata, with the intention of
35 : * atomically swapping the temporary file's contents with the file that's
36 : * being repaired.
37 : */
38 : int
39 7967565 : xrep_tempfile_create(
40 : struct xfs_scrub *sc,
41 : uint16_t mode)
42 : {
43 7967565 : struct xfs_mount *mp = sc->mp;
44 7967565 : struct xfs_trans *tp = NULL;
45 7967565 : struct xfs_dquot *udqp = NULL;
46 7967565 : struct xfs_dquot *gdqp = NULL;
47 7967565 : struct xfs_dquot *pdqp = NULL;
48 7967565 : struct xfs_trans_res *tres;
49 7967565 : struct xfs_inode *dp = mp->m_rootip;
50 7967565 : xfs_ino_t ino;
51 7967565 : unsigned int resblks;
52 7967565 : bool is_dir = S_ISDIR(mode);
53 7967565 : int error;
54 :
55 15935130 : if (xfs_is_shutdown(mp))
56 : return -EIO;
57 15935130 : if (xfs_is_readonly(mp))
58 : return -EROFS;
59 :
60 7967565 : ASSERT(sc->tp == NULL);
61 7967565 : ASSERT(sc->tempip == NULL);
62 :
63 : /*
64 : * Make sure that we have allocated dquot(s) on disk. The temporary
65 : * inode should be completely root owned so that we don't fail due to
66 : * quota limits.
67 : */
68 7967565 : error = xfs_qm_vop_dqalloc(dp, GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, 0,
69 : XFS_QMOPT_QUOTALL, &udqp, &gdqp, &pdqp);
70 7959815 : if (error)
71 : return error;
72 :
73 7959815 : if (is_dir) {
74 184730 : resblks = xfs_mkdir_space_res(mp, 0);
75 184730 : tres = &M_RES(mp)->tr_mkdir;
76 : } else {
77 7775085 : resblks = XFS_IALLOC_SPACE_RES(mp);
78 7775085 : tres = &M_RES(mp)->tr_create_tmpfile;
79 : }
80 :
81 7959815 : error = xfs_trans_alloc_icreate(mp, tres, udqp, gdqp, pdqp, resblks,
82 : &tp);
83 7972893 : if (error)
84 0 : goto out_release_dquots;
85 :
86 : /* Allocate inode, set up directory. */
87 7972893 : error = xfs_dialloc(&tp, dp->i_ino, mode, &ino);
88 7977970 : if (error)
89 0 : goto out_trans_cancel;
90 7977970 : error = xfs_init_new_inode(&nop_mnt_idmap, tp, dp, ino, mode, 0, 0,
91 : 0, false, &sc->tempip);
92 7977949 : if (error)
93 0 : goto out_trans_cancel;
94 :
95 : /* Change the ownership of the inode to root. */
96 7977949 : VFS_I(sc->tempip)->i_uid = GLOBAL_ROOT_UID;
97 7977949 : VFS_I(sc->tempip)->i_gid = GLOBAL_ROOT_GID;
98 7977949 : sc->tempip->i_diflags &= ~(XFS_DIFLAG_REALTIME | XFS_DIFLAG_RTINHERIT);
99 7977949 : xfs_trans_log_inode(tp, sc->tempip, XFS_ILOG_CORE);
100 :
101 : /*
102 : * Mark our temporary file as private so that LSMs and the ACL code
103 : * don't try to add their own metadata or reason about these files.
104 : * The file should never be exposed to userspace.
105 : */
106 7977968 : VFS_I(sc->tempip)->i_flags |= S_PRIVATE;
107 7977968 : VFS_I(sc->tempip)->i_opflags &= ~IOP_XATTR;
108 :
109 7977968 : if (is_dir) {
110 184693 : error = xfs_dir_init(tp, sc->tempip, dp);
111 184585 : if (error)
112 0 : goto out_trans_cancel;
113 7793275 : } else if (S_ISLNK(VFS_I(sc->tempip)->i_mode)) {
114 995972 : error = xfs_symlink_write_target(tp, sc->tempip, ".", 1, 0, 0);
115 995972 : if (error)
116 0 : goto out_trans_cancel;
117 : }
118 :
119 : /*
120 : * Attach the dquot(s) to the inodes and modify them incore.
121 : * These ids of the inode couldn't have changed since the new
122 : * inode has been locked ever since it was created.
123 : */
124 7977860 : xfs_qm_vop_create_dqattach(tp, sc->tempip, udqp, gdqp, pdqp);
125 :
126 : /*
127 : * Put our temp file on the unlinked list so it's purged automatically.
128 : * Anything being reconstructed using this file must be atomically
129 : * swapped with the original file because the contents here will be
130 : * purged when the inode is dropped or log recovery cleans out the
131 : * unlinked list.
132 : */
133 7977882 : error = xfs_iunlink(tp, sc->tempip);
134 7977882 : if (error)
135 0 : goto out_trans_cancel;
136 :
137 7977882 : error = xfs_trans_commit(tp);
138 7977989 : if (error)
139 0 : goto out_release_inode;
140 :
141 7977989 : trace_xrep_tempfile_create(sc);
142 :
143 7977918 : xfs_qm_dqrele(udqp);
144 7977990 : xfs_qm_dqrele(gdqp);
145 7977989 : xfs_qm_dqrele(pdqp);
146 :
147 : /* Finish setting up the incore / vfs context. */
148 7977981 : xfs_iunlock(sc->tempip, XFS_ILOCK_EXCL);
149 7977966 : xfs_setup_iops(sc->tempip);
150 7977968 : xfs_finish_inode_setup(sc->tempip);
151 :
152 7977928 : sc->temp_ilock_flags = 0;
153 7977928 : return error;
154 :
155 0 : out_trans_cancel:
156 0 : xfs_trans_cancel(tp);
157 0 : out_release_inode:
158 : /*
159 : * Wait until after the current transaction is aborted to finish the
160 : * setup of the inode and release the inode. This prevents recursive
161 : * transactions and deadlocks from xfs_inactive.
162 : */
163 0 : if (sc->tempip) {
164 0 : xfs_iunlock(sc->tempip, XFS_ILOCK_EXCL);
165 0 : xfs_finish_inode_setup(sc->tempip);
166 0 : xchk_irele(sc, sc->tempip);
167 : }
168 0 : out_release_dquots:
169 0 : xfs_qm_dqrele(udqp);
170 0 : xfs_qm_dqrele(gdqp);
171 0 : xfs_qm_dqrele(pdqp);
172 :
173 0 : return error;
174 : }
175 :
176 : /* Take IOLOCK_EXCL on the temporary file, maybe. */
177 : bool
178 337067 : xrep_tempfile_iolock_nowait(
179 : struct xfs_scrub *sc)
180 : {
181 337067 : if (xfs_ilock_nowait(sc->tempip, XFS_IOLOCK_EXCL)) {
182 337064 : sc->temp_ilock_flags |= XFS_IOLOCK_EXCL;
183 337064 : return true;
184 : }
185 :
186 : return false;
187 : }
188 :
189 : /*
190 : * Take the temporary file's IOLOCK while holding a different inode's IOLOCK.
191 : * In theory nobody else should hold the tempfile's IOLOCK, but we use trylock
192 : * to avoid deadlocks and lockdep complaints.
193 : */
194 : int
195 337063 : xrep_tempfile_iolock_polled(
196 : struct xfs_scrub *sc)
197 : {
198 337063 : int error = 0;
199 :
200 337063 : while (!xrep_tempfile_iolock_nowait(sc)) {
201 0 : if (xchk_should_terminate(sc, &error))
202 0 : return error;
203 0 : delay(1);
204 : }
205 :
206 : return 0;
207 : }
208 :
209 : /* Release IOLOCK_EXCL on the temporary file. */
210 : void
211 127255 : xrep_tempfile_iounlock(
212 : struct xfs_scrub *sc)
213 : {
214 127255 : xfs_iunlock(sc->tempip, XFS_IOLOCK_EXCL);
215 127255 : sc->temp_ilock_flags &= ~XFS_IOLOCK_EXCL;
216 127255 : }
217 :
218 : /* Prepare the temporary file for metadata updates by grabbing ILOCK_EXCL. */
219 : void
220 134012 : xrep_tempfile_ilock(
221 : struct xfs_scrub *sc)
222 : {
223 134012 : sc->temp_ilock_flags |= XFS_ILOCK_EXCL;
224 134012 : xfs_ilock(sc->tempip, XFS_ILOCK_EXCL);
225 134012 : }
226 :
227 : /* Try to grab ILOCK_EXCL on the temporary file. */
228 : bool
229 0 : xrep_tempfile_ilock_nowait(
230 : struct xfs_scrub *sc)
231 : {
232 0 : if (xfs_ilock_nowait(sc->tempip, XFS_ILOCK_EXCL)) {
233 0 : sc->temp_ilock_flags |= XFS_ILOCK_EXCL;
234 0 : return true;
235 : }
236 :
237 : return false;
238 : }
239 :
240 : /* Unlock ILOCK_EXCL on the temporary file after an update. */
241 : void
242 211606 : xrep_tempfile_iunlock(
243 : struct xfs_scrub *sc)
244 : {
245 211606 : xfs_iunlock(sc->tempip, XFS_ILOCK_EXCL);
246 211602 : sc->temp_ilock_flags &= ~XFS_ILOCK_EXCL;
247 211602 : }
248 :
249 : /*
250 : * Begin the process of making changes to both the file being scrubbed and
251 : * the temporary file by taking ILOCK_EXCL on both.
252 : */
253 : void
254 0 : xrep_tempfile_ilock_both(
255 : struct xfs_scrub *sc)
256 : {
257 0 : xfs_lock_two_inodes(sc->ip, XFS_ILOCK_EXCL, sc->tempip, XFS_ILOCK_EXCL);
258 0 : sc->ilock_flags |= XFS_ILOCK_EXCL;
259 0 : sc->temp_ilock_flags |= XFS_ILOCK_EXCL;
260 0 : }
261 :
262 : /* Unlock ILOCK_EXCL on both files. */
263 : void
264 0 : xrep_tempfile_iunlock_both(
265 : struct xfs_scrub *sc)
266 : {
267 0 : xrep_tempfile_iunlock(sc);
268 0 : xchk_iunlock(sc, XFS_ILOCK_EXCL);
269 0 : }
270 :
271 : /* Release the temporary file. */
272 : void
273 650067168 : xrep_tempfile_rele(
274 : struct xfs_scrub *sc)
275 : {
276 650067168 : if (!sc->tempip)
277 : return;
278 :
279 7977685 : if (sc->temp_ilock_flags) {
280 214016 : xfs_iunlock(sc->tempip, sc->temp_ilock_flags);
281 213977 : sc->temp_ilock_flags = 0;
282 : }
283 :
284 7977646 : xchk_irele(sc, sc->tempip);
285 7976877 : sc->tempip = NULL;
286 : }
287 :
288 : /*
289 : * Make sure that the given range of the data fork of the temporary file is
290 : * mapped to written blocks. The caller must ensure that both inodes are
291 : * joined to the transaction.
292 : */
293 : int
294 0 : xrep_tempfile_prealloc(
295 : struct xfs_scrub *sc,
296 : xfs_fileoff_t off,
297 : xfs_filblks_t len)
298 : {
299 0 : struct xfs_bmbt_irec map;
300 0 : xfs_fileoff_t end = off + len;
301 0 : int error;
302 :
303 0 : ASSERT(sc->tempip != NULL);
304 0 : ASSERT(!XFS_NOT_DQATTACHED(sc->mp, sc->tempip));
305 :
306 0 : for (; off < end; off = map.br_startoff + map.br_blockcount) {
307 0 : int nmaps = 1;
308 :
309 : /*
310 : * If we have a real extent mapping this block then we're
311 : * in ok shape.
312 : */
313 0 : error = xfs_bmapi_read(sc->tempip, off, end - off, &map, &nmaps,
314 : XFS_DATA_FORK);
315 0 : if (error)
316 0 : return error;
317 0 : if (nmaps == 0) {
318 0 : ASSERT(nmaps != 0);
319 0 : return -EFSCORRUPTED;
320 : }
321 :
322 0 : if (xfs_bmap_is_written_extent(&map))
323 0 : continue;
324 :
325 : /*
326 : * If we find a delalloc reservation then something is very
327 : * very wrong. Bail out.
328 : */
329 0 : if (map.br_startblock == DELAYSTARTBLOCK)
330 : return -EFSCORRUPTED;
331 :
332 : /*
333 : * Make sure this block has a real zeroed extent allocated to
334 : * it.
335 : */
336 0 : nmaps = 1;
337 0 : error = xfs_bmapi_write(sc->tp, sc->tempip, off, end - off,
338 : XFS_BMAPI_CONVERT | XFS_BMAPI_ZERO, 0, &map,
339 : &nmaps);
340 0 : if (error)
341 0 : return error;
342 :
343 0 : trace_xrep_tempfile_prealloc(sc, XFS_DATA_FORK, &map);
344 :
345 : /* Commit new extent and all deferred work. */
346 0 : error = xfs_defer_finish(&sc->tp);
347 0 : if (error)
348 0 : return error;
349 : }
350 :
351 : return 0;
352 : }
353 :
354 : /*
355 : * Write data to each block of a file. The given range of the tempfile's data
356 : * fork must already be populated with written extents.
357 : */
358 : int
359 0 : xrep_tempfile_copyin(
360 : struct xfs_scrub *sc,
361 : xfs_fileoff_t off,
362 : xfs_filblks_t len,
363 : xrep_tempfile_copyin_fn prep_fn,
364 : void *data)
365 : {
366 0 : LIST_HEAD(buffers_list);
367 0 : struct xfs_mount *mp = sc->mp;
368 0 : struct xfs_buf *bp;
369 0 : xfs_fileoff_t flush_mask;
370 0 : xfs_fileoff_t end = off + len;
371 0 : loff_t pos = XFS_FSB_TO_B(mp, off);
372 0 : int error = 0;
373 :
374 0 : ASSERT(S_ISREG(VFS_I(sc->tempip)->i_mode));
375 :
376 : /* Flush buffers to disk every 512K */
377 0 : flush_mask = XFS_B_TO_FSBT(mp, (1U << 19)) - 1;
378 :
379 0 : for (; off < end; off++, pos += mp->m_sb.sb_blocksize) {
380 0 : struct xfs_bmbt_irec map;
381 0 : int nmaps = 1;
382 :
383 : /* Read block mapping for this file block. */
384 0 : error = xfs_bmapi_read(sc->tempip, off, 1, &map, &nmaps, 0);
385 0 : if (error)
386 0 : goto out_err;
387 0 : if (nmaps == 0 || !xfs_bmap_is_written_extent(&map)) {
388 0 : error = -EFSCORRUPTED;
389 0 : goto out_err;
390 : }
391 :
392 : /* Get the metadata buffer for this offset in the file. */
393 0 : error = xfs_trans_get_buf(sc->tp, mp->m_ddev_targp,
394 0 : XFS_FSB_TO_DADDR(mp, map.br_startblock),
395 : mp->m_bsize, 0, &bp);
396 0 : if (error)
397 0 : goto out_err;
398 :
399 0 : trace_xrep_tempfile_copyin(sc, XFS_DATA_FORK, &map);
400 :
401 : /* Read in a block's worth of data from the xfile. */
402 0 : error = prep_fn(sc, bp, data);
403 0 : if (error) {
404 0 : xfs_trans_brelse(sc->tp, bp);
405 0 : goto out_err;
406 : }
407 :
408 : /* Queue buffer, and flush if we have too much dirty data. */
409 0 : xfs_buf_delwri_queue_here(bp, &buffers_list);
410 0 : xfs_trans_brelse(sc->tp, bp);
411 :
412 0 : if (!(off & flush_mask)) {
413 0 : error = xfs_buf_delwri_submit(&buffers_list);
414 0 : if (error)
415 0 : goto out_err;
416 : }
417 : }
418 :
419 : /*
420 : * Write the new blocks to disk. If the ordered list isn't empty after
421 : * that, then something went wrong and we have to fail. This should
422 : * never happen, but we'll check anyway.
423 : */
424 0 : error = xfs_buf_delwri_submit(&buffers_list);
425 0 : if (error)
426 0 : goto out_err;
427 :
428 0 : if (!list_empty(&buffers_list)) {
429 0 : ASSERT(list_empty(&buffers_list));
430 0 : error = -EIO;
431 0 : goto out_err;
432 : }
433 :
434 : return 0;
435 :
436 0 : out_err:
437 0 : xfs_buf_delwri_cancel(&buffers_list);
438 0 : return error;
439 : }
440 :
441 : /*
442 : * Set the temporary file's size. Caller must join the tempfile to the scrub
443 : * transaction and is responsible for adjusting block mappings as needed.
444 : */
445 : int
446 0 : xrep_tempfile_set_isize(
447 : struct xfs_scrub *sc,
448 : unsigned long long isize)
449 : {
450 0 : if (sc->tempip->i_disk_size == isize)
451 : return 0;
452 :
453 0 : sc->tempip->i_disk_size = isize;
454 0 : i_size_write(VFS_I(sc->tempip), isize);
455 0 : return xrep_tempfile_roll_trans(sc);
456 : }
457 :
458 : /*
459 : * Roll a repair transaction involving the temporary file. Caller must join
460 : * both the temporary file and the file being scrubbed to the transaction.
461 : * This function return with both inodes joined to a new scrub transaction,
462 : * or the usual negative errno.
463 : */
464 : int
465 214918 : xrep_tempfile_roll_trans(
466 : struct xfs_scrub *sc)
467 : {
468 214918 : int error;
469 :
470 214918 : xfs_trans_log_inode(sc->tp, sc->tempip, XFS_ILOG_CORE);
471 214919 : error = xrep_roll_trans(sc);
472 214925 : if (error)
473 : return error;
474 :
475 214925 : xfs_trans_ijoin(sc->tp, sc->tempip, 0);
476 214925 : return 0;
477 : }
478 :
479 : /* Enable atomic extent swapping. */
480 : int
481 214984 : xrep_tempswap_grab_log_assist(
482 : struct xfs_scrub *sc)
483 : {
484 214984 : bool need_rele = false;
485 214984 : int error;
486 :
487 214984 : if (sc->flags & XREP_FSGATES_ATOMIC_XCHG)
488 : return 0;
489 :
490 214983 : error = xfs_xchg_range_grab_log_assist(sc->mp, true, &need_rele);
491 214987 : if (error)
492 : return error;
493 214038 : if (!need_rele) {
494 0 : ASSERT(need_rele);
495 0 : return -EOPNOTSUPP;
496 : }
497 :
498 214038 : trace_xchk_fsgates_enable(sc, XREP_FSGATES_ATOMIC_XCHG);
499 :
500 214038 : sc->flags |= XREP_FSGATES_ATOMIC_XCHG;
501 214038 : return 0;
502 : }
503 :
504 : /*
505 : * Fill out the swapext request in preparation for swapping the contents of a
506 : * metadata file that we've rebuilt in the temp file.
507 : */
508 : STATIC int
509 214043 : xrep_tempswap_prep_request(
510 : struct xfs_scrub *sc,
511 : int whichfork,
512 : struct xrep_tempswap *tx)
513 : {
514 214043 : struct xfs_swapext_req *req = &tx->req;
515 :
516 214043 : memset(tx, 0, sizeof(struct xrep_tempswap));
517 :
518 : /* COW forks don't exist on disk. */
519 214043 : if (whichfork == XFS_COW_FORK) {
520 0 : ASSERT(0);
521 0 : return -EINVAL;
522 : }
523 :
524 : /* Both files should have the relevant forks. */
525 428086 : if (!xfs_ifork_ptr(sc->ip, whichfork) ||
526 214045 : !xfs_ifork_ptr(sc->tempip, whichfork)) {
527 0 : ASSERT(xfs_ifork_ptr(sc->ip, whichfork) != NULL);
528 0 : ASSERT(xfs_ifork_ptr(sc->tempip, whichfork) != NULL);
529 0 : return -EINVAL;
530 : }
531 :
532 : /* Swap all mappings in both forks. */
533 214043 : req->ip1 = sc->tempip;
534 214043 : req->ip2 = sc->ip;
535 214043 : req->startoff1 = 0;
536 214043 : req->startoff2 = 0;
537 214043 : req->whichfork = whichfork;
538 214043 : req->blockcount = XFS_MAX_FILEOFF;
539 214043 : req->req_flags = XFS_SWAP_REQ_LOGGED;
540 :
541 : /* Always swap sizes when we're swapping data fork mappings. */
542 214043 : if (whichfork == XFS_DATA_FORK)
543 17042 : req->req_flags |= XFS_SWAP_REQ_SET_SIZES;
544 :
545 : /*
546 : * If we're repairing symlinks, xattrs, or directories, always try to
547 : * convert ip2 to short format after swapping.
548 : */
549 214043 : if (whichfork == XFS_ATTR_FORK || S_ISDIR(VFS_I(sc->ip)->i_mode) ||
550 : S_ISLNK(VFS_I(sc->ip)->i_mode))
551 214043 : req->req_flags |= XFS_SWAP_REQ_CVT_INO2_SF;
552 :
553 : return 0;
554 : }
555 :
556 : /*
557 : * Fill out the swapext resource estimation structures in preparation for
558 : * swapping the contents of a metadata file that we've rebuilt in the temp
559 : * file. Caller must hold IOLOCK_EXCL but not ILOCK_EXCL on both files.
560 : */
561 : STATIC int
562 214038 : xrep_tempswap_estimate(
563 : struct xfs_scrub *sc,
564 : struct xrep_tempswap *tx)
565 : {
566 214038 : struct xfs_swapext_req *req = &tx->req;
567 214038 : struct xfs_ifork *ifp;
568 214038 : struct xfs_ifork *tifp;
569 214038 : int state = 0;
570 :
571 : /*
572 : * Deal with either fork being in local format. The swapext code only
573 : * knows how to exchange block mappings for regular files, so we only
574 : * have to know about local format for xattrs and directories.
575 : */
576 214038 : ifp = xfs_ifork_ptr(sc->ip, req->whichfork);
577 214040 : if (ifp->if_format == XFS_DINODE_FMT_LOCAL)
578 168952 : state |= 1;
579 :
580 214040 : tifp = xfs_ifork_ptr(sc->tempip, req->whichfork);
581 214039 : if (tifp->if_format == XFS_DINODE_FMT_LOCAL)
582 171101 : state |= 2;
583 :
584 214039 : switch (state) {
585 42938 : case 0:
586 : /* Both files have mapped extents; use the regular estimate. */
587 42938 : return xfs_xchg_range_estimate(req);
588 1 : case 1:
589 : /*
590 : * The file being repaired is in local format, but the temp
591 : * file has mapped extents. To perform the swap, the file
592 : * being repaired must have its shorform data converted to a
593 : * fsblock, and the fork changed to extents format. We need
594 : * one resblk for the conversion; the number of exchanges is
595 : * (worst case) the temporary file's extent count plus the
596 : * block we converted.
597 : */
598 1 : req->ip1_bcount = sc->tempip->i_nblocks;
599 1 : req->ip2_bcount = 1;
600 1 : req->nr_exchanges = 1 + tifp->if_nextents;
601 1 : req->resblks = 1;
602 1 : break;
603 2150 : case 2:
604 : /*
605 : * The temporary file is in local format, but the file being
606 : * repaired has mapped extents. To perform the swap, the temp
607 : * file must have its shortform data converted to an fsblock,
608 : * and the fork changed to extents format. We need one resblk
609 : * for the conversion; the number of exchanges is (worst case)
610 : * the extent count of the file being repaired plus the block
611 : * we converted.
612 : */
613 2150 : req->ip1_bcount = 1;
614 2150 : req->ip2_bcount = sc->ip->i_nblocks;
615 2150 : req->nr_exchanges = 1 + ifp->if_nextents;
616 2150 : req->resblks = 1;
617 2150 : break;
618 168950 : case 3:
619 : /*
620 : * Both forks are in local format. To perform the swap, both
621 : * files must have their shortform data converted to fsblocks,
622 : * and both forks must be converted to extents format. We
623 : * need two resblks for the two conversions, and the number of
624 : * exchanges is 1 since there's only one block at fileoff 0.
625 : * Presumably, the caller could not exchange the two inode fork
626 : * areas directly.
627 : */
628 168950 : req->ip1_bcount = 1;
629 168950 : req->ip2_bcount = 1;
630 168950 : req->nr_exchanges = 1;
631 168950 : req->resblks = 2;
632 168950 : break;
633 : }
634 :
635 171101 : return xfs_swapext_estimate_overhead(req);
636 : }
637 :
638 : /*
639 : * Obtain a quota reservation to make sure we don't hit EDQUOT. We can skip
640 : * this if quota enforcement is disabled or if both inodes' dquots are the
641 : * same. The qretry structure must be initialized to zeroes before the first
642 : * call to this function.
643 : */
644 : STATIC int
645 214041 : xrep_tempswap_reserve_quota(
646 : struct xfs_scrub *sc,
647 : const struct xrep_tempswap *tx)
648 : {
649 214041 : struct xfs_trans *tp = sc->tp;
650 214041 : const struct xfs_swapext_req *req = &tx->req;
651 214041 : int64_t ddelta, rdelta;
652 214041 : int error;
653 :
654 : /*
655 : * Don't bother with a quota reservation if we're not enforcing them
656 : * or the two inodes have the same dquots.
657 : */
658 214041 : if (!XFS_IS_QUOTA_ON(tp->t_mountp) || req->ip1 == req->ip2 ||
659 214041 : (req->ip1->i_udquot == req->ip2->i_udquot &&
660 189258 : req->ip1->i_gdquot == req->ip2->i_gdquot &&
661 188657 : req->ip1->i_pdquot == req->ip2->i_pdquot))
662 : return 0;
663 :
664 : /*
665 : * Quota reservation for each file comes from two sources. First, we
666 : * need to account for any net gain in mapped blocks during the swap.
667 : * Second, we need reservation for the gross gain in mapped blocks so
668 : * that we don't trip over any quota block reservation assertions. We
669 : * must reserve the gross gain because the quota code subtracts from
670 : * bcount the number of blocks that we unmap; it does not add that
671 : * quantity back to the quota block reservation.
672 : */
673 26990 : ddelta = max_t(int64_t, 0, req->ip2_bcount - req->ip1_bcount);
674 26990 : rdelta = max_t(int64_t, 0, req->ip2_rtbcount - req->ip1_rtbcount);
675 26990 : error = xfs_trans_reserve_quota_nblks(tp, req->ip1,
676 26990 : ddelta + req->ip1_bcount, rdelta + req->ip1_rtbcount,
677 : true);
678 26978 : if (error)
679 : return error;
680 :
681 26978 : ddelta = max_t(int64_t, 0, req->ip1_bcount - req->ip2_bcount);
682 26978 : rdelta = max_t(int64_t, 0, req->ip1_rtbcount - req->ip2_rtbcount);
683 26978 : return xfs_trans_reserve_quota_nblks(tp, req->ip2,
684 26978 : ddelta + req->ip2_bcount, rdelta + req->ip2_rtbcount,
685 : true);
686 : }
687 :
688 : /*
689 : * Prepare an existing transaction for a swap. The caller must hold
690 : * the ILOCK of both the inode being repaired and the temporary file.
691 : * Only use this when those ILOCKs cannot be dropped.
692 : *
693 : * Fill out the swapext request and resource estimation structures in
694 : * preparation for swapping the contents of a metadata file that we've rebuilt
695 : * in the temp file, then reserve space and quota to the transaction.
696 : */
697 : int
698 0 : xrep_tempswap_trans_reserve(
699 : struct xfs_scrub *sc,
700 : int whichfork,
701 : struct xrep_tempswap *tx)
702 : {
703 0 : int error;
704 :
705 0 : ASSERT(sc->tp != NULL);
706 0 : ASSERT(xfs_isilocked(sc->ip, XFS_ILOCK_EXCL));
707 0 : ASSERT(xfs_isilocked(sc->tempip, XFS_ILOCK_EXCL));
708 :
709 0 : error = xrep_tempswap_prep_request(sc, whichfork, tx);
710 0 : if (error)
711 : return error;
712 :
713 0 : error = xfs_swapext_estimate(&tx->req);
714 0 : if (error)
715 : return error;
716 :
717 0 : error = xfs_trans_reserve_more(sc->tp, tx->req.resblks, 0);
718 0 : if (error)
719 : return error;
720 :
721 0 : return xrep_tempswap_reserve_quota(sc, tx);
722 : }
723 :
724 : /*
725 : * Allocate a transaction, ILOCK the temporary file and the file being
726 : * repaired, and join them to the transaction in preparation to swap fork
727 : * contents as part of a repair operation.
728 : */
729 : int
730 214044 : xrep_tempswap_trans_alloc(
731 : struct xfs_scrub *sc,
732 : int whichfork,
733 : struct xrep_tempswap *tx)
734 : {
735 214044 : unsigned int flags = 0;
736 214044 : int error;
737 :
738 214044 : ASSERT(sc->tp == NULL);
739 :
740 214044 : error = xrep_tempswap_prep_request(sc, whichfork, tx);
741 214037 : if (error)
742 : return error;
743 :
744 214036 : error = xrep_tempswap_estimate(sc, tx);
745 214035 : if (error)
746 : return error;
747 :
748 214035 : if (xfs_has_lazysbcount(sc->mp))
749 214035 : flags |= XFS_TRANS_RES_FDBLKS;
750 :
751 214035 : error = xrep_tempswap_grab_log_assist(sc);
752 214037 : if (error)
753 : return error;
754 :
755 214037 : error = xfs_trans_alloc(sc->mp, &M_RES(sc->mp)->tr_itruncate,
756 214037 : tx->req.resblks, 0, flags, &sc->tp);
757 214045 : if (error)
758 : return error;
759 :
760 214045 : sc->temp_ilock_flags |= XFS_ILOCK_EXCL;
761 214045 : sc->ilock_flags |= XFS_ILOCK_EXCL;
762 214045 : xfs_xchg_range_ilock(sc->tp, sc->ip, sc->tempip);
763 :
764 214037 : return xrep_tempswap_reserve_quota(sc, tx);
765 : }
766 :
767 : /* Swap forks between the file being repaired and the temporary file. */
768 : int
769 45089 : xrep_tempswap_contents(
770 : struct xfs_scrub *sc,
771 : struct xrep_tempswap *tx)
772 : {
773 45089 : int error;
774 :
775 45089 : ASSERT(sc->flags & XREP_FSGATES_ATOMIC_XCHG);
776 :
777 45089 : xfs_swapext(sc->tp, &tx->req);
778 45089 : error = xfs_defer_finish(&sc->tp);
779 45089 : if (error)
780 : return error;
781 :
782 : /*
783 : * If we swapped the ondisk sizes of two metadata files, we must swap
784 : * the incore sizes as well. Since online fsck doesn't use swapext on
785 : * the data forks of user-accessible files, the two sizes are always
786 : * the same, so we don't need to log the inodes.
787 : */
788 45089 : if (tx->req.req_flags & XFS_SWAP_REQ_SET_SIZES) {
789 4255 : loff_t temp;
790 :
791 4255 : temp = i_size_read(VFS_I(sc->ip));
792 4255 : i_size_write(VFS_I(sc->ip), i_size_read(VFS_I(sc->tempip)));
793 4255 : i_size_write(VFS_I(sc->tempip), temp);
794 : }
795 :
796 : return 0;
797 : }
798 :
799 : /*
800 : * Write local format data from one of the temporary file's forks into the same
801 : * fork of file being repaired, and swap the file sizes, if appropriate.
802 : * Caller must ensure that the file being repaired has enough fork space to
803 : * hold all the bytes.
804 : */
805 : void
806 168934 : xrep_tempfile_copyout_local(
807 : struct xfs_scrub *sc,
808 : int whichfork)
809 : {
810 168934 : struct xfs_ifork *temp_ifp;
811 168934 : struct xfs_ifork *ifp;
812 168934 : unsigned int ilog_flags = XFS_ILOG_CORE;
813 :
814 168934 : temp_ifp = xfs_ifork_ptr(sc->tempip, whichfork);
815 168935 : ifp = xfs_ifork_ptr(sc->ip, whichfork);
816 :
817 168943 : ASSERT(temp_ifp != NULL);
818 168943 : ASSERT(ifp != NULL);
819 168943 : ASSERT(temp_ifp->if_format == XFS_DINODE_FMT_LOCAL);
820 168943 : ASSERT(ifp->if_format == XFS_DINODE_FMT_LOCAL);
821 :
822 168943 : switch (whichfork) {
823 12784 : case XFS_DATA_FORK:
824 12784 : ASSERT(sc->tempip->i_disk_size <= xfs_inode_data_fork_size(sc->ip));
825 : break;
826 156159 : case XFS_ATTR_FORK:
827 156159 : ASSERT(sc->tempip->i_forkoff >= sc->ip->i_forkoff);
828 : break;
829 0 : default:
830 0 : ASSERT(0);
831 0 : return;
832 : }
833 :
834 168943 : xfs_idestroy_fork(ifp);
835 168933 : xfs_init_local_fork(sc->ip, whichfork, temp_ifp->if_u1.if_data,
836 : temp_ifp->if_bytes);
837 :
838 168924 : if (whichfork == XFS_DATA_FORK) {
839 12785 : i_size_write(VFS_I(sc->ip), i_size_read(VFS_I(sc->tempip)));
840 12785 : sc->ip->i_disk_size = sc->tempip->i_disk_size;
841 : }
842 :
843 168924 : ilog_flags |= xfs_ilog_fdata(whichfork);
844 168924 : xfs_trans_log_inode(sc->tp, sc->ip, ilog_flags);
845 : }
846 :
847 : /* Decide if a given XFS inode is a temporary file for a repair. */
848 : bool
849 4960754478 : xrep_is_tempfile(
850 : const struct xfs_inode *ip)
851 : {
852 4960754478 : const struct inode *inode = &ip->i_vnode;
853 :
854 4960754478 : if (IS_PRIVATE(inode) && !(inode->i_opflags & IOP_XATTR))
855 342556 : return true;
856 :
857 : return false;
858 : }
|