Line data Source code
1 : // SPDX-License-Identifier: GPL-2.0-or-later
2 : /*
3 : * Copyright (C) 2021-2023 Oracle. All Rights Reserved.
4 : * Author: Darrick J. Wong <djwong@kernel.org>
5 : */
6 : #include "xfs.h"
7 : #include "xfs_fs.h"
8 : #include "xfs_shared.h"
9 : #include "xfs_format.h"
10 : #include "xfs_trans_resv.h"
11 : #include "xfs_mount.h"
12 : #include "xfs_log_format.h"
13 : #include "xfs_trans.h"
14 : #include "xfs_inode.h"
15 : #include "xfs_ialloc.h"
16 : #include "xfs_quota.h"
17 : #include "xfs_bmap.h"
18 : #include "xfs_bmap_btree.h"
19 : #include "xfs_trans_space.h"
20 : #include "xfs_dir2.h"
21 : #include "xfs_xchgrange.h"
22 : #include "xfs_swapext.h"
23 : #include "xfs_defer.h"
24 : #include "xfs_symlink_remote.h"
25 : #include "scrub/scrub.h"
26 : #include "scrub/common.h"
27 : #include "scrub/repair.h"
28 : #include "scrub/trace.h"
29 : #include "scrub/tempfile.h"
30 : #include "scrub/tempswap.h"
31 : #include "scrub/xfile.h"
32 :
33 : /*
34 : * Create a temporary file for reconstructing metadata, with the intention of
35 : * atomically swapping the temporary file's contents with the file that's
36 : * being repaired.
37 : */
38 : int
39 14644680 : xrep_tempfile_create(
40 : struct xfs_scrub *sc,
41 : uint16_t mode)
42 : {
43 14644680 : struct xfs_icreate_args args = { .pip = sc->mp->m_rootip, };
44 14644680 : struct xfs_mount *mp = sc->mp;
45 14644680 : struct xfs_trans *tp = NULL;
46 14644680 : struct xfs_dquot *udqp;
47 14644680 : struct xfs_dquot *gdqp;
48 14644680 : struct xfs_dquot *pdqp;
49 14644680 : struct xfs_trans_res *tres;
50 14644680 : struct xfs_inode *dp = mp->m_rootip;
51 14644680 : xfs_ino_t ino;
52 14644680 : unsigned int resblks;
53 14644680 : bool is_dir = S_ISDIR(mode);
54 14644680 : int error;
55 :
56 29289360 : if (xfs_is_shutdown(mp))
57 : return -EIO;
58 29289360 : if (xfs_is_readonly(mp))
59 : return -EROFS;
60 :
61 14644680 : ASSERT(sc->tp == NULL);
62 14644680 : ASSERT(sc->tempip == NULL);
63 :
64 : /* Force everything to have the root ids and mode we want. */
65 14644680 : xfs_icreate_args_rootfile(&args, mp, mode, false);
66 :
67 : /*
68 : * Make sure that we have allocated dquot(s) on disk. The temporary
69 : * inode should be completely root owned so that we don't fail due to
70 : * quota limits.
71 : */
72 14624726 : error = xfs_icreate_dqalloc(&args, &udqp, &gdqp, &pdqp);
73 14744560 : if (error)
74 : return error;
75 :
76 14744560 : if (is_dir) {
77 242643 : resblks = xfs_mkdir_space_res(mp, 0);
78 242643 : tres = &M_RES(mp)->tr_mkdir;
79 : } else {
80 14501917 : resblks = XFS_IALLOC_SPACE_RES(mp);
81 14501917 : tres = &M_RES(mp)->tr_create_tmpfile;
82 : }
83 :
84 14744560 : error = xfs_trans_alloc_icreate(mp, tres, udqp, gdqp, pdqp, resblks,
85 : &tp);
86 14743746 : if (error)
87 102921 : goto out_release_dquots;
88 :
89 : /* Allocate inode, set up directory. */
90 14640825 : error = xfs_dialloc(&tp, dp, mode, &ino);
91 14628570 : if (error)
92 7199578 : goto out_trans_cancel;
93 7428992 : error = xfs_icreate(tp, ino, &args, &sc->tempip);
94 7428828 : if (error)
95 0 : goto out_trans_cancel;
96 :
97 : /* We don't touch file data, so drop the realtime flags. */
98 7428828 : sc->tempip->i_diflags &= ~(XFS_DIFLAG_REALTIME | XFS_DIFLAG_RTINHERIT);
99 7428828 : xfs_trans_log_inode(tp, sc->tempip, XFS_ILOG_CORE);
100 :
101 : /*
102 : * Mark our temporary file as private so that LSMs and the ACL code
103 : * don't try to add their own metadata or reason about these files.
104 : * The file should never be exposed to userspace.
105 : */
106 7428995 : VFS_I(sc->tempip)->i_flags |= S_PRIVATE;
107 7428995 : VFS_I(sc->tempip)->i_opflags &= ~IOP_XATTR;
108 :
109 7428995 : if (is_dir) {
110 242098 : error = xfs_dir_init(tp, sc->tempip, dp);
111 242064 : if (error)
112 0 : goto out_trans_cancel;
113 7186897 : } else if (S_ISLNK(VFS_I(sc->tempip)->i_mode)) {
114 144691 : error = xfs_symlink_write_target(tp, sc->tempip, ".", 1, 0, 0);
115 144691 : if (error)
116 0 : goto out_trans_cancel;
117 : }
118 :
119 : /*
120 : * Attach the dquot(s) to the inodes and modify them incore.
121 : * These ids of the inode couldn't have changed since the new
122 : * inode has been locked ever since it was created.
123 : */
124 7428961 : xfs_qm_vop_create_dqattach(tp, sc->tempip, udqp, gdqp, pdqp);
125 :
126 : /*
127 : * Put our temp file on the unlinked list so it's purged automatically.
128 : * Anything being reconstructed using this file must be atomically
129 : * swapped with the original file because the contents here will be
130 : * purged when the inode is dropped or log recovery cleans out the
131 : * unlinked list.
132 : */
133 7428925 : error = xfs_iunlink(tp, sc->tempip);
134 7428963 : if (error)
135 0 : goto out_trans_cancel;
136 :
137 7428963 : error = xfs_trans_commit(tp);
138 7429059 : if (error)
139 0 : goto out_release_inode;
140 :
141 7429059 : trace_xrep_tempfile_create(sc);
142 :
143 7429057 : xfs_qm_dqrele(udqp);
144 7429054 : xfs_qm_dqrele(gdqp);
145 7429057 : xfs_qm_dqrele(pdqp);
146 :
147 : /* Finish setting up the incore / vfs context. */
148 7429051 : xfs_iunlock(sc->tempip, XFS_ILOCK_EXCL);
149 7429046 : xfs_setup_iops(sc->tempip);
150 7429030 : xfs_finish_inode_setup(sc->tempip);
151 :
152 7429013 : sc->temp_ilock_flags = 0;
153 7429013 : return error;
154 :
155 7199578 : out_trans_cancel:
156 7199578 : xfs_trans_cancel(tp);
157 7007335 : out_release_inode:
158 : /*
159 : * Wait until after the current transaction is aborted to finish the
160 : * setup of the inode and release the inode. This prevents recursive
161 : * transactions and deadlocks from xfs_inactive.
162 : */
163 7007335 : if (sc->tempip) {
164 0 : xfs_iunlock(sc->tempip, XFS_ILOCK_EXCL);
165 0 : xfs_finish_inode_setup(sc->tempip);
166 0 : xchk_irele(sc, sc->tempip);
167 : }
168 7007335 : out_release_dquots:
169 7110256 : xfs_qm_dqrele(udqp);
170 7313796 : xfs_qm_dqrele(gdqp);
171 7310610 : xfs_qm_dqrele(pdqp);
172 :
173 7310610 : return error;
174 : }
175 :
176 : /* Take IOLOCK_EXCL on the temporary file, maybe. */
177 : bool
178 7213779 : xrep_tempfile_iolock_nowait(
179 : struct xfs_scrub *sc)
180 : {
181 7213779 : if (xfs_ilock_nowait(sc->tempip, XFS_IOLOCK_EXCL)) {
182 7213151 : sc->temp_ilock_flags |= XFS_IOLOCK_EXCL;
183 7213151 : return true;
184 : }
185 :
186 : return false;
187 : }
188 :
189 : /*
190 : * Take the temporary file's IOLOCK while holding a different inode's IOLOCK.
191 : * In theory nobody else should hold the tempfile's IOLOCK, but we use trylock
192 : * to avoid deadlocks and lockdep complaints.
193 : */
194 : int
195 7213526 : xrep_tempfile_iolock_polled(
196 : struct xfs_scrub *sc)
197 : {
198 7213526 : int error = 0;
199 :
200 7213526 : while (!xrep_tempfile_iolock_nowait(sc)) {
201 0 : if (xchk_should_terminate(sc, &error))
202 0 : return error;
203 0 : delay(1);
204 : }
205 :
206 : return 0;
207 : }
208 :
209 : /* Release IOLOCK_EXCL on the temporary file. */
210 : void
211 2451940 : xrep_tempfile_iounlock(
212 : struct xfs_scrub *sc)
213 : {
214 2451940 : xfs_iunlock(sc->tempip, XFS_IOLOCK_EXCL);
215 2451831 : sc->temp_ilock_flags &= ~XFS_IOLOCK_EXCL;
216 2451831 : }
217 :
218 : /* Prepare the temporary file for metadata updates by grabbing ILOCK_EXCL. */
219 : void
220 2601270 : xrep_tempfile_ilock(
221 : struct xfs_scrub *sc)
222 : {
223 2601270 : sc->temp_ilock_flags |= XFS_ILOCK_EXCL;
224 2601270 : xfs_ilock(sc->tempip, XFS_ILOCK_EXCL);
225 2601185 : }
226 :
227 : /* Try to grab ILOCK_EXCL on the temporary file. */
228 : bool
229 81958 : xrep_tempfile_ilock_nowait(
230 : struct xfs_scrub *sc)
231 : {
232 81958 : if (xfs_ilock_nowait(sc->tempip, XFS_ILOCK_EXCL)) {
233 81958 : sc->temp_ilock_flags |= XFS_ILOCK_EXCL;
234 81958 : return true;
235 : }
236 :
237 : return false;
238 : }
239 :
240 : /* Unlock ILOCK_EXCL on the temporary file after an update. */
241 : void
242 4862636 : xrep_tempfile_iunlock(
243 : struct xfs_scrub *sc)
244 : {
245 4862636 : xfs_iunlock(sc->tempip, XFS_ILOCK_EXCL);
246 4861068 : sc->temp_ilock_flags &= ~XFS_ILOCK_EXCL;
247 4861068 : }
248 :
249 : /*
250 : * Begin the process of making changes to both the file being scrubbed and
251 : * the temporary file by taking ILOCK_EXCL on both.
252 : */
253 : void
254 0 : xrep_tempfile_ilock_both(
255 : struct xfs_scrub *sc)
256 : {
257 0 : xfs_lock_two_inodes(sc->ip, XFS_ILOCK_EXCL, sc->tempip, XFS_ILOCK_EXCL);
258 0 : sc->ilock_flags |= XFS_ILOCK_EXCL;
259 0 : sc->temp_ilock_flags |= XFS_ILOCK_EXCL;
260 0 : }
261 :
262 : /* Unlock ILOCK_EXCL on both files. */
263 : void
264 0 : xrep_tempfile_iunlock_both(
265 : struct xfs_scrub *sc)
266 : {
267 0 : xrep_tempfile_iunlock(sc);
268 0 : xchk_iunlock(sc, XFS_ILOCK_EXCL);
269 0 : }
270 :
271 : /* Release the temporary file. */
272 : void
273 876785130 : xrep_tempfile_rele(
274 : struct xfs_scrub *sc)
275 : {
276 876785130 : if (!sc->tempip)
277 : return;
278 :
279 7421317 : if (sc->temp_ilock_flags) {
280 4948455 : xfs_iunlock(sc->tempip, sc->temp_ilock_flags);
281 4948136 : sc->temp_ilock_flags = 0;
282 : }
283 :
284 7420998 : xchk_irele(sc, sc->tempip);
285 7416062 : sc->tempip = NULL;
286 : }
287 :
288 : /*
289 : * Make sure that the given range of the data fork of the temporary file is
290 : * mapped to written blocks. The caller must ensure that both inodes are
291 : * joined to the transaction.
292 : */
293 : int
294 81958 : xrep_tempfile_prealloc(
295 : struct xfs_scrub *sc,
296 : xfs_fileoff_t off,
297 : xfs_filblks_t len)
298 : {
299 81958 : struct xfs_bmbt_irec map;
300 81958 : xfs_fileoff_t end = off + len;
301 81958 : int error;
302 :
303 81958 : ASSERT(sc->tempip != NULL);
304 81958 : ASSERT(!XFS_NOT_DQATTACHED(sc->mp, sc->tempip));
305 :
306 163916 : for (; off < end; off = map.br_startoff + map.br_blockcount) {
307 81958 : int nmaps = 1;
308 :
309 : /*
310 : * If we have a real extent mapping this block then we're
311 : * in ok shape.
312 : */
313 81958 : error = xfs_bmapi_read(sc->tempip, off, end - off, &map, &nmaps,
314 : XFS_DATA_FORK);
315 81958 : if (error)
316 0 : return error;
317 81958 : if (nmaps == 0) {
318 0 : ASSERT(nmaps != 0);
319 0 : return -EFSCORRUPTED;
320 : }
321 :
322 81958 : if (xfs_bmap_is_written_extent(&map))
323 0 : continue;
324 :
325 : /*
326 : * If we find a delalloc reservation then something is very
327 : * very wrong. Bail out.
328 : */
329 81958 : if (map.br_startblock == DELAYSTARTBLOCK)
330 : return -EFSCORRUPTED;
331 :
332 : /*
333 : * Make sure this block has a real zeroed extent allocated to
334 : * it.
335 : */
336 81958 : nmaps = 1;
337 81958 : error = xfs_bmapi_write(sc->tp, sc->tempip, off, end - off,
338 : XFS_BMAPI_CONVERT | XFS_BMAPI_ZERO, 0, &map,
339 : &nmaps);
340 81958 : if (error)
341 0 : return error;
342 :
343 81958 : trace_xrep_tempfile_prealloc(sc, XFS_DATA_FORK, &map);
344 :
345 : /* Commit new extent and all deferred work. */
346 81958 : error = xfs_defer_finish(&sc->tp);
347 81958 : if (error)
348 0 : return error;
349 : }
350 :
351 : return 0;
352 : }
353 :
354 : /*
355 : * Write data to each block of a file. The given range of the tempfile's data
356 : * fork must already be populated with written extents.
357 : */
358 : int
359 81958 : xrep_tempfile_copyin(
360 : struct xfs_scrub *sc,
361 : xfs_fileoff_t off,
362 : xfs_filblks_t len,
363 : xrep_tempfile_copyin_fn prep_fn,
364 : void *data)
365 : {
366 81958 : LIST_HEAD(buffers_list);
367 81958 : struct xfs_mount *mp = sc->mp;
368 81958 : struct xfs_buf *bp;
369 81958 : xfs_fileoff_t flush_mask;
370 81958 : xfs_fileoff_t end = off + len;
371 81958 : loff_t pos = XFS_FSB_TO_B(mp, off);
372 81958 : int error = 0;
373 :
374 81958 : ASSERT(S_ISREG(VFS_I(sc->tempip)->i_mode));
375 :
376 : /* Flush buffers to disk every 512K */
377 81958 : flush_mask = XFS_B_TO_FSBT(mp, (1U << 19)) - 1;
378 :
379 2657346 : for (; off < end; off++, pos += mp->m_sb.sb_blocksize) {
380 2575388 : struct xfs_bmbt_irec map;
381 2575388 : int nmaps = 1;
382 :
383 : /* Read block mapping for this file block. */
384 2575388 : error = xfs_bmapi_read(sc->tempip, off, 1, &map, &nmaps, 0);
385 2575388 : if (error)
386 0 : goto out_err;
387 2575388 : if (nmaps == 0 || !xfs_bmap_is_written_extent(&map)) {
388 0 : error = -EFSCORRUPTED;
389 0 : goto out_err;
390 : }
391 :
392 : /* Get the metadata buffer for this offset in the file. */
393 7726164 : error = xfs_trans_get_buf(sc->tp, mp->m_ddev_targp,
394 2575388 : XFS_FSB_TO_DADDR(mp, map.br_startblock),
395 : mp->m_bsize, 0, &bp);
396 2575388 : if (error)
397 0 : goto out_err;
398 :
399 2575388 : trace_xrep_tempfile_copyin(sc, XFS_DATA_FORK, &map);
400 :
401 : /* Read in a block's worth of data from the xfile. */
402 2575388 : error = prep_fn(sc, bp, data);
403 2575388 : if (error) {
404 0 : xfs_trans_brelse(sc->tp, bp);
405 0 : goto out_err;
406 : }
407 :
408 : /* Queue buffer, and flush if we have too much dirty data. */
409 2575388 : xfs_buf_delwri_queue_here(bp, &buffers_list);
410 2575388 : xfs_trans_brelse(sc->tp, bp);
411 :
412 2575388 : if (!(off & flush_mask)) {
413 60025 : error = xfs_buf_delwri_submit(&buffers_list);
414 60025 : if (error)
415 0 : goto out_err;
416 : }
417 : }
418 :
419 : /*
420 : * Write the new blocks to disk. If the ordered list isn't empty after
421 : * that, then something went wrong and we have to fail. This should
422 : * never happen, but we'll check anyway.
423 : */
424 81958 : error = xfs_buf_delwri_submit(&buffers_list);
425 81958 : if (error)
426 0 : goto out_err;
427 :
428 81958 : if (!list_empty(&buffers_list)) {
429 0 : ASSERT(list_empty(&buffers_list));
430 0 : error = -EIO;
431 0 : goto out_err;
432 : }
433 :
434 : return 0;
435 :
436 0 : out_err:
437 0 : xfs_buf_delwri_cancel(&buffers_list);
438 0 : return error;
439 : }
440 :
441 : /*
442 : * Set the temporary file's size. Caller must join the tempfile to the scrub
443 : * transaction and is responsible for adjusting block mappings as needed.
444 : */
445 : int
446 81958 : xrep_tempfile_set_isize(
447 : struct xfs_scrub *sc,
448 : unsigned long long isize)
449 : {
450 81958 : if (sc->tempip->i_disk_size == isize)
451 : return 0;
452 :
453 81958 : sc->tempip->i_disk_size = isize;
454 81958 : i_size_write(VFS_I(sc->tempip), isize);
455 81958 : return xrep_tempfile_roll_trans(sc);
456 : }
457 :
458 : /*
459 : * Roll a repair transaction involving the temporary file. Caller must join
460 : * both the temporary file and the file being scrubbed to the transaction.
461 : * This function return with both inodes joined to a new scrub transaction,
462 : * or the usual negative errno.
463 : */
464 : int
465 4964013 : xrep_tempfile_roll_trans(
466 : struct xfs_scrub *sc)
467 : {
468 4964013 : int error;
469 :
470 4964013 : xfs_trans_log_inode(sc->tp, sc->tempip, XFS_ILOG_CORE);
471 4964184 : error = xrep_roll_trans(sc);
472 4964353 : if (error)
473 : return error;
474 :
475 4964233 : xfs_trans_ijoin(sc->tp, sc->tempip, 0);
476 4964233 : return 0;
477 : }
478 :
479 : /* Enable atomic extent swapping. */
480 : int
481 4954558 : xrep_tempswap_grab_log_assist(
482 : struct xfs_scrub *sc)
483 : {
484 4954558 : bool need_rele = false;
485 4954558 : int error;
486 :
487 4954558 : if (sc->flags & XREP_FSGATES_ATOMIC_XCHG)
488 : return 0;
489 :
490 4954014 : error = xfs_xchg_range_grab_log_assist(sc->mp, true, &need_rele);
491 4951558 : if (error)
492 : return error;
493 4951473 : if (!need_rele) {
494 0 : ASSERT(need_rele);
495 0 : return -EOPNOTSUPP;
496 : }
497 :
498 4951473 : trace_xchk_fsgates_enable(sc, XREP_FSGATES_ATOMIC_XCHG);
499 :
500 4948732 : sc->flags |= XREP_FSGATES_ATOMIC_XCHG;
501 4948732 : return 0;
502 : }
503 :
504 : /*
505 : * Fill out the swapext request in preparation for swapping the contents of a
506 : * metadata file that we've rebuilt in the temp file.
507 : */
508 : STATIC int
509 4947873 : xrep_tempswap_prep_request(
510 : struct xfs_scrub *sc,
511 : int whichfork,
512 : xfs_fileoff_t off,
513 : xfs_filblks_t len,
514 : struct xrep_tempswap *tx)
515 : {
516 4947873 : struct xfs_swapext_req *req = &tx->req;
517 :
518 4947873 : memset(tx, 0, sizeof(struct xrep_tempswap));
519 :
520 : /* COW forks don't exist on disk. */
521 4947873 : if (whichfork == XFS_COW_FORK) {
522 0 : ASSERT(0);
523 0 : return -EINVAL;
524 : }
525 :
526 : /* Both files should have the relevant forks. */
527 9895872 : if (!xfs_ifork_ptr(sc->ip, whichfork) ||
528 4948166 : !xfs_ifork_ptr(sc->tempip, whichfork)) {
529 0 : ASSERT(xfs_ifork_ptr(sc->ip, whichfork) != NULL);
530 0 : ASSERT(xfs_ifork_ptr(sc->tempip, whichfork) != NULL);
531 0 : return -EINVAL;
532 : }
533 :
534 : /* Swap all mappings in both forks. */
535 4947999 : req->ip1 = sc->tempip;
536 4947999 : req->ip2 = sc->ip;
537 4947999 : req->startoff1 = off;
538 4947999 : req->startoff2 = off;
539 4947999 : req->whichfork = whichfork;
540 4947999 : req->blockcount = len;
541 4947999 : req->req_flags = XFS_SWAP_REQ_LOGGED;
542 :
543 : /* Always swap sizes when we're swapping data fork mappings. */
544 4947999 : if (whichfork == XFS_DATA_FORK)
545 391612 : req->req_flags |= XFS_SWAP_REQ_SET_SIZES;
546 :
547 : /*
548 : * If we're repairing symlinks, xattrs, or directories, always try to
549 : * convert ip2 to short format after swapping.
550 : */
551 4947999 : if (whichfork == XFS_ATTR_FORK || S_ISDIR(VFS_I(sc->ip)->i_mode) ||
552 : S_ISLNK(VFS_I(sc->ip)->i_mode))
553 4866041 : req->req_flags |= XFS_SWAP_REQ_CVT_INO2_SF;
554 :
555 : return 0;
556 : }
557 :
558 : /*
559 : * Fill out the swapext resource estimation structures in preparation for
560 : * swapping the contents of a metadata file that we've rebuilt in the temp
561 : * file. Caller must hold IOLOCK_EXCL but not ILOCK_EXCL on both files.
562 : */
563 : STATIC int
564 4869643 : xrep_tempswap_estimate(
565 : struct xfs_scrub *sc,
566 : struct xrep_tempswap *tx)
567 : {
568 4869643 : struct xfs_swapext_req *req = &tx->req;
569 4869643 : struct xfs_ifork *ifp;
570 4869643 : struct xfs_ifork *tifp;
571 4869643 : int state = 0;
572 :
573 : /*
574 : * Deal with either fork being in local format. The swapext code only
575 : * knows how to exchange block mappings for regular files, so we only
576 : * have to know about local format for xattrs and directories.
577 : */
578 4869643 : ifp = xfs_ifork_ptr(sc->ip, req->whichfork);
579 4862966 : if (ifp->if_format == XFS_DINODE_FMT_LOCAL)
580 4756395 : state |= 1;
581 :
582 4862966 : tifp = xfs_ifork_ptr(sc->tempip, req->whichfork);
583 4863670 : if (tifp->if_format == XFS_DINODE_FMT_LOCAL)
584 4773588 : state |= 2;
585 :
586 4863670 : switch (state) {
587 93571 : case 0:
588 : /* Both files have mapped extents; use the regular estimate. */
589 93571 : return xfs_xchg_range_estimate(req);
590 51 : case 1:
591 : /*
592 : * The file being repaired is in local format, but the temp
593 : * file has mapped extents. To perform the swap, the file
594 : * being repaired must have its shorform data converted to a
595 : * fsblock, and the fork changed to extents format. We need
596 : * one resblk for the conversion; the number of exchanges is
597 : * (worst case) the temporary file's extent count plus the
598 : * block we converted.
599 : */
600 51 : req->ip1_bcount = sc->tempip->i_nblocks;
601 51 : req->ip2_bcount = 1;
602 51 : req->nr_exchanges = 1 + tifp->if_nextents;
603 51 : req->resblks = 1;
604 51 : break;
605 15246 : case 2:
606 : /*
607 : * The temporary file is in local format, but the file being
608 : * repaired has mapped extents. To perform the swap, the temp
609 : * file must have its shortform data converted to an fsblock,
610 : * and the fork changed to extents format. We need one resblk
611 : * for the conversion; the number of exchanges is (worst case)
612 : * the extent count of the file being repaired plus the block
613 : * we converted.
614 : */
615 15246 : req->ip1_bcount = 1;
616 15246 : req->ip2_bcount = sc->ip->i_nblocks;
617 15246 : req->nr_exchanges = 1 + ifp->if_nextents;
618 15246 : req->resblks = 1;
619 15246 : break;
620 4754802 : case 3:
621 : /*
622 : * Both forks are in local format. To perform the swap, both
623 : * files must have their shortform data converted to fsblocks,
624 : * and both forks must be converted to extents format. We
625 : * need two resblks for the two conversions, and the number of
626 : * exchanges is 1 since there's only one block at fileoff 0.
627 : * Presumably, the caller could not exchange the two inode fork
628 : * areas directly.
629 : */
630 4754802 : req->ip1_bcount = 1;
631 4754802 : req->ip2_bcount = 1;
632 4754802 : req->nr_exchanges = 1;
633 4754802 : req->resblks = 2;
634 4754802 : break;
635 : }
636 :
637 4770099 : return xfs_swapext_estimate_overhead(req);
638 : }
639 :
640 : /*
641 : * Obtain a quota reservation to make sure we don't hit EDQUOT. We can skip
642 : * this if quota enforcement is disabled or if both inodes' dquots are the
643 : * same. The qretry structure must be initialized to zeroes before the first
644 : * call to this function.
645 : */
646 : STATIC int
647 4951151 : xrep_tempswap_reserve_quota(
648 : struct xfs_scrub *sc,
649 : const struct xrep_tempswap *tx)
650 : {
651 4951151 : struct xfs_trans *tp = sc->tp;
652 4951151 : const struct xfs_swapext_req *req = &tx->req;
653 4951151 : int64_t ddelta, rdelta;
654 4951151 : int error;
655 :
656 : /*
657 : * Don't bother with a quota reservation if we're not enforcing them
658 : * or the two inodes have the same dquots.
659 : */
660 4951151 : if (!XFS_IS_QUOTA_ON(tp->t_mountp) || req->ip1 == req->ip2 ||
661 4941944 : (req->ip1->i_udquot == req->ip2->i_udquot &&
662 4225675 : req->ip1->i_gdquot == req->ip2->i_gdquot &&
663 4219242 : req->ip1->i_pdquot == req->ip2->i_pdquot))
664 : return 0;
665 :
666 : /*
667 : * Quota reservation for each file comes from two sources. First, we
668 : * need to account for any net gain in mapped blocks during the swap.
669 : * Second, we need reservation for the gross gain in mapped blocks so
670 : * that we don't trip over any quota block reservation assertions. We
671 : * must reserve the gross gain because the quota code subtracts from
672 : * bcount the number of blocks that we unmap; it does not add that
673 : * quantity back to the quota block reservation.
674 : */
675 740275 : ddelta = max_t(int64_t, 0, req->ip2_bcount - req->ip1_bcount);
676 740275 : rdelta = max_t(int64_t, 0, req->ip2_rtbcount - req->ip1_rtbcount);
677 740275 : error = xfs_trans_reserve_quota_nblks(tp, req->ip1,
678 740275 : ddelta + req->ip1_bcount, rdelta + req->ip1_rtbcount,
679 : true);
680 736384 : if (error)
681 : return error;
682 :
683 736384 : ddelta = max_t(int64_t, 0, req->ip1_bcount - req->ip2_bcount);
684 736384 : rdelta = max_t(int64_t, 0, req->ip1_rtbcount - req->ip2_rtbcount);
685 736384 : return xfs_trans_reserve_quota_nblks(tp, req->ip2,
686 736384 : ddelta + req->ip2_bcount, rdelta + req->ip2_rtbcount,
687 : true);
688 : }
689 :
690 : /*
691 : * Prepare an existing transaction for a swap. The caller must hold
692 : * the ILOCK of both the inode being repaired and the temporary file.
693 : * Only use this when those ILOCKs cannot be dropped.
694 : *
695 : * Fill out the swapext request and resource estimation structures in
696 : * preparation for swapping the contents of a metadata file that we've rebuilt
697 : * in the temp file, then reserve space and quota to the transaction.
698 : */
699 : int
700 81958 : xrep_tempswap_trans_reserve(
701 : struct xfs_scrub *sc,
702 : int whichfork,
703 : xfs_fileoff_t off,
704 : xfs_filblks_t len,
705 : struct xrep_tempswap *tx)
706 : {
707 81958 : int error;
708 :
709 81958 : ASSERT(sc->tp != NULL);
710 81958 : ASSERT(xfs_isilocked(sc->ip, XFS_ILOCK_EXCL));
711 81958 : ASSERT(xfs_isilocked(sc->tempip, XFS_ILOCK_EXCL));
712 :
713 81958 : error = xrep_tempswap_prep_request(sc, whichfork, off, len, tx);
714 81958 : if (error)
715 : return error;
716 :
717 81958 : error = xfs_swapext_estimate(&tx->req);
718 81958 : if (error)
719 : return error;
720 :
721 81958 : error = xfs_trans_reserve_more(sc->tp, tx->req.resblks, 0);
722 81958 : if (error)
723 : return error;
724 :
725 81958 : return xrep_tempswap_reserve_quota(sc, tx);
726 : }
727 :
728 : /*
729 : * Allocate a transaction, ILOCK the temporary file and the file being
730 : * repaired, and join them to the transaction in preparation to swap fork
731 : * contents as part of a repair operation.
732 : */
733 : int
734 4869639 : xrep_tempswap_trans_alloc(
735 : struct xfs_scrub *sc,
736 : int whichfork,
737 : struct xrep_tempswap *tx)
738 : {
739 4869639 : unsigned int flags = 0;
740 4869639 : int error;
741 :
742 4869639 : ASSERT(sc->tp == NULL);
743 :
744 4869639 : error = xrep_tempswap_prep_request(sc, whichfork, 0, XFS_MAX_FILEOFF,
745 : tx);
746 4866510 : if (error)
747 : return error;
748 :
749 4866689 : error = xrep_tempswap_estimate(sc, tx);
750 4865527 : if (error)
751 : return error;
752 :
753 4865527 : if (xfs_has_lazysbcount(sc->mp))
754 4865643 : flags |= XFS_TRANS_RES_FDBLKS;
755 :
756 4865527 : error = xrep_tempswap_grab_log_assist(sc);
757 4861721 : if (error)
758 : return error;
759 :
760 4862479 : error = xfs_trans_alloc(sc->mp, &M_RES(sc->mp)->tr_itruncate,
761 4862479 : tx->req.resblks, 0, flags, &sc->tp);
762 4872750 : if (error)
763 : return error;
764 :
765 4872826 : sc->temp_ilock_flags |= XFS_ILOCK_EXCL;
766 4872826 : sc->ilock_flags |= XFS_ILOCK_EXCL;
767 4872826 : xfs_xchg_range_ilock(sc->tp, sc->ip, sc->tempip);
768 :
769 4871855 : return xrep_tempswap_reserve_quota(sc, tx);
770 : }
771 :
772 : /* Swap forks between the file being repaired and the temporary file. */
773 : int
774 190823 : xrep_tempswap_contents(
775 : struct xfs_scrub *sc,
776 : struct xrep_tempswap *tx)
777 : {
778 190823 : int error;
779 :
780 190823 : ASSERT(sc->flags & XREP_FSGATES_ATOMIC_XCHG);
781 :
782 190823 : xfs_swapext(sc->tp, &tx->req);
783 190826 : error = xfs_defer_finish(&sc->tp);
784 190827 : if (error)
785 : return error;
786 :
787 : /*
788 : * If we swapped the ondisk sizes of two metadata files, we must swap
789 : * the incore sizes as well. Since online fsck doesn't use swapext on
790 : * the data forks of user-accessible files, the two sizes are always
791 : * the same, so we don't need to log the inodes.
792 : */
793 190827 : if (tx->req.req_flags & XFS_SWAP_REQ_SET_SIZES) {
794 111158 : loff_t temp;
795 :
796 111158 : temp = i_size_read(VFS_I(sc->ip));
797 111158 : i_size_write(VFS_I(sc->ip), i_size_read(VFS_I(sc->tempip)));
798 111158 : i_size_write(VFS_I(sc->tempip), temp);
799 : }
800 :
801 : return 0;
802 : }
803 :
804 : /*
805 : * Write local format data from one of the temporary file's forks into the same
806 : * fork of file being repaired, and swap the file sizes, if appropriate.
807 : * Caller must ensure that the file being repaired has enough fork space to
808 : * hold all the bytes.
809 : */
810 : void
811 4755927 : xrep_tempfile_copyout_local(
812 : struct xfs_scrub *sc,
813 : int whichfork)
814 : {
815 4755927 : struct xfs_ifork *temp_ifp;
816 4755927 : struct xfs_ifork *ifp;
817 4755927 : unsigned int ilog_flags = XFS_ILOG_CORE;
818 :
819 4755927 : temp_ifp = xfs_ifork_ptr(sc->tempip, whichfork);
820 4755090 : ifp = xfs_ifork_ptr(sc->ip, whichfork);
821 :
822 4754454 : ASSERT(temp_ifp != NULL);
823 4754454 : ASSERT(ifp != NULL);
824 4754454 : ASSERT(temp_ifp->if_format == XFS_DINODE_FMT_LOCAL);
825 4754454 : ASSERT(ifp->if_format == XFS_DINODE_FMT_LOCAL);
826 :
827 4754454 : switch (whichfork) {
828 280475 : case XFS_DATA_FORK:
829 280475 : ASSERT(sc->tempip->i_disk_size <= xfs_inode_data_fork_size(sc->ip));
830 : break;
831 4473979 : case XFS_ATTR_FORK:
832 4473979 : ASSERT(sc->tempip->i_forkoff >= sc->ip->i_forkoff);
833 : break;
834 0 : default:
835 0 : ASSERT(0);
836 0 : return;
837 : }
838 :
839 4754454 : xfs_idestroy_fork(ifp);
840 4761442 : xfs_init_local_fork(sc->ip, whichfork, temp_ifp->if_u1.if_data,
841 : temp_ifp->if_bytes);
842 :
843 4755955 : if (whichfork == XFS_DATA_FORK) {
844 280471 : i_size_write(VFS_I(sc->ip), i_size_read(VFS_I(sc->tempip)));
845 280471 : sc->ip->i_disk_size = sc->tempip->i_disk_size;
846 : }
847 :
848 4755955 : ilog_flags |= xfs_ilog_fdata(whichfork);
849 4755955 : xfs_trans_log_inode(sc->tp, sc->ip, ilog_flags);
850 : }
851 :
852 : /* Decide if a given XFS inode is a temporary file for a repair. */
853 : bool
854 >10639*10^7 : xrep_is_tempfile(
855 : const struct xfs_inode *ip)
856 : {
857 >10639*10^7 : const struct inode *inode = &ip->i_vnode;
858 >10639*10^7 : struct xfs_mount *mp = ip->i_mount;
859 :
860 : /*
861 : * Files in the metadata directory tree also have S_PRIVATE set and
862 : * IOP_XATTR unset, so we must distinguish them separately.
863 : */
864 >10639*10^7 : if (xfs_has_metadir(mp) && (ip->i_diflags2 & XFS_DIFLAG2_METADIR))
865 : return false;
866 :
867 >10632*10^7 : if (IS_PRIVATE(inode) && !(inode->i_opflags & IOP_XATTR))
868 19125899 : return true;
869 :
870 : return false;
871 : }
|