Line data Source code
1 : // SPDX-License-Identifier: GPL-2.0-or-later
2 : /*
3 : * Copyright (C) 2018-2023 Oracle. All Rights Reserved.
4 : * Author: Darrick J. Wong <djwong@kernel.org>
5 : */
6 : #include "xfs.h"
7 : #include "xfs_fs.h"
8 : #include "xfs_shared.h"
9 : #include "xfs_format.h"
10 : #include "xfs_trans_resv.h"
11 : #include "xfs_mount.h"
12 : #include "xfs_btree.h"
13 : #include "xfs_log_format.h"
14 : #include "xfs_trans.h"
15 : #include "xfs_sb.h"
16 : #include "xfs_inode.h"
17 : #include "xfs_alloc.h"
18 : #include "xfs_alloc_btree.h"
19 : #include "xfs_ialloc.h"
20 : #include "xfs_ialloc_btree.h"
21 : #include "xfs_rmap.h"
22 : #include "xfs_rmap_btree.h"
23 : #include "xfs_refcount_btree.h"
24 : #include "xfs_extent_busy.h"
25 : #include "xfs_ag.h"
26 : #include "xfs_ag_resv.h"
27 : #include "xfs_quota.h"
28 : #include "xfs_qm.h"
29 : #include "xfs_defer.h"
30 : #include "xfs_errortag.h"
31 : #include "xfs_error.h"
32 : #include "xfs_reflink.h"
33 : #include "xfs_health.h"
34 : #include "xfs_buf_xfile.h"
35 : #include "xfs_da_format.h"
36 : #include "xfs_da_btree.h"
37 : #include "xfs_attr.h"
38 : #include "xfs_dir2.h"
39 : #include "scrub/scrub.h"
40 : #include "scrub/common.h"
41 : #include "scrub/trace.h"
42 : #include "scrub/repair.h"
43 : #include "scrub/bitmap.h"
44 : #include "scrub/stats.h"
45 : #include "scrub/xfile.h"
46 : #include "scrub/attr_repair.h"
47 :
48 : /*
49 : * Attempt to repair some metadata, if the metadata is corrupt and userspace
50 : * told us to fix it. This function returns -EAGAIN to mean "re-run scrub",
51 : * and will set *fixed to true if it thinks it repaired anything.
52 : */
53 : int
54 48917533 : xrep_attempt(
55 : struct xfs_scrub *sc,
56 : struct xchk_stats_run *run)
57 : {
58 48917533 : u64 repair_start;
59 48917533 : int error = 0;
60 :
61 48917533 : trace_xrep_attempt(XFS_I(file_inode(sc->file)), sc->sm, error);
62 :
63 48907738 : xchk_ag_btcur_free(&sc->sa);
64 :
65 : /* Repair whatever's broken. */
66 48907486 : ASSERT(sc->ops->repair);
67 48907486 : run->repair_attempted = true;
68 48907486 : repair_start = xchk_stats_now();
69 48909357 : error = sc->ops->repair(sc);
70 48918368 : trace_xrep_done(XFS_I(file_inode(sc->file)), sc->sm, error);
71 48915813 : run->repair_ns += xchk_stats_elapsed_ns(repair_start);
72 48918897 : switch (error) {
73 21547577 : case 0:
74 : /*
75 : * Repair succeeded. Commit the fixes and perform a second
76 : * scrub so that we can tell userspace if we fixed the problem.
77 : */
78 21547577 : sc->sm->sm_flags &= ~XFS_SCRUB_FLAGS_OUT;
79 21547577 : sc->flags |= XREP_ALREADY_FIXED;
80 21547577 : run->repair_succeeded = true;
81 21547577 : return -EAGAIN;
82 124 : case -ECHRNG:
83 124 : sc->flags |= XCHK_NEED_DRAIN;
84 124 : run->retries++;
85 124 : return -EAGAIN;
86 0 : case -EDEADLOCK:
87 : /* Tell the caller to try again having grabbed all the locks. */
88 0 : if (!(sc->flags & XCHK_TRY_HARDER)) {
89 0 : sc->flags |= XCHK_TRY_HARDER;
90 0 : run->retries++;
91 0 : return -EAGAIN;
92 : }
93 : /*
94 : * We tried harder but still couldn't grab all the resources
95 : * we needed to fix it. The corruption has not been fixed,
96 : * so exit to userspace with the scan's output flags unchanged.
97 : */
98 : return 0;
99 27371196 : default:
100 : /*
101 : * EAGAIN tells the caller to re-scrub, so we cannot return
102 : * that here.
103 : */
104 27371196 : ASSERT(error != -EAGAIN);
105 : return error;
106 : }
107 : }
108 :
109 : /*
110 : * Complain about unfixable problems in the filesystem. We don't log
111 : * corruptions when IFLAG_REPAIR wasn't set on the assumption that the driver
112 : * program is xfs_scrub, which will call back with IFLAG_REPAIR set if the
113 : * administrator isn't running xfs_scrub in no-repairs mode.
114 : *
115 : * Use this helper function because _ratelimited silently declares a static
116 : * structure to track rate limiting information.
117 : */
118 : void
119 0 : xrep_failure(
120 : struct xfs_mount *mp)
121 : {
122 0 : xfs_alert_ratelimited(mp,
123 : "Corruption not fixed during online repair. Unmount and run xfs_repair.");
124 0 : }
125 :
126 : /*
127 : * Repair probe -- userspace uses this to probe if we're willing to repair a
128 : * given mountpoint.
129 : */
130 : int
131 4862 : xrep_probe(
132 : struct xfs_scrub *sc)
133 : {
134 4862 : int error = 0;
135 :
136 4862 : if (xchk_should_terminate(sc, &error))
137 0 : return error;
138 :
139 : return 0;
140 : }
141 :
142 : /*
143 : * Roll a transaction, keeping the AG headers locked and reinitializing
144 : * the btree cursors.
145 : */
146 : int
147 258654 : xrep_roll_ag_trans(
148 : struct xfs_scrub *sc)
149 : {
150 258654 : int error;
151 :
152 : /*
153 : * Keep the AG header buffers locked while we roll the transaction.
154 : * Ensure that both AG buffers are dirty and held when we roll the
155 : * transaction so that they move forward in the log without losing the
156 : * bli (and hence the bli type) when the transaction commits.
157 : *
158 : * Normal code would never hold clean buffers across a roll, but repair
159 : * needs both buffers to maintain a total lock on the AG.
160 : */
161 258654 : if (sc->sa.agi_bp) {
162 258654 : xfs_ialloc_log_agi(sc->tp, sc->sa.agi_bp, XFS_AGI_MAGICNUM);
163 258786 : xfs_trans_bhold(sc->tp, sc->sa.agi_bp);
164 : }
165 :
166 258576 : if (sc->sa.agf_bp) {
167 258576 : xfs_alloc_log_agf(sc->tp, sc->sa.agf_bp, XFS_AGF_MAGICNUM);
168 258823 : xfs_trans_bhold(sc->tp, sc->sa.agf_bp);
169 : }
170 :
171 : /*
172 : * Roll the transaction. We still hold the AG header buffers locked
173 : * regardless of whether or not that succeeds. On failure, the buffers
174 : * will be released during teardown on our way out of the kernel. If
175 : * successful, join the buffers to the new transaction and move on.
176 : */
177 258775 : error = xfs_trans_roll(&sc->tp);
178 258506 : if (error)
179 : return error;
180 :
181 : /* Join the AG headers to the new transaction. */
182 258506 : if (sc->sa.agi_bp)
183 258506 : xfs_trans_bjoin(sc->tp, sc->sa.agi_bp);
184 258421 : if (sc->sa.agf_bp)
185 258421 : xfs_trans_bjoin(sc->tp, sc->sa.agf_bp);
186 :
187 : return 0;
188 : }
189 :
190 : /* Roll the scrub transaction, holding the primary metadata locked. */
191 : int
192 7151044 : xrep_roll_trans(
193 : struct xfs_scrub *sc)
194 : {
195 7151044 : if (!sc->ip)
196 3101 : return xrep_roll_ag_trans(sc);
197 7147943 : return xfs_trans_roll_inode(&sc->tp, sc->ip);
198 : }
199 :
200 : /* Finish all deferred work attached to the repair transaction. */
201 : int
202 436073 : xrep_defer_finish(
203 : struct xfs_scrub *sc)
204 : {
205 436073 : int error;
206 :
207 : /*
208 : * Keep the AG header buffers locked while we complete deferred work
209 : * items. Ensure that both AG buffers are dirty and held when we roll
210 : * the transaction so that they move forward in the log without losing
211 : * the bli (and hence the bli type) when the transaction commits.
212 : *
213 : * Normal code would never hold clean buffers across a roll, but repair
214 : * needs both buffers to maintain a total lock on the AG.
215 : */
216 436073 : if (sc->sa.agi_bp) {
217 367535 : xfs_ialloc_log_agi(sc->tp, sc->sa.agi_bp, XFS_AGI_MAGICNUM);
218 367850 : xfs_trans_bhold(sc->tp, sc->sa.agi_bp);
219 : }
220 :
221 435952 : if (sc->sa.agf_bp) {
222 388249 : xfs_alloc_log_agf(sc->tp, sc->sa.agf_bp, XFS_AGF_MAGICNUM);
223 389036 : xfs_trans_bhold(sc->tp, sc->sa.agf_bp);
224 : }
225 :
226 : /*
227 : * Finish all deferred work items. We still hold the AG header buffers
228 : * locked regardless of whether or not that succeeds. On failure, the
229 : * buffers will be released during teardown on our way out of the
230 : * kernel. If successful, join the buffers to the new transaction
231 : * and move on.
232 : */
233 436641 : error = xfs_defer_finish(&sc->tp);
234 436288 : if (error)
235 : return error;
236 :
237 : /*
238 : * Release the hold that we set above because defer_finish won't do
239 : * that for us. The defer roll code redirties held buffers after each
240 : * roll, so the AG header buffers should be ready for logging.
241 : */
242 436288 : if (sc->sa.agi_bp)
243 367750 : xfs_trans_bhold_release(sc->tp, sc->sa.agi_bp);
244 435643 : if (sc->sa.agf_bp)
245 387940 : xfs_trans_bhold_release(sc->tp, sc->sa.agf_bp);
246 :
247 : return 0;
248 : }
249 :
250 : /*
251 : * Does the given AG have enough space to rebuild a btree? Neither AG
252 : * reservation can be critical, and we must have enough space (factoring
253 : * in AG reservations) to construct a whole btree.
254 : */
255 : bool
256 0 : xrep_ag_has_space(
257 : struct xfs_perag *pag,
258 : xfs_extlen_t nr_blocks,
259 : enum xfs_ag_resv_type type)
260 : {
261 0 : return !xfs_ag_resv_critical(pag, XFS_AG_RESV_RMAPBT) &&
262 0 : !xfs_ag_resv_critical(pag, XFS_AG_RESV_METADATA) &&
263 0 : pag->pagf_freeblks > xfs_ag_resv_needed(pag, type) + nr_blocks;
264 : }
265 :
266 : /*
267 : * Figure out how many blocks to reserve for an AG repair. We calculate the
268 : * worst case estimate for the number of blocks we'd need to rebuild one of
269 : * any type of per-AG btree.
270 : */
271 : xfs_extlen_t
272 4840780 : xrep_calc_ag_resblks(
273 : struct xfs_scrub *sc)
274 : {
275 4840780 : struct xfs_mount *mp = sc->mp;
276 4840780 : struct xfs_scrub_metadata *sm = sc->sm;
277 4840780 : struct xfs_perag *pag;
278 4840780 : struct xfs_buf *bp;
279 4840780 : xfs_agino_t icount = NULLAGINO;
280 4840780 : xfs_extlen_t aglen = NULLAGBLOCK;
281 4840780 : xfs_extlen_t usedlen;
282 4840780 : xfs_extlen_t freelen;
283 4840780 : xfs_extlen_t bnobt_sz;
284 4840780 : xfs_extlen_t inobt_sz;
285 4840780 : xfs_extlen_t rmapbt_sz;
286 4840780 : xfs_extlen_t refcbt_sz;
287 4840780 : int error;
288 :
289 4840780 : if (!(sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR))
290 : return 0;
291 :
292 1630130 : pag = xfs_perag_get(mp, sm->sm_agno);
293 3262114 : if (xfs_perag_initialised_agi(pag)) {
294 : /* Use in-core icount if possible. */
295 1631057 : icount = pag->pagi_count;
296 : } else {
297 : /* Try to get the actual counters from disk. */
298 0 : error = xfs_ialloc_read_agi(pag, NULL, &bp);
299 0 : if (!error) {
300 0 : icount = pag->pagi_count;
301 0 : xfs_buf_relse(bp);
302 : }
303 : }
304 :
305 : /* Now grab the block counters from the AGF. */
306 1631057 : error = xfs_alloc_read_agf(pag, NULL, 0, &bp);
307 1630682 : if (error) {
308 0 : aglen = pag->block_count;
309 0 : freelen = aglen;
310 0 : usedlen = aglen;
311 : } else {
312 1630682 : struct xfs_agf *agf = bp->b_addr;
313 :
314 1630682 : aglen = be32_to_cpu(agf->agf_length);
315 1630682 : freelen = be32_to_cpu(agf->agf_freeblks);
316 1630682 : usedlen = aglen - freelen;
317 1630682 : xfs_buf_relse(bp);
318 : }
319 :
320 : /* If the icount is impossible, make some worst-case assumptions. */
321 1630998 : if (icount == NULLAGINO ||
322 : !xfs_verify_agino(pag, icount)) {
323 290934 : icount = pag->agino_max - pag->agino_min + 1;
324 : }
325 :
326 : /* If the block counts are impossible, make worst-case assumptions. */
327 1630998 : if (aglen == NULLAGBLOCK ||
328 1630966 : aglen != pag->block_count ||
329 : freelen >= aglen) {
330 77 : aglen = pag->block_count;
331 77 : freelen = aglen;
332 77 : usedlen = aglen;
333 : }
334 1630998 : xfs_perag_put(pag);
335 :
336 1631104 : trace_xrep_calc_ag_resblks(mp, sm->sm_agno, icount, aglen,
337 : freelen, usedlen);
338 :
339 : /*
340 : * Figure out how many blocks we'd need worst case to rebuild
341 : * each type of btree. Note that we can only rebuild the
342 : * bnobt/cntbt or inobt/finobt as pairs.
343 : */
344 1630921 : bnobt_sz = 2 * xfs_allocbt_calc_size(mp, freelen);
345 1630544 : if (xfs_has_sparseinodes(mp))
346 1630544 : inobt_sz = xfs_iallocbt_calc_size(mp, icount /
347 : XFS_INODES_PER_HOLEMASK_BIT);
348 : else
349 0 : inobt_sz = xfs_iallocbt_calc_size(mp, icount /
350 : XFS_INODES_PER_CHUNK);
351 1630379 : if (xfs_has_finobt(mp))
352 1630395 : inobt_sz *= 2;
353 1630379 : if (xfs_has_reflink(mp))
354 1332213 : refcbt_sz = xfs_refcountbt_calc_size(mp, usedlen);
355 : else
356 : refcbt_sz = 0;
357 1630084 : if (xfs_has_rmapbt(mp)) {
358 : /*
359 : * Guess how many blocks we need to rebuild the rmapbt.
360 : * For non-reflink filesystems we can't have more records than
361 : * used blocks. However, with reflink it's possible to have
362 : * more than one rmap record per AG block. We don't know how
363 : * many rmaps there could be in the AG, so we start off with
364 : * what we hope is an generous over-estimation.
365 : */
366 1332134 : if (xfs_has_reflink(mp))
367 1331923 : rmapbt_sz = xfs_rmapbt_calc_size(mp,
368 1331923 : (unsigned long long)aglen * 2);
369 : else
370 211 : rmapbt_sz = xfs_rmapbt_calc_size(mp, usedlen);
371 : } else {
372 : rmapbt_sz = 0;
373 : }
374 :
375 1630292 : trace_xrep_calc_ag_resblks_btsize(mp, sm->sm_agno, bnobt_sz,
376 : inobt_sz, rmapbt_sz, refcbt_sz);
377 :
378 1630314 : return max(max(bnobt_sz, inobt_sz), max(rmapbt_sz, refcbt_sz));
379 : }
380 :
381 : /*
382 : * Reconstructing per-AG Btrees
383 : *
384 : * When a space btree is corrupt, we don't bother trying to fix it. Instead,
385 : * we scan secondary space metadata to derive the records that should be in
386 : * the damaged btree, initialize a fresh btree root, and insert the records.
387 : * Note that for rebuilding the rmapbt we scan all the primary data to
388 : * generate the new records.
389 : *
390 : * However, that leaves the matter of removing all the metadata describing the
391 : * old broken structure. For primary metadata we use the rmap data to collect
392 : * every extent with a matching rmap owner (bitmap); we then iterate all other
393 : * metadata structures with the same rmap owner to collect the extents that
394 : * cannot be removed (sublist). We then subtract sublist from bitmap to
395 : * derive the blocks that were used by the old btree. These blocks can be
396 : * reaped.
397 : *
398 : * For rmapbt reconstructions we must use different tactics for extent
399 : * collection. First we iterate all primary metadata (this excludes the old
400 : * rmapbt, obviously) to generate new rmap records. The gaps in the rmap
401 : * records are collected as bitmap. The bnobt records are collected as
402 : * sublist. As with the other btrees we subtract sublist from bitmap, and the
403 : * result (since the rmapbt lives in the free space) are the blocks from the
404 : * old rmapbt.
405 : */
406 :
407 : /* Ensure the freelist is the correct size. */
408 : int
409 18863 : xrep_fix_freelist(
410 : struct xfs_scrub *sc,
411 : int alloc_flags)
412 : {
413 18863 : struct xfs_alloc_arg args = {0};
414 :
415 18863 : args.mp = sc->mp;
416 18863 : args.tp = sc->tp;
417 18863 : args.agno = sc->sa.pag->pag_agno;
418 18863 : args.alignment = 1;
419 18863 : args.pag = sc->sa.pag;
420 :
421 18863 : return xfs_alloc_fix_freelist(&args, alloc_flags);
422 : }
423 :
424 : /*
425 : * Finding per-AG Btree Roots for AGF/AGI Reconstruction
426 : *
427 : * If the AGF or AGI become slightly corrupted, it may be necessary to rebuild
428 : * the AG headers by using the rmap data to rummage through the AG looking for
429 : * btree roots. This is not guaranteed to work if the AG is heavily damaged
430 : * or the rmap data are corrupt.
431 : *
432 : * Callers of xrep_find_ag_btree_roots must lock the AGF and AGFL
433 : * buffers if the AGF is being rebuilt; or the AGF and AGI buffers if the
434 : * AGI is being rebuilt. It must maintain these locks until it's safe for
435 : * other threads to change the btrees' shapes. The caller provides
436 : * information about the btrees to look for by passing in an array of
437 : * xrep_find_ag_btree with the (rmap owner, buf_ops, magic) fields set.
438 : * The (root, height) fields will be set on return if anything is found. The
439 : * last element of the array should have a NULL buf_ops to mark the end of the
440 : * array.
441 : *
442 : * For every rmapbt record matching any of the rmap owners in btree_info,
443 : * read each block referenced by the rmap record. If the block is a btree
444 : * block from this filesystem matching any of the magic numbers and has a
445 : * level higher than what we've already seen, remember the block and the
446 : * height of the tree required to have such a block. When the call completes,
447 : * we return the highest block we've found for each btree description; those
448 : * should be the roots.
449 : */
450 :
451 : struct xrep_findroot {
452 : struct xfs_scrub *sc;
453 : struct xfs_buf *agfl_bp;
454 : struct xfs_agf *agf;
455 : struct xrep_find_ag_btree *btree_info;
456 : };
457 :
458 : /* See if our block is in the AGFL. */
459 : STATIC int
460 567292009 : xrep_findroot_agfl_walk(
461 : struct xfs_mount *mp,
462 : xfs_agblock_t bno,
463 : void *priv)
464 : {
465 567292009 : xfs_agblock_t *agbno = priv;
466 :
467 567292009 : return (*agbno == bno) ? -ECANCELED : 0;
468 : }
469 :
470 : /* Does this block match the btree information passed in? */
471 : STATIC int
472 59064814 : xrep_findroot_block(
473 : struct xrep_findroot *ri,
474 : struct xrep_find_ag_btree *fab,
475 : uint64_t owner,
476 : xfs_agblock_t agbno,
477 : bool *done_with_block)
478 : {
479 59064814 : struct xfs_mount *mp = ri->sc->mp;
480 59064814 : struct xfs_buf *bp;
481 59064814 : struct xfs_btree_block *btblock;
482 59064814 : xfs_daddr_t daddr;
483 59064814 : int block_level;
484 59064814 : int error = 0;
485 :
486 59064814 : daddr = XFS_AGB_TO_DADDR(mp, ri->sc->sa.pag->pag_agno, agbno);
487 :
488 : /*
489 : * Blocks in the AGFL have stale contents that might just happen to
490 : * have a matching magic and uuid. We don't want to pull these blocks
491 : * in as part of a tree root, so we have to filter out the AGFL stuff
492 : * here. If the AGFL looks insane we'll just refuse to repair.
493 : */
494 59064814 : if (owner == XFS_RMAP_OWN_AG) {
495 58384500 : error = xfs_agfl_walk(mp, ri->agf, ri->agfl_bp,
496 : xrep_findroot_agfl_walk, &agbno);
497 58384498 : if (error == -ECANCELED)
498 : return 0;
499 56157915 : if (error)
500 : return error;
501 : }
502 :
503 : /*
504 : * Read the buffer into memory so that we can see if it's a match for
505 : * our btree type. We have no clue if it is beforehand, and we want to
506 : * avoid xfs_trans_read_buf's behavior of dumping the DONE state (which
507 : * will cause needless disk reads in subsequent calls to this function)
508 : * and logging metadata verifier failures.
509 : *
510 : * Therefore, pass in NULL buffer ops. If the buffer was already in
511 : * memory from some other caller it will already have b_ops assigned.
512 : * If it was in memory from a previous unsuccessful findroot_block
513 : * call, the buffer won't have b_ops but it should be clean and ready
514 : * for us to try to verify if the read call succeeds. The same applies
515 : * if the buffer wasn't in memory at all.
516 : *
517 : * Note: If we never match a btree type with this buffer, it will be
518 : * left in memory with NULL b_ops. This shouldn't be a problem unless
519 : * the buffer gets written.
520 : */
521 56838229 : error = xfs_trans_read_buf(mp, ri->sc->tp, mp->m_ddev_targp, daddr,
522 : mp->m_bsize, 0, &bp, NULL);
523 56838252 : if (error)
524 : return error;
525 :
526 : /* Ensure the block magic matches the btree type we're looking for. */
527 56838252 : btblock = XFS_BUF_TO_BLOCK(bp);
528 56838252 : ASSERT(fab->buf_ops->magic[1] != 0);
529 56838252 : if (btblock->bb_magic != fab->buf_ops->magic[1])
530 36520731 : goto out;
531 :
532 : /*
533 : * If the buffer already has ops applied and they're not the ones for
534 : * this btree type, we know this block doesn't match the btree and we
535 : * can bail out.
536 : *
537 : * If the buffer ops match ours, someone else has already validated
538 : * the block for us, so we can move on to checking if this is a root
539 : * block candidate.
540 : *
541 : * If the buffer does not have ops, nobody has successfully validated
542 : * the contents and the buffer cannot be dirty. If the magic, uuid,
543 : * and structure match this btree type then we'll move on to checking
544 : * if it's a root block candidate. If there is no match, bail out.
545 : */
546 20317521 : if (bp->b_ops) {
547 20317521 : if (bp->b_ops != fab->buf_ops)
548 0 : goto out;
549 : } else {
550 0 : ASSERT(!xfs_trans_buf_is_dirty(bp));
551 0 : if (!uuid_equal(&btblock->bb_u.s.bb_uuid,
552 0 : &mp->m_sb.sb_meta_uuid))
553 0 : goto out;
554 : /*
555 : * Read verifiers can reference b_ops, so we set the pointer
556 : * here. If the verifier fails we'll reset the buffer state
557 : * to what it was before we touched the buffer.
558 : */
559 0 : bp->b_ops = fab->buf_ops;
560 0 : fab->buf_ops->verify_read(bp);
561 0 : if (bp->b_error) {
562 0 : bp->b_ops = NULL;
563 0 : bp->b_error = 0;
564 0 : goto out;
565 : }
566 :
567 : /*
568 : * Some read verifiers will (re)set b_ops, so we must be
569 : * careful not to change b_ops after running the verifier.
570 : */
571 : }
572 :
573 : /*
574 : * This block passes the magic/uuid and verifier tests for this btree
575 : * type. We don't need the caller to try the other tree types.
576 : */
577 20317521 : *done_with_block = true;
578 :
579 : /*
580 : * Compare this btree block's level to the height of the current
581 : * candidate root block.
582 : *
583 : * If the level matches the root we found previously, throw away both
584 : * blocks because there can't be two candidate roots.
585 : *
586 : * If level is lower in the tree than the root we found previously,
587 : * ignore this block.
588 : */
589 20317521 : block_level = xfs_btree_get_level(btblock);
590 20317521 : if (block_level + 1 == fab->height) {
591 470141 : fab->root = NULLAGBLOCK;
592 470141 : goto out;
593 19847380 : } else if (block_level < fab->height) {
594 19018006 : goto out;
595 : }
596 :
597 : /*
598 : * This is the highest block in the tree that we've found so far.
599 : * Update the btree height to reflect what we've learned from this
600 : * block.
601 : */
602 829374 : fab->height = block_level + 1;
603 :
604 : /*
605 : * If this block doesn't have sibling pointers, then it's the new root
606 : * block candidate. Otherwise, the root will be found farther up the
607 : * tree.
608 : */
609 829374 : if (btblock->bb_u.s.bb_leftsib == cpu_to_be32(NULLAGBLOCK) &&
610 : btblock->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK))
611 545241 : fab->root = agbno;
612 : else
613 284133 : fab->root = NULLAGBLOCK;
614 :
615 829374 : trace_xrep_findroot_block(mp, ri->sc->sa.pag->pag_agno, agbno,
616 829374 : be32_to_cpu(btblock->bb_magic), fab->height - 1);
617 56838252 : out:
618 56838252 : xfs_trans_brelse(ri->sc->tp, bp);
619 56838252 : return error;
620 : }
621 :
622 : /*
623 : * Do any of the blocks in this rmap record match one of the btrees we're
624 : * looking for?
625 : */
626 : STATIC int
627 5353377347 : xrep_findroot_rmap(
628 : struct xfs_btree_cur *cur,
629 : const struct xfs_rmap_irec *rec,
630 : void *priv)
631 : {
632 5353377347 : struct xrep_findroot *ri = priv;
633 5353377347 : struct xrep_find_ag_btree *fab;
634 5353377347 : xfs_agblock_t b;
635 5353377347 : bool done;
636 5353377347 : int error = 0;
637 :
638 : /* Ignore anything that isn't AG metadata. */
639 5353377347 : if (!XFS_RMAP_NON_INODE_OWNER(rec->rm_owner))
640 : return 0;
641 :
642 : /* Otherwise scan each block + btree type. */
643 1496387359 : for (b = 0; b < rec->rm_blockcount; b++) {
644 1387580913 : done = false;
645 5411885507 : for (fab = ri->btree_info; fab->buf_ops; fab++) {
646 4044617117 : if (rec->rm_owner != fab->rmap_owner)
647 3985557294 : continue;
648 59059823 : error = xrep_findroot_block(ri, fab,
649 59059823 : rec->rm_owner, rec->rm_startblock + b,
650 : &done);
651 59064818 : if (error)
652 0 : return error;
653 59064818 : if (done)
654 : break;
655 : }
656 : }
657 :
658 : return 0;
659 : }
660 :
661 : /* Find the roots of the per-AG btrees described in btree_info. */
662 : int
663 184425 : xrep_find_ag_btree_roots(
664 : struct xfs_scrub *sc,
665 : struct xfs_buf *agf_bp,
666 : struct xrep_find_ag_btree *btree_info,
667 : struct xfs_buf *agfl_bp)
668 : {
669 184425 : struct xfs_mount *mp = sc->mp;
670 184425 : struct xrep_findroot ri;
671 184425 : struct xrep_find_ag_btree *fab;
672 184425 : struct xfs_btree_cur *cur;
673 184425 : int error;
674 :
675 184425 : ASSERT(xfs_buf_islocked(agf_bp));
676 184425 : ASSERT(agfl_bp == NULL || xfs_buf_islocked(agfl_bp));
677 :
678 184425 : ri.sc = sc;
679 184425 : ri.btree_info = btree_info;
680 184425 : ri.agf = agf_bp->b_addr;
681 184425 : ri.agfl_bp = agfl_bp;
682 729652 : for (fab = btree_info; fab->buf_ops; fab++) {
683 545230 : ASSERT(agfl_bp || fab->rmap_owner != XFS_RMAP_OWN_AG);
684 545230 : ASSERT(XFS_RMAP_NON_INODE_OWNER(fab->rmap_owner));
685 545227 : fab->root = NULLAGBLOCK;
686 545227 : fab->height = 0;
687 : }
688 :
689 184422 : cur = xfs_rmapbt_init_cursor(mp, sc->tp, agf_bp, sc->sa.pag);
690 184428 : error = xfs_rmap_query_all(cur, xrep_findroot_rmap, &ri);
691 184425 : xfs_btree_del_cursor(cur, error);
692 :
693 184427 : return error;
694 : }
695 :
696 : #ifdef CONFIG_XFS_QUOTA
697 : /* Update some quota flags in the superblock. */
698 : void
699 6208 : xrep_update_qflags(
700 : struct xfs_scrub *sc,
701 : unsigned int clear_flags,
702 : unsigned int set_flags)
703 : {
704 6208 : struct xfs_mount *mp = sc->mp;
705 6208 : struct xfs_buf *bp;
706 :
707 6208 : mutex_lock(&mp->m_quotainfo->qi_quotaofflock);
708 6208 : if ((mp->m_qflags & clear_flags) == 0 &&
709 3104 : (mp->m_qflags & set_flags) == set_flags)
710 0 : goto no_update;
711 :
712 6208 : mp->m_qflags &= ~clear_flags;
713 6208 : mp->m_qflags |= set_flags;
714 :
715 6208 : spin_lock(&mp->m_sb_lock);
716 6208 : mp->m_sb.sb_qflags &= ~clear_flags;
717 6208 : mp->m_sb.sb_qflags |= set_flags;
718 6208 : spin_unlock(&mp->m_sb_lock);
719 :
720 : /*
721 : * Update the quota flags in the ondisk superblock without touching
722 : * the summary counters. We have not quiesced inode chunk allocation,
723 : * so we cannot coordinate with updates to the icount and ifree percpu
724 : * counters.
725 : */
726 6208 : bp = xfs_trans_getsb(sc->tp);
727 6208 : xfs_sb_to_disk(bp->b_addr, &mp->m_sb);
728 6208 : xfs_trans_buf_set_type(sc->tp, bp, XFS_BLFT_SB_BUF);
729 6208 : xfs_trans_log_buf(sc->tp, bp, 0, sizeof(struct xfs_dsb) - 1);
730 :
731 6208 : no_update:
732 6208 : mutex_unlock(&sc->mp->m_quotainfo->qi_quotaofflock);
733 6208 : }
734 :
735 : /* Force a quotacheck the next time we mount. */
736 : void
737 0 : xrep_force_quotacheck(
738 : struct xfs_scrub *sc,
739 : xfs_dqtype_t type)
740 : {
741 0 : uint flag;
742 :
743 0 : flag = xfs_quota_chkd_flag(type);
744 0 : if (!(flag & sc->mp->m_qflags))
745 : return;
746 :
747 0 : xrep_update_qflags(sc, flag, 0);
748 : }
749 :
750 : /*
751 : * Attach dquots to this inode, or schedule quotacheck to fix them.
752 : *
753 : * This function ensures that the appropriate dquots are attached to an inode.
754 : * We cannot allow the dquot code to allocate an on-disk dquot block here
755 : * because we're already in transaction context. The on-disk dquot should
756 : * already exist anyway. If the quota code signals corruption or missing quota
757 : * information, schedule quotacheck, which will repair corruptions in the quota
758 : * metadata.
759 : */
760 : int
761 48112188 : xrep_ino_dqattach(
762 : struct xfs_scrub *sc)
763 : {
764 48112188 : int error;
765 :
766 48112188 : ASSERT(sc->tp != NULL);
767 48112188 : ASSERT(sc->ip != NULL);
768 :
769 48112188 : error = xfs_qm_dqattach(sc->ip);
770 48098760 : switch (error) {
771 0 : case -EFSBADCRC:
772 : case -EFSCORRUPTED:
773 : case -ENOENT:
774 0 : xfs_err_ratelimited(sc->mp,
775 : "inode %llu repair encountered quota error %d, quotacheck forced.",
776 : (unsigned long long)sc->ip->i_ino, error);
777 0 : if (XFS_IS_UQUOTA_ON(sc->mp) && !sc->ip->i_udquot)
778 0 : xrep_force_quotacheck(sc, XFS_DQTYPE_USER);
779 0 : if (XFS_IS_GQUOTA_ON(sc->mp) && !sc->ip->i_gdquot)
780 0 : xrep_force_quotacheck(sc, XFS_DQTYPE_GROUP);
781 0 : if (XFS_IS_PQUOTA_ON(sc->mp) && !sc->ip->i_pdquot)
782 0 : xrep_force_quotacheck(sc, XFS_DQTYPE_PROJ);
783 : fallthrough;
784 : case -ESRCH:
785 : error = 0;
786 : break;
787 : default:
788 : break;
789 : }
790 :
791 48098760 : return error;
792 : }
793 : #endif /* CONFIG_XFS_QUOTA */
794 :
795 : /*
796 : * Ensure that the inode being repaired is ready to handle a certain number of
797 : * extents, or return EFSCORRUPTED. Caller must hold the ILOCK of the inode
798 : * being repaired and have joined it to the scrub transaction.
799 : */
800 : int
801 2568665 : xrep_ino_ensure_extent_count(
802 : struct xfs_scrub *sc,
803 : int whichfork,
804 : xfs_extnum_t nextents)
805 : {
806 2568665 : xfs_extnum_t max_extents;
807 2568665 : bool large_extcount;
808 :
809 2568665 : large_extcount = xfs_inode_has_large_extent_counts(sc->ip);
810 2568665 : max_extents = xfs_iext_max_nextents(large_extcount, whichfork);
811 2568665 : if (nextents <= max_extents)
812 : return 0;
813 0 : if (large_extcount)
814 : return -EFSCORRUPTED;
815 0 : if (!xfs_has_large_extent_counts(sc->mp))
816 : return -EFSCORRUPTED;
817 :
818 0 : max_extents = xfs_iext_max_nextents(true, whichfork);
819 0 : if (nextents > max_extents)
820 : return -EFSCORRUPTED;
821 :
822 0 : sc->ip->i_diflags2 |= XFS_DIFLAG2_NREXT64;
823 0 : xfs_trans_log_inode(sc->tp, sc->ip, XFS_ILOG_CORE);
824 0 : return 0;
825 : }
826 :
827 : /* Initialize all the btree cursors for an AG repair. */
828 : void
829 12950816 : xrep_ag_btcur_init(
830 : struct xfs_scrub *sc,
831 : struct xchk_ag *sa)
832 : {
833 12950816 : struct xfs_mount *mp = sc->mp;
834 :
835 : /* Set up a bnobt cursor for cross-referencing. */
836 12950816 : if (sc->sm->sm_type != XFS_SCRUB_TYPE_BNOBT &&
837 : sc->sm->sm_type != XFS_SCRUB_TYPE_CNTBT) {
838 12910467 : sa->bno_cur = xfs_allocbt_init_cursor(mp, sc->tp, sa->agf_bp,
839 : sc->sa.pag, XFS_BTNUM_BNO);
840 12910654 : sa->cnt_cur = xfs_allocbt_init_cursor(mp, sc->tp, sa->agf_bp,
841 : sc->sa.pag, XFS_BTNUM_CNT);
842 : }
843 :
844 : /* Set up a inobt cursor for cross-referencing. */
845 12951226 : if (sc->sm->sm_type != XFS_SCRUB_TYPE_INOBT &&
846 : sc->sm->sm_type != XFS_SCRUB_TYPE_FINOBT) {
847 12877104 : sa->ino_cur = xfs_inobt_init_cursor(sc->sa.pag, sc->tp,
848 : sa->agi_bp, XFS_BTNUM_INO);
849 12876690 : if (xfs_has_finobt(mp))
850 12877159 : sa->fino_cur = xfs_inobt_init_cursor(sc->sa.pag,
851 : sc->tp, sa->agi_bp, XFS_BTNUM_FINO);
852 : }
853 :
854 : /* Set up a rmapbt cursor for cross-referencing. */
855 12951004 : if (sc->sm->sm_type != XFS_SCRUB_TYPE_RMAPBT &&
856 : xfs_has_rmapbt(mp))
857 12919915 : sa->rmap_cur = xfs_rmapbt_init_cursor(mp, sc->tp, sa->agf_bp,
858 : sc->sa.pag);
859 :
860 : /* Set up a refcountbt cursor for cross-referencing. */
861 12951962 : if (sc->sm->sm_type != XFS_SCRUB_TYPE_REFCNTBT &&
862 : xfs_has_reflink(mp))
863 12918332 : sa->refc_cur = xfs_refcountbt_init_cursor(mp, sc->tp,
864 : sa->agf_bp, sc->sa.pag);
865 12952456 : }
866 :
867 : /*
868 : * Reinitialize the in-core AG state after a repair by rereading the AGF
869 : * buffer. We had better get the same AGF buffer as the one that's attached
870 : * to the scrub context.
871 : */
872 : int
873 85912 : xrep_reinit_pagf(
874 : struct xfs_scrub *sc)
875 : {
876 85912 : struct xfs_perag *pag = sc->sa.pag;
877 85912 : struct xfs_buf *bp;
878 85912 : int error;
879 :
880 85912 : ASSERT(pag);
881 171824 : ASSERT(xfs_perag_initialised_agf(pag));
882 :
883 85912 : clear_bit(XFS_AGSTATE_AGF_INIT, &pag->pag_opstate);
884 85978 : error = xfs_alloc_read_agf(pag, sc->tp, 0, &bp);
885 85983 : if (error)
886 : return error;
887 :
888 85983 : if (bp != sc->sa.agf_bp) {
889 0 : ASSERT(bp == sc->sa.agf_bp);
890 0 : return -EFSCORRUPTED;
891 : }
892 :
893 : return 0;
894 : }
895 :
896 : /*
897 : * Reinitialize the in-core AG state after a repair by rereading the AGI
898 : * buffer. We had better get the same AGI buffer as the one that's attached
899 : * to the scrub context.
900 : */
901 : int
902 73710 : xrep_reinit_pagi(
903 : struct xfs_scrub *sc)
904 : {
905 73710 : struct xfs_perag *pag = sc->sa.pag;
906 73710 : struct xfs_buf *bp;
907 73710 : int error;
908 :
909 73710 : ASSERT(pag);
910 147420 : ASSERT(xfs_perag_initialised_agi(pag));
911 :
912 73710 : clear_bit(XFS_AGSTATE_AGI_INIT, &pag->pag_opstate);
913 73729 : error = xfs_ialloc_read_agi(pag, sc->tp, &bp);
914 73689 : if (error)
915 : return error;
916 :
917 73689 : if (bp != sc->sa.agi_bp) {
918 0 : ASSERT(bp == sc->sa.agi_bp);
919 0 : return -EFSCORRUPTED;
920 : }
921 :
922 : return 0;
923 : }
924 :
925 : /*
926 : * Given an active reference to a perag structure, load AG headers and cursors.
927 : * This should only be called to scan an AG while repairing file-based metadata.
928 : */
929 : int
930 12775584 : xrep_ag_init(
931 : struct xfs_scrub *sc,
932 : struct xfs_perag *pag,
933 : struct xchk_ag *sa)
934 : {
935 12775584 : int error;
936 :
937 12775584 : ASSERT(!sa->pag);
938 :
939 12775584 : error = xfs_ialloc_read_agi(pag, sc->tp, &sa->agi_bp);
940 12774330 : if (error)
941 : return error;
942 :
943 12774293 : error = xfs_alloc_read_agf(pag, sc->tp, 0, &sa->agf_bp);
944 12774716 : if (error)
945 : return error;
946 :
947 : /* Grab our own passive reference from the caller's ref. */
948 12774747 : sa->pag = xfs_perag_hold(pag);
949 12775974 : xrep_ag_btcur_init(sc, sa);
950 12775974 : return 0;
951 : }
952 :
953 : /* Reinitialize the per-AG block reservation for the AG we just fixed. */
954 : int
955 1556329355 : xrep_reset_perag_resv(
956 : struct xfs_scrub *sc)
957 : {
958 1556329355 : int error;
959 :
960 1556329355 : if (!(sc->flags & XREP_RESET_PERAG_RESV))
961 : return 0;
962 :
963 118973 : ASSERT(sc->sa.pag != NULL);
964 118973 : ASSERT(sc->ops->type == ST_PERAG);
965 118973 : ASSERT(sc->tp);
966 :
967 118973 : sc->flags &= ~XREP_RESET_PERAG_RESV;
968 118973 : error = xfs_ag_resv_free(sc->sa.pag);
969 119359 : if (error)
970 0 : goto out;
971 119359 : error = xfs_ag_resv_init(sc->sa.pag, sc->tp);
972 119137 : if (error == -ENOSPC) {
973 0 : xfs_err(sc->mp,
974 : "Insufficient free space to reset per-AG reservation for AG %u after repair.",
975 : sc->sa.pag->pag_agno);
976 0 : error = 0;
977 : }
978 :
979 119137 : out:
980 : return error;
981 : }
982 :
983 : /* Decide if we are going to call the repair function for a scrub type. */
984 : bool
985 48937209 : xrep_will_attempt(
986 : struct xfs_scrub *sc)
987 : {
988 : /* Userspace asked us to rebuild the structure regardless. */
989 48937209 : if (sc->sm->sm_flags & XFS_SCRUB_IFLAG_FORCE_REBUILD)
990 : return true;
991 :
992 : /* Let debug users force us into the repair routines. */
993 5745 : if (XFS_TEST_ERROR(false, sc->mp, XFS_ERRTAG_FORCE_SCRUB_REPAIR))
994 : return true;
995 :
996 : /* Metadata is corrupt or failed cross-referencing. */
997 5711 : if (xchk_needs_repair(sc->sm))
998 507 : return true;
999 :
1000 : return false;
1001 : }
1002 :
1003 : /* Try to fix some part of a metadata inode by calling another scrubber. */
1004 : STATIC int
1005 19886 : xrep_metadata_inode_subtype(
1006 : struct xfs_scrub *sc,
1007 : unsigned int scrub_type)
1008 : {
1009 19886 : __u32 smtype = sc->sm->sm_type;
1010 19886 : __u32 smflags = sc->sm->sm_flags;
1011 19886 : int error;
1012 :
1013 : /*
1014 : * Let's see if the inode needs repair. We're going to open-code calls
1015 : * to the scrub and repair functions so that we can hang on to the
1016 : * resources that we already acquired instead of using the standard
1017 : * setup/teardown routines.
1018 : */
1019 19886 : sc->sm->sm_flags &= ~XFS_SCRUB_FLAGS_OUT;
1020 19886 : sc->sm->sm_type = scrub_type;
1021 :
1022 19886 : switch (scrub_type) {
1023 7216 : case XFS_SCRUB_TYPE_INODE:
1024 7216 : error = xchk_inode(sc);
1025 7216 : break;
1026 7216 : case XFS_SCRUB_TYPE_BMBTD:
1027 7216 : error = xchk_bmap_data(sc);
1028 7216 : break;
1029 5454 : case XFS_SCRUB_TYPE_BMBTA:
1030 5454 : error = xchk_bmap_attr(sc);
1031 5454 : break;
1032 0 : default:
1033 0 : ASSERT(0);
1034 0 : error = -EFSCORRUPTED;
1035 : }
1036 19886 : if (error)
1037 5454 : goto out;
1038 :
1039 14432 : if (!xrep_will_attempt(sc))
1040 0 : goto out;
1041 :
1042 : /*
1043 : * Repair some part of the inode. This will potentially join the inode
1044 : * to the transaction.
1045 : */
1046 14432 : switch (scrub_type) {
1047 7216 : case XFS_SCRUB_TYPE_INODE:
1048 7216 : error = xrep_inode(sc);
1049 7216 : break;
1050 7216 : case XFS_SCRUB_TYPE_BMBTD:
1051 7216 : error = xrep_bmap(sc, XFS_DATA_FORK, false);
1052 7216 : break;
1053 0 : case XFS_SCRUB_TYPE_BMBTA:
1054 0 : error = xrep_bmap(sc, XFS_ATTR_FORK, false);
1055 0 : break;
1056 : }
1057 14432 : if (error)
1058 1762 : goto out;
1059 :
1060 : /*
1061 : * Finish all deferred intent items and then roll the transaction so
1062 : * that the inode will not be joined to the transaction when we exit
1063 : * the function.
1064 : */
1065 12670 : error = xfs_defer_finish(&sc->tp);
1066 12670 : if (error)
1067 0 : goto out;
1068 12670 : error = xfs_trans_roll(&sc->tp);
1069 12670 : if (error)
1070 0 : goto out;
1071 :
1072 : /*
1073 : * Clear the corruption flags and re-check the metadata that we just
1074 : * repaired.
1075 : */
1076 12670 : sc->sm->sm_flags &= ~XFS_SCRUB_FLAGS_OUT;
1077 :
1078 12670 : switch (scrub_type) {
1079 7216 : case XFS_SCRUB_TYPE_INODE:
1080 7216 : error = xchk_inode(sc);
1081 7216 : break;
1082 5454 : case XFS_SCRUB_TYPE_BMBTD:
1083 5454 : error = xchk_bmap_data(sc);
1084 5454 : break;
1085 0 : case XFS_SCRUB_TYPE_BMBTA:
1086 0 : error = xchk_bmap_attr(sc);
1087 0 : break;
1088 : }
1089 12670 : if (error)
1090 0 : goto out;
1091 :
1092 : /* If corruption persists, the repair has failed. */
1093 12670 : if (xchk_needs_repair(sc->sm)) {
1094 0 : error = -EFSCORRUPTED;
1095 0 : goto out;
1096 : }
1097 12670 : out:
1098 19886 : sc->sm->sm_type = smtype;
1099 19886 : sc->sm->sm_flags = smflags;
1100 19886 : return error;
1101 : }
1102 :
1103 : /*
1104 : * Repair the ondisk forks of a metadata inode. The caller must ensure that
1105 : * sc->ip points to the metadata inode and the ILOCK is held on that inode.
1106 : * The inode must not be joined to the transaction before the call, and will
1107 : * not be afterwards.
1108 : */
1109 : int
1110 7216 : xrep_metadata_inode_forks(
1111 : struct xfs_scrub *sc)
1112 : {
1113 7216 : bool dirty = false;
1114 7216 : int error;
1115 :
1116 : /* Repair the inode record and the data fork. */
1117 7216 : error = xrep_metadata_inode_subtype(sc, XFS_SCRUB_TYPE_INODE);
1118 7216 : if (error)
1119 : return error;
1120 :
1121 7216 : error = xrep_metadata_inode_subtype(sc, XFS_SCRUB_TYPE_BMBTD);
1122 7216 : if (error)
1123 : return error;
1124 :
1125 : /* Make sure the attr fork looks ok before we delete it. */
1126 5454 : error = xrep_metadata_inode_subtype(sc, XFS_SCRUB_TYPE_BMBTA);
1127 5454 : if (error)
1128 : return error;
1129 :
1130 : /* Clear the reflink flag since metadata never shares. */
1131 0 : if (xfs_is_reflink_inode(sc->ip)) {
1132 0 : dirty = true;
1133 0 : xfs_trans_ijoin(sc->tp, sc->ip, 0);
1134 0 : error = xfs_reflink_clear_inode_flag(sc->ip, &sc->tp);
1135 0 : if (error)
1136 : return error;
1137 : }
1138 :
1139 : /* Clear the attr forks since metadata shouldn't have that. */
1140 0 : if (xfs_inode_hasattr(sc->ip)) {
1141 0 : if (!dirty) {
1142 0 : dirty = true;
1143 0 : xfs_trans_ijoin(sc->tp, sc->ip, 0);
1144 : }
1145 0 : error = xrep_xattr_reset_fork(sc);
1146 0 : if (error)
1147 : return error;
1148 : }
1149 :
1150 : /*
1151 : * If we modified the inode, roll the transaction but don't rejoin the
1152 : * inode to the new transaction because xrep_bmap_data can do that.
1153 : */
1154 0 : if (dirty) {
1155 0 : error = xfs_trans_roll(&sc->tp);
1156 0 : if (error)
1157 0 : return error;
1158 : dirty = false;
1159 : }
1160 :
1161 : return 0;
1162 : }
1163 :
1164 : /*
1165 : * Set a file's link count, being careful about integer overflows. Returns
1166 : * true if we had to correct an integer overflow.
1167 : */
1168 : bool
1169 192947 : xrep_set_nlink(
1170 : struct xfs_inode *ip,
1171 : uint64_t nlink)
1172 : {
1173 192947 : bool ret = false;
1174 :
1175 192947 : if (nlink > XFS_NLINK_PINNED) {
1176 : /*
1177 : * The observed link count will overflow the nlink field.
1178 : *
1179 : * The VFS won't let users create more hardlinks if the link
1180 : * count is larger than XFS_MAXLINK, but it will let them
1181 : * delete hardlinks. XFS_MAXLINK is half of XFS_NLINK_PINNED,
1182 : * which means that sysadmins could actually fix this situation
1183 : * by deleting links and calling us again.
1184 : *
1185 : * Set the link count to the largest possible value that will
1186 : * fit in the field. This will buy us the most possible time
1187 : * to avoid a UAF should the sysadmins start deleting links.
1188 : * As long as the link count stays above MAXLINK the undercount
1189 : * problem will not get worse.
1190 : */
1191 0 : BUILD_BUG_ON((uint64_t)XFS_MAXLINK >= XFS_NLINK_PINNED);
1192 :
1193 0 : nlink = XFS_NLINK_PINNED;
1194 0 : ret = true;
1195 : }
1196 :
1197 192947 : set_nlink(VFS_I(ip), nlink);
1198 :
1199 192936 : if (VFS_I(ip)->i_nlink == 0) {
1200 : /* had better be on an unlinked list */
1201 0 : ASSERT(xfs_inode_on_unlinked_list(ip));
1202 0 : if (!xfs_inode_on_unlinked_list(ip))
1203 0 : xfs_emerg(ip->i_mount, "IUNLINK ino 0x%llx nlink %u prevun 0x%x nextun 0x%x", ip->i_ino, VFS_I(ip)->i_nlink, ip->i_prev_unlinked, ip->i_next_unlinked);
1204 : } else {
1205 : /* had better not be on an unlinked list */
1206 192936 : ASSERT(!xfs_inode_on_unlinked_list(ip));
1207 192936 : if (xfs_inode_on_unlinked_list(ip))
1208 0 : xfs_emerg(ip->i_mount, "IUNLINK ino 0x%llx nlink %u prevun 0x%x nextun 0x%x", ip->i_ino, VFS_I(ip)->i_nlink, ip->i_prev_unlinked, ip->i_next_unlinked);
1209 : }
1210 :
1211 192936 : return ret;
1212 : }
1213 :
1214 : /*
1215 : * Set up an xfile and a buffer cache so that we can use the xfbtree. Buffer
1216 : * target initialization registers a shrinker, so we cannot be in transaction
1217 : * context. Park our resources in the scrub context and let the teardown
1218 : * function take care of them at the right time.
1219 : */
1220 : int
1221 47054 : xrep_setup_buftarg(
1222 : struct xfs_scrub *sc,
1223 : const char *descr)
1224 : {
1225 47054 : ASSERT(sc->tp == NULL);
1226 :
1227 47054 : return xfile_alloc_buftarg(sc->mp, descr, &sc->xfile_buftarg);
1228 : }
1229 :
1230 : /*
1231 : * Create a dummy transaction for use in a live update hook function. This
1232 : * function MUST NOT be called from regular repair code because the current
1233 : * process' transaction is saved via the cookie.
1234 : */
1235 : int
1236 197889 : xrep_trans_alloc_hook_dummy(
1237 : struct xfs_mount *mp,
1238 : void **cookiep,
1239 : struct xfs_trans **tpp)
1240 : {
1241 197889 : int error;
1242 :
1243 197889 : *cookiep = current->journal_info;
1244 197889 : current->journal_info = NULL;
1245 :
1246 197889 : error = xfs_trans_alloc_empty(mp, tpp);
1247 197889 : if (!error)
1248 : return 0;
1249 :
1250 0 : current->journal_info = *cookiep;
1251 0 : *cookiep = NULL;
1252 0 : return error;
1253 : }
1254 :
1255 : /* Cancel a dummy transaction used by a live update hook function. */
1256 : void
1257 197890 : xrep_trans_cancel_hook_dummy(
1258 : void **cookiep,
1259 : struct xfs_trans *tp)
1260 : {
1261 197890 : xfs_trans_cancel(tp);
1262 197890 : current->journal_info = *cookiep;
1263 197890 : *cookiep = NULL;
1264 197890 : }
1265 :
1266 : /*
1267 : * See if this buffer can pass the given ->verify_struct() function.
1268 : *
1269 : * If the buffer already has ops attached and they're not the ones that were
1270 : * passed in, we reject the buffer. Otherwise, we perform the structure test
1271 : * (note that we do not check CRCs) and return the outcome of the test. The
1272 : * buffer ops and error state are left unchanged.
1273 : */
1274 : bool
1275 55900 : xrep_buf_verify_struct(
1276 : struct xfs_buf *bp,
1277 : const struct xfs_buf_ops *ops)
1278 : {
1279 55900 : const struct xfs_buf_ops *old_ops = bp->b_ops;
1280 55900 : xfs_failaddr_t fa;
1281 55900 : int old_error;
1282 :
1283 55900 : if (old_ops) {
1284 55900 : if (old_ops != ops)
1285 : return false;
1286 : }
1287 :
1288 55900 : old_error = bp->b_error;
1289 55900 : bp->b_ops = ops;
1290 55900 : fa = bp->b_ops->verify_struct(bp);
1291 55900 : bp->b_ops = old_ops;
1292 55900 : bp->b_error = old_error;
1293 :
1294 55900 : return fa == NULL;
1295 : }
|