Line data Source code
1 : // SPDX-License-Identifier: GPL-2.0-or-later
2 : /*
3 : * Copyright (C) 2018-2023 Oracle. All Rights Reserved.
4 : * Author: Darrick J. Wong <djwong@kernel.org>
5 : */
6 : #include "xfs.h"
7 : #include "xfs_fs.h"
8 : #include "xfs_shared.h"
9 : #include "xfs_format.h"
10 : #include "xfs_trans_resv.h"
11 : #include "xfs_mount.h"
12 : #include "xfs_btree.h"
13 : #include "xfs_log_format.h"
14 : #include "xfs_trans.h"
15 : #include "xfs_sb.h"
16 : #include "xfs_inode.h"
17 : #include "xfs_alloc.h"
18 : #include "xfs_alloc_btree.h"
19 : #include "xfs_ialloc.h"
20 : #include "xfs_ialloc_btree.h"
21 : #include "xfs_rmap.h"
22 : #include "xfs_rmap_btree.h"
23 : #include "xfs_refcount_btree.h"
24 : #include "xfs_extent_busy.h"
25 : #include "xfs_ag.h"
26 : #include "xfs_ag_resv.h"
27 : #include "xfs_quota.h"
28 : #include "xfs_qm.h"
29 : #include "xfs_defer.h"
30 : #include "xfs_errortag.h"
31 : #include "xfs_error.h"
32 : #include "xfs_reflink.h"
33 : #include "xfs_health.h"
34 : #include "xfs_buf_xfile.h"
35 : #include "xfs_da_format.h"
36 : #include "xfs_da_btree.h"
37 : #include "xfs_attr.h"
38 : #include "xfs_dir2.h"
39 : #include "scrub/scrub.h"
40 : #include "scrub/common.h"
41 : #include "scrub/trace.h"
42 : #include "scrub/repair.h"
43 : #include "scrub/bitmap.h"
44 : #include "scrub/stats.h"
45 : #include "scrub/xfile.h"
46 : #include "scrub/attr_repair.h"
47 :
48 : /*
49 : * Attempt to repair some metadata, if the metadata is corrupt and userspace
50 : * told us to fix it. This function returns -EAGAIN to mean "re-run scrub",
51 : * and will set *fixed to true if it thinks it repaired anything.
52 : */
53 : int
54 18552576 : xrep_attempt(
55 : struct xfs_scrub *sc,
56 : struct xchk_stats_run *run)
57 : {
58 18552576 : u64 repair_start;
59 18552576 : int error = 0;
60 :
61 18552576 : trace_xrep_attempt(XFS_I(file_inode(sc->file)), sc->sm, error);
62 :
63 18552815 : xchk_ag_btcur_free(&sc->sa);
64 :
65 : /* Repair whatever's broken. */
66 18552490 : ASSERT(sc->ops->repair);
67 18552490 : run->repair_attempted = true;
68 18552490 : repair_start = xchk_stats_now();
69 18552825 : error = sc->ops->repair(sc);
70 18552394 : trace_xrep_done(XFS_I(file_inode(sc->file)), sc->sm, error);
71 18552411 : run->repair_ns += xchk_stats_elapsed_ns(repair_start);
72 18552588 : switch (error) {
73 4350065 : case 0:
74 : /*
75 : * Repair succeeded. Commit the fixes and perform a second
76 : * scrub so that we can tell userspace if we fixed the problem.
77 : */
78 4350065 : sc->sm->sm_flags &= ~XFS_SCRUB_FLAGS_OUT;
79 4350065 : sc->flags |= XREP_ALREADY_FIXED;
80 4350065 : run->repair_succeeded = true;
81 4350065 : return -EAGAIN;
82 21 : case -ECHRNG:
83 21 : sc->flags |= XCHK_NEED_DRAIN;
84 21 : run->retries++;
85 21 : return -EAGAIN;
86 0 : case -EDEADLOCK:
87 : /* Tell the caller to try again having grabbed all the locks. */
88 0 : if (!(sc->flags & XCHK_TRY_HARDER)) {
89 0 : sc->flags |= XCHK_TRY_HARDER;
90 0 : run->retries++;
91 0 : return -EAGAIN;
92 : }
93 : /*
94 : * We tried harder but still couldn't grab all the resources
95 : * we needed to fix it. The corruption has not been fixed,
96 : * so exit to userspace with the scan's output flags unchanged.
97 : */
98 : return 0;
99 14202502 : default:
100 : /*
101 : * EAGAIN tells the caller to re-scrub, so we cannot return
102 : * that here.
103 : */
104 14202502 : ASSERT(error != -EAGAIN);
105 : return error;
106 : }
107 : }
108 :
109 : /*
110 : * Complain about unfixable problems in the filesystem. We don't log
111 : * corruptions when IFLAG_REPAIR wasn't set on the assumption that the driver
112 : * program is xfs_scrub, which will call back with IFLAG_REPAIR set if the
113 : * administrator isn't running xfs_scrub in no-repairs mode.
114 : *
115 : * Use this helper function because _ratelimited silently declares a static
116 : * structure to track rate limiting information.
117 : */
118 : void
119 0 : xrep_failure(
120 : struct xfs_mount *mp)
121 : {
122 0 : xfs_alert_ratelimited(mp,
123 : "Corruption not fixed during online repair. Unmount and run xfs_repair.");
124 0 : }
125 :
126 : /*
127 : * Repair probe -- userspace uses this to probe if we're willing to repair a
128 : * given mountpoint.
129 : */
130 : int
131 2240 : xrep_probe(
132 : struct xfs_scrub *sc)
133 : {
134 2240 : int error = 0;
135 :
136 2240 : if (xchk_should_terminate(sc, &error))
137 0 : return error;
138 :
139 : return 0;
140 : }
141 :
142 : /*
143 : * Roll a transaction, keeping the AG headers locked and reinitializing
144 : * the btree cursors.
145 : */
146 : int
147 126939 : xrep_roll_ag_trans(
148 : struct xfs_scrub *sc)
149 : {
150 126939 : int error;
151 :
152 : /*
153 : * Keep the AG header buffers locked while we roll the transaction.
154 : * Ensure that both AG buffers are dirty and held when we roll the
155 : * transaction so that they move forward in the log without losing the
156 : * bli (and hence the bli type) when the transaction commits.
157 : *
158 : * Normal code would never hold clean buffers across a roll, but repair
159 : * needs both buffers to maintain a total lock on the AG.
160 : */
161 126939 : if (sc->sa.agi_bp) {
162 126939 : xfs_ialloc_log_agi(sc->tp, sc->sa.agi_bp, XFS_AGI_MAGICNUM);
163 126940 : xfs_trans_bhold(sc->tp, sc->sa.agi_bp);
164 : }
165 :
166 126935 : if (sc->sa.agf_bp) {
167 126935 : xfs_alloc_log_agf(sc->tp, sc->sa.agf_bp, XFS_AGF_MAGICNUM);
168 126937 : xfs_trans_bhold(sc->tp, sc->sa.agf_bp);
169 : }
170 :
171 : /*
172 : * Roll the transaction. We still hold the AG header buffers locked
173 : * regardless of whether or not that succeeds. On failure, the buffers
174 : * will be released during teardown on our way out of the kernel. If
175 : * successful, join the buffers to the new transaction and move on.
176 : */
177 126937 : error = xfs_trans_roll(&sc->tp);
178 126933 : if (error)
179 : return error;
180 :
181 : /* Join the AG headers to the new transaction. */
182 126933 : if (sc->sa.agi_bp)
183 126933 : xfs_trans_bjoin(sc->tp, sc->sa.agi_bp);
184 126924 : if (sc->sa.agf_bp)
185 126924 : xfs_trans_bjoin(sc->tp, sc->sa.agf_bp);
186 :
187 : return 0;
188 : }
189 :
190 : /* Roll the scrub transaction, holding the primary metadata locked. */
191 : int
192 276741 : xrep_roll_trans(
193 : struct xfs_scrub *sc)
194 : {
195 276741 : if (!sc->ip)
196 1012 : return xrep_roll_ag_trans(sc);
197 275729 : return xfs_trans_roll_inode(&sc->tp, sc->ip);
198 : }
199 :
200 : /* Finish all deferred work attached to the repair transaction. */
201 : int
202 168586 : xrep_defer_finish(
203 : struct xfs_scrub *sc)
204 : {
205 168586 : int error;
206 :
207 : /*
208 : * Keep the AG header buffers locked while we complete deferred work
209 : * items. Ensure that both AG buffers are dirty and held when we roll
210 : * the transaction so that they move forward in the log without losing
211 : * the bli (and hence the bli type) when the transaction commits.
212 : *
213 : * Normal code would never hold clean buffers across a roll, but repair
214 : * needs both buffers to maintain a total lock on the AG.
215 : */
216 168586 : if (sc->sa.agi_bp) {
217 157418 : xfs_ialloc_log_agi(sc->tp, sc->sa.agi_bp, XFS_AGI_MAGICNUM);
218 157402 : xfs_trans_bhold(sc->tp, sc->sa.agi_bp);
219 : }
220 :
221 168579 : if (sc->sa.agf_bp) {
222 158244 : xfs_alloc_log_agf(sc->tp, sc->sa.agf_bp, XFS_AGF_MAGICNUM);
223 158239 : xfs_trans_bhold(sc->tp, sc->sa.agf_bp);
224 : }
225 :
226 : /*
227 : * Finish all deferred work items. We still hold the AG header buffers
228 : * locked regardless of whether or not that succeeds. On failure, the
229 : * buffers will be released during teardown on our way out of the
230 : * kernel. If successful, join the buffers to the new transaction
231 : * and move on.
232 : */
233 168580 : error = xfs_defer_finish(&sc->tp);
234 168587 : if (error)
235 : return error;
236 :
237 : /*
238 : * Release the hold that we set above because defer_finish won't do
239 : * that for us. The defer roll code redirties held buffers after each
240 : * roll, so the AG header buffers should be ready for logging.
241 : */
242 168587 : if (sc->sa.agi_bp)
243 157419 : xfs_trans_bhold_release(sc->tp, sc->sa.agi_bp);
244 168583 : if (sc->sa.agf_bp)
245 158248 : xfs_trans_bhold_release(sc->tp, sc->sa.agf_bp);
246 :
247 : return 0;
248 : }
249 :
250 : /*
251 : * Does the given AG have enough space to rebuild a btree? Neither AG
252 : * reservation can be critical, and we must have enough space (factoring
253 : * in AG reservations) to construct a whole btree.
254 : */
255 : bool
256 0 : xrep_ag_has_space(
257 : struct xfs_perag *pag,
258 : xfs_extlen_t nr_blocks,
259 : enum xfs_ag_resv_type type)
260 : {
261 0 : return !xfs_ag_resv_critical(pag, XFS_AG_RESV_RMAPBT) &&
262 0 : !xfs_ag_resv_critical(pag, XFS_AG_RESV_METADATA) &&
263 0 : pag->pagf_freeblks > xfs_ag_resv_needed(pag, type) + nr_blocks;
264 : }
265 :
266 : /*
267 : * Figure out how many blocks to reserve for an AG repair. We calculate the
268 : * worst case estimate for the number of blocks we'd need to rebuild one of
269 : * any type of per-AG btree.
270 : */
271 : xfs_extlen_t
272 2631635 : xrep_calc_ag_resblks(
273 : struct xfs_scrub *sc)
274 : {
275 2631635 : struct xfs_mount *mp = sc->mp;
276 2631635 : struct xfs_scrub_metadata *sm = sc->sm;
277 2631635 : struct xfs_perag *pag;
278 2631635 : struct xfs_buf *bp;
279 2631635 : xfs_agino_t icount = NULLAGINO;
280 2631635 : xfs_extlen_t aglen = NULLAGBLOCK;
281 2631635 : xfs_extlen_t usedlen;
282 2631635 : xfs_extlen_t freelen;
283 2631635 : xfs_extlen_t bnobt_sz;
284 2631635 : xfs_extlen_t inobt_sz;
285 2631635 : xfs_extlen_t rmapbt_sz;
286 2631635 : xfs_extlen_t refcbt_sz;
287 2631635 : int error;
288 :
289 2631635 : if (!(sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR))
290 : return 0;
291 :
292 895812 : pag = xfs_perag_get(mp, sm->sm_agno);
293 1791624 : if (xfs_perag_initialised_agi(pag)) {
294 : /* Use in-core icount if possible. */
295 895812 : icount = pag->pagi_count;
296 : } else {
297 : /* Try to get the actual counters from disk. */
298 0 : error = xfs_ialloc_read_agi(pag, NULL, &bp);
299 0 : if (!error) {
300 0 : icount = pag->pagi_count;
301 0 : xfs_buf_relse(bp);
302 : }
303 : }
304 :
305 : /* Now grab the block counters from the AGF. */
306 895812 : error = xfs_alloc_read_agf(pag, NULL, 0, &bp);
307 895818 : if (error) {
308 0 : aglen = pag->block_count;
309 0 : freelen = aglen;
310 0 : usedlen = aglen;
311 : } else {
312 895818 : struct xfs_agf *agf = bp->b_addr;
313 :
314 895818 : aglen = be32_to_cpu(agf->agf_length);
315 895818 : freelen = be32_to_cpu(agf->agf_freeblks);
316 895818 : usedlen = aglen - freelen;
317 895818 : xfs_buf_relse(bp);
318 : }
319 :
320 : /* If the icount is impossible, make some worst-case assumptions. */
321 895823 : if (icount == NULLAGINO ||
322 : !xfs_verify_agino(pag, icount)) {
323 89802 : icount = pag->agino_max - pag->agino_min + 1;
324 : }
325 :
326 : /* If the block counts are impossible, make worst-case assumptions. */
327 895823 : if (aglen == NULLAGBLOCK ||
328 895815 : aglen != pag->block_count ||
329 : freelen >= aglen) {
330 5 : aglen = pag->block_count;
331 5 : freelen = aglen;
332 5 : usedlen = aglen;
333 : }
334 895823 : xfs_perag_put(pag);
335 :
336 895807 : trace_xrep_calc_ag_resblks(mp, sm->sm_agno, icount, aglen,
337 : freelen, usedlen);
338 :
339 : /*
340 : * Figure out how many blocks we'd need worst case to rebuild
341 : * each type of btree. Note that we can only rebuild the
342 : * bnobt/cntbt or inobt/finobt as pairs.
343 : */
344 895800 : bnobt_sz = 2 * xfs_allocbt_calc_size(mp, freelen);
345 895781 : if (xfs_has_sparseinodes(mp))
346 895781 : inobt_sz = xfs_iallocbt_calc_size(mp, icount /
347 : XFS_INODES_PER_HOLEMASK_BIT);
348 : else
349 0 : inobt_sz = xfs_iallocbt_calc_size(mp, icount /
350 : XFS_INODES_PER_CHUNK);
351 895811 : if (xfs_has_finobt(mp))
352 895790 : inobt_sz *= 2;
353 895811 : if (xfs_has_reflink(mp))
354 712083 : refcbt_sz = xfs_refcountbt_calc_size(mp, usedlen);
355 : else
356 : refcbt_sz = 0;
357 895809 : if (xfs_has_rmapbt(mp)) {
358 : /*
359 : * Guess how many blocks we need to rebuild the rmapbt.
360 : * For non-reflink filesystems we can't have more records than
361 : * used blocks. However, with reflink it's possible to have
362 : * more than one rmap record per AG block. We don't know how
363 : * many rmaps there could be in the AG, so we start off with
364 : * what we hope is an generous over-estimation.
365 : */
366 712078 : if (xfs_has_reflink(mp))
367 712078 : rmapbt_sz = xfs_rmapbt_calc_size(mp,
368 712078 : (unsigned long long)aglen * 2);
369 : else
370 0 : rmapbt_sz = xfs_rmapbt_calc_size(mp, usedlen);
371 : } else {
372 : rmapbt_sz = 0;
373 : }
374 :
375 895796 : trace_xrep_calc_ag_resblks_btsize(mp, sm->sm_agno, bnobt_sz,
376 : inobt_sz, rmapbt_sz, refcbt_sz);
377 :
378 895737 : return max(max(bnobt_sz, inobt_sz), max(rmapbt_sz, refcbt_sz));
379 : }
380 :
381 : /*
382 : * Reconstructing per-AG Btrees
383 : *
384 : * When a space btree is corrupt, we don't bother trying to fix it. Instead,
385 : * we scan secondary space metadata to derive the records that should be in
386 : * the damaged btree, initialize a fresh btree root, and insert the records.
387 : * Note that for rebuilding the rmapbt we scan all the primary data to
388 : * generate the new records.
389 : *
390 : * However, that leaves the matter of removing all the metadata describing the
391 : * old broken structure. For primary metadata we use the rmap data to collect
392 : * every extent with a matching rmap owner (bitmap); we then iterate all other
393 : * metadata structures with the same rmap owner to collect the extents that
394 : * cannot be removed (sublist). We then subtract sublist from bitmap to
395 : * derive the blocks that were used by the old btree. These blocks can be
396 : * reaped.
397 : *
398 : * For rmapbt reconstructions we must use different tactics for extent
399 : * collection. First we iterate all primary metadata (this excludes the old
400 : * rmapbt, obviously) to generate new rmap records. The gaps in the rmap
401 : * records are collected as bitmap. The bnobt records are collected as
402 : * sublist. As with the other btrees we subtract sublist from bitmap, and the
403 : * result (since the rmapbt lives in the free space) are the blocks from the
404 : * old rmapbt.
405 : */
406 :
407 : /* Ensure the freelist is the correct size. */
408 : int
409 5475 : xrep_fix_freelist(
410 : struct xfs_scrub *sc,
411 : int alloc_flags)
412 : {
413 5475 : struct xfs_alloc_arg args = {0};
414 :
415 5475 : args.mp = sc->mp;
416 5475 : args.tp = sc->tp;
417 5475 : args.agno = sc->sa.pag->pag_agno;
418 5475 : args.alignment = 1;
419 5475 : args.pag = sc->sa.pag;
420 :
421 5475 : return xfs_alloc_fix_freelist(&args, alloc_flags);
422 : }
423 :
424 : /*
425 : * Finding per-AG Btree Roots for AGF/AGI Reconstruction
426 : *
427 : * If the AGF or AGI become slightly corrupted, it may be necessary to rebuild
428 : * the AG headers by using the rmap data to rummage through the AG looking for
429 : * btree roots. This is not guaranteed to work if the AG is heavily damaged
430 : * or the rmap data are corrupt.
431 : *
432 : * Callers of xrep_find_ag_btree_roots must lock the AGF and AGFL
433 : * buffers if the AGF is being rebuilt; or the AGF and AGI buffers if the
434 : * AGI is being rebuilt. It must maintain these locks until it's safe for
435 : * other threads to change the btrees' shapes. The caller provides
436 : * information about the btrees to look for by passing in an array of
437 : * xrep_find_ag_btree with the (rmap owner, buf_ops, magic) fields set.
438 : * The (root, height) fields will be set on return if anything is found. The
439 : * last element of the array should have a NULL buf_ops to mark the end of the
440 : * array.
441 : *
442 : * For every rmapbt record matching any of the rmap owners in btree_info,
443 : * read each block referenced by the rmap record. If the block is a btree
444 : * block from this filesystem matching any of the magic numbers and has a
445 : * level higher than what we've already seen, remember the block and the
446 : * height of the tree required to have such a block. When the call completes,
447 : * we return the highest block we've found for each btree description; those
448 : * should be the roots.
449 : */
450 :
451 : struct xrep_findroot {
452 : struct xfs_scrub *sc;
453 : struct xfs_buf *agfl_bp;
454 : struct xfs_agf *agf;
455 : struct xrep_find_ag_btree *btree_info;
456 : };
457 :
458 : /* See if our block is in the AGFL. */
459 : STATIC int
460 400462921 : xrep_findroot_agfl_walk(
461 : struct xfs_mount *mp,
462 : xfs_agblock_t bno,
463 : void *priv)
464 : {
465 400462921 : xfs_agblock_t *agbno = priv;
466 :
467 400462921 : return (*agbno == bno) ? -ECANCELED : 0;
468 : }
469 :
470 : /* Does this block match the btree information passed in? */
471 : STATIC int
472 41646156 : xrep_findroot_block(
473 : struct xrep_findroot *ri,
474 : struct xrep_find_ag_btree *fab,
475 : uint64_t owner,
476 : xfs_agblock_t agbno,
477 : bool *done_with_block)
478 : {
479 41646156 : struct xfs_mount *mp = ri->sc->mp;
480 41646156 : struct xfs_buf *bp;
481 41646156 : struct xfs_btree_block *btblock;
482 41646156 : xfs_daddr_t daddr;
483 41646156 : int block_level;
484 41646156 : int error = 0;
485 :
486 41646156 : daddr = XFS_AGB_TO_DADDR(mp, ri->sc->sa.pag->pag_agno, agbno);
487 :
488 : /*
489 : * Blocks in the AGFL have stale contents that might just happen to
490 : * have a matching magic and uuid. We don't want to pull these blocks
491 : * in as part of a tree root, so we have to filter out the AGFL stuff
492 : * here. If the AGFL looks insane we'll just refuse to repair.
493 : */
494 41646156 : if (owner == XFS_RMAP_OWN_AG) {
495 41274503 : error = xfs_agfl_walk(mp, ri->agf, ri->agfl_bp,
496 : xrep_findroot_agfl_walk, &agbno);
497 41274506 : if (error == -ECANCELED)
498 : return 0;
499 39684623 : if (error)
500 : return error;
501 : }
502 :
503 : /*
504 : * Read the buffer into memory so that we can see if it's a match for
505 : * our btree type. We have no clue if it is beforehand, and we want to
506 : * avoid xfs_trans_read_buf's behavior of dumping the DONE state (which
507 : * will cause needless disk reads in subsequent calls to this function)
508 : * and logging metadata verifier failures.
509 : *
510 : * Therefore, pass in NULL buffer ops. If the buffer was already in
511 : * memory from some other caller it will already have b_ops assigned.
512 : * If it was in memory from a previous unsuccessful findroot_block
513 : * call, the buffer won't have b_ops but it should be clean and ready
514 : * for us to try to verify if the read call succeeds. The same applies
515 : * if the buffer wasn't in memory at all.
516 : *
517 : * Note: If we never match a btree type with this buffer, it will be
518 : * left in memory with NULL b_ops. This shouldn't be a problem unless
519 : * the buffer gets written.
520 : */
521 40056276 : error = xfs_trans_read_buf(mp, ri->sc->tp, mp->m_ddev_targp, daddr,
522 : mp->m_bsize, 0, &bp, NULL);
523 40056278 : if (error)
524 : return error;
525 :
526 : /* Ensure the block magic matches the btree type we're looking for. */
527 40056278 : btblock = XFS_BUF_TO_BLOCK(bp);
528 40056278 : ASSERT(fab->buf_ops->magic[1] != 0);
529 40056278 : if (btblock->bb_magic != fab->buf_ops->magic[1])
530 26044954 : goto out;
531 :
532 : /*
533 : * If the buffer already has ops applied and they're not the ones for
534 : * this btree type, we know this block doesn't match the btree and we
535 : * can bail out.
536 : *
537 : * If the buffer ops match ours, someone else has already validated
538 : * the block for us, so we can move on to checking if this is a root
539 : * block candidate.
540 : *
541 : * If the buffer does not have ops, nobody has successfully validated
542 : * the contents and the buffer cannot be dirty. If the magic, uuid,
543 : * and structure match this btree type then we'll move on to checking
544 : * if it's a root block candidate. If there is no match, bail out.
545 : */
546 14011324 : if (bp->b_ops) {
547 14011262 : if (bp->b_ops != fab->buf_ops)
548 0 : goto out;
549 : } else {
550 62 : ASSERT(!xfs_trans_buf_is_dirty(bp));
551 62 : if (!uuid_equal(&btblock->bb_u.s.bb_uuid,
552 62 : &mp->m_sb.sb_meta_uuid))
553 0 : goto out;
554 : /*
555 : * Read verifiers can reference b_ops, so we set the pointer
556 : * here. If the verifier fails we'll reset the buffer state
557 : * to what it was before we touched the buffer.
558 : */
559 62 : bp->b_ops = fab->buf_ops;
560 62 : fab->buf_ops->verify_read(bp);
561 62 : if (bp->b_error) {
562 0 : bp->b_ops = NULL;
563 0 : bp->b_error = 0;
564 0 : goto out;
565 : }
566 :
567 : /*
568 : * Some read verifiers will (re)set b_ops, so we must be
569 : * careful not to change b_ops after running the verifier.
570 : */
571 : }
572 :
573 : /*
574 : * This block passes the magic/uuid and verifier tests for this btree
575 : * type. We don't need the caller to try the other tree types.
576 : */
577 14011324 : *done_with_block = true;
578 :
579 : /*
580 : * Compare this btree block's level to the height of the current
581 : * candidate root block.
582 : *
583 : * If the level matches the root we found previously, throw away both
584 : * blocks because there can't be two candidate roots.
585 : *
586 : * If level is lower in the tree than the root we found previously,
587 : * ignore this block.
588 : */
589 14011324 : block_level = xfs_btree_get_level(btblock);
590 14011324 : if (block_level + 1 == fab->height) {
591 264689 : fab->root = NULLAGBLOCK;
592 264689 : goto out;
593 13746635 : } else if (block_level < fab->height) {
594 13156417 : goto out;
595 : }
596 :
597 : /*
598 : * This is the highest block in the tree that we've found so far.
599 : * Update the btree height to reflect what we've learned from this
600 : * block.
601 : */
602 590218 : fab->height = block_level + 1;
603 :
604 : /*
605 : * If this block doesn't have sibling pointers, then it's the new root
606 : * block candidate. Otherwise, the root will be found farther up the
607 : * tree.
608 : */
609 590218 : if (btblock->bb_u.s.bb_leftsib == cpu_to_be32(NULLAGBLOCK) &&
610 : btblock->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK))
611 385754 : fab->root = agbno;
612 : else
613 204464 : fab->root = NULLAGBLOCK;
614 :
615 590218 : trace_xrep_findroot_block(mp, ri->sc->sa.pag->pag_agno, agbno,
616 590218 : be32_to_cpu(btblock->bb_magic), fab->height - 1);
617 40056278 : out:
618 40056278 : xfs_trans_brelse(ri->sc->tp, bp);
619 40056278 : return error;
620 : }
621 :
622 : /*
623 : * Do any of the blocks in this rmap record match one of the btrees we're
624 : * looking for?
625 : */
626 : STATIC int
627 3663486406 : xrep_findroot_rmap(
628 : struct xfs_btree_cur *cur,
629 : const struct xfs_rmap_irec *rec,
630 : void *priv)
631 : {
632 3663486406 : struct xrep_findroot *ri = priv;
633 3663486406 : struct xrep_find_ag_btree *fab;
634 3663486406 : xfs_agblock_t b;
635 3663486406 : bool done;
636 3663486406 : int error = 0;
637 :
638 : /* Ignore anything that isn't AG metadata. */
639 3663486406 : if (!XFS_RMAP_NON_INODE_OWNER(rec->rm_owner))
640 : return 0;
641 :
642 : /* Otherwise scan each block + btree type. */
643 793271470 : for (b = 0; b < rec->rm_blockcount; b++) {
644 735267780 : done = false;
645 2854878346 : for (fab = ri->btree_info; fab->buf_ops; fab++) {
646 2133621247 : if (rec->rm_owner != fab->rmap_owner)
647 2091975730 : continue;
648 41645517 : error = xrep_findroot_block(ri, fab,
649 41645517 : rec->rm_owner, rec->rm_startblock + b,
650 : &done);
651 41646160 : if (error)
652 0 : return error;
653 41646160 : if (done)
654 : break;
655 : }
656 : }
657 :
658 : return 0;
659 : }
660 :
661 : /* Find the roots of the per-AG btrees described in btree_info. */
662 : int
663 132282 : xrep_find_ag_btree_roots(
664 : struct xfs_scrub *sc,
665 : struct xfs_buf *agf_bp,
666 : struct xrep_find_ag_btree *btree_info,
667 : struct xfs_buf *agfl_bp)
668 : {
669 132282 : struct xfs_mount *mp = sc->mp;
670 132282 : struct xrep_findroot ri;
671 132282 : struct xrep_find_ag_btree *fab;
672 132282 : struct xfs_btree_cur *cur;
673 132282 : int error;
674 :
675 132282 : ASSERT(xfs_buf_islocked(agf_bp));
676 132282 : ASSERT(agfl_bp == NULL || xfs_buf_islocked(agfl_bp));
677 :
678 132282 : ri.sc = sc;
679 132282 : ri.btree_info = btree_info;
680 132282 : ri.agf = agf_bp->b_addr;
681 132282 : ri.agfl_bp = agfl_bp;
682 518036 : for (fab = btree_info; fab->buf_ops; fab++) {
683 385754 : ASSERT(agfl_bp || fab->rmap_owner != XFS_RMAP_OWN_AG);
684 385754 : ASSERT(XFS_RMAP_NON_INODE_OWNER(fab->rmap_owner));
685 385754 : fab->root = NULLAGBLOCK;
686 385754 : fab->height = 0;
687 : }
688 :
689 132282 : cur = xfs_rmapbt_init_cursor(mp, sc->tp, agf_bp, sc->sa.pag);
690 132282 : error = xfs_rmap_query_all(cur, xrep_findroot_rmap, &ri);
691 132282 : xfs_btree_del_cursor(cur, error);
692 :
693 132282 : return error;
694 : }
695 :
696 : #ifdef CONFIG_XFS_QUOTA
697 : /* Update some quota flags in the superblock. */
698 : void
699 2084 : xrep_update_qflags(
700 : struct xfs_scrub *sc,
701 : unsigned int clear_flags,
702 : unsigned int set_flags)
703 : {
704 2084 : struct xfs_mount *mp = sc->mp;
705 2084 : struct xfs_buf *bp;
706 :
707 2084 : mutex_lock(&mp->m_quotainfo->qi_quotaofflock);
708 2084 : if ((mp->m_qflags & clear_flags) == 0 &&
709 1042 : (mp->m_qflags & set_flags) == set_flags)
710 0 : goto no_update;
711 :
712 2084 : mp->m_qflags &= ~clear_flags;
713 2084 : mp->m_qflags |= set_flags;
714 :
715 2084 : spin_lock(&mp->m_sb_lock);
716 2084 : mp->m_sb.sb_qflags &= ~clear_flags;
717 2084 : mp->m_sb.sb_qflags |= set_flags;
718 2084 : spin_unlock(&mp->m_sb_lock);
719 :
720 : /*
721 : * Update the quota flags in the ondisk superblock without touching
722 : * the summary counters. We have not quiesced inode chunk allocation,
723 : * so we cannot coordinate with updates to the icount and ifree percpu
724 : * counters.
725 : */
726 2084 : bp = xfs_trans_getsb(sc->tp);
727 2084 : xfs_sb_to_disk(bp->b_addr, &mp->m_sb);
728 2084 : xfs_trans_buf_set_type(sc->tp, bp, XFS_BLFT_SB_BUF);
729 2084 : xfs_trans_log_buf(sc->tp, bp, 0, sizeof(struct xfs_dsb) - 1);
730 :
731 2084 : no_update:
732 2084 : mutex_unlock(&sc->mp->m_quotainfo->qi_quotaofflock);
733 2084 : }
734 :
735 : /* Force a quotacheck the next time we mount. */
736 : void
737 0 : xrep_force_quotacheck(
738 : struct xfs_scrub *sc,
739 : xfs_dqtype_t type)
740 : {
741 0 : uint flag;
742 :
743 0 : flag = xfs_quota_chkd_flag(type);
744 0 : if (!(flag & sc->mp->m_qflags))
745 : return;
746 :
747 0 : xrep_update_qflags(sc, flag, 0);
748 : }
749 :
750 : /*
751 : * Attach dquots to this inode, or schedule quotacheck to fix them.
752 : *
753 : * This function ensures that the appropriate dquots are attached to an inode.
754 : * We cannot allow the dquot code to allocate an on-disk dquot block here
755 : * because we're already in transaction context. The on-disk dquot should
756 : * already exist anyway. If the quota code signals corruption or missing quota
757 : * information, schedule quotacheck, which will repair corruptions in the quota
758 : * metadata.
759 : */
760 : int
761 18090314 : xrep_ino_dqattach(
762 : struct xfs_scrub *sc)
763 : {
764 18090314 : int error;
765 :
766 18090314 : ASSERT(sc->tp != NULL);
767 18090314 : ASSERT(sc->ip != NULL);
768 :
769 18090314 : error = xfs_qm_dqattach(sc->ip);
770 18090410 : switch (error) {
771 0 : case -EFSBADCRC:
772 : case -EFSCORRUPTED:
773 : case -ENOENT:
774 0 : xfs_err_ratelimited(sc->mp,
775 : "inode %llu repair encountered quota error %d, quotacheck forced.",
776 : (unsigned long long)sc->ip->i_ino, error);
777 0 : if (XFS_IS_UQUOTA_ON(sc->mp) && !sc->ip->i_udquot)
778 0 : xrep_force_quotacheck(sc, XFS_DQTYPE_USER);
779 0 : if (XFS_IS_GQUOTA_ON(sc->mp) && !sc->ip->i_gdquot)
780 0 : xrep_force_quotacheck(sc, XFS_DQTYPE_GROUP);
781 0 : if (XFS_IS_PQUOTA_ON(sc->mp) && !sc->ip->i_pdquot)
782 0 : xrep_force_quotacheck(sc, XFS_DQTYPE_PROJ);
783 : fallthrough;
784 : case -ESRCH:
785 : error = 0;
786 : break;
787 : default:
788 : break;
789 : }
790 :
791 18090410 : return error;
792 : }
793 : #endif /* CONFIG_XFS_QUOTA */
794 :
795 : /*
796 : * Ensure that the inode being repaired is ready to handle a certain number of
797 : * extents, or return EFSCORRUPTED. Caller must hold the ILOCK of the inode
798 : * being repaired and have joined it to the scrub transaction.
799 : */
800 : int
801 60814 : xrep_ino_ensure_extent_count(
802 : struct xfs_scrub *sc,
803 : int whichfork,
804 : xfs_extnum_t nextents)
805 : {
806 60814 : xfs_extnum_t max_extents;
807 60814 : bool large_extcount;
808 :
809 60814 : large_extcount = xfs_inode_has_large_extent_counts(sc->ip);
810 60814 : max_extents = xfs_iext_max_nextents(large_extcount, whichfork);
811 60814 : if (nextents <= max_extents)
812 : return 0;
813 0 : if (large_extcount)
814 : return -EFSCORRUPTED;
815 0 : if (!xfs_has_large_extent_counts(sc->mp))
816 : return -EFSCORRUPTED;
817 :
818 0 : max_extents = xfs_iext_max_nextents(true, whichfork);
819 0 : if (nextents > max_extents)
820 : return -EFSCORRUPTED;
821 :
822 0 : sc->ip->i_diflags2 |= XFS_DIFLAG2_NREXT64;
823 0 : xfs_trans_log_inode(sc->tp, sc->ip, XFS_ILOG_CORE);
824 0 : return 0;
825 : }
826 :
827 : /* Initialize all the btree cursors for an AG repair. */
828 : void
829 320551 : xrep_ag_btcur_init(
830 : struct xfs_scrub *sc,
831 : struct xchk_ag *sa)
832 : {
833 320551 : struct xfs_mount *mp = sc->mp;
834 :
835 : /* Set up a bnobt cursor for cross-referencing. */
836 320551 : if (sc->sm->sm_type != XFS_SCRUB_TYPE_BNOBT &&
837 : sc->sm->sm_type != XFS_SCRUB_TYPE_CNTBT) {
838 299483 : sa->bno_cur = xfs_allocbt_init_cursor(mp, sc->tp, sa->agf_bp,
839 : sc->sa.pag, XFS_BTNUM_BNO);
840 299470 : sa->cnt_cur = xfs_allocbt_init_cursor(mp, sc->tp, sa->agf_bp,
841 : sc->sa.pag, XFS_BTNUM_CNT);
842 : }
843 :
844 : /* Set up a inobt cursor for cross-referencing. */
845 320546 : if (sc->sm->sm_type != XFS_SCRUB_TYPE_INOBT &&
846 : sc->sm->sm_type != XFS_SCRUB_TYPE_FINOBT) {
847 291320 : sa->ino_cur = xfs_inobt_init_cursor(sc->sa.pag, sc->tp,
848 : sa->agi_bp, XFS_BTNUM_INO);
849 291315 : if (xfs_has_finobt(mp))
850 291317 : sa->fino_cur = xfs_inobt_init_cursor(sc->sa.pag,
851 : sc->tp, sa->agi_bp, XFS_BTNUM_FINO);
852 : }
853 :
854 : /* Set up a rmapbt cursor for cross-referencing. */
855 320557 : if (sc->sm->sm_type != XFS_SCRUB_TYPE_RMAPBT &&
856 : xfs_has_rmapbt(mp))
857 311651 : sa->rmap_cur = xfs_rmapbt_init_cursor(mp, sc->tp, sa->agf_bp,
858 : sc->sa.pag);
859 :
860 : /* Set up a refcountbt cursor for cross-referencing. */
861 320547 : if (sc->sm->sm_type != XFS_SCRUB_TYPE_REFCNTBT &&
862 : xfs_has_reflink(mp))
863 303862 : sa->refc_cur = xfs_refcountbt_init_cursor(mp, sc->tp,
864 : sa->agf_bp, sc->sa.pag);
865 320549 : }
866 :
867 : /*
868 : * Reinitialize the in-core AG state after a repair by rereading the AGF
869 : * buffer. We had better get the same AGF buffer as the one that's attached
870 : * to the scrub context.
871 : */
872 : int
873 42254 : xrep_reinit_pagf(
874 : struct xfs_scrub *sc)
875 : {
876 42254 : struct xfs_perag *pag = sc->sa.pag;
877 42254 : struct xfs_buf *bp;
878 42254 : int error;
879 :
880 42254 : ASSERT(pag);
881 84508 : ASSERT(xfs_perag_initialised_agf(pag));
882 :
883 42254 : clear_bit(XFS_AGSTATE_AGF_INIT, &pag->pag_opstate);
884 42254 : error = xfs_alloc_read_agf(pag, sc->tp, 0, &bp);
885 42253 : if (error)
886 : return error;
887 :
888 42253 : if (bp != sc->sa.agf_bp) {
889 0 : ASSERT(bp == sc->sa.agf_bp);
890 0 : return -EFSCORRUPTED;
891 : }
892 :
893 : return 0;
894 : }
895 :
896 : /*
897 : * Reinitialize the in-core AG state after a repair by rereading the AGI
898 : * buffer. We had better get the same AGI buffer as the one that's attached
899 : * to the scrub context.
900 : */
901 : int
902 29217 : xrep_reinit_pagi(
903 : struct xfs_scrub *sc)
904 : {
905 29217 : struct xfs_perag *pag = sc->sa.pag;
906 29217 : struct xfs_buf *bp;
907 29217 : int error;
908 :
909 29217 : ASSERT(pag);
910 58434 : ASSERT(xfs_perag_initialised_agi(pag));
911 :
912 29217 : clear_bit(XFS_AGSTATE_AGI_INIT, &pag->pag_opstate);
913 29218 : error = xfs_ialloc_read_agi(pag, sc->tp, &bp);
914 29215 : if (error)
915 : return error;
916 :
917 29215 : if (bp != sc->sa.agi_bp) {
918 0 : ASSERT(bp == sc->sa.agi_bp);
919 0 : return -EFSCORRUPTED;
920 : }
921 :
922 : return 0;
923 : }
924 :
925 : /*
926 : * Given an active reference to a perag structure, load AG headers and cursors.
927 : * This should only be called to scan an AG while repairing file-based metadata.
928 : */
929 : int
930 244666 : xrep_ag_init(
931 : struct xfs_scrub *sc,
932 : struct xfs_perag *pag,
933 : struct xchk_ag *sa)
934 : {
935 244666 : int error;
936 :
937 244666 : ASSERT(!sa->pag);
938 :
939 244666 : error = xfs_ialloc_read_agi(pag, sc->tp, &sa->agi_bp);
940 244666 : if (error)
941 : return error;
942 :
943 244666 : error = xfs_alloc_read_agf(pag, sc->tp, 0, &sa->agf_bp);
944 244666 : if (error)
945 : return error;
946 :
947 : /* Grab our own passive reference from the caller's ref. */
948 244666 : sa->pag = xfs_perag_hold(pag);
949 244666 : xrep_ag_btcur_init(sc, sa);
950 244666 : return 0;
951 : }
952 :
953 : /* Reinitialize the per-AG block reservation for the AG we just fixed. */
954 : int
955 928484607 : xrep_reset_perag_resv(
956 : struct xfs_scrub *sc)
957 : {
958 928484607 : int error;
959 :
960 928484607 : if (!(sc->flags & XREP_RESET_PERAG_RESV))
961 : return 0;
962 :
963 50317 : ASSERT(sc->sa.pag != NULL);
964 50317 : ASSERT(sc->ops->type == ST_PERAG);
965 50317 : ASSERT(sc->tp);
966 :
967 50317 : sc->flags &= ~XREP_RESET_PERAG_RESV;
968 50317 : error = xfs_ag_resv_free(sc->sa.pag);
969 50356 : if (error)
970 0 : goto out;
971 50356 : error = xfs_ag_resv_init(sc->sa.pag, sc->tp);
972 50336 : if (error == -ENOSPC) {
973 0 : xfs_err(sc->mp,
974 : "Insufficient free space to reset per-AG reservation for AG %u after repair.",
975 : sc->sa.pag->pag_agno);
976 0 : error = 0;
977 : }
978 :
979 50336 : out:
980 : return error;
981 : }
982 :
983 : /* Decide if we are going to call the repair function for a scrub type. */
984 : bool
985 18561305 : xrep_will_attempt(
986 : struct xfs_scrub *sc)
987 : {
988 : /* Userspace asked us to rebuild the structure regardless. */
989 18561305 : if (sc->sm->sm_flags & XFS_SCRUB_IFLAG_FORCE_REBUILD)
990 : return true;
991 :
992 : /* Let debug users force us into the repair routines. */
993 2393 : if (XFS_TEST_ERROR(false, sc->mp, XFS_ERRTAG_FORCE_SCRUB_REPAIR))
994 : return true;
995 :
996 : /* Metadata is corrupt or failed cross-referencing. */
997 2383 : if (xchk_needs_repair(sc->sm))
998 78 : return true;
999 :
1000 : return false;
1001 : }
1002 :
1003 : /* Try to fix some part of a metadata inode by calling another scrubber. */
1004 : STATIC int
1005 8891 : xrep_metadata_inode_subtype(
1006 : struct xfs_scrub *sc,
1007 : unsigned int scrub_type)
1008 : {
1009 8891 : __u32 smtype = sc->sm->sm_type;
1010 8891 : __u32 smflags = sc->sm->sm_flags;
1011 8891 : int error;
1012 :
1013 : /*
1014 : * Let's see if the inode needs repair. We're going to open-code calls
1015 : * to the scrub and repair functions so that we can hang on to the
1016 : * resources that we already acquired instead of using the standard
1017 : * setup/teardown routines.
1018 : */
1019 8891 : sc->sm->sm_flags &= ~XFS_SCRUB_FLAGS_OUT;
1020 8891 : sc->sm->sm_type = scrub_type;
1021 :
1022 8891 : switch (scrub_type) {
1023 3280 : case XFS_SCRUB_TYPE_INODE:
1024 3280 : error = xchk_inode(sc);
1025 3280 : break;
1026 3280 : case XFS_SCRUB_TYPE_BMBTD:
1027 3280 : error = xchk_bmap_data(sc);
1028 3280 : break;
1029 2331 : case XFS_SCRUB_TYPE_BMBTA:
1030 2331 : error = xchk_bmap_attr(sc);
1031 2331 : break;
1032 0 : default:
1033 0 : ASSERT(0);
1034 0 : error = -EFSCORRUPTED;
1035 : }
1036 8891 : if (error)
1037 2331 : goto out;
1038 :
1039 6560 : if (!xrep_will_attempt(sc))
1040 0 : goto out;
1041 :
1042 : /*
1043 : * Repair some part of the inode. This will potentially join the inode
1044 : * to the transaction.
1045 : */
1046 6560 : switch (scrub_type) {
1047 3280 : case XFS_SCRUB_TYPE_INODE:
1048 3280 : error = xrep_inode(sc);
1049 3280 : break;
1050 3280 : case XFS_SCRUB_TYPE_BMBTD:
1051 3280 : error = xrep_bmap(sc, XFS_DATA_FORK, false);
1052 3280 : break;
1053 0 : case XFS_SCRUB_TYPE_BMBTA:
1054 0 : error = xrep_bmap(sc, XFS_ATTR_FORK, false);
1055 0 : break;
1056 : }
1057 6560 : if (error)
1058 949 : goto out;
1059 :
1060 : /*
1061 : * Finish all deferred intent items and then roll the transaction so
1062 : * that the inode will not be joined to the transaction when we exit
1063 : * the function.
1064 : */
1065 5611 : error = xfs_defer_finish(&sc->tp);
1066 5611 : if (error)
1067 0 : goto out;
1068 5611 : error = xfs_trans_roll(&sc->tp);
1069 5611 : if (error)
1070 0 : goto out;
1071 :
1072 : /*
1073 : * Clear the corruption flags and re-check the metadata that we just
1074 : * repaired.
1075 : */
1076 5611 : sc->sm->sm_flags &= ~XFS_SCRUB_FLAGS_OUT;
1077 :
1078 5611 : switch (scrub_type) {
1079 3280 : case XFS_SCRUB_TYPE_INODE:
1080 3280 : error = xchk_inode(sc);
1081 3280 : break;
1082 2331 : case XFS_SCRUB_TYPE_BMBTD:
1083 2331 : error = xchk_bmap_data(sc);
1084 2331 : break;
1085 0 : case XFS_SCRUB_TYPE_BMBTA:
1086 0 : error = xchk_bmap_attr(sc);
1087 0 : break;
1088 : }
1089 5611 : if (error)
1090 0 : goto out;
1091 :
1092 : /* If corruption persists, the repair has failed. */
1093 5611 : if (xchk_needs_repair(sc->sm)) {
1094 0 : error = -EFSCORRUPTED;
1095 0 : goto out;
1096 : }
1097 5611 : out:
1098 8891 : sc->sm->sm_type = smtype;
1099 8891 : sc->sm->sm_flags = smflags;
1100 8891 : return error;
1101 : }
1102 :
1103 : /*
1104 : * Repair the ondisk forks of a metadata inode. The caller must ensure that
1105 : * sc->ip points to the metadata inode and the ILOCK is held on that inode.
1106 : * The inode must not be joined to the transaction before the call, and will
1107 : * not be afterwards.
1108 : */
1109 : int
1110 3280 : xrep_metadata_inode_forks(
1111 : struct xfs_scrub *sc)
1112 : {
1113 3280 : bool dirty = false;
1114 3280 : int error;
1115 :
1116 : /* Repair the inode record and the data fork. */
1117 3280 : error = xrep_metadata_inode_subtype(sc, XFS_SCRUB_TYPE_INODE);
1118 3280 : if (error)
1119 : return error;
1120 :
1121 3280 : error = xrep_metadata_inode_subtype(sc, XFS_SCRUB_TYPE_BMBTD);
1122 3280 : if (error)
1123 : return error;
1124 :
1125 : /* Make sure the attr fork looks ok before we delete it. */
1126 2331 : error = xrep_metadata_inode_subtype(sc, XFS_SCRUB_TYPE_BMBTA);
1127 2331 : if (error)
1128 : return error;
1129 :
1130 : /* Clear the reflink flag since metadata never shares. */
1131 0 : if (xfs_is_reflink_inode(sc->ip)) {
1132 0 : dirty = true;
1133 0 : xfs_trans_ijoin(sc->tp, sc->ip, 0);
1134 0 : error = xfs_reflink_clear_inode_flag(sc->ip, &sc->tp);
1135 0 : if (error)
1136 : return error;
1137 : }
1138 :
1139 : /* Clear the attr forks since metadata shouldn't have that. */
1140 0 : if (xfs_inode_hasattr(sc->ip)) {
1141 0 : if (!dirty) {
1142 0 : dirty = true;
1143 0 : xfs_trans_ijoin(sc->tp, sc->ip, 0);
1144 : }
1145 0 : error = xrep_xattr_reset_fork(sc);
1146 0 : if (error)
1147 : return error;
1148 : }
1149 :
1150 : /*
1151 : * If we modified the inode, roll the transaction but don't rejoin the
1152 : * inode to the new transaction because xrep_bmap_data can do that.
1153 : */
1154 0 : if (dirty) {
1155 0 : error = xfs_trans_roll(&sc->tp);
1156 0 : if (error)
1157 0 : return error;
1158 : dirty = false;
1159 : }
1160 :
1161 : return 0;
1162 : }
1163 :
1164 : /*
1165 : * Set a file's link count, being careful about integer overflows. Returns
1166 : * true if we had to correct an integer overflow.
1167 : */
1168 : bool
1169 12812 : xrep_set_nlink(
1170 : struct xfs_inode *ip,
1171 : uint64_t nlink)
1172 : {
1173 12812 : bool ret = false;
1174 :
1175 12812 : if (nlink > XFS_NLINK_PINNED) {
1176 : /*
1177 : * The observed link count will overflow the nlink field.
1178 : *
1179 : * The VFS won't let users create more hardlinks if the link
1180 : * count is larger than XFS_MAXLINK, but it will let them
1181 : * delete hardlinks. XFS_MAXLINK is half of XFS_NLINK_PINNED,
1182 : * which means that sysadmins could actually fix this situation
1183 : * by deleting links and calling us again.
1184 : *
1185 : * Set the link count to the largest possible value that will
1186 : * fit in the field. This will buy us the most possible time
1187 : * to avoid a UAF should the sysadmins start deleting links.
1188 : * As long as the link count stays above MAXLINK the undercount
1189 : * problem will not get worse.
1190 : */
1191 0 : BUILD_BUG_ON((uint64_t)XFS_MAXLINK >= XFS_NLINK_PINNED);
1192 :
1193 0 : nlink = XFS_NLINK_PINNED;
1194 0 : ret = true;
1195 : }
1196 :
1197 12812 : set_nlink(VFS_I(ip), nlink);
1198 :
1199 12812 : if (VFS_I(ip)->i_nlink == 0) {
1200 : /* had better be on an unlinked list */
1201 0 : ASSERT(xfs_inode_on_unlinked_list(ip));
1202 0 : if (!xfs_inode_on_unlinked_list(ip))
1203 0 : xfs_emerg(ip->i_mount, "IUNLINK ino 0x%llx nlink %u prevun 0x%x nextun 0x%x", ip->i_ino, VFS_I(ip)->i_nlink, ip->i_prev_unlinked, ip->i_next_unlinked);
1204 : } else {
1205 : /* had better not be on an unlinked list */
1206 12812 : ASSERT(!xfs_inode_on_unlinked_list(ip));
1207 12812 : if (xfs_inode_on_unlinked_list(ip))
1208 0 : xfs_emerg(ip->i_mount, "IUNLINK ino 0x%llx nlink %u prevun 0x%x nextun 0x%x", ip->i_ino, VFS_I(ip)->i_nlink, ip->i_prev_unlinked, ip->i_next_unlinked);
1209 : }
1210 :
1211 12812 : return ret;
1212 : }
1213 :
1214 : /*
1215 : * Set up an xfile and a buffer cache so that we can use the xfbtree. Buffer
1216 : * target initialization registers a shrinker, so we cannot be in transaction
1217 : * context. Park our resources in the scrub context and let the teardown
1218 : * function take care of them at the right time.
1219 : */
1220 : int
1221 21331 : xrep_setup_buftarg(
1222 : struct xfs_scrub *sc,
1223 : const char *descr)
1224 : {
1225 21331 : ASSERT(sc->tp == NULL);
1226 :
1227 21331 : return xfile_alloc_buftarg(sc->mp, descr, &sc->xfile_buftarg);
1228 : }
1229 :
1230 : /*
1231 : * Create a dummy transaction for use in a live update hook function. This
1232 : * function MUST NOT be called from regular repair code because the current
1233 : * process' transaction is saved via the cookie.
1234 : */
1235 : int
1236 95663 : xrep_trans_alloc_hook_dummy(
1237 : struct xfs_mount *mp,
1238 : void **cookiep,
1239 : struct xfs_trans **tpp)
1240 : {
1241 95663 : int error;
1242 :
1243 95663 : *cookiep = current->journal_info;
1244 95663 : current->journal_info = NULL;
1245 :
1246 95663 : error = xfs_trans_alloc_empty(mp, tpp);
1247 95663 : if (!error)
1248 : return 0;
1249 :
1250 0 : current->journal_info = *cookiep;
1251 0 : *cookiep = NULL;
1252 0 : return error;
1253 : }
1254 :
1255 : /* Cancel a dummy transaction used by a live update hook function. */
1256 : void
1257 95663 : xrep_trans_cancel_hook_dummy(
1258 : void **cookiep,
1259 : struct xfs_trans *tp)
1260 : {
1261 95663 : xfs_trans_cancel(tp);
1262 95663 : current->journal_info = *cookiep;
1263 95663 : *cookiep = NULL;
1264 95663 : }
1265 :
1266 : /*
1267 : * See if this buffer can pass the given ->verify_struct() function.
1268 : *
1269 : * If the buffer already has ops attached and they're not the ones that were
1270 : * passed in, we reject the buffer. Otherwise, we perform the structure test
1271 : * (note that we do not check CRCs) and return the outcome of the test. The
1272 : * buffer ops and error state are left unchanged.
1273 : */
1274 : bool
1275 42635 : xrep_buf_verify_struct(
1276 : struct xfs_buf *bp,
1277 : const struct xfs_buf_ops *ops)
1278 : {
1279 42635 : const struct xfs_buf_ops *old_ops = bp->b_ops;
1280 42635 : xfs_failaddr_t fa;
1281 42635 : int old_error;
1282 :
1283 42635 : if (old_ops) {
1284 42635 : if (old_ops != ops)
1285 : return false;
1286 : }
1287 :
1288 42635 : old_error = bp->b_error;
1289 42635 : bp->b_ops = ops;
1290 42635 : fa = bp->b_ops->verify_struct(bp);
1291 42635 : bp->b_ops = old_ops;
1292 42635 : bp->b_error = old_error;
1293 :
1294 42635 : return fa == NULL;
1295 : }
|