Line data Source code
1 : // SPDX-License-Identifier: GPL-2.0-or-later
2 : /*
3 : * Copyright (C) 2017-2023 Oracle. All Rights Reserved.
4 : * Author: Darrick J. Wong <djwong@kernel.org>
5 : */
6 : #include "xfs.h"
7 : #include "xfs_fs.h"
8 : #include "xfs_shared.h"
9 : #include "xfs_format.h"
10 : #include "xfs_trans_resv.h"
11 : #include "xfs_mount.h"
12 : #include "xfs_btree.h"
13 : #include "xfs_btree_staging.h"
14 : #include "xfs_log_format.h"
15 : #include "xfs_trans.h"
16 : #include "xfs_inode.h"
17 : #include "xfs_icache.h"
18 : #include "xfs_alloc.h"
19 : #include "xfs_alloc_btree.h"
20 : #include "xfs_ialloc.h"
21 : #include "xfs_ialloc_btree.h"
22 : #include "xfs_refcount_btree.h"
23 : #include "xfs_rmap.h"
24 : #include "xfs_rmap_btree.h"
25 : #include "xfs_log.h"
26 : #include "xfs_trans_priv.h"
27 : #include "xfs_da_format.h"
28 : #include "xfs_da_btree.h"
29 : #include "xfs_dir2_priv.h"
30 : #include "xfs_dir2.h"
31 : #include "xfs_attr.h"
32 : #include "xfs_reflink.h"
33 : #include "xfs_ag.h"
34 : #include "xfs_error.h"
35 : #include "xfs_quota.h"
36 : #include "xfs_swapext.h"
37 : #include "xfs_rtbitmap.h"
38 : #include "xfs_rtgroup.h"
39 : #include "xfs_rtrmap_btree.h"
40 : #include "xfs_bmap_util.h"
41 : #include "xfs_rtrefcount_btree.h"
42 : #include "scrub/scrub.h"
43 : #include "scrub/common.h"
44 : #include "scrub/trace.h"
45 : #include "scrub/repair.h"
46 : #include "scrub/health.h"
47 :
48 : /* Common code for the metadata scrubbers. */
49 :
50 : /*
51 : * Handling operational errors.
52 : *
53 : * The *_process_error() family of functions are used to process error return
54 : * codes from functions called as part of a scrub operation.
55 : *
56 : * If there's no error, we return true to tell the caller that it's ok
57 : * to move on to the next check in its list.
58 : *
59 : * For non-verifier errors (e.g. ENOMEM) we return false to tell the
60 : * caller that something bad happened, and we preserve *error so that
61 : * the caller can return the *error up the stack to userspace.
62 : *
63 : * Verifier errors (EFSBADCRC/EFSCORRUPTED) are recorded by setting
64 : * OFLAG_CORRUPT in sm_flags and the *error is cleared. In other words,
65 : * we track verifier errors (and failed scrub checks) via OFLAG_CORRUPT,
66 : * not via return codes. We return false to tell the caller that
67 : * something bad happened. Since the error has been cleared, the caller
68 : * will (presumably) return that zero and scrubbing will move on to
69 : * whatever's next.
70 : *
71 : * ftrace can be used to record the precise metadata location and the
72 : * approximate code location of the failed operation.
73 : */
74 :
75 : /* Check for operational errors. */
76 : static bool
77 207329177 : __xchk_process_error(
78 : struct xfs_scrub *sc,
79 : xfs_agnumber_t agno,
80 : xfs_agblock_t bno,
81 : int *error,
82 : __u32 errflag,
83 : void *ret_ip)
84 : {
85 207329177 : switch (*error) {
86 : case 0:
87 : return true;
88 87082 : case -EDEADLOCK:
89 : case -ECHRNG:
90 : /* Used to restart an op with deadlock avoidance. */
91 174164 : trace_xchk_deadlock_retry(
92 87082 : sc->ip ? sc->ip : XFS_I(file_inode(sc->file)),
93 : sc->sm, *error);
94 87082 : break;
95 122 : case -ECANCELED:
96 : /*
97 : * ECANCELED here means that the caller set one of the scrub
98 : * outcome flags (corrupt, xfail, xcorrupt) and wants to exit
99 : * quickly. Set error to zero and do not continue.
100 : */
101 122 : trace_xchk_op_error(sc, agno, bno, *error, ret_ip);
102 122 : *error = 0;
103 122 : break;
104 99 : case -EFSBADCRC:
105 : case -EFSCORRUPTED:
106 : /* Note the badness but don't abort. */
107 99 : sc->sm->sm_flags |= errflag;
108 99 : xchk_whine(sc->mp, "type %s agno 0x%x agbno 0x%x error %d errflag 0x%x ret_ip %pS",
109 99 : xchk_type_string(sc->sm->sm_type),
110 : agno,
111 : bno,
112 : *error,
113 : errflag,
114 : ret_ip);
115 99 : *error = 0;
116 136 : fallthrough;
117 136 : default:
118 136 : if (*error)
119 37 : xchk_whine(sc->mp, "type %s agno 0x%x agbno 0x%x error %d ret_ip %pS",
120 37 : xchk_type_string(sc->sm->sm_type),
121 : agno,
122 : bno,
123 : *error,
124 : ret_ip);
125 136 : trace_xchk_op_error(sc, agno, bno, *error, ret_ip);
126 136 : break;
127 : }
128 : return false;
129 : }
130 :
131 : bool
132 6626750 : xchk_process_error(
133 : struct xfs_scrub *sc,
134 : xfs_agnumber_t agno,
135 : xfs_agblock_t bno,
136 : int *error)
137 : {
138 6626750 : return __xchk_process_error(sc, agno, bno, error,
139 : XFS_SCRUB_OFLAG_CORRUPT, __return_address);
140 : }
141 :
142 : bool
143 49807 : xchk_process_rt_error(
144 : struct xfs_scrub *sc,
145 : xfs_rgnumber_t rgno,
146 : xfs_rgblock_t rgbno,
147 : int *error)
148 : {
149 49807 : return __xchk_process_error(sc, rgno, rgbno, error,
150 : XFS_SCRUB_OFLAG_CORRUPT, __return_address);
151 : }
152 :
153 : bool
154 201005482 : xchk_xref_process_error(
155 : struct xfs_scrub *sc,
156 : xfs_agnumber_t agno,
157 : xfs_agblock_t bno,
158 : int *error)
159 : {
160 201005482 : return __xchk_process_error(sc, agno, bno, error,
161 : XFS_SCRUB_OFLAG_XFAIL, __return_address);
162 : }
163 :
164 : bool
165 0 : xchk_xref_process_rt_error(
166 : struct xfs_scrub *sc,
167 : xfs_rgnumber_t rgno,
168 : xfs_rgblock_t rgbno,
169 : int *error)
170 : {
171 0 : return __xchk_process_error(sc, rgno, rgbno, error,
172 : XFS_SCRUB_OFLAG_XFAIL, __return_address);
173 : }
174 :
175 : /* Check for operational errors for a file offset. */
176 : static bool
177 1779623342 : __xchk_fblock_process_error(
178 : struct xfs_scrub *sc,
179 : int whichfork,
180 : xfs_fileoff_t offset,
181 : int *error,
182 : __u32 errflag,
183 : void *ret_ip)
184 : {
185 1779623342 : switch (*error) {
186 : case 0:
187 : return true;
188 0 : case -EDEADLOCK:
189 : case -ECHRNG:
190 : /* Used to restart an op with deadlock avoidance. */
191 0 : trace_xchk_deadlock_retry(sc->ip, sc->sm, *error);
192 0 : break;
193 11 : case -ECANCELED:
194 : /*
195 : * ECANCELED here means that the caller set one of the scrub
196 : * outcome flags (corrupt, xfail, xcorrupt) and wants to exit
197 : * quickly. Set error to zero and do not continue.
198 : */
199 11 : trace_xchk_file_op_error(sc, whichfork, offset, *error,
200 : ret_ip);
201 11 : *error = 0;
202 11 : break;
203 0 : case -EFSBADCRC:
204 : case -EFSCORRUPTED:
205 : /* Note the badness but don't abort. */
206 0 : sc->sm->sm_flags |= errflag;
207 0 : xchk_whine(sc->mp, "ino 0x%llx fork %d type %s offset %llu error %d errflag 0x%x ret_ip %pS",
208 0 : sc->ip->i_ino,
209 : whichfork,
210 0 : xchk_type_string(sc->sm->sm_type),
211 : offset,
212 : *error,
213 : errflag,
214 : ret_ip);
215 0 : *error = 0;
216 1 : fallthrough;
217 1 : default:
218 1 : if (*error)
219 2 : xchk_whine(sc->mp, "ino 0x%llx fork %d type %s offset %llu error %d ret_ip %pS",
220 1 : sc->ip->i_ino,
221 : whichfork,
222 1 : xchk_type_string(sc->sm->sm_type),
223 : offset,
224 : *error,
225 : ret_ip);
226 1 : trace_xchk_file_op_error(sc, whichfork, offset, *error,
227 : ret_ip);
228 1 : break;
229 : }
230 : return false;
231 : }
232 :
233 : bool
234 1265269190 : xchk_fblock_process_error(
235 : struct xfs_scrub *sc,
236 : int whichfork,
237 : xfs_fileoff_t offset,
238 : int *error)
239 : {
240 1265936613 : return __xchk_fblock_process_error(sc, whichfork, offset, error,
241 : XFS_SCRUB_OFLAG_CORRUPT, __return_address);
242 : }
243 :
244 : bool
245 515328155 : xchk_fblock_xref_process_error(
246 : struct xfs_scrub *sc,
247 : int whichfork,
248 : xfs_fileoff_t offset,
249 : int *error)
250 : {
251 515328155 : return __xchk_fblock_process_error(sc, whichfork, offset, error,
252 : XFS_SCRUB_OFLAG_XFAIL, __return_address);
253 : }
254 :
255 : /*
256 : * Handling scrub corruption/optimization/warning checks.
257 : *
258 : * The *_set_{corrupt,preen,warning}() family of functions are used to
259 : * record the presence of metadata that is incorrect (corrupt), could be
260 : * optimized somehow (preen), or should be flagged for administrative
261 : * review but is not incorrect (warn).
262 : *
263 : * ftrace can be used to record the precise metadata location and
264 : * approximate code location of the failed check.
265 : */
266 :
267 : /* Record a block which could be optimized. */
268 : void
269 651062 : xchk_block_set_preen(
270 : struct xfs_scrub *sc,
271 : struct xfs_buf *bp)
272 : {
273 651062 : sc->sm->sm_flags |= XFS_SCRUB_OFLAG_PREEN;
274 651062 : trace_xchk_block_preen(sc, xfs_buf_daddr(bp), __return_address);
275 650997 : }
276 :
277 : /*
278 : * Record an inode which could be optimized. The trace data will
279 : * include the block given by bp if bp is given; otherwise it will use
280 : * the block location of the inode record itself.
281 : */
282 : void
283 2383459 : xchk_ino_set_preen(
284 : struct xfs_scrub *sc,
285 : xfs_ino_t ino)
286 : {
287 2383459 : sc->sm->sm_flags |= XFS_SCRUB_OFLAG_PREEN;
288 2383459 : trace_xchk_ino_preen(sc, ino, __return_address);
289 2383397 : }
290 :
291 : /* Record something being wrong with the filesystem primary superblock. */
292 : void
293 0 : xchk_set_corrupt(
294 : struct xfs_scrub *sc)
295 : {
296 0 : sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
297 0 : xchk_whine(sc->mp, "type %s ret_ip %pS", xchk_type_string(sc->sm->sm_type),
298 : __return_address);
299 0 : trace_xchk_fs_error(sc, 0, __return_address);
300 0 : }
301 :
302 : /* Record a corrupt block. */
303 : void
304 1 : xchk_block_set_corrupt(
305 : struct xfs_scrub *sc,
306 : struct xfs_buf *bp)
307 : {
308 1 : sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
309 1 : trace_xchk_block_error(sc, xfs_buf_daddr(bp), __return_address);
310 2 : xchk_whine(sc->mp, "type %s agno 0x%x agbno 0x%x ret_ip %pS",
311 1 : xchk_type_string(sc->sm->sm_type),
312 : xfs_daddr_to_agno(sc->mp, xfs_buf_daddr(bp)),
313 : xfs_daddr_to_agbno(sc->mp, xfs_buf_daddr(bp)),
314 : __return_address);
315 1 : }
316 :
317 : #ifdef CONFIG_XFS_QUOTA
318 : /* Record a corrupt quota counter. */
319 : void
320 0 : xchk_qcheck_set_corrupt(
321 : struct xfs_scrub *sc,
322 : unsigned int dqtype,
323 : xfs_dqid_t id)
324 : {
325 0 : sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
326 0 : xchk_whine(sc->mp, "type %s dqtype %u id %u ret_ip %pS",
327 0 : xchk_type_string(sc->sm->sm_type), dqtype, id, __return_address);
328 0 : trace_xchk_qcheck_error(sc, dqtype, id, __return_address);
329 0 : }
330 : #endif /* CONFIG_XFS_QUOTA */
331 :
332 : /* Record a corruption while cross-referencing. */
333 : void
334 0 : xchk_block_xref_set_corrupt(
335 : struct xfs_scrub *sc,
336 : struct xfs_buf *bp)
337 : {
338 0 : sc->sm->sm_flags |= XFS_SCRUB_OFLAG_XCORRUPT;
339 0 : trace_xchk_block_error(sc, xfs_buf_daddr(bp), __return_address);
340 0 : xchk_whine(sc->mp, "type %s agno 0x%x agbno 0x%x ret_ip %pS",
341 0 : xchk_type_string(sc->sm->sm_type),
342 : xfs_daddr_to_agno(sc->mp, xfs_buf_daddr(bp)),
343 : xfs_daddr_to_agbno(sc->mp, xfs_buf_daddr(bp)),
344 : __return_address);
345 0 : }
346 :
347 : /*
348 : * Record a corrupt inode. The trace data will include the block given
349 : * by bp if bp is given; otherwise it will use the block location of the
350 : * inode record itself.
351 : */
352 : void
353 0 : xchk_ino_set_corrupt(
354 : struct xfs_scrub *sc,
355 : xfs_ino_t ino)
356 : {
357 0 : sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
358 0 : xchk_whine(sc->mp, "ino 0x%llx type %s ret_ip %pS",
359 0 : ino, xchk_type_string(sc->sm->sm_type), __return_address);
360 0 : trace_xchk_ino_error(sc, ino, __return_address);
361 0 : }
362 :
363 : /* Record a corruption while cross-referencing with an inode. */
364 : void
365 0 : xchk_ino_xref_set_corrupt(
366 : struct xfs_scrub *sc,
367 : xfs_ino_t ino)
368 : {
369 0 : sc->sm->sm_flags |= XFS_SCRUB_OFLAG_XCORRUPT;
370 0 : xchk_whine(sc->mp, "ino 0x%llx type %s ret_ip %pS",
371 0 : ino, xchk_type_string(sc->sm->sm_type), __return_address);
372 0 : trace_xchk_ino_error(sc, ino, __return_address);
373 0 : }
374 :
375 : /* Record corruption in a block indexed by a file fork. */
376 : void
377 23 : xchk_fblock_set_corrupt(
378 : struct xfs_scrub *sc,
379 : int whichfork,
380 : xfs_fileoff_t offset)
381 : {
382 23 : sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
383 23 : xchk_whine(sc->mp, "ino 0x%llx fork %d type %s offset %llu ret_ip %pS",
384 23 : sc->ip->i_ino,
385 : whichfork,
386 23 : xchk_type_string(sc->sm->sm_type),
387 : offset,
388 : __return_address);
389 23 : trace_xchk_fblock_error(sc, whichfork, offset, __return_address);
390 23 : }
391 :
392 : /* Record a corruption while cross-referencing a fork block. */
393 : void
394 10 : xchk_fblock_xref_set_corrupt(
395 : struct xfs_scrub *sc,
396 : int whichfork,
397 : xfs_fileoff_t offset)
398 : {
399 10 : sc->sm->sm_flags |= XFS_SCRUB_OFLAG_XCORRUPT;
400 10 : xchk_whine(sc->mp, "ino 0x%llx fork %d type %s offset %llu ret_ip %pS",
401 10 : sc->ip->i_ino,
402 : whichfork,
403 10 : xchk_type_string(sc->sm->sm_type),
404 : offset,
405 : __return_address);
406 10 : trace_xchk_fblock_error(sc, whichfork, offset, __return_address);
407 10 : }
408 :
409 : /*
410 : * Warn about inodes that need administrative review but is not
411 : * incorrect.
412 : */
413 : void
414 0 : xchk_ino_set_warning(
415 : struct xfs_scrub *sc,
416 : xfs_ino_t ino)
417 : {
418 0 : sc->sm->sm_flags |= XFS_SCRUB_OFLAG_WARNING;
419 0 : xchk_whine(sc->mp, "ino 0x%llx type %s ret_ip %pS",
420 0 : ino, xchk_type_string(sc->sm->sm_type), __return_address);
421 0 : trace_xchk_ino_warning(sc, ino, __return_address);
422 0 : }
423 :
424 : /* Warn about a block indexed by a file fork that needs review. */
425 : void
426 453 : xchk_fblock_set_warning(
427 : struct xfs_scrub *sc,
428 : int whichfork,
429 : xfs_fileoff_t offset)
430 : {
431 453 : sc->sm->sm_flags |= XFS_SCRUB_OFLAG_WARNING;
432 453 : xchk_whine(sc->mp, "ino 0x%llx fork %d type %s offset %llu ret_ip %pS",
433 452 : sc->ip->i_ino,
434 : whichfork,
435 453 : xchk_type_string(sc->sm->sm_type),
436 : offset,
437 : __return_address);
438 453 : trace_xchk_fblock_warning(sc, whichfork, offset, __return_address);
439 453 : }
440 :
441 : /* Signal an incomplete scrub. */
442 : void
443 261 : xchk_set_incomplete(
444 : struct xfs_scrub *sc)
445 : {
446 261 : sc->sm->sm_flags |= XFS_SCRUB_OFLAG_INCOMPLETE;
447 261 : trace_xchk_incomplete(sc, __return_address);
448 261 : }
449 :
450 : /*
451 : * rmap scrubbing -- compute the number of blocks with a given owner,
452 : * at least according to the reverse mapping data.
453 : */
454 :
455 : struct xchk_rmap_ownedby_info {
456 : const struct xfs_owner_info *oinfo;
457 : xfs_filblks_t *blocks;
458 : };
459 :
460 : STATIC int
461 11074352206 : xchk_count_rmap_ownedby_irec(
462 : struct xfs_btree_cur *cur,
463 : const struct xfs_rmap_irec *rec,
464 : void *priv)
465 : {
466 11074352206 : struct xchk_rmap_ownedby_info *sroi = priv;
467 11074352206 : bool irec_attr;
468 11074352206 : bool oinfo_attr;
469 :
470 11074352206 : irec_attr = rec->rm_flags & XFS_RMAP_ATTR_FORK;
471 11074352206 : oinfo_attr = sroi->oinfo->oi_flags & XFS_OWNER_INFO_ATTR_FORK;
472 :
473 11074352206 : if (rec->rm_owner != sroi->oinfo->oi_owner)
474 : return 0;
475 :
476 61476015 : if (XFS_RMAP_NON_INODE_OWNER(rec->rm_owner) || irec_attr == oinfo_attr)
477 61476015 : (*sroi->blocks) += rec->rm_blockcount;
478 :
479 : return 0;
480 : }
481 :
482 : /*
483 : * Calculate the number of blocks the rmap thinks are owned by something.
484 : * The caller should pass us an rmapbt cursor.
485 : */
486 : int
487 2328012 : xchk_count_rmap_ownedby_ag(
488 : struct xfs_scrub *sc,
489 : struct xfs_btree_cur *cur,
490 : const struct xfs_owner_info *oinfo,
491 : xfs_filblks_t *blocks)
492 : {
493 2328012 : struct xchk_rmap_ownedby_info sroi = {
494 : .oinfo = oinfo,
495 : .blocks = blocks,
496 : };
497 :
498 2328012 : *blocks = 0;
499 2328012 : return xfs_rmap_query_all(cur, xchk_count_rmap_ownedby_irec,
500 : &sroi);
501 : }
502 :
503 : /*
504 : * AG scrubbing
505 : *
506 : * These helpers facilitate locking an allocation group's header
507 : * buffers, setting up cursors for all btrees that are present, and
508 : * cleaning everything up once we're through.
509 : */
510 :
511 : /* Decide if we want to return an AG header read failure. */
512 : static inline bool
513 : want_ag_read_header_failure(
514 : struct xfs_scrub *sc,
515 : unsigned int type)
516 : {
517 : /* Return all AG header read failures when scanning btrees. */
518 0 : if (sc->sm->sm_type != XFS_SCRUB_TYPE_AGF &&
519 0 : sc->sm->sm_type != XFS_SCRUB_TYPE_AGFL &&
520 : sc->sm->sm_type != XFS_SCRUB_TYPE_AGI)
521 : return true;
522 : /*
523 : * If we're scanning a given type of AG header, we only want to
524 : * see read failures from that specific header. We'd like the
525 : * other headers to cross-check them, but this isn't required.
526 : */
527 0 : if (sc->sm->sm_type == type)
528 : return true;
529 : return false;
530 : }
531 :
532 : /*
533 : * Grab the AG header buffers for the attached perag structure.
534 : *
535 : * The headers should be released by xchk_ag_free, but as a fail safe we attach
536 : * all the buffers we grab to the scrub transaction so they'll all be freed
537 : * when we cancel it.
538 : */
539 : static inline int
540 729073881 : xchk_perag_read_headers(
541 : struct xfs_scrub *sc,
542 : struct xchk_ag *sa)
543 : {
544 729073881 : int error;
545 :
546 729073881 : error = xfs_ialloc_read_agi(sa->pag, sc->tp, &sa->agi_bp);
547 729812742 : if (error && want_ag_read_header_failure(sc, XFS_SCRUB_TYPE_AGI))
548 : return error;
549 :
550 729812742 : error = xfs_alloc_read_agf(sa->pag, sc->tp, 0, &sa->agf_bp);
551 729879452 : if (error && want_ag_read_header_failure(sc, XFS_SCRUB_TYPE_AGF))
552 0 : return error;
553 :
554 : return 0;
555 : }
556 :
557 : /*
558 : * Grab the AG headers for the attached perag structure and wait for pending
559 : * intents to drain.
560 : */
561 : int
562 729368423 : xchk_perag_drain_and_lock(
563 : struct xfs_scrub *sc)
564 : {
565 729368423 : struct xchk_ag *sa = &sc->sa;
566 729368423 : int error = 0;
567 :
568 729368423 : ASSERT(sa->pag != NULL);
569 729368423 : ASSERT(sa->agi_bp == NULL);
570 729368423 : ASSERT(sa->agf_bp == NULL);
571 :
572 729423278 : do {
573 729423278 : if (xchk_should_terminate(sc, &error))
574 14 : return error;
575 :
576 729074034 : error = xchk_perag_read_headers(sc, sa);
577 729827550 : if (error)
578 0 : return error;
579 :
580 : /*
581 : * If we've grabbed an inode for scrubbing then we assume that
582 : * holding its ILOCK will suffice to coordinate with any intent
583 : * chains involving this inode.
584 : */
585 729827550 : if (sc->ip)
586 : return 0;
587 :
588 : /*
589 : * Decide if this AG is quiet enough for all metadata to be
590 : * consistent with each other. XFS allows the AG header buffer
591 : * locks to cycle across transaction rolls while processing
592 : * chains of deferred ops, which means that there could be
593 : * other threads in the middle of processing a chain of
594 : * deferred ops. For regular operations we are careful about
595 : * ordering operations to prevent collisions between threads
596 : * (which is why we don't need a per-AG lock), but scrub and
597 : * repair have to serialize against chained operations.
598 : *
599 : * We just locked all the AG headers buffers; now take a look
600 : * to see if there are any intents in progress. If there are,
601 : * drop the AG headers and wait for the intents to drain.
602 : * Since we hold all the AG header locks for the duration of
603 : * the scrub, this is the only time we have to sample the
604 : * intents counter; any threads increasing it after this point
605 : * can't possibly be in the middle of a chain of AG metadata
606 : * updates.
607 : *
608 : * Obviously, this should be slanted against scrub and in favor
609 : * of runtime threads.
610 : */
611 6266352 : if (!xfs_perag_intent_busy(sa->pag))
612 : return 0;
613 :
614 180850 : if (sa->agf_bp) {
615 180850 : xfs_trans_brelse(sc->tp, sa->agf_bp);
616 180850 : sa->agf_bp = NULL;
617 : }
618 :
619 180850 : if (sa->agi_bp) {
620 180850 : xfs_trans_brelse(sc->tp, sa->agi_bp);
621 180850 : sa->agi_bp = NULL;
622 : }
623 :
624 180850 : if (!(sc->flags & XCHK_FSGATES_DRAIN))
625 : return -ECHRNG;
626 54855 : error = xfs_perag_intent_drain(sa->pag);
627 54855 : if (error == -ERESTARTSYS)
628 0 : error = -EINTR;
629 54855 : } while (!error);
630 :
631 : return error;
632 : }
633 :
634 : /*
635 : * Grab the per-AG structure, grab all AG header buffers, and wait until there
636 : * aren't any pending intents. Returns -ENOENT if we can't grab the perag
637 : * structure.
638 : */
639 : int
640 728552513 : xchk_ag_read_headers(
641 : struct xfs_scrub *sc,
642 : xfs_agnumber_t agno,
643 : struct xchk_ag *sa)
644 : {
645 728552513 : struct xfs_mount *mp = sc->mp;
646 :
647 728552513 : ASSERT(!sa->pag);
648 728552513 : sa->pag = xfs_perag_get(mp, agno);
649 729572043 : if (!sa->pag)
650 : return -ENOENT;
651 :
652 729572043 : return xchk_perag_drain_and_lock(sc);
653 : }
654 :
655 : /* Release all the AG btree cursors. */
656 : void
657 1640658196 : xchk_ag_btcur_free(
658 : struct xchk_ag *sa)
659 : {
660 1640658196 : if (sa->refc_cur)
661 742847587 : xfs_btree_del_cursor(sa->refc_cur, XFS_BTREE_ERROR);
662 1641019222 : if (sa->rmap_cur)
663 743355475 : xfs_btree_del_cursor(sa->rmap_cur, XFS_BTREE_ERROR);
664 1640750472 : if (sa->fino_cur)
665 752084974 : xfs_btree_del_cursor(sa->fino_cur, XFS_BTREE_ERROR);
666 1640935287 : if (sa->ino_cur)
667 752224654 : xfs_btree_del_cursor(sa->ino_cur, XFS_BTREE_ERROR);
668 1640884926 : if (sa->cnt_cur)
669 752187642 : xfs_btree_del_cursor(sa->cnt_cur, XFS_BTREE_ERROR);
670 1640840174 : if (sa->bno_cur)
671 752158524 : xfs_btree_del_cursor(sa->bno_cur, XFS_BTREE_ERROR);
672 :
673 1640892549 : sa->refc_cur = NULL;
674 1640892549 : sa->rmap_cur = NULL;
675 1640892549 : sa->fino_cur = NULL;
676 1640892549 : sa->ino_cur = NULL;
677 1640892549 : sa->bno_cur = NULL;
678 1640892549 : sa->cnt_cur = NULL;
679 1640892549 : }
680 :
681 : /* Initialize all the btree cursors for an AG. */
682 : void
683 729365233 : xchk_ag_btcur_init(
684 : struct xfs_scrub *sc,
685 : struct xchk_ag *sa)
686 : {
687 729365233 : struct xfs_mount *mp = sc->mp;
688 :
689 1458742531 : if (sa->agf_bp &&
690 729507161 : xchk_ag_btree_healthy_enough(sc, sa->pag, XFS_BTNUM_BNO)) {
691 : /* Set up a bnobt cursor for cross-referencing. */
692 729359871 : sa->bno_cur = xfs_allocbt_init_cursor(mp, sc->tp, sa->agf_bp,
693 : sa->pag, XFS_BTNUM_BNO);
694 : }
695 :
696 1459353383 : if (sa->agf_bp &&
697 729622531 : xchk_ag_btree_healthy_enough(sc, sa->pag, XFS_BTNUM_CNT)) {
698 : /* Set up a cntbt cursor for cross-referencing. */
699 729801082 : sa->cnt_cur = xfs_allocbt_init_cursor(mp, sc->tp, sa->agf_bp,
700 : sa->pag, XFS_BTNUM_CNT);
701 : }
702 :
703 : /* Set up a inobt cursor for cross-referencing. */
704 1459572053 : if (sa->agi_bp &&
705 729903875 : xchk_ag_btree_healthy_enough(sc, sa->pag, XFS_BTNUM_INO)) {
706 729753856 : sa->ino_cur = xfs_inobt_init_cursor(sa->pag, sc->tp, sa->agi_bp,
707 : XFS_BTNUM_INO);
708 : }
709 :
710 : /* Set up a finobt cursor for cross-referencing. */
711 1459494526 : if (sa->agi_bp && xfs_has_finobt(mp) &&
712 729647572 : xchk_ag_btree_healthy_enough(sc, sa->pag, XFS_BTNUM_FINO)) {
713 729806992 : sa->fino_cur = xfs_inobt_init_cursor(sa->pag, sc->tp, sa->agi_bp,
714 : XFS_BTNUM_FINO);
715 : }
716 :
717 : /* Set up a rmapbt cursor for cross-referencing. */
718 1450962667 : if (sa->agf_bp && xfs_has_rmapbt(mp) &&
719 721171731 : xchk_ag_btree_healthy_enough(sc, sa->pag, XFS_BTNUM_RMAP)) {
720 720994238 : sa->rmap_cur = xfs_rmapbt_init_cursor(mp, sc->tp, sa->agf_bp,
721 : sa->pag);
722 : }
723 :
724 : /* Set up a refcountbt cursor for cross-referencing. */
725 1450754787 : if (sa->agf_bp && xfs_has_reflink(mp) &&
726 720973697 : xchk_ag_btree_healthy_enough(sc, sa->pag, XFS_BTNUM_REFC)) {
727 720911649 : sa->refc_cur = xfs_refcountbt_init_cursor(mp, sc->tp,
728 : sa->agf_bp, sa->pag);
729 : }
730 729746930 : }
731 :
732 : /* Release the AG header context and btree cursors. */
733 : void
734 1621471647 : xchk_ag_free(
735 : struct xfs_scrub *sc,
736 : struct xchk_ag *sa)
737 : {
738 1621471647 : xchk_ag_btcur_free(sa);
739 1621177475 : xrep_reset_perag_resv(sc);
740 1621627207 : if (sa->agf_bp) {
741 751643062 : xfs_trans_brelse(sc->tp, sa->agf_bp);
742 751904920 : sa->agf_bp = NULL;
743 : }
744 1621889065 : if (sa->agi_bp) {
745 751903772 : xfs_trans_brelse(sc->tp, sa->agi_bp);
746 751925644 : sa->agi_bp = NULL;
747 : }
748 1621910937 : if (sa->pag) {
749 752051487 : xfs_perag_put(sa->pag);
750 751957269 : sa->pag = NULL;
751 : }
752 1621816719 : }
753 :
754 : /*
755 : * For scrub, grab the perag structure, the AGI, and the AGF headers, in that
756 : * order. Locking order requires us to get the AGI before the AGF. We use the
757 : * transaction to avoid deadlocking on crosslinked metadata buffers; either the
758 : * caller passes one in (bmap scrub) or we have to create a transaction
759 : * ourselves. Returns ENOENT if the perag struct cannot be grabbed.
760 : */
761 : int
762 726052184 : xchk_ag_init(
763 : struct xfs_scrub *sc,
764 : xfs_agnumber_t agno,
765 : struct xchk_ag *sa)
766 : {
767 726052184 : int error;
768 :
769 726052184 : error = xchk_ag_read_headers(sc, agno, sa);
770 726933463 : if (error)
771 : return error;
772 :
773 726877913 : xchk_ag_btcur_init(sc, sa);
774 726877913 : return 0;
775 : }
776 :
777 : /*
778 : * For scrubbing a realtime file, grab all the in-core resources we'll need to
779 : * check the realtime metadata, which means taking the ILOCK of the realtime
780 : * metadata inodes. Callers must not join these inodes to the transaction
781 : * with non-zero lockflags or concurrency problems will result. The
782 : * @rtlock_flags argument takes XCHK_RTLOCK_* flags because scrub has somewhat
783 : * unusual locking requirements.
784 : */
785 : void
786 3555924 : xchk_rt_init(
787 : struct xfs_scrub *sc,
788 : struct xchk_rt *sr,
789 : unsigned int rtlock_flags)
790 : {
791 3555924 : ASSERT(!(rtlock_flags & ~XCHK_RTLOCK_ALL));
792 3555924 : ASSERT(hweight32(rtlock_flags & (XCHK_RTLOCK_BITMAP |
793 : XCHK_RTLOCK_BITMAP_SHARED)) < 2);
794 3555924 : ASSERT(hweight32(rtlock_flags & (XCHK_RTLOCK_SUMMARY |
795 : XCHK_RTLOCK_SUMMARY_SHARED)) < 2);
796 3555924 : ASSERT(sr->rtg == NULL);
797 :
798 3555924 : if (rtlock_flags & XCHK_RTLOCK_BITMAP)
799 48751 : xfs_ilock(sc->mp->m_rbmip, XFS_ILOCK_EXCL);
800 3507173 : else if (rtlock_flags & XCHK_RTLOCK_BITMAP_SHARED)
801 3507512 : xfs_ilock(sc->mp->m_rbmip, XFS_ILOCK_SHARED);
802 :
803 3569175 : if (rtlock_flags & XCHK_RTLOCK_SUMMARY)
804 122715 : xfs_ilock(sc->mp->m_rsumip, XFS_ILOCK_EXCL);
805 3446460 : else if (rtlock_flags & XCHK_RTLOCK_SUMMARY_SHARED)
806 0 : xfs_ilock(sc->mp->m_rsumip, XFS_ILOCK_SHARED);
807 :
808 3569178 : sr->rtlock_flags = rtlock_flags;
809 3569178 : }
810 :
811 : /*
812 : * Unlock the realtime metadata inodes. This must be done /after/ committing
813 : * (or cancelling) the scrub transaction.
814 : */
815 : void
816 878145831 : xchk_rt_unlock(
817 : struct xfs_scrub *sc,
818 : struct xchk_rt *sr)
819 : {
820 878145831 : ASSERT(sr->rtg == NULL);
821 :
822 878145831 : if (!sr->rtlock_flags)
823 : return;
824 :
825 3574510 : if (sr->rtlock_flags & XCHK_RTLOCK_SUMMARY)
826 122718 : xfs_iunlock(sc->mp->m_rsumip, XFS_ILOCK_EXCL);
827 : else if (sr->rtlock_flags & XCHK_RTLOCK_SUMMARY)
828 : xfs_iunlock(sc->mp->m_rsumip, XFS_ILOCK_SHARED);
829 :
830 3574510 : if (sr->rtlock_flags & XCHK_RTLOCK_BITMAP)
831 48751 : xfs_iunlock(sc->mp->m_rbmip, XFS_ILOCK_EXCL);
832 3525759 : else if (sr->rtlock_flags & XCHK_RTLOCK_BITMAP_SHARED)
833 3525500 : xfs_iunlock(sc->mp->m_rbmip, XFS_ILOCK_SHARED);
834 :
835 3572296 : sr->rtlock_flags = 0;
836 : }
837 :
838 : #ifdef CONFIG_XFS_RT
839 : /* Lock all the rt group metadata inode ILOCKs and wait for intents. */
840 : int
841 183341507 : xchk_rtgroup_drain_and_lock(
842 : struct xfs_scrub *sc,
843 : struct xchk_rt *sr,
844 : unsigned int rtglock_flags)
845 : {
846 183341507 : int error = 0;
847 :
848 183341507 : ASSERT(sr->rtg != NULL);
849 :
850 : /*
851 : * If we're /only/ locking the rtbitmap in shared mode, then we're
852 : * obviously not trying to compare records in two metadata inodes.
853 : * There's no need to drain intents here because the caller (most
854 : * likely the rgsuper scanner) doesn't need that level of consistency.
855 : */
856 183341507 : if (rtglock_flags == XFS_RTGLOCK_BITMAP_SHARED) {
857 61088 : xfs_rtgroup_lock(NULL, sr->rtg, rtglock_flags);
858 60979 : sr->rtlock_flags = rtglock_flags;
859 60979 : return 0;
860 : }
861 :
862 183311505 : do {
863 183311505 : if (xchk_should_terminate(sc, &error))
864 0 : return error;
865 :
866 183154856 : xfs_rtgroup_lock(NULL, sr->rtg, rtglock_flags);
867 :
868 : /*
869 : * If we've grabbed a non-metadata file for scrubbing, we
870 : * assume that holding its ILOCK will suffice to coordinate
871 : * with any rt intent chains involving this inode.
872 : */
873 183304839 : if (sc->ip && !xfs_is_metadata_inode(sc->ip)) {
874 182694889 : sr->rtlock_flags = rtglock_flags;
875 182694889 : return 0;
876 : }
877 :
878 : /*
879 : * Decide if the rt group is quiet enough for all metadata to
880 : * be consistent with each other. Regular file IO doesn't get
881 : * to lock all the rt inodes at the same time, which means that
882 : * there could be other threads in the middle of processing a
883 : * chain of deferred ops.
884 : *
885 : * We just locked all the metadata inodes for this rt group;
886 : * now take a look to see if there are any intents in progress.
887 : * If there are, drop the rt group inode locks and wait for the
888 : * intents to drain. Since we hold the rt group inode locks
889 : * for the duration of the scrub, this is the only time we have
890 : * to sample the intents counter; any threads increasing it
891 : * after this point can't possibly be in the middle of a chain
892 : * of rt metadata updates.
893 : *
894 : * Obviously, this should be slanted against scrub and in favor
895 : * of runtime threads.
896 : */
897 609950 : if (!xfs_rtgroup_intent_busy(sr->rtg)) {
898 517277 : sr->rtlock_flags = rtglock_flags;
899 517277 : return 0;
900 : }
901 :
902 68671 : xfs_rtgroup_unlock(sr->rtg, rtglock_flags);
903 :
904 68671 : if (!(sc->flags & XCHK_FSGATES_DRAIN))
905 : return -ECHRNG;
906 31086 : error = xfs_rtgroup_intent_drain(sr->rtg);
907 31086 : if (error == -ERESTARTSYS)
908 0 : error = -EINTR;
909 31086 : } while (!error);
910 :
911 : return error;
912 : }
913 :
914 : /*
915 : * For scrubbing a realtime group, grab all the in-core resources we'll need to
916 : * check the metadata, which means taking the ILOCK of the realtime group's
917 : * metadata inodes and draining any running intent chains. Callers must not
918 : * join these inodes to the transaction with non-zero lockflags or concurrency
919 : * problems will result. The @rtglock_flags argument takes XFS_RTGLOCK_*
920 : * flags.
921 : */
922 : int
923 183068687 : xchk_rtgroup_init(
924 : struct xfs_scrub *sc,
925 : xfs_rgnumber_t rgno,
926 : struct xchk_rt *sr,
927 : unsigned int rtglock_flags)
928 : {
929 183068687 : int error;
930 :
931 183068687 : ASSERT(sr->rtg == NULL);
932 183068687 : ASSERT(sr->rtlock_flags == 0);
933 :
934 183068687 : sr->rtg = xfs_rtgroup_get(sc->mp, rgno);
935 183380105 : if (!sr->rtg)
936 : return -ENOENT;
937 :
938 183380105 : error = xchk_rtgroup_drain_and_lock(sc, sr, rtglock_flags);
939 183274809 : if (error)
940 : return error;
941 :
942 183237292 : if (xfs_has_rtrmapbt(sc->mp) && (rtglock_flags & XFS_RTGLOCK_RMAP))
943 183322308 : sr->rmap_cur = xfs_rtrmapbt_init_cursor(sc->mp, sc->tp,
944 183177900 : sr->rtg, sr->rtg->rtg_rmapip);
945 :
946 183381700 : if (xfs_has_rtreflink(sc->mp) && (rtglock_flags & XFS_RTGLOCK_REFCOUNT))
947 183167895 : sr->refc_cur = xfs_rtrefcountbt_init_cursor(sc->mp, sc->tp,
948 183291839 : sr->rtg, sr->rtg->rtg_refcountip);
949 :
950 : return 0;
951 : }
952 :
953 : /*
954 : * Free all the btree cursors and other incore data relating to the realtime
955 : * group. This has to be done /before/ committing (or cancelling) the scrub
956 : * transaction.
957 : */
958 : void
959 1082605891 : xchk_rtgroup_btcur_free(
960 : struct xchk_rt *sr)
961 : {
962 1082605891 : if (sr->rmap_cur)
963 188513132 : xfs_btree_del_cursor(sr->rmap_cur, XFS_BTREE_ERROR);
964 1082722880 : if (sr->refc_cur)
965 188721744 : xfs_btree_del_cursor(sr->refc_cur, XFS_BTREE_ERROR);
966 :
967 1082751004 : sr->refc_cur = NULL;
968 1082751004 : sr->rmap_cur = NULL;
969 1082751004 : }
970 :
971 : /*
972 : * Unlock the realtime group. This must be done /after/ committing (or
973 : * cancelling) the scrub transaction.
974 : */
975 : void
976 188731248 : xchk_rtgroup_unlock(
977 : struct xfs_scrub *sc,
978 : struct xchk_rt *sr)
979 : {
980 188731248 : ASSERT(sr->rtg != NULL);
981 :
982 188731248 : if (sr->rtlock_flags) {
983 188695633 : xfs_rtgroup_unlock(sr->rtg, sr->rtlock_flags);
984 188701710 : sr->rtlock_flags = 0;
985 : }
986 188737325 : }
987 :
988 : /*
989 : * Unlock the realtime group and release its resources. This must be done
990 : * /after/ committing (or cancelling) the scrub transaction.
991 : */
992 : void
993 188780578 : xchk_rtgroup_free(
994 : struct xfs_scrub *sc,
995 : struct xchk_rt *sr)
996 : {
997 188780578 : ASSERT(sr->rtg != NULL);
998 :
999 188780578 : xchk_rtgroup_unlock(sc, sr);
1000 :
1001 188737769 : xfs_rtgroup_put(sr->rtg);
1002 188871347 : sr->rtg = NULL;
1003 188871347 : }
1004 : #endif /* CONFIG_XFS_RT */
1005 :
1006 : /* Per-scrubber setup functions */
1007 :
1008 : void
1009 229022896 : xchk_trans_cancel(
1010 : struct xfs_scrub *sc)
1011 : {
1012 229022896 : xfs_trans_cancel(sc->tp);
1013 229031404 : sc->tp = NULL;
1014 0 : }
1015 :
1016 : int
1017 104157788 : xchk_trans_alloc_empty(
1018 : struct xfs_scrub *sc)
1019 : {
1020 104157788 : return xfs_trans_alloc_empty(sc->mp, &sc->tp);
1021 : }
1022 :
1023 : /*
1024 : * Grab an empty transaction so that we can re-grab locked buffers if
1025 : * one of our btrees turns out to be cyclic.
1026 : *
1027 : * If we're going to repair something, we need to ask for the largest possible
1028 : * log reservation so that we can handle the worst case scenario for metadata
1029 : * updates while rebuilding a metadata item. We also need to reserve as many
1030 : * blocks in the head transaction as we think we're going to need to rebuild
1031 : * the metadata object.
1032 : */
1033 : int
1034 890190423 : xchk_trans_alloc(
1035 : struct xfs_scrub *sc,
1036 : uint resblks)
1037 : {
1038 890190423 : if (sc->sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR)
1039 63271839 : return xfs_trans_alloc(sc->mp, &M_RES(sc->mp)->tr_itruncate,
1040 : resblks, 0, 0, &sc->tp);
1041 :
1042 826918584 : return xchk_trans_alloc_empty(sc);
1043 : }
1044 :
1045 : /* Set us up with a transaction and an empty context. */
1046 : int
1047 6749675 : xchk_setup_fs(
1048 : struct xfs_scrub *sc)
1049 : {
1050 6749675 : uint resblks;
1051 :
1052 6749675 : resblks = xrep_calc_ag_resblks(sc);
1053 6751218 : return xchk_trans_alloc(sc, resblks);
1054 : }
1055 :
1056 : /* Set us up with a transaction and an empty context to repair rt metadata. */
1057 : int
1058 297423 : xchk_setup_rt(
1059 : struct xfs_scrub *sc)
1060 : {
1061 297423 : uint resblks;
1062 :
1063 297423 : resblks = xrep_calc_rtgroup_resblks(sc);
1064 297354 : return xchk_trans_alloc(sc, resblks);
1065 : }
1066 :
1067 : /* Set us up with AG headers and btree cursors. */
1068 : int
1069 2567508 : xchk_setup_ag_btree(
1070 : struct xfs_scrub *sc,
1071 : bool force_log)
1072 : {
1073 2567508 : struct xfs_mount *mp = sc->mp;
1074 2567508 : int error;
1075 :
1076 : /*
1077 : * If the caller asks us to checkpont the log, do so. This
1078 : * expensive operation should be performed infrequently and only
1079 : * as a last resort. Any caller that sets force_log should
1080 : * document why they need to do so.
1081 : */
1082 2567508 : if (force_log) {
1083 0 : error = xchk_checkpoint_log(mp);
1084 0 : if (error)
1085 : return error;
1086 : }
1087 :
1088 2567508 : error = xchk_setup_fs(sc);
1089 2569666 : if (error)
1090 : return error;
1091 :
1092 2568257 : return xchk_ag_init(sc, sc->sm->sm_agno, &sc->sa);
1093 : }
1094 :
1095 : /* Push everything out of the log onto disk. */
1096 : int
1097 0 : xchk_checkpoint_log(
1098 : struct xfs_mount *mp)
1099 : {
1100 0 : int error;
1101 :
1102 0 : error = xfs_log_force(mp, XFS_LOG_SYNC);
1103 0 : if (error)
1104 : return error;
1105 0 : xfs_ail_push_all_sync(mp->m_ail);
1106 0 : return 0;
1107 : }
1108 :
1109 : /* Verify that an inode is allocated ondisk, then return its cached inode. */
1110 : int
1111 2374125366 : xchk_iget(
1112 : struct xfs_scrub *sc,
1113 : xfs_ino_t inum,
1114 : struct xfs_inode **ipp)
1115 : {
1116 2374125366 : return xfs_iget(sc->mp, sc->tp, inum, XFS_IGET_UNTRUSTED, 0, ipp);
1117 : }
1118 :
1119 : /*
1120 : * Try to grab an inode in a manner that avoids races with physical inode
1121 : * allocation. If we can't, return the locked AGI buffer so that the caller
1122 : * can single-step the loading process to see where things went wrong.
1123 : * Callers must have a valid scrub transaction.
1124 : *
1125 : * If the iget succeeds, return 0, a NULL AGI, and the inode.
1126 : *
1127 : * If the iget fails, return the error, the locked AGI, and a NULL inode. This
1128 : * can include -EINVAL and -ENOENT for invalid inode numbers or inodes that are
1129 : * no longer allocated; or any other corruption or runtime error.
1130 : *
1131 : * If the AGI read fails, return the error, a NULL AGI, and NULL inode.
1132 : *
1133 : * If a fatal signal is pending, return -EINTR, a NULL AGI, and a NULL inode.
1134 : */
1135 : int
1136 431424 : xchk_iget_agi(
1137 : struct xfs_scrub *sc,
1138 : xfs_ino_t inum,
1139 : struct xfs_buf **agi_bpp,
1140 : struct xfs_inode **ipp)
1141 : {
1142 431424 : struct xfs_mount *mp = sc->mp;
1143 431424 : struct xfs_trans *tp = sc->tp;
1144 431424 : struct xfs_perag *pag;
1145 431424 : int error;
1146 :
1147 431424 : ASSERT(sc->tp != NULL);
1148 :
1149 431424 : again:
1150 431668 : *agi_bpp = NULL;
1151 431668 : *ipp = NULL;
1152 431668 : error = 0;
1153 :
1154 431668 : if (xchk_should_terminate(sc, &error))
1155 1 : return error;
1156 :
1157 : /*
1158 : * Attach the AGI buffer to the scrub transaction to avoid deadlocks
1159 : * in the iget cache miss path.
1160 : */
1161 431667 : pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, inum));
1162 431667 : error = xfs_ialloc_read_agi(pag, tp, agi_bpp);
1163 431667 : xfs_perag_put(pag);
1164 431667 : if (error)
1165 0 : return error;
1166 :
1167 431667 : error = xfs_iget(mp, tp, inum,
1168 : XFS_IGET_NORETRY | XFS_IGET_UNTRUSTED, 0, ipp);
1169 431667 : if (error == -EAGAIN) {
1170 : /*
1171 : * The inode may be in core but temporarily unavailable and may
1172 : * require the AGI buffer before it can be returned. Drop the
1173 : * AGI buffer and retry the lookup.
1174 : *
1175 : * Incore lookup will fail with EAGAIN on a cache hit if the
1176 : * inode is queued to the inactivation list. The inactivation
1177 : * worker may remove the inode from the unlinked list and hence
1178 : * needs the AGI.
1179 : *
1180 : * Hence xchk_iget_agi() needs to drop the AGI lock on EAGAIN
1181 : * to allow inodegc to make progress and move the inode to
1182 : * IRECLAIMABLE state where xfs_iget will be able to return it
1183 : * again if it can lock the inode.
1184 : */
1185 244 : xfs_trans_brelse(tp, *agi_bpp);
1186 244 : delay(1);
1187 244 : goto again;
1188 : }
1189 431423 : if (error)
1190 : return error;
1191 :
1192 : /* We got the inode, so we can release the AGI. */
1193 420082 : ASSERT(*ipp != NULL);
1194 420082 : xfs_trans_brelse(tp, *agi_bpp);
1195 420082 : *agi_bpp = NULL;
1196 420082 : return 0;
1197 : }
1198 :
1199 : #ifdef CONFIG_XFS_QUOTA
1200 : /*
1201 : * Try to attach dquots to this inode if we think we might want to repair it.
1202 : * Callers must not hold any ILOCKs. If the dquots are broken and cannot be
1203 : * attached, a quotacheck will be scheduled.
1204 : */
1205 : int
1206 857641606 : xchk_ino_dqattach(
1207 : struct xfs_scrub *sc)
1208 : {
1209 857641606 : ASSERT(sc->tp != NULL);
1210 857641606 : ASSERT(sc->ip != NULL);
1211 :
1212 1715283212 : if (!xchk_could_repair(sc))
1213 : return 0;
1214 :
1215 17315708 : return xrep_ino_dqattach(sc);
1216 : }
1217 : #endif
1218 :
1219 : /* Install an inode that we opened by handle for scrubbing. */
1220 : int
1221 516755875 : xchk_install_handle_inode(
1222 : struct xfs_scrub *sc,
1223 : struct xfs_inode *ip)
1224 : {
1225 : /*
1226 : * Only the directories in the metadata directory tree can be scrubbed
1227 : * by handle -- files must be checked through an explicit scrub type.
1228 : */
1229 516755875 : if ((xfs_is_metadir_inode(ip) && !S_ISDIR(VFS_I(ip)->i_mode)) ||
1230 516143835 : VFS_I(ip)->i_generation != sc->sm->sm_gen) {
1231 1467633 : xchk_irele(sc, ip);
1232 1467633 : return -ENOENT;
1233 : }
1234 :
1235 515288242 : sc->ip = ip;
1236 515288242 : return 0;
1237 : }
1238 :
1239 : /*
1240 : * Install an already-referenced inode for scrubbing. Get our own reference to
1241 : * the inode to make disposal simpler. The inode must not be in I_FREEING or
1242 : * I_WILL_FREE state!
1243 : */
1244 : int
1245 342024388 : xchk_install_live_inode(
1246 : struct xfs_scrub *sc,
1247 : struct xfs_inode *ip)
1248 : {
1249 342024388 : if (!igrab(VFS_I(ip))) {
1250 0 : xchk_ino_set_corrupt(sc, ip->i_ino);
1251 0 : return -EFSCORRUPTED;
1252 : }
1253 :
1254 343336834 : sc->ip = ip;
1255 343336834 : return 0;
1256 : }
1257 :
1258 : /*
1259 : * In preparation to scrub metadata structures that hang off of an inode,
1260 : * grab either the inode referenced in the scrub control structure or the
1261 : * inode passed in. If the inumber does not reference an allocated inode
1262 : * record, the function returns ENOENT to end the scrub early. The inode
1263 : * is not locked.
1264 : */
1265 : int
1266 724339465 : xchk_iget_for_scrubbing(
1267 : struct xfs_scrub *sc)
1268 : {
1269 724339465 : struct xfs_imap imap;
1270 724339465 : struct xfs_mount *mp = sc->mp;
1271 724339465 : struct xfs_perag *pag;
1272 724339465 : struct xfs_buf *agi_bp;
1273 724339465 : struct xfs_inode *ip_in = XFS_I(file_inode(sc->file));
1274 724339465 : struct xfs_inode *ip = NULL;
1275 724339465 : xfs_agnumber_t agno = XFS_INO_TO_AGNO(mp, sc->sm->sm_ino);
1276 724339465 : int error;
1277 :
1278 724339465 : ASSERT(sc->tp == NULL);
1279 :
1280 : /* We want to scan the inode we already had opened. */
1281 724339465 : if (sc->sm->sm_ino == 0 || sc->sm->sm_ino == ip_in->i_ino)
1282 284632299 : return xchk_install_live_inode(sc, ip_in);
1283 :
1284 : /* Reject internal metadata files and obviously bad inode numbers. */
1285 439707166 : if (xfs_internal_inum(mp, sc->sm->sm_ino))
1286 : return -ENOENT;
1287 439288611 : if (!xfs_verify_ino(sc->mp, sc->sm->sm_ino))
1288 : return -ENOENT;
1289 :
1290 : /* Try a regular untrusted iget. */
1291 439070172 : error = xchk_iget(sc, sc->sm->sm_ino, &ip);
1292 439645549 : if (!error)
1293 437118017 : return xchk_install_handle_inode(sc, ip);
1294 2527532 : if (error == -ENOENT)
1295 : return error;
1296 8960 : if (error != -EINVAL)
1297 0 : goto out_error;
1298 :
1299 : /*
1300 : * EINVAL with IGET_UNTRUSTED probably means one of several things:
1301 : * userspace gave us an inode number that doesn't correspond to fs
1302 : * space; the inode btree lacks a record for this inode; or there is a
1303 : * record, and it says this inode is free.
1304 : *
1305 : * We want to look up this inode in the inobt to distinguish two
1306 : * scenarios: (1) the inobt says the inode is free, in which case
1307 : * there's nothing to do; and (2) the inobt says the inode is
1308 : * allocated, but loading it failed due to corruption.
1309 : *
1310 : * Allocate a transaction and grab the AGI to prevent inobt activity
1311 : * in this AG. Retry the iget in case someone allocated a new inode
1312 : * after the first iget failed.
1313 : */
1314 8960 : error = xchk_trans_alloc(sc, 0);
1315 8960 : if (error)
1316 0 : goto out_error;
1317 :
1318 8960 : error = xchk_iget_agi(sc, sc->sm->sm_ino, &agi_bp, &ip);
1319 8960 : if (error == 0) {
1320 : /* Actually got the inode, so install it. */
1321 0 : xchk_trans_cancel(sc);
1322 0 : return xchk_install_handle_inode(sc, ip);
1323 : }
1324 8960 : if (error == -ENOENT)
1325 0 : goto out_gone;
1326 8960 : if (error != -EINVAL)
1327 0 : goto out_cancel;
1328 :
1329 : /* Ensure that we have protected against inode allocation/freeing. */
1330 8960 : if (agi_bp == NULL) {
1331 0 : ASSERT(agi_bp != NULL);
1332 0 : error = -ECANCELED;
1333 0 : goto out_cancel;
1334 : }
1335 :
1336 : /*
1337 : * Untrusted iget failed a second time. Let's try an inobt lookup.
1338 : * If the inobt thinks this the inode neither can exist inside the
1339 : * filesystem nor is allocated, return ENOENT to signal that the check
1340 : * can be skipped.
1341 : *
1342 : * If the lookup returns corruption, we'll mark this inode corrupt and
1343 : * exit to userspace. There's little chance of fixing anything until
1344 : * the inobt is straightened out, but there's nothing we can do here.
1345 : *
1346 : * If the lookup encounters any other error, exit to userspace.
1347 : *
1348 : * If the lookup succeeds, something else must be very wrong in the fs
1349 : * such that setting up the incore inode failed in some strange way.
1350 : * Treat those as corruptions.
1351 : */
1352 8960 : pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, sc->sm->sm_ino));
1353 8960 : if (!pag) {
1354 0 : error = -EFSCORRUPTED;
1355 0 : goto out_cancel;
1356 : }
1357 :
1358 8960 : error = xfs_imap(pag, sc->tp, sc->sm->sm_ino, &imap,
1359 : XFS_IGET_UNTRUSTED);
1360 8960 : xfs_perag_put(pag);
1361 8960 : if (error == -EINVAL || error == -ENOENT)
1362 8960 : goto out_gone;
1363 0 : if (!error)
1364 0 : error = -EFSCORRUPTED;
1365 :
1366 0 : out_cancel:
1367 0 : xchk_trans_cancel(sc);
1368 0 : out_error:
1369 0 : xchk_whine(mp, "type %s agno 0x%x agbno 0x%x error %d ret_ip %pS",
1370 0 : xchk_type_string(sc->sm->sm_type), agno,
1371 0 : XFS_INO_TO_AGBNO(mp, sc->sm->sm_ino), error,
1372 : __return_address);
1373 0 : trace_xchk_op_error(sc, agno, XFS_INO_TO_AGBNO(mp, sc->sm->sm_ino),
1374 : error, __return_address);
1375 0 : return error;
1376 8960 : out_gone:
1377 : /* The file is gone, so there's nothing to check. */
1378 8960 : xchk_trans_cancel(sc);
1379 8960 : return -ENOENT;
1380 : }
1381 :
1382 : /* Release an inode, possibly dropping it in the process. */
1383 : void
1384 57332560472 : xchk_irele(
1385 : struct xfs_scrub *sc,
1386 : struct xfs_inode *ip)
1387 : {
1388 57332560472 : if (current->journal_info != NULL) {
1389 56359028959 : ASSERT(current->journal_info == sc->tp);
1390 :
1391 : /*
1392 : * If we are in a transaction, we /cannot/ drop the inode
1393 : * ourselves, because the VFS will trigger writeback, which
1394 : * can require a transaction. Clear DONTCACHE to force the
1395 : * inode to the LRU, where someone else can take care of
1396 : * dropping it.
1397 : *
1398 : * Note that when we grabbed our reference to the inode, it
1399 : * could have had an active ref and DONTCACHE set if a sysadmin
1400 : * is trying to coerce a change in file access mode. icache
1401 : * hits do not clear DONTCACHE, so we must do it here.
1402 : */
1403 56359028959 : spin_lock(&VFS_I(ip)->i_lock);
1404 57012970241 : VFS_I(ip)->i_state &= ~I_DONTCACHE;
1405 57012970241 : spin_unlock(&VFS_I(ip)->i_lock);
1406 973531513 : } else if (atomic_read(&VFS_I(ip)->i_count) == 1) {
1407 : /*
1408 : * If this is the last reference to the inode and the caller
1409 : * permits it, set DONTCACHE to avoid thrashing.
1410 : */
1411 10987380 : d_mark_dontcache(VFS_I(ip));
1412 : }
1413 :
1414 58085567107 : xfs_irele(ip);
1415 58114813642 : }
1416 :
1417 : /*
1418 : * Set us up to scrub metadata mapped by a file's fork. Callers must not use
1419 : * this to operate on user-accessible regular file data because the MMAPLOCK is
1420 : * not taken.
1421 : */
1422 : int
1423 319764894 : xchk_setup_inode_contents(
1424 : struct xfs_scrub *sc,
1425 : unsigned int resblks)
1426 : {
1427 319764894 : int error;
1428 :
1429 319764894 : error = xchk_iget_for_scrubbing(sc);
1430 321070918 : if (error)
1431 : return error;
1432 :
1433 : /* Lock the inode so the VFS cannot touch this file. */
1434 319209144 : xchk_ilock(sc, XFS_IOLOCK_EXCL);
1435 :
1436 318801937 : error = xchk_trans_alloc(sc, resblks);
1437 319250588 : if (error)
1438 0 : goto out;
1439 :
1440 319250588 : error = xchk_ino_dqattach(sc);
1441 319218517 : if (error)
1442 0 : goto out;
1443 :
1444 319218517 : xchk_ilock(sc, XFS_ILOCK_EXCL);
1445 : out:
1446 : /* scrub teardown will unlock and release the inode for us */
1447 : return error;
1448 : }
1449 :
1450 : void
1451 1227505202 : xchk_ilock(
1452 : struct xfs_scrub *sc,
1453 : unsigned int ilock_flags)
1454 : {
1455 1865932863 : xfs_ilock(sc->ip, ilock_flags);
1456 318801937 : sc->ilock_flags |= ilock_flags;
1457 319237213 : }
1458 :
1459 : bool
1460 99822503 : xchk_ilock_nowait(
1461 : struct xfs_scrub *sc,
1462 : unsigned int ilock_flags)
1463 : {
1464 99822503 : if (xfs_ilock_nowait(sc->ip, ilock_flags)) {
1465 99818173 : sc->ilock_flags |= ilock_flags;
1466 99818173 : return true;
1467 : }
1468 :
1469 : return false;
1470 : }
1471 :
1472 : void
1473 1088532222 : xchk_iunlock(
1474 : struct xfs_scrub *sc,
1475 : unsigned int ilock_flags)
1476 : {
1477 1088532222 : sc->ilock_flags &= ~ilock_flags;
1478 1088532222 : xfs_iunlock(sc->ip, ilock_flags);
1479 1089697421 : }
1480 :
1481 : /*
1482 : * Predicate that decides if we need to evaluate the cross-reference check.
1483 : * If there was an error accessing the cross-reference btree, just delete
1484 : * the cursor and skip the check.
1485 : */
1486 : bool
1487 18326348128 : xchk_should_check_xref(
1488 : struct xfs_scrub *sc,
1489 : int *error,
1490 : struct xfs_btree_cur **curpp)
1491 : {
1492 : /* No point in xref if we already know we're corrupt. */
1493 18326348128 : if (xchk_skip_xref(sc->sm))
1494 : return false;
1495 :
1496 18326348128 : if (*error == 0)
1497 : return true;
1498 :
1499 2 : if (curpp) {
1500 : /* If we've already given up on xref, just bail out. */
1501 2 : if (!*curpp)
1502 : return false;
1503 :
1504 : /* xref error, delete cursor and bail out. */
1505 2 : xfs_btree_del_cursor(*curpp, XFS_BTREE_ERROR);
1506 2 : *curpp = NULL;
1507 : }
1508 :
1509 2 : sc->sm->sm_flags |= XFS_SCRUB_OFLAG_XFAIL;
1510 4 : xchk_whine(sc->mp, "type %s xref error %d ret_ip %pS",
1511 2 : xchk_type_string(sc->sm->sm_type),
1512 : *error,
1513 : __return_address);
1514 2 : trace_xchk_xref_error(sc, *error, __return_address);
1515 :
1516 : /*
1517 : * Errors encountered during cross-referencing with another
1518 : * data structure should not cause this scrubber to abort.
1519 : */
1520 2 : *error = 0;
1521 2 : return false;
1522 : }
1523 :
1524 : /* Run the structure verifiers on in-memory buffers to detect bad memory. */
1525 : void
1526 132626399 : xchk_buffer_recheck(
1527 : struct xfs_scrub *sc,
1528 : struct xfs_buf *bp)
1529 : {
1530 132626399 : xfs_failaddr_t fa;
1531 :
1532 132626399 : if (bp->b_ops == NULL) {
1533 0 : xchk_block_set_corrupt(sc, bp);
1534 0 : return;
1535 : }
1536 132626399 : if (bp->b_ops->verify_struct == NULL) {
1537 0 : xchk_set_incomplete(sc);
1538 0 : return;
1539 : }
1540 132626399 : fa = bp->b_ops->verify_struct(bp);
1541 132790710 : if (!fa)
1542 : return;
1543 0 : sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
1544 0 : trace_xchk_block_error(sc, xfs_buf_daddr(bp), fa);
1545 0 : xchk_whine(sc->mp, "type %s agno 0x%x agbno 0x%x ret_ip %pS",
1546 0 : xchk_type_string(sc->sm->sm_type),
1547 : xfs_daddr_to_agno(sc->mp, xfs_buf_daddr(bp)),
1548 : xfs_daddr_to_agbno(sc->mp, xfs_buf_daddr(bp)),
1549 : fa);
1550 : }
1551 :
1552 : static inline int
1553 1358314 : xchk_metadata_inode_subtype(
1554 : struct xfs_scrub *sc,
1555 : unsigned int scrub_type)
1556 : {
1557 1358314 : __u32 smtype = sc->sm->sm_type;
1558 1358314 : int error;
1559 :
1560 1358314 : sc->sm->sm_type = scrub_type;
1561 :
1562 1358314 : switch (scrub_type) {
1563 679038 : case XFS_SCRUB_TYPE_INODE:
1564 679038 : error = xchk_inode(sc);
1565 679038 : break;
1566 679276 : case XFS_SCRUB_TYPE_BMBTD:
1567 679276 : error = xchk_bmap_data(sc);
1568 679276 : break;
1569 0 : default:
1570 0 : ASSERT(0);
1571 0 : error = -EFSCORRUPTED;
1572 0 : break;
1573 : }
1574 :
1575 1358552 : sc->sm->sm_type = smtype;
1576 1358552 : return error;
1577 : }
1578 :
1579 : /*
1580 : * Scrub the attr/data forks of a metadata inode. The metadata inode must be
1581 : * pointed to by sc->ip and the ILOCK must be held.
1582 : */
1583 : int
1584 679102 : xchk_metadata_inode_forks(
1585 : struct xfs_scrub *sc)
1586 : {
1587 679102 : bool shared;
1588 679102 : int error;
1589 :
1590 679102 : if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
1591 : return 0;
1592 :
1593 : /* Check the inode record. */
1594 679055 : error = xchk_metadata_inode_subtype(sc, XFS_SCRUB_TYPE_INODE);
1595 679276 : if (error || (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT))
1596 : return error;
1597 :
1598 : /* Metadata inodes don't live on the rt device. */
1599 679276 : if (sc->ip->i_diflags & XFS_DIFLAG_REALTIME) {
1600 0 : xchk_ino_set_corrupt(sc, sc->ip->i_ino);
1601 0 : return 0;
1602 : }
1603 :
1604 : /* They should never participate in reflink. */
1605 679276 : if (xfs_is_reflink_inode(sc->ip)) {
1606 0 : xchk_ino_set_corrupt(sc, sc->ip->i_ino);
1607 0 : return 0;
1608 : }
1609 :
1610 : /*
1611 : * Metadata files can only have extended attributes if parent pointers
1612 : * and the metadata directory tree are enabled.
1613 : */
1614 679276 : if (xfs_inode_hasattr(sc->ip) &&
1615 666826 : !(xfs_has_parent(sc->mp) && xfs_has_metadir(sc->mp))) {
1616 0 : xchk_ino_set_corrupt(sc, sc->ip->i_ino);
1617 0 : return 0;
1618 : }
1619 :
1620 : /* Invoke the data fork scrubber. */
1621 679276 : error = xchk_metadata_inode_subtype(sc, XFS_SCRUB_TYPE_BMBTD);
1622 679276 : if (error || (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT))
1623 : return error;
1624 :
1625 : /* Look for incorrect shared blocks. */
1626 679274 : if (xfs_has_reflink(sc->mp)) {
1627 667423 : error = xfs_reflink_inode_has_shared_extents(sc->tp, sc->ip,
1628 : &shared);
1629 1334846 : if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, 0,
1630 : &error))
1631 0 : return error;
1632 667423 : if (shared)
1633 0 : xchk_ino_set_corrupt(sc, sc->ip->i_ino);
1634 : }
1635 :
1636 : return 0;
1637 : }
1638 :
1639 : /*
1640 : * Enable filesystem hooks (i.e. runtime code patching) before starting a scrub
1641 : * operation. Callers must not hold any locks that intersect with the CPU
1642 : * hotplug lock (e.g. writeback locks) because code patching must halt the CPUs
1643 : * to change kernel code.
1644 : */
1645 : void
1646 33529984 : xchk_fsgates_enable(
1647 : struct xfs_scrub *sc,
1648 : unsigned int scrub_fsgates)
1649 : {
1650 33529984 : ASSERT(!(scrub_fsgates & ~XCHK_FSGATES_ALL));
1651 33529984 : ASSERT(!(sc->flags & scrub_fsgates));
1652 :
1653 33529984 : trace_xchk_fsgates_enable(sc, scrub_fsgates);
1654 :
1655 33511884 : if (scrub_fsgates & XCHK_FSGATES_DRAIN)
1656 173576 : xfs_defer_drain_wait_enable();
1657 :
1658 33511884 : if (scrub_fsgates & XCHK_FSGATES_QUOTA)
1659 34637 : xfs_dqtrx_hook_enable();
1660 :
1661 33511884 : if (scrub_fsgates & XCHK_FSGATES_DIRENTS)
1662 33261274 : xfs_dir_hook_enable();
1663 :
1664 33563999 : if (scrub_fsgates & XCHK_FSGATES_RMAP)
1665 44084 : xfs_rmap_hook_enable();
1666 :
1667 33564103 : sc->flags |= scrub_fsgates;
1668 33564103 : }
1669 :
1670 : /*
1671 : * Decide if this is this a cached inode that's also allocated. The caller
1672 : * must hold a reference to an AG and the AGI buffer lock to prevent inodes
1673 : * from being allocated or freed.
1674 : *
1675 : * Look up an inode by number in the given file system. If the inode number
1676 : * is invalid, return -EINVAL. If the inode is not in cache, return -ENODATA.
1677 : * If the inode is being reclaimed, return -ENODATA because we know the inode
1678 : * cache cannot be updating the ondisk metadata.
1679 : *
1680 : * Otherwise, the incore inode is the one we want, and it is either live,
1681 : * somewhere in the inactivation machinery, or reclaimable. The inode is
1682 : * allocated if i_mode is nonzero. In all three cases, the cached inode will
1683 : * be more up to date than the ondisk inode buffer, so we must use the incore
1684 : * i_mode.
1685 : */
1686 : int
1687 3259448411 : xchk_inode_is_allocated(
1688 : struct xfs_scrub *sc,
1689 : xfs_agino_t agino,
1690 : bool *inuse)
1691 : {
1692 3259448411 : struct xfs_mount *mp = sc->mp;
1693 3259448411 : struct xfs_perag *pag = sc->sa.pag;
1694 3259448411 : xfs_ino_t ino;
1695 3259448411 : struct xfs_inode *ip;
1696 3259448411 : int error;
1697 :
1698 : /* caller must hold perag reference */
1699 3259448411 : if (pag == NULL) {
1700 0 : ASSERT(pag != NULL);
1701 0 : return -EINVAL;
1702 : }
1703 :
1704 : /* caller must have AGI buffer */
1705 3259448411 : if (sc->sa.agi_bp == NULL) {
1706 0 : ASSERT(sc->sa.agi_bp != NULL);
1707 0 : return -EINVAL;
1708 : }
1709 :
1710 : /* reject inode numbers outside existing AGs */
1711 3259448411 : ino = XFS_AGINO_TO_INO(sc->mp, pag->pag_agno, agino);
1712 3259448411 : if (!xfs_verify_ino(mp, ino))
1713 : return -EINVAL;
1714 :
1715 3259355645 : error = -ENODATA;
1716 3259355645 : rcu_read_lock();
1717 3259270840 : ip = radix_tree_lookup(&pag->pag_ici_root, agino);
1718 3259312622 : if (!ip) {
1719 : /* cache miss */
1720 65196048 : goto out_rcu;
1721 : }
1722 :
1723 : /*
1724 : * If the inode number doesn't match, the incore inode got reused
1725 : * during an RCU grace period and the radix tree hasn't been updated.
1726 : * This isn't the inode we want.
1727 : */
1728 3194116574 : spin_lock(&ip->i_flags_lock);
1729 3194931549 : if (ip->i_ino != ino)
1730 0 : goto out_skip;
1731 :
1732 3194931549 : trace_xchk_inode_is_allocated(ip);
1733 :
1734 : /*
1735 : * We have an incore inode that matches the inode we want, and the
1736 : * caller holds the perag structure and the AGI buffer. Let's check
1737 : * our assumptions below:
1738 : */
1739 :
1740 : #ifdef DEBUG
1741 : /*
1742 : * (1) If the incore inode is live (i.e. referenced from the dcache),
1743 : * it will not be INEW, nor will it be in the inactivation or reclaim
1744 : * machinery. The ondisk inode had better be allocated. This is the
1745 : * most trivial case.
1746 : */
1747 3194420289 : if (!(ip->i_flags & (XFS_NEED_INACTIVE | XFS_INEW | XFS_IRECLAIMABLE |
1748 : XFS_INACTIVATING))) {
1749 : /* live inode */
1750 3192349572 : ASSERT(VFS_I(ip)->i_mode != 0);
1751 : }
1752 :
1753 : /*
1754 : * If the incore inode is INEW, there are several possibilities:
1755 : *
1756 : * (2) For a file that is being created, note that we allocate the
1757 : * ondisk inode before allocating, initializing, and adding the incore
1758 : * inode to the radix tree.
1759 : *
1760 : * (3) If the incore inode is being recycled, the inode has to be
1761 : * allocated because we don't allow freed inodes to be recycled.
1762 : * Recycling doesn't touch i_mode.
1763 : */
1764 3194420289 : if (ip->i_flags & XFS_INEW) {
1765 : /* created on disk already or recycling */
1766 747 : ASSERT(VFS_I(ip)->i_mode != 0);
1767 : }
1768 :
1769 : /*
1770 : * (4) If the inode is queued for inactivation (NEED_INACTIVE) but
1771 : * inactivation has not started (!INACTIVATING), it is still allocated.
1772 : */
1773 3194420289 : if ((ip->i_flags & XFS_NEED_INACTIVE) &&
1774 : !(ip->i_flags & XFS_INACTIVATING)) {
1775 : /* definitely before difree */
1776 19885 : ASSERT(VFS_I(ip)->i_mode != 0);
1777 : }
1778 : #endif
1779 :
1780 : /*
1781 : * If the incore inode is undergoing inactivation (INACTIVATING), there
1782 : * are two possibilities:
1783 : *
1784 : * (5) It is before the point where it would get freed ondisk, in which
1785 : * case i_mode is still nonzero.
1786 : *
1787 : * (6) It has already been freed, in which case i_mode is zero.
1788 : *
1789 : * We don't take the ILOCK here, but difree and dialloc update the AGI,
1790 : * and we've taken the AGI buffer lock, which prevents that from
1791 : * happening.
1792 : */
1793 :
1794 : /*
1795 : * (7) Inodes undergoing inactivation (INACTIVATING) or queued for
1796 : * reclaim (IRECLAIMABLE) could be allocated or free. i_mode still
1797 : * reflects the ondisk state.
1798 : */
1799 :
1800 : /*
1801 : * (8) If the inode is in IFLUSHING, it's safe to query i_mode because
1802 : * the flush code uses i_mode to format the ondisk inode.
1803 : */
1804 :
1805 : /*
1806 : * (9) If the inode is in IRECLAIM and was reachable via the radix
1807 : * tree, it still has the same i_mode as it did before it entered
1808 : * reclaim. The inode object is still alive because we hold the RCU
1809 : * read lock.
1810 : */
1811 :
1812 3194420289 : *inuse = VFS_I(ip)->i_mode != 0;
1813 3194420289 : error = 0;
1814 :
1815 3194420289 : out_skip:
1816 3194420289 : spin_unlock(&ip->i_flags_lock);
1817 3260062827 : out_rcu:
1818 3260062827 : rcu_read_unlock();
1819 3260062827 : return error;
1820 : }
1821 :
1822 : /* Count the blocks used by a file, even if it's a metadata inode. */
1823 : int
1824 275276849 : xchk_inode_count_blocks(
1825 : struct xfs_scrub *sc,
1826 : int whichfork,
1827 : xfs_extnum_t *nextents,
1828 : xfs_filblks_t *count)
1829 : {
1830 275276849 : struct xfs_ifork *ifp = xfs_ifork_ptr(sc->ip, whichfork);
1831 275210735 : struct xfs_btree_cur *cur;
1832 275210735 : xfs_extlen_t btblocks;
1833 275210735 : int error;
1834 :
1835 275210735 : if (!ifp) {
1836 4982619 : *nextents = 0;
1837 4982619 : *count = 0;
1838 4982619 : return 0;
1839 : }
1840 :
1841 270228116 : switch (ifp->if_format) {
1842 124702 : case XFS_DINODE_FMT_RMAP:
1843 124702 : if (!sc->sr.rtg) {
1844 0 : ASSERT(0);
1845 0 : return -EFSCORRUPTED;
1846 : }
1847 124702 : cur = xfs_rtrmapbt_init_cursor(sc->mp, sc->tp, sc->sr.rtg,
1848 : sc->ip);
1849 124702 : goto meta_btree;
1850 224235 : case XFS_DINODE_FMT_REFCOUNT:
1851 224235 : if (!sc->sr.rtg) {
1852 0 : ASSERT(0);
1853 0 : return -EFSCORRUPTED;
1854 : }
1855 224235 : cur = xfs_rtrefcountbt_init_cursor(sc->mp, sc->tp, sc->sr.rtg,
1856 : sc->ip);
1857 224235 : goto meta_btree;
1858 : }
1859 :
1860 269879179 : return xfs_bmap_count_blocks(sc->tp, sc->ip, whichfork, nextents,
1861 : count);
1862 348937 : meta_btree:
1863 348937 : error = xfs_btree_count_blocks(cur, &btblocks);
1864 348937 : xfs_btree_del_cursor(cur, error);
1865 348937 : if (error)
1866 : return error;
1867 :
1868 348937 : *nextents = 0;
1869 348937 : *count = btblocks - 1;
1870 348937 : return 0;
1871 : }
1872 :
1873 : /* Complain about failures... */
1874 : void
1875 627 : xchk_whine(
1876 : const struct xfs_mount *mp,
1877 : const char *fmt,
1878 : ...)
1879 : {
1880 627 : struct va_format vaf;
1881 627 : va_list args;
1882 :
1883 627 : va_start(args, fmt);
1884 :
1885 627 : vaf.fmt = fmt;
1886 627 : vaf.va = &args;
1887 :
1888 627 : printk(KERN_INFO "XFS (%s) %pS: %pV\n", mp->m_super->s_id,
1889 : __return_address, &vaf);
1890 628 : va_end(args);
1891 :
1892 628 : if (xfs_error_level >= XFS_ERRLEVEL_HIGH)
1893 0 : xfs_stack_trace();
1894 628 : }
|