Line data Source code
1 : // SPDX-License-Identifier: GPL-2.0-or-later
2 : /*
3 : * Copyright (C) 2017-2023 Oracle. All Rights Reserved.
4 : * Author: Darrick J. Wong <djwong@kernel.org>
5 : */
6 : #include "xfs.h"
7 : #include "xfs_fs.h"
8 : #include "xfs_shared.h"
9 : #include "xfs_format.h"
10 : #include "xfs_trans_resv.h"
11 : #include "xfs_mount.h"
12 : #include "xfs_log_format.h"
13 : #include "xfs_trans.h"
14 : #include "xfs_inode.h"
15 : #include "xfs_quota.h"
16 : #include "xfs_qm.h"
17 : #include "xfs_scrub.h"
18 : #include "xfs_btree.h"
19 : #include "xfs_btree_staging.h"
20 : #include "xfs_buf_xfile.h"
21 : #include "xfs_rmap.h"
22 : #include "xfs_xchgrange.h"
23 : #include "xfs_swapext.h"
24 : #include "xfs_da_format.h"
25 : #include "xfs_da_btree.h"
26 : #include "xfs_xattr.h"
27 : #include "scrub/scrub.h"
28 : #include "scrub/common.h"
29 : #include "scrub/trace.h"
30 : #include "scrub/repair.h"
31 : #include "scrub/health.h"
32 : #include "scrub/stats.h"
33 : #include "scrub/xfile.h"
34 : #include "scrub/tempfile.h"
35 : #include "scrub/orphanage.h"
36 :
37 : /*
38 : * Online Scrub and Repair
39 : *
40 : * Traditionally, XFS (the kernel driver) did not know how to check or
41 : * repair on-disk data structures. That task was left to the xfs_check
42 : * and xfs_repair tools, both of which require taking the filesystem
43 : * offline for a thorough but time consuming examination. Online
44 : * scrub & repair, on the other hand, enables us to check the metadata
45 : * for obvious errors while carefully stepping around the filesystem's
46 : * ongoing operations, locking rules, etc.
47 : *
48 : * Given that most XFS metadata consist of records stored in a btree,
49 : * most of the checking functions iterate the btree blocks themselves
50 : * looking for irregularities. When a record block is encountered, each
51 : * record can be checked for obviously bad values. Record values can
52 : * also be cross-referenced against other btrees to look for potential
53 : * misunderstandings between pieces of metadata.
54 : *
55 : * It is expected that the checkers responsible for per-AG metadata
56 : * structures will lock the AG headers (AGI, AGF, AGFL), iterate the
57 : * metadata structure, and perform any relevant cross-referencing before
58 : * unlocking the AG and returning the results to userspace. These
59 : * scrubbers must not keep an AG locked for too long to avoid tying up
60 : * the block and inode allocators.
61 : *
62 : * Block maps and b-trees rooted in an inode present a special challenge
63 : * because they can involve extents from any AG. The general scrubber
64 : * structure of lock -> check -> xref -> unlock still holds, but AG
65 : * locking order rules /must/ be obeyed to avoid deadlocks. The
66 : * ordering rule, of course, is that we must lock in increasing AG
67 : * order. Helper functions are provided to track which AG headers we've
68 : * already locked. If we detect an imminent locking order violation, we
69 : * can signal a potential deadlock, in which case the scrubber can jump
70 : * out to the top level, lock all the AGs in order, and retry the scrub.
71 : *
72 : * For file data (directories, extended attributes, symlinks) scrub, we
73 : * can simply lock the inode and walk the data. For btree data
74 : * (directories and attributes) we follow the same btree-scrubbing
75 : * strategy outlined previously to check the records.
76 : *
77 : * We use a bit of trickery with transactions to avoid buffer deadlocks
78 : * if there is a cycle in the metadata. The basic problem is that
79 : * travelling down a btree involves locking the current buffer at each
80 : * tree level. If a pointer should somehow point back to a buffer that
81 : * we've already examined, we will deadlock due to the second buffer
82 : * locking attempt. Note however that grabbing a buffer in transaction
83 : * context links the locked buffer to the transaction. If we try to
84 : * re-grab the buffer in the context of the same transaction, we avoid
85 : * the second lock attempt and continue. Between the verifier and the
86 : * scrubber, something will notice that something is amiss and report
87 : * the corruption. Therefore, each scrubber will allocate an empty
88 : * transaction, attach buffers to it, and cancel the transaction at the
89 : * end of the scrub run. Cancelling a non-dirty transaction simply
90 : * unlocks the buffers.
91 : *
92 : * There are four pieces of data that scrub can communicate to
93 : * userspace. The first is the error code (errno), which can be used to
94 : * communicate operational errors in performing the scrub. There are
95 : * also three flags that can be set in the scrub context. If the data
96 : * structure itself is corrupt, the CORRUPT flag will be set. If
97 : * the metadata is correct but otherwise suboptimal, the PREEN flag
98 : * will be set.
99 : *
100 : * We perform secondary validation of filesystem metadata by
101 : * cross-referencing every record with all other available metadata.
102 : * For example, for block mapping extents, we verify that there are no
103 : * records in the free space and inode btrees corresponding to that
104 : * space extent and that there is a corresponding entry in the reverse
105 : * mapping btree. Inconsistent metadata is noted by setting the
106 : * XCORRUPT flag; btree query function errors are noted by setting the
107 : * XFAIL flag and deleting the cursor to prevent further attempts to
108 : * cross-reference with a defective btree.
109 : *
110 : * If a piece of metadata proves corrupt or suboptimal, the userspace
111 : * program can ask the kernel to apply some tender loving care (TLC) to
112 : * the metadata object by setting the REPAIR flag and re-calling the
113 : * scrub ioctl. "Corruption" is defined by metadata violating the
114 : * on-disk specification; operations cannot continue if the violation is
115 : * left untreated. It is possible for XFS to continue if an object is
116 : * "suboptimal", however performance may be degraded. Repairs are
117 : * usually performed by rebuilding the metadata entirely out of
118 : * redundant metadata. Optimizing, on the other hand, can sometimes be
119 : * done without rebuilding entire structures.
120 : *
121 : * Generally speaking, the repair code has the following code structure:
122 : * Lock -> scrub -> repair -> commit -> re-lock -> re-scrub -> unlock.
123 : * The first check helps us figure out if we need to rebuild or simply
124 : * optimize the structure so that the rebuild knows what to do. The
125 : * second check evaluates the completeness of the repair; that is what
126 : * is reported to userspace.
127 : *
128 : * A quick note on symbol prefixes:
129 : * - "xfs_" are general XFS symbols.
130 : * - "xchk_" are symbols related to metadata checking.
131 : * - "xrep_" are symbols related to metadata repair.
132 : * - "xfs_scrub_" are symbols that tie online fsck to the rest of XFS.
133 : */
134 :
135 : /*
136 : * Scrub probe -- userspace uses this to probe if we're willing to scrub
137 : * or repair a given mountpoint. This will be used by xfs_scrub to
138 : * probe the kernel's abilities to scrub (and repair) the metadata. We
139 : * do this by validating the ioctl inputs from userspace, preparing the
140 : * filesystem for a scrub (or a repair) operation, and immediately
141 : * returning to userspace. Userspace can use the returned errno and
142 : * structure state to decide (in broad terms) if scrub/repair are
143 : * supported by the running kernel.
144 : */
145 : static int
146 64523 : xchk_probe(
147 : struct xfs_scrub *sc)
148 : {
149 64523 : int error = 0;
150 :
151 64523 : if (xchk_should_terminate(sc, &error))
152 0 : return error;
153 :
154 : return 0;
155 : }
156 :
157 : /* Scrub setup and teardown */
158 :
159 : #define FSGATES_MASK (XCHK_FSGATES_ALL | XREP_FSGATES_ALL)
160 : static inline void
161 1104864345 : xchk_fsgates_disable(
162 : struct xfs_scrub *sc)
163 : {
164 1104864345 : if (!(sc->flags & FSGATES_MASK))
165 : return;
166 :
167 47813603 : trace_xchk_fsgates_disable(sc, sc->flags & FSGATES_MASK);
168 :
169 47793592 : if (sc->flags & XCHK_FSGATES_DRAIN)
170 122908 : xfs_drain_wait_disable();
171 :
172 47793592 : if (sc->flags & XCHK_FSGATES_QUOTA)
173 18677 : xfs_dqtrx_hook_disable();
174 :
175 47793592 : if (sc->flags & XCHK_FSGATES_DIRENTS)
176 47596449 : xfs_dir_hook_disable();
177 :
178 48218375 : if (sc->flags & XCHK_FSGATES_RMAP)
179 15770 : xfs_rmap_hook_disable();
180 :
181 48218451 : if (sc->flags & XREP_FSGATES_ATOMIC_XCHG)
182 4572104 : xfs_xchg_range_rele_log_assist(sc->mp);
183 :
184 48219744 : if (sc->flags & XREP_FSGATES_LARP)
185 11331520 : xfs_attr_rele_log_assist(sc->mp);
186 :
187 48220124 : sc->flags &= ~FSGATES_MASK;
188 : }
189 : #undef FSGATES_MASK
190 :
191 : /* Free all the resources and finish the transactions. */
192 : STATIC int
193 1105097879 : xchk_teardown(
194 : struct xfs_scrub *sc,
195 : int error)
196 : {
197 1105097879 : xchk_ag_free(sc, &sc->sa);
198 1105351888 : if (sc->tp) {
199 1092795285 : if (error == 0 && (sc->sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR))
200 43195202 : error = xfs_trans_commit(sc->tp);
201 : else
202 1049600083 : xfs_trans_cancel(sc->tp);
203 1093290182 : sc->tp = NULL;
204 : }
205 1105846785 : if (sc->ip) {
206 1088468412 : if (sc->ilock_flags)
207 1088311922 : xchk_iunlock(sc, sc->ilock_flags);
208 1088159393 : xchk_irele(sc, sc->ip);
209 1089122053 : sc->ip = NULL;
210 : }
211 1106500426 : if (sc->flags & XCHK_HAVE_FREEZE_PROT) {
212 77579510 : sc->flags &= ~XCHK_HAVE_FREEZE_PROT;
213 77579510 : mnt_drop_write_file(sc->file);
214 : }
215 1106691353 : if (sc->xfile_buftarg) {
216 46991 : xfile_free_buftarg(sc->xfile_buftarg);
217 46970 : sc->xfile_buftarg = NULL;
218 : }
219 1106691332 : if (sc->xfile) {
220 38028 : xfile_destroy(sc->xfile);
221 38028 : sc->xfile = NULL;
222 : }
223 1106691332 : if (sc->buf) {
224 218952990 : if (sc->buf_cleanup)
225 187656444 : sc->buf_cleanup(sc->buf);
226 218535629 : kvfree(sc->buf);
227 218677642 : sc->buf_cleanup = NULL;
228 218677642 : sc->buf = NULL;
229 : }
230 :
231 1106415984 : xrep_tempfile_rele(sc);
232 1104609473 : xrep_orphanage_rele(sc);
233 1104863356 : xchk_fsgates_disable(sc);
234 1105231842 : return error;
235 : }
236 :
237 : /* Scrubbing dispatch. */
238 :
239 : static const struct xchk_meta_ops meta_scrub_ops[] = {
240 : [XFS_SCRUB_TYPE_PROBE] = { /* ioctl presence test */
241 : .type = ST_NONE,
242 : .setup = xchk_setup_fs,
243 : .scrub = xchk_probe,
244 : .repair = xrep_probe,
245 : },
246 : [XFS_SCRUB_TYPE_SB] = { /* superblock */
247 : .type = ST_PERAG,
248 : .setup = xchk_setup_agheader,
249 : .scrub = xchk_superblock,
250 : .repair = xrep_superblock,
251 : },
252 : [XFS_SCRUB_TYPE_AGF] = { /* agf */
253 : .type = ST_PERAG,
254 : .setup = xchk_setup_agheader,
255 : .scrub = xchk_agf,
256 : .repair = xrep_agf,
257 : },
258 : [XFS_SCRUB_TYPE_AGFL]= { /* agfl */
259 : .type = ST_PERAG,
260 : .setup = xchk_setup_agheader,
261 : .scrub = xchk_agfl,
262 : .repair = xrep_agfl,
263 : },
264 : [XFS_SCRUB_TYPE_AGI] = { /* agi */
265 : .type = ST_PERAG,
266 : .setup = xchk_setup_agheader,
267 : .scrub = xchk_agi,
268 : .repair = xrep_agi,
269 : },
270 : [XFS_SCRUB_TYPE_BNOBT] = { /* bnobt */
271 : .type = ST_PERAG,
272 : .setup = xchk_setup_ag_allocbt,
273 : .scrub = xchk_bnobt,
274 : .repair = xrep_allocbt,
275 : .repair_eval = xrep_revalidate_allocbt,
276 : },
277 : [XFS_SCRUB_TYPE_CNTBT] = { /* cntbt */
278 : .type = ST_PERAG,
279 : .setup = xchk_setup_ag_allocbt,
280 : .scrub = xchk_cntbt,
281 : .repair = xrep_allocbt,
282 : .repair_eval = xrep_revalidate_allocbt,
283 : },
284 : [XFS_SCRUB_TYPE_INOBT] = { /* inobt */
285 : .type = ST_PERAG,
286 : .setup = xchk_setup_ag_iallocbt,
287 : .scrub = xchk_inobt,
288 : .repair = xrep_iallocbt,
289 : .repair_eval = xrep_revalidate_iallocbt,
290 : },
291 : [XFS_SCRUB_TYPE_FINOBT] = { /* finobt */
292 : .type = ST_PERAG,
293 : .setup = xchk_setup_ag_iallocbt,
294 : .scrub = xchk_finobt,
295 : .has = xfs_has_finobt,
296 : .repair = xrep_iallocbt,
297 : .repair_eval = xrep_revalidate_iallocbt,
298 : },
299 : [XFS_SCRUB_TYPE_RMAPBT] = { /* rmapbt */
300 : .type = ST_PERAG,
301 : .setup = xchk_setup_ag_rmapbt,
302 : .scrub = xchk_rmapbt,
303 : .has = xfs_has_rmapbt,
304 : .repair = xrep_rmapbt,
305 : },
306 : [XFS_SCRUB_TYPE_REFCNTBT] = { /* refcountbt */
307 : .type = ST_PERAG,
308 : .setup = xchk_setup_ag_refcountbt,
309 : .scrub = xchk_refcountbt,
310 : .has = xfs_has_reflink,
311 : .repair = xrep_refcountbt,
312 : },
313 : [XFS_SCRUB_TYPE_INODE] = { /* inode record */
314 : .type = ST_INODE,
315 : .setup = xchk_setup_inode,
316 : .scrub = xchk_inode,
317 : .repair = xrep_inode,
318 : },
319 : [XFS_SCRUB_TYPE_BMBTD] = { /* inode data fork */
320 : .type = ST_INODE,
321 : .setup = xchk_setup_inode_bmap,
322 : .scrub = xchk_bmap_data,
323 : .repair = xrep_bmap_data,
324 : },
325 : [XFS_SCRUB_TYPE_BMBTA] = { /* inode attr fork */
326 : .type = ST_INODE,
327 : .setup = xchk_setup_inode_bmap,
328 : .scrub = xchk_bmap_attr,
329 : .repair = xrep_bmap_attr,
330 : },
331 : [XFS_SCRUB_TYPE_BMBTC] = { /* inode CoW fork */
332 : .type = ST_INODE,
333 : .setup = xchk_setup_inode_bmap,
334 : .scrub = xchk_bmap_cow,
335 : .repair = xrep_bmap_cow,
336 : },
337 : [XFS_SCRUB_TYPE_DIR] = { /* directory */
338 : .type = ST_INODE,
339 : .setup = xchk_setup_directory,
340 : .scrub = xchk_directory,
341 : .repair = xrep_directory,
342 : },
343 : [XFS_SCRUB_TYPE_XATTR] = { /* extended attributes */
344 : .type = ST_INODE,
345 : .setup = xchk_setup_xattr,
346 : .scrub = xchk_xattr,
347 : .repair = xrep_xattr,
348 : },
349 : [XFS_SCRUB_TYPE_SYMLINK] = { /* symbolic link */
350 : .type = ST_INODE,
351 : .setup = xchk_setup_symlink,
352 : .scrub = xchk_symlink,
353 : .repair = xrep_symlink,
354 : },
355 : [XFS_SCRUB_TYPE_PARENT] = { /* parent pointers */
356 : .type = ST_INODE,
357 : .setup = xchk_setup_parent,
358 : .scrub = xchk_parent,
359 : .repair = xrep_parent,
360 : },
361 : [XFS_SCRUB_TYPE_RTBITMAP] = { /* realtime bitmap */
362 : .type = ST_FS,
363 : .setup = xchk_setup_rtbitmap,
364 : .scrub = xchk_rtbitmap,
365 : .has = xfs_has_realtime,
366 : .repair = xrep_rtbitmap,
367 : },
368 : [XFS_SCRUB_TYPE_RTSUM] = { /* realtime summary */
369 : .type = ST_FS,
370 : .setup = xchk_setup_rtsummary,
371 : .scrub = xchk_rtsummary,
372 : .has = xfs_has_realtime,
373 : .repair = xrep_rtsummary,
374 : },
375 : [XFS_SCRUB_TYPE_UQUOTA] = { /* user quota */
376 : .type = ST_FS,
377 : .setup = xchk_setup_quota,
378 : .scrub = xchk_quota,
379 : .repair = xrep_quota,
380 : },
381 : [XFS_SCRUB_TYPE_GQUOTA] = { /* group quota */
382 : .type = ST_FS,
383 : .setup = xchk_setup_quota,
384 : .scrub = xchk_quota,
385 : .repair = xrep_quota,
386 : },
387 : [XFS_SCRUB_TYPE_PQUOTA] = { /* project quota */
388 : .type = ST_FS,
389 : .setup = xchk_setup_quota,
390 : .scrub = xchk_quota,
391 : .repair = xrep_quota,
392 : },
393 : [XFS_SCRUB_TYPE_FSCOUNTERS] = { /* fs summary counters */
394 : .type = ST_FS,
395 : .setup = xchk_setup_fscounters,
396 : .scrub = xchk_fscounters,
397 : .repair = xrep_fscounters,
398 : },
399 : [XFS_SCRUB_TYPE_QUOTACHECK] = { /* quota counters */
400 : .type = ST_FS,
401 : .setup = xchk_setup_quotacheck,
402 : .scrub = xchk_quotacheck,
403 : .repair = xrep_quotacheck,
404 : },
405 : [XFS_SCRUB_TYPE_NLINKS] = { /* inode link counts */
406 : .type = ST_FS,
407 : .setup = xchk_setup_nlinks,
408 : .scrub = xchk_nlinks,
409 : .repair = xrep_nlinks,
410 : },
411 : [XFS_SCRUB_TYPE_HEALTHY] = { /* fs healthy; clean all reminders */
412 : .type = ST_FS,
413 : .setup = xchk_setup_fs,
414 : .scrub = xchk_health_record,
415 : .repair = xrep_notsupported,
416 : },
417 : [XFS_SCRUB_TYPE_DIRTREE] = { /* directory tree structure */
418 : .type = ST_INODE,
419 : .setup = xchk_setup_dirtree,
420 : .scrub = xchk_dirtree,
421 : .has = xfs_has_parent,
422 : .repair = xrep_dirtree,
423 : },
424 : };
425 :
426 : static int
427 1080978763 : xchk_validate_inputs(
428 : struct xfs_mount *mp,
429 : struct xfs_scrub_metadata *sm)
430 : {
431 1080978763 : int error;
432 1080978763 : const struct xchk_meta_ops *ops;
433 :
434 1080978763 : error = -EINVAL;
435 : /* Check our inputs. */
436 1080978763 : sm->sm_flags &= ~XFS_SCRUB_FLAGS_OUT;
437 1080978763 : if (sm->sm_flags & ~XFS_SCRUB_FLAGS_IN)
438 0 : goto out;
439 : /* sm_reserved[] must be zero */
440 2163080427 : if (memchr_inv(sm->sm_reserved, 0, sizeof(sm->sm_reserved)))
441 0 : goto out;
442 :
443 1082101664 : error = -ENOENT;
444 : /* Do we know about this type of metadata? */
445 1082101664 : if (sm->sm_type >= XFS_SCRUB_TYPE_NR)
446 0 : goto out;
447 1082101664 : ops = &meta_scrub_ops[sm->sm_type];
448 1083043728 : if (ops->setup == NULL || ops->scrub == NULL)
449 0 : goto out;
450 : /* Does this fs even support this type of metadata? */
451 1083043728 : if (ops->has && !ops->has(mp))
452 641087 : goto out;
453 :
454 1082193343 : error = -EINVAL;
455 : /* restricting fields must be appropriate for type */
456 1082193343 : switch (ops->type) {
457 439013 : case ST_NONE:
458 : case ST_FS:
459 439013 : if (sm->sm_ino || sm->sm_gen || sm->sm_agno)
460 0 : goto out;
461 : break;
462 3751706 : case ST_PERAG:
463 3751706 : if (sm->sm_ino || sm->sm_gen ||
464 3751706 : sm->sm_agno >= mp->m_sb.sb_agcount)
465 0 : goto out;
466 : break;
467 1078002624 : case ST_INODE:
468 1078002624 : if (sm->sm_agno || (sm->sm_gen && !sm->sm_ino))
469 0 : goto out;
470 : break;
471 0 : default:
472 0 : goto out;
473 : }
474 :
475 : /* No rebuild without repair. */
476 1082193343 : if ((sm->sm_flags & XFS_SCRUB_IFLAG_FORCE_REBUILD) &&
477 : !(sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR))
478 : return -EINVAL;
479 :
480 : /*
481 : * We only want to repair read-write v5+ filesystems. Defer the check
482 : * for ops->repair until after our scrub confirms that we need to
483 : * perform repairs so that we avoid failing due to not supporting
484 : * repairing an object that doesn't need repairs.
485 : */
486 1082193343 : if (sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR) {
487 56354182 : error = -EOPNOTSUPP;
488 56354182 : if (!xfs_has_crc(mp))
489 0 : goto out;
490 :
491 56354182 : error = -EROFS;
492 112708364 : if (xfs_is_readonly(mp))
493 14421 : goto out;
494 : }
495 :
496 : error = 0;
497 : out:
498 : return error;
499 : }
500 :
501 : #ifdef CONFIG_XFS_ONLINE_REPAIR
502 902120409 : static inline void xchk_postmortem(struct xfs_scrub *sc)
503 : {
504 : /*
505 : * Userspace asked us to repair something, we repaired it, rescanned
506 : * it, and the rescan says it's still broken. Scream about this in
507 : * the system logs.
508 : */
509 902120409 : if ((sc->sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR) &&
510 48919169 : (sc->sm->sm_flags & (XFS_SCRUB_OFLAG_CORRUPT |
511 : XFS_SCRUB_OFLAG_XCORRUPT)))
512 0 : xrep_failure(sc->mp);
513 902120409 : }
514 : #else
515 : static inline void xchk_postmortem(struct xfs_scrub *sc)
516 : {
517 : /*
518 : * Userspace asked us to scrub something, it's broken, and we have no
519 : * way of fixing it. Scream in the logs.
520 : */
521 : if (sc->sm->sm_flags & (XFS_SCRUB_OFLAG_CORRUPT |
522 : XFS_SCRUB_OFLAG_XCORRUPT))
523 : xfs_alert_ratelimited(sc->mp,
524 : "Corruption detected during scrub.");
525 : }
526 : #endif /* CONFIG_XFS_ONLINE_REPAIR */
527 :
528 : static inline void
529 48915100 : repair_outcomes(struct xfs_scrub *sc, int error)
530 : {
531 48915100 : struct xfs_scrub_metadata *sm = sc->sm;
532 48915100 : const char *wut = NULL;
533 :
534 48915100 : if (sc->flags & XREP_ALREADY_FIXED) {
535 : wut = "*** REPAIR SUCCESS";
536 : error = 0;
537 27371751 : } else if (error == -EBUSY) {
538 : wut = "??? FILESYSTEM BUSY";
539 : } else if (error == -EAGAIN) {
540 : wut = "??? REPAIR DEFERRED";
541 : } else if (error == -ECANCELED) {
542 : wut = "??? REPAIR CANCELLED";
543 : } else if (error == -EINTR) {
544 : wut = "??? REPAIR INTERRUPTED";
545 27371486 : } else if (error != -EOPNOTSUPP && error != -ENOENT) {
546 158 : wut = "!!! REPAIR FAILED";
547 158 : xfs_info(sc->mp,
548 : "%s ino 0x%llx type %s agno 0x%x inum 0x%llx gen 0x%x flags 0x%x error %d",
549 : wut, XFS_I(file_inode(sc->file))->i_ino,
550 : xchk_type_string(sm->sm_type), sm->sm_agno,
551 : sm->sm_ino, sm->sm_gen, sm->sm_flags, error);
552 158 : return;
553 : } else {
554 : return;
555 : }
556 :
557 21543614 : xfs_info_ratelimited(sc->mp,
558 : "%s ino 0x%llx type %s agno 0x%x inum 0x%llx gen 0x%x flags 0x%x error %d",
559 : wut, XFS_I(file_inode(sc->file))->i_ino,
560 : xchk_type_string(sm->sm_type), sm->sm_agno, sm->sm_ino,
561 : sm->sm_gen, sm->sm_flags, error);
562 : }
563 :
564 : /* Dispatch metadata scrubbing. */
565 : int
566 1084747084 : xfs_scrub_metadata(
567 : struct file *file,
568 : struct xfs_scrub_metadata *sm)
569 : {
570 1084747084 : struct xchk_stats_run run = { };
571 1084747084 : struct xfs_scrub *sc;
572 1084747084 : struct xfs_mount *mp = XFS_I(file_inode(file))->i_mount;
573 1084747084 : u64 check_start;
574 1084747084 : int error = 0;
575 :
576 1084747084 : BUILD_BUG_ON(sizeof(meta_scrub_ops) !=
577 : (sizeof(struct xchk_meta_ops) * XFS_SCRUB_TYPE_NR));
578 :
579 1084747084 : trace_xchk_start(XFS_I(file_inode(file)), sm, error);
580 :
581 : /* Forbidden if we are shut down or mounted norecovery. */
582 1082077425 : error = -ESHUTDOWN;
583 2164154850 : if (xfs_is_shutdown(mp))
584 0 : goto out;
585 1082077425 : error = -ENOTRECOVERABLE;
586 1082077425 : if (xfs_has_norecovery(mp))
587 11 : goto out;
588 :
589 1082077414 : error = xchk_validate_inputs(mp, sm);
590 1081650595 : if (error)
591 656082 : goto out;
592 :
593 1080994513 : xfs_warn_mount(mp, XFS_OPSTATE_WARNED_SCRUB,
594 : "EXPERIMENTAL online scrub feature in use. Use at your own risk!");
595 :
596 1083189115 : sc = kzalloc(sizeof(struct xfs_scrub), XCHK_GFP_FLAGS);
597 1085090920 : if (!sc) {
598 0 : error = -ENOMEM;
599 0 : goto out;
600 : }
601 :
602 1085090920 : sc->mp = mp;
603 1085090920 : sc->file = file;
604 1085090920 : sc->sm = sm;
605 1085090920 : sc->ops = &meta_scrub_ops[sm->sm_type];
606 1085226720 : sc->sick_mask = xchk_health_mask_for_scrub_type(sm->sm_type);
607 : retry_op:
608 : /*
609 : * When repairs are allowed, prevent freezing or readonly remount while
610 : * scrub is running with a real transaction.
611 : */
612 1103709071 : if (sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR) {
613 77982083 : error = mnt_want_write_file(sc->file);
614 77984710 : if (error)
615 396 : goto out_sc;
616 :
617 77984314 : sc->flags |= XCHK_HAVE_FREEZE_PROT;
618 : }
619 :
620 : /* Set up for the operation. */
621 1103711302 : error = sc->ops->setup(sc);
622 1105908900 : if (error == -EDEADLOCK && !(sc->flags & XCHK_TRY_HARDER))
623 0 : goto try_harder;
624 1105908900 : if (error == -ECHRNG && !(sc->flags & XCHK_NEED_DRAIN))
625 37028 : goto need_drain;
626 1105871872 : if (error)
627 12774808 : goto out_teardown;
628 :
629 : /* Scrub for errors. */
630 1093097064 : check_start = xchk_stats_now();
631 1087719430 : if ((sc->flags & XREP_ALREADY_FIXED) && sc->ops->repair_eval != NULL)
632 113757 : error = sc->ops->repair_eval(sc);
633 : else
634 1087605673 : error = sc->ops->scrub(sc);
635 1092866367 : run.scrub_ns += xchk_stats_elapsed_ns(check_start);
636 1095074771 : if (error == -EDEADLOCK && !(sc->flags & XCHK_TRY_HARDER))
637 14532 : goto try_harder;
638 1095060239 : if (error == -ECHRNG && !(sc->flags & XCHK_NEED_DRAIN))
639 77975 : goto need_drain;
640 1094982264 : if (error || (sm->sm_flags & XFS_SCRUB_OFLAG_INCOMPLETE))
641 169507081 : goto out_teardown;
642 :
643 925475183 : xchk_update_health(sc);
644 :
645 1849095736 : if (xchk_could_repair(sc)) {
646 : /*
647 : * If userspace asked for a repair but it wasn't necessary,
648 : * report that back to userspace.
649 : */
650 49069696 : if (!xrep_will_attempt(sc)) {
651 5203 : sc->sm->sm_flags |= XFS_SCRUB_OFLAG_NO_REPAIR_NEEDED;
652 5203 : goto out_nofix;
653 : }
654 :
655 : /*
656 : * If it's broken, userspace wants us to fix it, and we haven't
657 : * already tried to fix it, then attempt a repair.
658 : */
659 48910374 : error = xrep_attempt(sc, &run);
660 48920223 : repair_outcomes(sc, error);
661 48918588 : if (error == -EAGAIN) {
662 : /*
663 : * Either the repair function succeeded or it couldn't
664 : * get all the resources it needs; either way, we go
665 : * back to the beginning and call the scrub function.
666 : */
667 21550125 : error = xchk_teardown(sc, 0);
668 21546144 : if (error) {
669 0 : xrep_failure(mp);
670 0 : goto out_sc;
671 : }
672 21546144 : goto retry_op;
673 : }
674 : }
675 :
676 902846635 : out_nofix:
677 902851838 : xchk_postmortem(sc);
678 1083738125 : out_teardown:
679 1083738125 : error = xchk_teardown(sc, error);
680 1083755342 : out_sc:
681 1083755342 : kfree(sc);
682 1084864756 : out:
683 1084864756 : trace_xchk_done(XFS_I(file_inode(file)), sm, error);
684 1081890056 : if (error == -EFSCORRUPTED || error == -EFSBADCRC) {
685 0 : sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
686 0 : error = 0;
687 : }
688 1081890056 : if (error != -ENOENT)
689 906626088 : xchk_stats_merge(mp, sm, &run);
690 1087654462 : return error;
691 115003 : need_drain:
692 115003 : error = xchk_teardown(sc, 0);
693 115003 : if (error)
694 0 : goto out_sc;
695 115003 : sc->flags |= XCHK_NEED_DRAIN;
696 115003 : run.retries++;
697 115003 : goto retry_op;
698 14532 : try_harder:
699 : /*
700 : * Scrubbers return -EDEADLOCK to mean 'try harder'. Tear down
701 : * everything we hold, then set up again with preparation for
702 : * worst-case scenarios.
703 : */
704 14532 : error = xchk_teardown(sc, 0);
705 14532 : if (error)
706 0 : goto out_sc;
707 14532 : sc->flags |= XCHK_TRY_HARDER;
708 14532 : run.retries++;
709 14532 : goto retry_op;
710 : }
|