Line data Source code
1 : // SPDX-License-Identifier: GPL-2.0-or-later
2 : /*
3 : * Copyright (C) 2021-2023 Oracle. All Rights Reserved.
4 : * Author: Darrick J. Wong <djwong@kernel.org>
5 : */
6 : #include "xfs.h"
7 : #include "xfs_fs.h"
8 : #include "xfs_shared.h"
9 : #include "xfs_format.h"
10 : #include "xfs_trans_resv.h"
11 : #include "xfs_mount.h"
12 : #include "xfs_log_format.h"
13 : #include "xfs_trans.h"
14 : #include "xfs_inode.h"
15 : #include "xfs_icache.h"
16 : #include "xfs_iwalk.h"
17 : #include "xfs_ialloc.h"
18 : #include "xfs_dir2.h"
19 : #include "xfs_dir2_priv.h"
20 : #include "xfs_ag.h"
21 : #include "xfs_parent.h"
22 : #include "scrub/scrub.h"
23 : #include "scrub/common.h"
24 : #include "scrub/repair.h"
25 : #include "scrub/xfile.h"
26 : #include "scrub/xfarray.h"
27 : #include "scrub/iscan.h"
28 : #include "scrub/orphanage.h"
29 : #include "scrub/nlinks.h"
30 : #include "scrub/trace.h"
31 : #include "scrub/readdir.h"
32 : #include "scrub/tempfile.h"
33 : #include "scrub/listxattr.h"
34 :
35 : /*
36 : * Live Inode Link Count Checking
37 : * ==============================
38 : *
39 : * Inode link counts are "summary" metadata, in the sense that they are
40 : * computed as the number of directory entries referencing each file on the
41 : * filesystem. Therefore, we compute the correct link counts by creating a
42 : * shadow link count structure and walking every inode.
43 : */
44 :
45 : /* Set us up to scrub inode link counts. */
46 : int
47 28432 : xchk_setup_nlinks(
48 : struct xfs_scrub *sc)
49 : {
50 28432 : int error;
51 :
52 28432 : xchk_fsgates_enable(sc, XCHK_FSGATES_DIRENTS);
53 :
54 56864 : if (xchk_could_repair(sc)) {
55 3936 : error = xrep_setup_nlinks(sc);
56 3936 : if (error)
57 : return error;
58 : }
59 :
60 28432 : sc->buf = kvzalloc(sizeof(struct xchk_nlink_ctrs), XCHK_GFP_FLAGS);
61 28432 : if (!sc->buf)
62 : return -ENOMEM;
63 :
64 28432 : return xchk_setup_fs(sc);
65 : }
66 :
67 : /*
68 : * Part 1: Collecting file link counts. For each file, we create a shadow link
69 : * counting structure, then walk the entire directory tree, incrementing parent
70 : * and child link counts for each directory entry seen.
71 : *
72 : * To avoid false corruption reports in part 2, any failure in this part must
73 : * set the INCOMPLETE flag even when a negative errno is returned. This care
74 : * must be taken with certain errno values (i.e. EFSBADCRC, EFSCORRUPTED,
75 : * ECANCELED) that are absorbed into a scrub state flag update by
76 : * xchk_*_process_error. Scrub and repair share the same incore data
77 : * structures, so the INCOMPLETE flag is critical to prevent a repair based on
78 : * insufficient information.
79 : *
80 : * Because we are scanning a live filesystem, it's possible that another thread
81 : * will try to update the link counts for an inode that we've already scanned.
82 : * This will cause our counts to be incorrect. Therefore, we hook all
83 : * directory entry updates because that is when link count updates occur. By
84 : * shadowing transaction updates in this manner, live nlink check can ensure by
85 : * locking the inode and the shadow structure that its own copies are not out
86 : * of date. Because the hook code runs in a different process context from the
87 : * scrub code and the scrub state flags are not accessed atomically, failures
88 : * in the hook code must abort the iscan and the scrubber must notice the
89 : * aborted scan and set the incomplete flag.
90 : *
91 : * Note that we use jump labels and srcu notifier hooks to minimize the
92 : * overhead when live nlinks is /not/ running. Locking order for nlink
93 : * observations is inode ILOCK -> iscan_lock/xchk_nlink_ctrs lock.
94 : */
95 :
96 : /*
97 : * Add a delta to an nlink counter, being careful about integer overflow.
98 : * Clamp the value to XFS_NLINK_PINNED because the ondisk format does not
99 : * handle link counts any higher.
100 : */
101 : static inline void
102 : careful_add(
103 : xfs_nlink_t *nlinkp,
104 : int delta)
105 : {
106 1025045321 : uint64_t new_value = (uint64_t)(*nlinkp) + delta;
107 :
108 1025045321 : *nlinkp = min_t(uint64_t, new_value, XFS_NLINK_PINNED);
109 : }
110 :
111 : /* Update incore link count information. Caller must hold the nlinks lock. */
112 : STATIC int
113 1025045321 : xchk_nlinks_update_incore(
114 : struct xchk_nlink_ctrs *xnc,
115 : xfs_ino_t ino,
116 : int parents_delta,
117 : int backrefs_delta,
118 : int children_delta)
119 : {
120 1025045321 : struct xchk_nlink nl;
121 1025045321 : int error;
122 :
123 1025045321 : if (!xnc->nlinks)
124 : return 0;
125 :
126 1025045321 : error = xfarray_load_sparse(xnc->nlinks, ino, &nl);
127 1025045321 : if (error)
128 : return error;
129 :
130 1025045321 : trace_xchk_nlinks_update_incore(xnc->sc->mp, ino, &nl, parents_delta,
131 : backrefs_delta, children_delta);
132 :
133 1025045321 : careful_add(&nl.parents, parents_delta);
134 1025045321 : careful_add(&nl.backrefs, backrefs_delta);
135 1025045321 : careful_add(&nl.children, children_delta);
136 :
137 1025045321 : nl.flags |= XCHK_NLINK_WRITTEN;
138 1025045321 : error = xfarray_store(xnc->nlinks, ino, &nl);
139 1025045321 : if (error == -EFBIG) {
140 : /*
141 : * EFBIG means we tried to store data at too high a byte offset
142 : * in the sparse array. IOWs, we cannot complete the check and
143 : * must notify userspace that the check was incomplete.
144 : */
145 0 : error = -ECANCELED;
146 : }
147 : return error;
148 : }
149 :
150 : /*
151 : * Apply a link count change from the regular filesystem into our shadow link
152 : * count structure based on a directory update in progress.
153 : */
154 : STATIC int
155 20192637 : xchk_nlinks_live_update(
156 : struct notifier_block *nb,
157 : unsigned long action,
158 : void *data)
159 : {
160 20192637 : struct xfs_dir_update_params *p = data;
161 20192637 : struct xchk_nlink_ctrs *xnc;
162 20192637 : int error;
163 :
164 20192637 : xnc = container_of(nb, struct xchk_nlink_ctrs, hooks.dirent_hook.nb);
165 :
166 : /*
167 : * Ignore temporary directories being used to stage dir repairs, since
168 : * we don't bump the link counts of the children.
169 : */
170 20192637 : if (xrep_is_tempfile(p->dp))
171 : return NOTIFY_DONE;
172 :
173 20188430 : trace_xchk_nlinks_live_update(xnc->sc->mp, p->dp, action, p->ip->i_ino,
174 20188430 : p->delta, p->name->name, p->name->len);
175 :
176 : /*
177 : * If we've already scanned @dp, update the number of parents that link
178 : * to @ip. If @ip is a subdirectory, update the number of child links
179 : * going out of @dp.
180 : */
181 20192793 : if (xchk_iscan_want_live_update(&xnc->collect_iscan, p->dp->i_ino)) {
182 13761615 : mutex_lock(&xnc->lock);
183 13761967 : error = xchk_nlinks_update_incore(xnc, p->ip->i_ino, p->delta,
184 : 0, 0);
185 13761967 : if (!error && S_ISDIR(VFS_IC(p->ip)->i_mode))
186 3448859 : error = xchk_nlinks_update_incore(xnc, p->dp->i_ino, 0,
187 : 0, p->delta);
188 13761967 : mutex_unlock(&xnc->lock);
189 13761959 : if (error)
190 0 : goto out_abort;
191 : }
192 :
193 : /*
194 : * If @ip is a subdirectory and we've already scanned it, update the
195 : * number of backrefs pointing to @dp.
196 : */
197 25264519 : if (S_ISDIR(VFS_IC(p->ip)->i_mode) &&
198 5063445 : xchk_iscan_want_live_update(&xnc->collect_iscan, p->ip->i_ino)) {
199 3237448 : mutex_lock(&xnc->lock);
200 3237453 : error = xchk_nlinks_update_incore(xnc, p->dp->i_ino, 0,
201 : p->delta, 0);
202 3237453 : mutex_unlock(&xnc->lock);
203 3237453 : if (error)
204 0 : goto out_abort;
205 : }
206 :
207 : return NOTIFY_DONE;
208 :
209 0 : out_abort:
210 0 : xchk_iscan_abort(&xnc->collect_iscan);
211 0 : return NOTIFY_DONE;
212 : }
213 :
214 : /* Bump the observed link count for the inode referenced by this entry. */
215 : STATIC int
216 1004731968 : xchk_nlinks_collect_dirent(
217 : struct xfs_scrub *sc,
218 : struct xfs_inode *dp,
219 : xfs_dir2_dataptr_t dapos,
220 : const struct xfs_name *name,
221 : xfs_ino_t ino,
222 : void *priv)
223 : {
224 1004731968 : struct xchk_nlink_ctrs *xnc = priv;
225 1004731968 : bool dot = false, dotdot = false;
226 1004731968 : int error;
227 :
228 : /* Does this name make sense? */
229 1004731968 : if (name->len == 0 || !xfs_dir2_namecheck(name->name, name->len)) {
230 121 : error = -ECANCELED;
231 121 : goto out_abort;
232 : }
233 :
234 1004731847 : if (name->len == 1 && name->name[0] == '.')
235 : dot = true;
236 825715127 : else if (name->len == 2 && name->name[0] == '.' &&
237 179016720 : name->name[1] == '.')
238 : dotdot = true;
239 :
240 : /* Don't accept a '.' entry that points somewhere else. */
241 179016720 : if (dot && ino != dp->i_ino) {
242 0 : error = -ECANCELED;
243 0 : goto out_abort;
244 : }
245 :
246 : /* Don't accept an invalid inode number. */
247 1004731847 : if (!xfs_verify_ino(sc->mp, ino)) {
248 0 : error = -ECANCELED;
249 0 : goto out_abort;
250 : }
251 :
252 : /* Update the shadow link counts if we haven't already failed. */
253 :
254 1004731847 : if (xchk_iscan_aborted(&xnc->collect_iscan)) {
255 0 : error = -ECANCELED;
256 0 : goto out_incomplete;
257 : }
258 :
259 1004731847 : trace_xchk_nlinks_collect_dirent(sc->mp, dp, ino, name);
260 :
261 1004731847 : mutex_lock(&xnc->lock);
262 :
263 : /*
264 : * If this is a dotdot entry, it is a back link from dp to ino. How
265 : * we handle this depends on whether or not dp is the root directory.
266 : *
267 : * The root directory is its own parent, so we pretend the dotdot entry
268 : * establishes the "parent" of the root directory. Increment the
269 : * number of parents of the root directory.
270 : *
271 : * Otherwise, increment the number of backrefs pointing back to ino.
272 : *
273 : * If the filesystem has parent pointers, we walk the pptrs to
274 : * determine the backref count.
275 : */
276 1004731847 : if (dotdot) {
277 179016720 : if (dp == sc->mp->m_rootip || dp == sc->mp->m_metadirip)
278 55327 : error = xchk_nlinks_update_incore(xnc, ino, 1, 0, 0);
279 178961393 : else if (!xfs_has_parent(sc->mp))
280 1395265 : error = xchk_nlinks_update_incore(xnc, ino, 0, 1, 0);
281 : else
282 : error = 0;
283 1450592 : if (error)
284 0 : goto out_unlock;
285 : }
286 :
287 : /*
288 : * If this dirent is a forward link from dp to ino, increment the
289 : * number of parents linking into ino.
290 : */
291 1004731847 : if (!dot && !dotdot) {
292 646698407 : error = xchk_nlinks_update_incore(xnc, ino, 1, 0, 0);
293 646698407 : if (error)
294 0 : goto out_unlock;
295 : }
296 :
297 : /*
298 : * If this dirent is a forward link to a subdirectory, increment the
299 : * number of child links of dp.
300 : */
301 1004731847 : if (!dot && !dotdot && name->type == XFS_DIR3_FT_DIR) {
302 178875016 : error = xchk_nlinks_update_incore(xnc, dp->i_ino, 0, 0, 1);
303 178875016 : if (error)
304 0 : goto out_unlock;
305 : }
306 :
307 1004731847 : mutex_unlock(&xnc->lock);
308 1004731847 : return 0;
309 :
310 0 : out_unlock:
311 0 : mutex_unlock(&xnc->lock);
312 121 : out_abort:
313 121 : xchk_iscan_abort(&xnc->collect_iscan);
314 121 : out_incomplete:
315 121 : xchk_set_incomplete(sc);
316 121 : return error;
317 : }
318 :
319 : /* Bump the backref count for the inode referenced by this parent pointer. */
320 : STATIC int
321 177566128 : xchk_nlinks_collect_pptr(
322 : struct xfs_scrub *sc,
323 : struct xfs_inode *ip,
324 : const struct xfs_parent_name_irec *pptr,
325 : void *priv)
326 : {
327 177566128 : struct xchk_nlink_ctrs *xnc = priv;
328 177566128 : int error;
329 :
330 : /* Don't accept an invalid inode number. */
331 177566128 : if (!xfs_verify_ino(sc->mp, pptr->p_ino)) {
332 0 : error = -EFSCORRUPTED;
333 0 : goto out_abort;
334 : }
335 :
336 : /* Update the shadow link counts if we haven't already failed. */
337 :
338 177566128 : if (xchk_iscan_aborted(&xnc->collect_iscan)) {
339 0 : error = -ECANCELED;
340 0 : goto out_incomplete;
341 : }
342 :
343 177566128 : trace_xchk_nlinks_collect_pptr(sc->mp, ip, pptr);
344 :
345 177566128 : mutex_lock(&xnc->lock);
346 :
347 177566128 : error = xchk_nlinks_update_incore(xnc, pptr->p_ino, 0, 1, 0);
348 177566128 : if (error)
349 0 : goto out_unlock;
350 :
351 177566128 : mutex_unlock(&xnc->lock);
352 177566128 : return 0;
353 :
354 : out_unlock:
355 0 : mutex_unlock(&xnc->lock);
356 0 : out_abort:
357 0 : xchk_iscan_abort(&xnc->collect_iscan);
358 0 : out_incomplete:
359 0 : xchk_set_incomplete(sc);
360 0 : return error;
361 : }
362 :
363 : /* Walk a directory to bump the observed link counts of the children. */
364 : STATIC int
365 179018871 : xchk_nlinks_collect_dir(
366 : struct xchk_nlink_ctrs *xnc,
367 : struct xfs_inode *dp)
368 : {
369 179018871 : struct xfs_scrub *sc = xnc->sc;
370 179018871 : unsigned int lock_mode;
371 179018871 : int error = 0;
372 :
373 : /*
374 : * Ignore temporary directories being used to stage dir repairs, since
375 : * we don't bump the link counts of the children.
376 : */
377 179018871 : if (xrep_is_tempfile(dp))
378 : return 0;
379 :
380 : /* Prevent anyone from changing this directory while we walk it. */
381 179018871 : xfs_ilock(dp, XFS_IOLOCK_SHARED);
382 179018871 : lock_mode = xfs_ilock_data_map_shared(dp);
383 :
384 : /*
385 : * The dotdot entry of an unlinked directory still points to the last
386 : * parent, but the parent no longer links to this directory. Skip the
387 : * directory to avoid overcounting.
388 : */
389 179018871 : if (VFS_I(dp)->i_nlink == 0)
390 2151 : goto out_unlock;
391 :
392 : /*
393 : * We cannot count file links if the directory looks as though it has
394 : * been zapped by the inode record repair code.
395 : */
396 179016720 : if (xchk_dir_looks_zapped(dp))
397 0 : goto out_abort;
398 :
399 179016720 : error = xchk_dir_walk(sc, dp, xchk_nlinks_collect_dirent, xnc);
400 179016720 : if (error == -ECANCELED) {
401 121 : error = 0;
402 121 : goto out_unlock;
403 : }
404 179016599 : if (error)
405 0 : goto out_abort;
406 :
407 : /* Walk the parent pointers to get real backref counts. */
408 179016599 : if (xfs_has_parent(sc->mp)) {
409 177620089 : error = xchk_pptr_walk(sc, dp, xchk_nlinks_collect_pptr,
410 : &xnc->pptr, xnc);
411 177620089 : if (error == -ECANCELED) {
412 0 : error = 0;
413 0 : goto out_unlock;
414 : }
415 177620089 : if (error)
416 0 : goto out_abort;
417 : }
418 :
419 179016599 : xchk_iscan_mark_visited(&xnc->collect_iscan, dp);
420 179016599 : goto out_unlock;
421 :
422 0 : out_abort:
423 0 : xchk_set_incomplete(sc);
424 0 : xchk_iscan_abort(&xnc->collect_iscan);
425 179018871 : out_unlock:
426 179018871 : xfs_iunlock(dp, lock_mode);
427 179018871 : xfs_iunlock(dp, XFS_IOLOCK_SHARED);
428 179018871 : return error;
429 : }
430 :
431 : /* If this looks like a valid pointer, count it. */
432 : static inline int
433 7435 : xchk_nlinks_collect_metafile(
434 : struct xchk_nlink_ctrs *xnc,
435 : xfs_ino_t ino)
436 : {
437 7435 : if (!xfs_verify_ino(xnc->sc->mp, ino))
438 : return 0;
439 :
440 6899 : trace_xchk_nlinks_collect_metafile(xnc->sc->mp, ino);
441 6899 : return xchk_nlinks_update_incore(xnc, ino, 1, 0, 0);
442 : }
443 :
444 : /* Bump the link counts of metadata files rooted in the superblock. */
445 : STATIC int
446 1487 : xchk_nlinks_collect_metafiles(
447 : struct xchk_nlink_ctrs *xnc)
448 : {
449 1487 : struct xfs_mount *mp = xnc->sc->mp;
450 1487 : int error = -ECANCELED;
451 :
452 :
453 1487 : if (xchk_iscan_aborted(&xnc->collect_iscan))
454 0 : goto out_incomplete;
455 :
456 1487 : mutex_lock(&xnc->lock);
457 1487 : error = xchk_nlinks_collect_metafile(xnc, mp->m_sb.sb_rbmino);
458 1487 : if (error)
459 0 : goto out_abort;
460 :
461 1487 : error = xchk_nlinks_collect_metafile(xnc, mp->m_sb.sb_rsumino);
462 1487 : if (error)
463 0 : goto out_abort;
464 :
465 1487 : error = xchk_nlinks_collect_metafile(xnc, mp->m_sb.sb_uquotino);
466 1487 : if (error)
467 0 : goto out_abort;
468 :
469 1487 : error = xchk_nlinks_collect_metafile(xnc, mp->m_sb.sb_gquotino);
470 1487 : if (error)
471 0 : goto out_abort;
472 :
473 1487 : error = xchk_nlinks_collect_metafile(xnc, mp->m_sb.sb_pquotino);
474 1487 : if (error)
475 0 : goto out_abort;
476 1487 : mutex_unlock(&xnc->lock);
477 :
478 1487 : return 0;
479 :
480 0 : out_abort:
481 0 : mutex_unlock(&xnc->lock);
482 0 : xchk_iscan_abort(&xnc->collect_iscan);
483 0 : out_incomplete:
484 0 : xchk_set_incomplete(xnc->sc);
485 0 : return error;
486 : }
487 :
488 : /* Advance the collection scan cursor for this non-directory file. */
489 : static inline int
490 361191432 : xchk_nlinks_collect_file(
491 : struct xchk_nlink_ctrs *xnc,
492 : struct xfs_inode *ip)
493 : {
494 361191432 : xfs_ilock(ip, XFS_IOLOCK_SHARED);
495 361191432 : xchk_iscan_mark_visited(&xnc->collect_iscan, ip);
496 361191432 : xfs_iunlock(ip, XFS_IOLOCK_SHARED);
497 361191432 : return 0;
498 : }
499 :
500 : /* Walk all directories and count inode links. */
501 : STATIC int
502 28413 : xchk_nlinks_collect(
503 : struct xchk_nlink_ctrs *xnc)
504 : {
505 28413 : struct xfs_scrub *sc = xnc->sc;
506 28413 : struct xfs_inode *ip;
507 28413 : int error;
508 :
509 : /* Count the rt and quota files that are rooted in the superblock. */
510 28413 : if (!xfs_has_metadir(sc->mp)) {
511 1487 : error = xchk_nlinks_collect_metafiles(xnc);
512 1487 : if (error)
513 : return error;
514 : }
515 :
516 : /*
517 : * Set up for a potentially lengthy filesystem scan by reducing our
518 : * transaction resource usage for the duration. Specifically:
519 : *
520 : * Cancel the transaction to release the log grant space while we scan
521 : * the filesystem.
522 : *
523 : * Create a new empty transaction to eliminate the possibility of the
524 : * inode scan deadlocking on cyclical metadata.
525 : *
526 : * We pass the empty transaction to the file scanning function to avoid
527 : * repeatedly cycling empty transactions. This can be done even though
528 : * we take the IOLOCK to quiesce the file because empty transactions
529 : * do not take sb_internal.
530 : */
531 28413 : xchk_trans_cancel(sc);
532 28413 : error = xchk_trans_alloc_empty(sc);
533 28413 : if (error)
534 : return error;
535 :
536 540238706 : while ((error = xchk_iscan_iter(&xnc->collect_iscan, &ip)) == 1) {
537 540210303 : if (S_ISDIR(VFS_I(ip)->i_mode))
538 179018871 : error = xchk_nlinks_collect_dir(xnc, ip);
539 : else
540 361191432 : error = xchk_nlinks_collect_file(xnc, ip);
541 540210303 : xchk_irele(sc, ip);
542 540210303 : if (error)
543 : break;
544 :
545 540210303 : if (xchk_should_terminate(sc, &error))
546 : break;
547 : }
548 28413 : xchk_iscan_iter_finish(&xnc->collect_iscan);
549 28413 : if (error) {
550 132 : xchk_set_incomplete(sc);
551 : /*
552 : * If we couldn't grab an inode that was busy with a state
553 : * change, change the error code so that we exit to userspace
554 : * as quickly as possible.
555 : */
556 132 : if (error == -EBUSY)
557 : return -ECANCELED;
558 132 : return error;
559 : }
560 :
561 : /*
562 : * Switch out for a real transaction in preparation for building a new
563 : * tree.
564 : */
565 28281 : xchk_trans_cancel(sc);
566 28281 : return xchk_setup_fs(sc);
567 : }
568 :
569 : /*
570 : * Part 2: Comparing file link counters. Walk each inode and compare the link
571 : * counts against our shadow information; and then walk each shadow link count
572 : * structure (that wasn't covered in the first part), comparing it against the
573 : * file.
574 : */
575 :
576 : /* Read the observed link count for comparison with the actual inode. */
577 : STATIC int
578 541104372 : xchk_nlinks_comparison_read(
579 : struct xchk_nlink_ctrs *xnc,
580 : xfs_ino_t ino,
581 : struct xchk_nlink *obs)
582 : {
583 541104372 : struct xchk_nlink nl;
584 541104372 : int error;
585 :
586 541104372 : error = xfarray_load_sparse(xnc->nlinks, ino, &nl);
587 541104372 : if (error)
588 : return error;
589 :
590 541104372 : nl.flags |= (XCHK_NLINK_COMPARE_SCANNED | XCHK_NLINK_WRITTEN);
591 :
592 541104372 : error = xfarray_store(xnc->nlinks, ino, &nl);
593 541104372 : if (error == -EFBIG) {
594 : /*
595 : * EFBIG means we tried to store data at too high a byte offset
596 : * in the sparse array. IOWs, we cannot complete the check and
597 : * must notify userspace that the check was incomplete. This
598 : * shouldn't really happen outside of the collection phase.
599 : */
600 0 : xchk_set_incomplete(xnc->sc);
601 0 : return -ECANCELED;
602 : }
603 541104372 : if (error)
604 : return error;
605 :
606 : /* Copy the counters, but do not expose the internal state. */
607 541104372 : obs->parents = nl.parents;
608 541104372 : obs->backrefs = nl.backrefs;
609 541104372 : obs->children = nl.children;
610 541104372 : obs->flags = 0;
611 541104372 : return 0;
612 : }
613 :
614 : /* Check our link count against an inode. */
615 : STATIC int
616 541108117 : xchk_nlinks_compare_inode(
617 : struct xchk_nlink_ctrs *xnc,
618 : struct xfs_inode *ip)
619 : {
620 541108117 : struct xchk_nlink obs;
621 541108117 : struct xfs_scrub *sc = xnc->sc;
622 541108117 : uint64_t total_links;
623 541108117 : unsigned int actual_nlink;
624 541108117 : int error;
625 :
626 : /*
627 : * Ignore temporary files being used to stage repairs, since we assume
628 : * they're correct for non-directories, and the directory repair code
629 : * doesn't bump the link counts for the children.
630 : */
631 541108117 : if (xrep_is_tempfile(ip))
632 : return 0;
633 :
634 541103781 : xfs_ilock(ip, XFS_ILOCK_SHARED);
635 541103781 : mutex_lock(&xnc->lock);
636 :
637 541103781 : if (xchk_iscan_aborted(&xnc->collect_iscan)) {
638 0 : xchk_set_incomplete(xnc->sc);
639 0 : error = -ECANCELED;
640 0 : goto out_scanlock;
641 : }
642 :
643 541103781 : error = xchk_nlinks_comparison_read(xnc, ip->i_ino, &obs);
644 541103781 : if (error)
645 0 : goto out_scanlock;
646 :
647 : /*
648 : * If we don't have ftype to get an accurate count of the subdirectory
649 : * entries in this directory, take advantage of the fact that on a
650 : * consistent ftype=0 filesystem, the number of subdirectory
651 : * backreferences (dotdot entries) pointing towards this directory
652 : * should be equal to the number of subdirectory entries in the
653 : * directory.
654 : */
655 541103781 : if (!xfs_has_ftype(sc->mp) && S_ISDIR(VFS_I(ip)->i_mode))
656 0 : obs.children = obs.backrefs;
657 :
658 541103781 : total_links = xchk_nlink_total(ip, &obs);
659 541103781 : actual_nlink = VFS_I(ip)->i_nlink;
660 :
661 541103781 : trace_xchk_nlinks_compare_inode(sc->mp, ip, &obs);
662 :
663 : /*
664 : * If we found so many parents that we'd overflow i_nlink, we must
665 : * flag this as a corruption. If we found enough parents to exceed
666 : * the somewhat lower XFS_MAXLINK, warn the system administrator about
667 : * this. The VFS won't let users increase the link count, but it will
668 : * let them decrease it.
669 : */
670 541103781 : if (total_links > XFS_NLINK_PINNED)
671 0 : xchk_ino_set_corrupt(sc, ip->i_ino);
672 541103781 : else if (total_links > XFS_MAXLINK)
673 0 : xchk_ino_set_warning(sc, ip->i_ino);
674 :
675 : /* Link counts should match. */
676 541103781 : if (total_links != actual_nlink)
677 0 : xchk_ino_set_corrupt(sc, ip->i_ino);
678 :
679 541103781 : if (S_ISDIR(VFS_I(ip)->i_mode) && actual_nlink > 0) {
680 : /*
681 : * The collection phase ignores directories with zero link
682 : * count, so we ignore them here too.
683 : *
684 : * The number of subdirectory backreferences (dotdot entries)
685 : * pointing towards this directory should be equal to the
686 : * number of subdirectory entries in the directory.
687 : */
688 179344399 : if (obs.children != obs.backrefs)
689 0 : xchk_ino_xref_set_corrupt(sc, ip->i_ino);
690 : } else {
691 : /*
692 : * Non-directories and unlinked directories should not have
693 : * back references.
694 : */
695 361759382 : if (obs.backrefs != 0)
696 0 : xchk_ino_set_corrupt(sc, ip->i_ino);
697 :
698 : /*
699 : * Non-directories and unlinked directories should not have
700 : * children.
701 : */
702 361759382 : if (obs.children != 0)
703 0 : xchk_ino_set_corrupt(sc, ip->i_ino);
704 : }
705 :
706 541103781 : if (ip == sc->mp->m_rootip || ip == sc->mp->m_metadirip) {
707 : /*
708 : * For the root of a directory tree, both the '.' and '..'
709 : * entries should point to the root directory. The dotdot
710 : * entry is counted as a parent of the root /and/ a backref of
711 : * the root directory.
712 : */
713 55197 : if (obs.parents != 1)
714 0 : xchk_ino_set_corrupt(sc, ip->i_ino);
715 541048584 : } else if (actual_nlink > 0) {
716 : /*
717 : * Linked files that are not the root directory should have at
718 : * least one parent.
719 : */
720 541046297 : if (obs.parents == 0)
721 0 : xchk_ino_set_corrupt(sc, ip->i_ino);
722 : }
723 :
724 541103781 : if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
725 0 : error = -ECANCELED;
726 :
727 541103781 : out_scanlock:
728 541103781 : mutex_unlock(&xnc->lock);
729 541103781 : xfs_iunlock(ip, XFS_ILOCK_SHARED);
730 541103781 : return error;
731 : }
732 :
733 : /*
734 : * Check our link count against an inode that wasn't checked previously. This
735 : * is intended to catch directories with dangling links, though we could be
736 : * racing with inode allocation in other threads.
737 : */
738 : STATIC int
739 420674 : xchk_nlinks_compare_inum(
740 : struct xchk_nlink_ctrs *xnc,
741 : xfs_ino_t ino)
742 : {
743 420674 : struct xchk_nlink obs;
744 420674 : struct xfs_mount *mp = xnc->sc->mp;
745 420674 : struct xfs_trans *tp = xnc->sc->tp;
746 420674 : struct xfs_buf *agi_bp;
747 420674 : struct xfs_inode *ip;
748 420674 : int error;
749 :
750 : /*
751 : * The first iget failed, so try again with the variant that returns
752 : * either an incore inode or the AGI buffer. If the function returns
753 : * EINVAL/ENOENT, it should have passed us the AGI buffer so that we
754 : * can guarantee that the inode won't be allocated while we check for
755 : * a zero link count in the observed link count data.
756 : */
757 420674 : error = xchk_iget_agi(xnc->sc, ino, &agi_bp, &ip);
758 420674 : if (!error) {
759 : /* Actually got an inode, so use the inode compare. */
760 420082 : error = xchk_nlinks_compare_inode(xnc, ip);
761 420082 : xchk_irele(xnc->sc, ip);
762 420082 : return error;
763 : }
764 592 : if (error == -ENOENT || error == -EINVAL) {
765 : /* No inode was found. Check for zero link count below. */
766 591 : error = 0;
767 : }
768 592 : if (error)
769 1 : goto out_agi;
770 :
771 : /* Ensure that we have protected against inode allocation/freeing. */
772 591 : if (agi_bp == NULL) {
773 0 : ASSERT(agi_bp != NULL);
774 0 : xchk_set_incomplete(xnc->sc);
775 0 : return -ECANCELED;
776 : }
777 :
778 591 : if (xchk_iscan_aborted(&xnc->collect_iscan)) {
779 0 : xchk_set_incomplete(xnc->sc);
780 0 : error = -ECANCELED;
781 0 : goto out_agi;
782 : }
783 :
784 591 : mutex_lock(&xnc->lock);
785 591 : error = xchk_nlinks_comparison_read(xnc, ino, &obs);
786 591 : if (error)
787 0 : goto out_scanlock;
788 :
789 591 : trace_xchk_nlinks_check_zero(mp, ino, &obs);
790 :
791 : /*
792 : * If we can't grab the inode, the link count had better be zero. We
793 : * still hold the AGI to prevent inode allocation/freeing.
794 : */
795 591 : if (xchk_nlink_total(NULL, &obs) != 0) {
796 0 : xchk_ino_set_corrupt(xnc->sc, ino);
797 0 : error = -ECANCELED;
798 : }
799 :
800 591 : out_scanlock:
801 591 : mutex_unlock(&xnc->lock);
802 592 : out_agi:
803 592 : if (agi_bp)
804 591 : xfs_trans_brelse(tp, agi_bp);
805 : return error;
806 : }
807 :
808 : /*
809 : * Try to visit every inode in the filesystem to compare the link count. Move
810 : * on if we can't grab an inode, since we'll revisit unchecked nlink records in
811 : * the second part.
812 : */
813 : static int
814 : xchk_nlinks_compare_iter(
815 : struct xchk_nlink_ctrs *xnc,
816 : struct xfs_inode **ipp)
817 : {
818 540719732 : int error;
819 :
820 540719732 : do {
821 540719732 : error = xchk_iscan_iter(&xnc->compare_iscan, ipp);
822 540719732 : } while (error == -EBUSY);
823 :
824 540716313 : return error;
825 : }
826 :
827 : /* Compare the link counts we observed against the live information. */
828 : STATIC int
829 28281 : xchk_nlinks_compare(
830 : struct xchk_nlink_ctrs *xnc)
831 : {
832 28281 : struct xchk_nlink nl;
833 28281 : struct xfs_scrub *sc = xnc->sc;
834 28281 : struct xfs_inode *ip;
835 28281 : xfarray_idx_t cur = XFARRAY_CURSOR_INIT;
836 28281 : int error;
837 :
838 28281 : if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
839 : return 0;
840 :
841 : /*
842 : * Create a new empty transaction so that we can advance the iscan
843 : * cursor without deadlocking if the inobt has a cycle and push on the
844 : * inactivation workqueue.
845 : */
846 28281 : xchk_trans_cancel(sc);
847 28281 : error = xchk_trans_alloc_empty(sc);
848 28281 : if (error)
849 : return error;
850 :
851 : /*
852 : * Use the inobt to walk all allocated inodes to compare the link
853 : * counts. Inodes skipped by _compare_iter will be tried again in the
854 : * next phase of the scan.
855 : */
856 28281 : xchk_iscan_start(sc, 0, 0, &xnc->compare_iscan);
857 1081432626 : while ((error = xchk_nlinks_compare_iter(xnc, &ip)) == 1) {
858 540688035 : error = xchk_nlinks_compare_inode(xnc, ip);
859 540688035 : xchk_iscan_mark_visited(&xnc->compare_iscan, ip);
860 540688035 : xchk_irele(sc, ip);
861 540688035 : if (error)
862 : break;
863 :
864 540688035 : if (xchk_should_terminate(sc, &error))
865 : break;
866 : }
867 28281 : xchk_iscan_iter_finish(&xnc->compare_iscan);
868 28281 : xchk_iscan_teardown(&xnc->compare_iscan);
869 28281 : if (error)
870 : return error;
871 :
872 28278 : if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
873 : return 0;
874 :
875 : /*
876 : * Walk all the non-null nlink observations that weren't checked in the
877 : * previous step.
878 : */
879 28278 : mutex_lock(&xnc->lock);
880 539899367 : while ((error = xfarray_iter(xnc->nlinks, &cur, &nl)) == 1) {
881 539871091 : xfs_ino_t ino = cur - 1;
882 :
883 539871091 : if (nl.flags & XCHK_NLINK_COMPARE_SCANNED)
884 539450417 : continue;
885 :
886 420674 : mutex_unlock(&xnc->lock);
887 :
888 420674 : error = xchk_nlinks_compare_inum(xnc, ino);
889 420674 : if (error)
890 1 : return error;
891 :
892 420673 : if (xchk_should_terminate(xnc->sc, &error))
893 1 : return error;
894 :
895 420672 : mutex_lock(&xnc->lock);
896 : }
897 28276 : mutex_unlock(&xnc->lock);
898 :
899 28276 : return error;
900 : }
901 :
902 : /* Tear down everything associated with a nlinks check. */
903 : static void
904 28413 : xchk_nlinks_teardown_scan(
905 : void *priv)
906 : {
907 28413 : struct xchk_nlink_ctrs *xnc = priv;
908 :
909 : /* Discourage any hook functions that might be running. */
910 28413 : xchk_iscan_abort(&xnc->collect_iscan);
911 :
912 28413 : xfs_dir_hook_del(xnc->sc->mp, &xnc->hooks);
913 :
914 28413 : xfarray_destroy(xnc->nlinks);
915 28413 : xnc->nlinks = NULL;
916 :
917 28413 : xchk_iscan_teardown(&xnc->collect_iscan);
918 28413 : mutex_destroy(&xnc->lock);
919 28413 : xnc->sc = NULL;
920 28413 : }
921 :
922 : /*
923 : * Scan all inodes in the entire filesystem to generate link count data. If
924 : * the scan is successful, the counts will be left alive for a repair. If any
925 : * error occurs, we'll tear everything down.
926 : */
927 : STATIC int
928 28413 : xchk_nlinks_setup_scan(
929 : struct xfs_scrub *sc,
930 : struct xchk_nlink_ctrs *xnc)
931 : {
932 28413 : struct xfs_mount *mp = sc->mp;
933 28413 : char *descr;
934 28413 : unsigned long long max_inos;
935 28413 : xfs_agnumber_t last_agno = mp->m_sb.sb_agcount - 1;
936 28413 : xfs_agino_t first_agino, last_agino;
937 28413 : int error;
938 :
939 28413 : ASSERT(xnc->sc == NULL);
940 28413 : xnc->sc = sc;
941 :
942 28413 : mutex_init(&xnc->lock);
943 :
944 : /* Retry iget every tenth of a second for up to 30 seconds. */
945 28413 : xchk_iscan_start(sc, 30000, 100, &xnc->collect_iscan);
946 :
947 : /*
948 : * Set up enough space to store an nlink record for the highest
949 : * possible inode number in this system.
950 : */
951 28413 : xfs_agino_range(mp, last_agno, &first_agino, &last_agino);
952 28413 : max_inos = XFS_AGINO_TO_INO(mp, last_agno, last_agino) + 1;
953 28413 : descr = xchk_xfile_descr(sc, "file link counts");
954 28413 : error = xfarray_create(descr, min(XFS_MAXINUMBER + 1, max_inos),
955 : sizeof(struct xchk_nlink), &xnc->nlinks);
956 28413 : kfree(descr);
957 28413 : if (error)
958 0 : goto out_teardown;
959 :
960 : /*
961 : * Hook into the directory entry code so that we can capture updates to
962 : * file link counts. The hook only triggers for inodes that were
963 : * already scanned, and the scanner thread takes each inode's ILOCK,
964 : * which means that any in-progress inode updates will finish before we
965 : * can scan the inode.
966 : */
967 28413 : ASSERT(sc->flags & XCHK_FSGATES_DIRENTS);
968 28413 : xfs_hook_setup(&xnc->hooks.dirent_hook, xchk_nlinks_live_update);
969 28413 : error = xfs_dir_hook_add(mp, &xnc->hooks);
970 28413 : if (error)
971 0 : goto out_teardown;
972 :
973 : /* Use deferred cleanup to pass the inode link count data to repair. */
974 28413 : sc->buf_cleanup = xchk_nlinks_teardown_scan;
975 28413 : return 0;
976 :
977 0 : out_teardown:
978 0 : xchk_nlinks_teardown_scan(xnc);
979 0 : return error;
980 : }
981 :
982 : /* Scrub the link count of all inodes on the filesystem. */
983 : int
984 28413 : xchk_nlinks(
985 : struct xfs_scrub *sc)
986 : {
987 28413 : struct xchk_nlink_ctrs *xnc = sc->buf;
988 28413 : int error = 0;
989 :
990 : /* Set ourselves up to check link counts on the live filesystem. */
991 28413 : error = xchk_nlinks_setup_scan(sc, xnc);
992 28413 : if (error)
993 : return error;
994 :
995 : /* Walk all inodes, picking up link count information. */
996 28413 : error = xchk_nlinks_collect(xnc);
997 28413 : if (!xchk_xref_process_error(sc, 0, 0, &error))
998 132 : return error;
999 :
1000 : /* Fail fast if we're not playing with a full dataset. */
1001 28281 : if (xchk_iscan_aborted(&xnc->collect_iscan))
1002 0 : xchk_set_incomplete(sc);
1003 28281 : if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_INCOMPLETE)
1004 : return 0;
1005 :
1006 : /* Compare link counts. */
1007 28281 : error = xchk_nlinks_compare(xnc);
1008 28281 : if (!xchk_xref_process_error(sc, 0, 0, &error))
1009 5 : return error;
1010 :
1011 : /* Check one last time for an incomplete dataset. */
1012 28276 : if (xchk_iscan_aborted(&xnc->collect_iscan))
1013 0 : xchk_set_incomplete(sc);
1014 :
1015 : return 0;
1016 : }
|