Line data Source code
1 : // SPDX-License-Identifier: GPL-2.0-or-later
2 : /*
3 : * Copyright (C) 2021-2023 Oracle. All Rights Reserved.
4 : * Author: Darrick J. Wong <djwong@kernel.org>
5 : */
6 : #include "xfs.h"
7 : #include "xfs_fs.h"
8 : #include "xfs_shared.h"
9 : #include "xfs_format.h"
10 : #include "xfs_trans_resv.h"
11 : #include "xfs_mount.h"
12 : #include "xfs_log_format.h"
13 : #include "xfs_trans.h"
14 : #include "xfs_inode.h"
15 : #include "xfs_icache.h"
16 : #include "xfs_iwalk.h"
17 : #include "xfs_ialloc.h"
18 : #include "xfs_dir2.h"
19 : #include "xfs_dir2_priv.h"
20 : #include "xfs_ag.h"
21 : #include "xfs_parent.h"
22 : #include "scrub/scrub.h"
23 : #include "scrub/common.h"
24 : #include "scrub/repair.h"
25 : #include "scrub/xfile.h"
26 : #include "scrub/xfarray.h"
27 : #include "scrub/iscan.h"
28 : #include "scrub/orphanage.h"
29 : #include "scrub/nlinks.h"
30 : #include "scrub/trace.h"
31 : #include "scrub/readdir.h"
32 : #include "scrub/tempfile.h"
33 : #include "scrub/listxattr.h"
34 :
35 : /*
36 : * Live Inode Link Count Checking
37 : * ==============================
38 : *
39 : * Inode link counts are "summary" metadata, in the sense that they are
40 : * computed as the number of directory entries referencing each file on the
41 : * filesystem. Therefore, we compute the correct link counts by creating a
42 : * shadow link count structure and walking every inode.
43 : */
44 :
45 : /* Set us up to scrub inode link counts. */
46 : int
47 12684 : xchk_setup_nlinks(
48 : struct xfs_scrub *sc)
49 : {
50 12684 : int error;
51 :
52 12684 : xchk_fsgates_enable(sc, XCHK_FSGATES_DIRENTS);
53 :
54 25368 : if (xchk_could_repair(sc)) {
55 2007 : error = xrep_setup_nlinks(sc);
56 2007 : if (error)
57 : return error;
58 : }
59 :
60 12684 : sc->buf = kvzalloc(sizeof(struct xchk_nlink_ctrs), XCHK_GFP_FLAGS);
61 12684 : if (!sc->buf)
62 : return -ENOMEM;
63 :
64 12684 : return xchk_setup_fs(sc);
65 : }
66 :
67 : /*
68 : * Part 1: Collecting file link counts. For each file, we create a shadow link
69 : * counting structure, then walk the entire directory tree, incrementing parent
70 : * and child link counts for each directory entry seen.
71 : *
72 : * To avoid false corruption reports in part 2, any failure in this part must
73 : * set the INCOMPLETE flag even when a negative errno is returned. This care
74 : * must be taken with certain errno values (i.e. EFSBADCRC, EFSCORRUPTED,
75 : * ECANCELED) that are absorbed into a scrub state flag update by
76 : * xchk_*_process_error. Scrub and repair share the same incore data
77 : * structures, so the INCOMPLETE flag is critical to prevent a repair based on
78 : * insufficient information.
79 : *
80 : * Because we are scanning a live filesystem, it's possible that another thread
81 : * will try to update the link counts for an inode that we've already scanned.
82 : * This will cause our counts to be incorrect. Therefore, we hook all
83 : * directory entry updates because that is when link count updates occur. By
84 : * shadowing transaction updates in this manner, live nlink check can ensure by
85 : * locking the inode and the shadow structure that its own copies are not out
86 : * of date. Because the hook code runs in a different process context from the
87 : * scrub code and the scrub state flags are not accessed atomically, failures
88 : * in the hook code must abort the iscan and the scrubber must notice the
89 : * aborted scan and set the incomplete flag.
90 : *
91 : * Note that we use jump labels and srcu notifier hooks to minimize the
92 : * overhead when live nlinks is /not/ running. Locking order for nlink
93 : * observations is inode ILOCK -> iscan_lock/xchk_nlink_ctrs lock.
94 : */
95 :
96 : /*
97 : * Add a delta to an nlink counter, being careful about integer overflow.
98 : * Clamp the value to XFS_NLINK_PINNED because the ondisk format does not
99 : * handle link counts any higher.
100 : */
101 : static inline void
102 : careful_add(
103 : xfs_nlink_t *nlinkp,
104 : int delta)
105 : {
106 929815857 : uint64_t new_value = (uint64_t)(*nlinkp) + delta;
107 :
108 929815857 : *nlinkp = min_t(uint64_t, new_value, XFS_NLINK_PINNED);
109 : }
110 :
111 : /* Update incore link count information. Caller must hold the nlinks lock. */
112 : STATIC int
113 929815857 : xchk_nlinks_update_incore(
114 : struct xchk_nlink_ctrs *xnc,
115 : xfs_ino_t ino,
116 : int parents_delta,
117 : int backrefs_delta,
118 : int children_delta)
119 : {
120 929815857 : struct xchk_nlink nl;
121 929815857 : int error;
122 :
123 929815857 : if (!xnc->nlinks)
124 : return 0;
125 :
126 929815857 : error = xfarray_load_sparse(xnc->nlinks, ino, &nl);
127 929815857 : if (error)
128 : return error;
129 :
130 929815857 : trace_xchk_nlinks_update_incore(xnc->sc->mp, ino, &nl, parents_delta,
131 : backrefs_delta, children_delta);
132 :
133 929815857 : careful_add(&nl.parents, parents_delta);
134 929815857 : careful_add(&nl.backrefs, backrefs_delta);
135 929815857 : careful_add(&nl.children, children_delta);
136 :
137 929815857 : nl.flags |= XCHK_NLINK_WRITTEN;
138 929815857 : error = xfarray_store(xnc->nlinks, ino, &nl);
139 929815857 : if (error == -EFBIG) {
140 : /*
141 : * EFBIG means we tried to store data at too high a byte offset
142 : * in the sparse array. IOWs, we cannot complete the check and
143 : * must notify userspace that the check was incomplete.
144 : */
145 0 : error = -ECANCELED;
146 : }
147 : return error;
148 : }
149 :
150 : /*
151 : * Apply a link count change from the regular filesystem into our shadow link
152 : * count structure based on a directory update in progress.
153 : */
154 : STATIC int
155 10242072 : xchk_nlinks_live_update(
156 : struct notifier_block *nb,
157 : unsigned long action,
158 : void *data)
159 : {
160 10242072 : struct xfs_dir_update_params *p = data;
161 10242072 : struct xchk_nlink_ctrs *xnc;
162 10242072 : int error;
163 :
164 10242072 : xnc = container_of(nb, struct xchk_nlink_ctrs, hooks.dirent_hook.nb);
165 :
166 : /*
167 : * Ignore temporary directories being used to stage dir repairs, since
168 : * we don't bump the link counts of the children.
169 : */
170 10242072 : if (xrep_is_tempfile(p->dp))
171 : return NOTIFY_DONE;
172 :
173 10242544 : trace_xchk_nlinks_live_update(xnc->sc->mp, p->dp, action, p->ip->i_ino,
174 10242544 : p->delta, p->name->name, p->name->len);
175 :
176 : /*
177 : * If we've already scanned @dp, update the number of parents that link
178 : * to @ip. If @ip is a subdirectory, update the number of child links
179 : * going out of @dp.
180 : */
181 10242032 : if (xchk_iscan_want_live_update(&xnc->collect_iscan, p->dp->i_ino)) {
182 7266584 : mutex_lock(&xnc->lock);
183 7266640 : error = xchk_nlinks_update_incore(xnc, p->ip->i_ino, p->delta,
184 : 0, 0);
185 7266640 : if (!error && S_ISDIR(VFS_IC(p->ip)->i_mode))
186 1799191 : error = xchk_nlinks_update_incore(xnc, p->dp->i_ino, 0,
187 : 0, p->delta);
188 7266640 : mutex_unlock(&xnc->lock);
189 7266631 : if (error)
190 0 : goto out_abort;
191 : }
192 :
193 : /*
194 : * If @ip is a subdirectory and we've already scanned it, update the
195 : * number of backrefs pointing to @dp.
196 : */
197 12782668 : if (S_ISDIR(VFS_IC(p->ip)->i_mode) &&
198 2539226 : xchk_iscan_want_live_update(&xnc->collect_iscan, p->ip->i_ino)) {
199 1693737 : mutex_lock(&xnc->lock);
200 1693737 : error = xchk_nlinks_update_incore(xnc, p->dp->i_ino, 0,
201 : p->delta, 0);
202 1693737 : mutex_unlock(&xnc->lock);
203 1693737 : if (error)
204 0 : goto out_abort;
205 : }
206 :
207 : return NOTIFY_DONE;
208 :
209 0 : out_abort:
210 0 : xchk_iscan_abort(&xnc->collect_iscan);
211 0 : return NOTIFY_DONE;
212 : }
213 :
214 : /* Bump the observed link count for the inode referenced by this entry. */
215 : STATIC int
216 919059776 : xchk_nlinks_collect_dirent(
217 : struct xfs_scrub *sc,
218 : struct xfs_inode *dp,
219 : xfs_dir2_dataptr_t dapos,
220 : const struct xfs_name *name,
221 : xfs_ino_t ino,
222 : void *priv)
223 : {
224 919059776 : struct xchk_nlink_ctrs *xnc = priv;
225 919059776 : bool dot = false, dotdot = false;
226 919059776 : int error;
227 :
228 : /* Does this name make sense? */
229 919059776 : if (name->len == 0 || !xfs_dir2_namecheck(name->name, name->len)) {
230 22 : error = -ECANCELED;
231 22 : goto out_abort;
232 : }
233 :
234 919059754 : if (name->len == 1 && name->name[0] == '.')
235 : dot = true;
236 750085803 : else if (name->len == 2 && name->name[0] == '.' &&
237 168973951 : name->name[1] == '.')
238 : dotdot = true;
239 :
240 : /* Don't accept a '.' entry that points somewhere else. */
241 168973951 : if (dot && ino != dp->i_ino) {
242 0 : error = -ECANCELED;
243 0 : goto out_abort;
244 : }
245 :
246 : /* Don't accept an invalid inode number. */
247 919059754 : if (!xfs_verify_ino(sc->mp, ino)) {
248 0 : error = -ECANCELED;
249 0 : goto out_abort;
250 : }
251 :
252 : /* Update the shadow link counts if we haven't already failed. */
253 :
254 919059754 : if (xchk_iscan_aborted(&xnc->collect_iscan)) {
255 0 : error = -ECANCELED;
256 0 : goto out_incomplete;
257 : }
258 :
259 919059754 : trace_xchk_nlinks_collect_dirent(sc->mp, dp, ino, name);
260 :
261 919059754 : mutex_lock(&xnc->lock);
262 :
263 : /*
264 : * If this is a dotdot entry, it is a back link from dp to ino. How
265 : * we handle this depends on whether or not dp is the root directory.
266 : *
267 : * The root directory is its own parent, so we pretend the dotdot entry
268 : * establishes the "parent" of the root directory. Increment the
269 : * number of parents of the root directory.
270 : *
271 : * Otherwise, increment the number of backrefs pointing back to ino.
272 : *
273 : * If the filesystem has parent pointers, we walk the pptrs to
274 : * determine the backref count.
275 : */
276 919059754 : if (dotdot) {
277 168973951 : if (dp == sc->mp->m_rootip)
278 12682 : error = xchk_nlinks_update_incore(xnc, ino, 1, 0, 0);
279 168961269 : else if (!xfs_has_parent(sc->mp))
280 1270 : error = xchk_nlinks_update_incore(xnc, ino, 0, 1, 0);
281 : else
282 : error = 0;
283 13952 : if (error)
284 0 : goto out_unlock;
285 : }
286 :
287 : /*
288 : * If this dirent is a forward link from dp to ino, increment the
289 : * number of parents linking into ino.
290 : */
291 919059754 : if (!dot && !dotdot) {
292 581111852 : error = xchk_nlinks_update_incore(xnc, ino, 1, 0, 0);
293 581111852 : if (error)
294 0 : goto out_unlock;
295 : }
296 :
297 : /*
298 : * If this dirent is a forward link to a subdirectory, increment the
299 : * number of child links of dp.
300 : */
301 919059754 : if (!dot && !dotdot && name->type == XFS_DIR3_FT_DIR) {
302 168922903 : error = xchk_nlinks_update_incore(xnc, dp->i_ino, 0, 0, 1);
303 168922903 : if (error)
304 0 : goto out_unlock;
305 : }
306 :
307 919059754 : mutex_unlock(&xnc->lock);
308 919059754 : return 0;
309 :
310 0 : out_unlock:
311 0 : mutex_unlock(&xnc->lock);
312 22 : out_abort:
313 22 : xchk_iscan_abort(&xnc->collect_iscan);
314 22 : out_incomplete:
315 22 : xchk_set_incomplete(sc);
316 22 : return error;
317 : }
318 :
319 : /* Bump the backref count for the inode referenced by this parent pointer. */
320 : STATIC int
321 168959999 : xchk_nlinks_collect_pptr(
322 : struct xfs_scrub *sc,
323 : struct xfs_inode *ip,
324 : const struct xfs_parent_name_irec *pptr,
325 : void *priv)
326 : {
327 168959999 : struct xchk_nlink_ctrs *xnc = priv;
328 168959999 : int error;
329 :
330 : /* Don't accept an invalid inode number. */
331 168959999 : if (!xfs_verify_ino(sc->mp, pptr->p_ino)) {
332 0 : error = -EFSCORRUPTED;
333 0 : goto out_abort;
334 : }
335 :
336 : /* Update the shadow link counts if we haven't already failed. */
337 :
338 168959999 : if (xchk_iscan_aborted(&xnc->collect_iscan)) {
339 0 : error = -ECANCELED;
340 0 : goto out_incomplete;
341 : }
342 :
343 168959999 : trace_xchk_nlinks_collect_pptr(sc->mp, ip, pptr);
344 :
345 168959999 : mutex_lock(&xnc->lock);
346 :
347 168959999 : error = xchk_nlinks_update_incore(xnc, pptr->p_ino, 0, 1, 0);
348 168959999 : if (error)
349 0 : goto out_unlock;
350 :
351 168959999 : mutex_unlock(&xnc->lock);
352 168959999 : return 0;
353 :
354 : out_unlock:
355 0 : mutex_unlock(&xnc->lock);
356 0 : out_abort:
357 0 : xchk_iscan_abort(&xnc->collect_iscan);
358 0 : out_incomplete:
359 0 : xchk_set_incomplete(sc);
360 0 : return error;
361 : }
362 :
363 : /* Walk a directory to bump the observed link counts of the children. */
364 : STATIC int
365 168976261 : xchk_nlinks_collect_dir(
366 : struct xchk_nlink_ctrs *xnc,
367 : struct xfs_inode *dp)
368 : {
369 168976261 : struct xfs_scrub *sc = xnc->sc;
370 168976261 : unsigned int lock_mode;
371 168976261 : int error = 0;
372 :
373 : /*
374 : * Ignore temporary directories being used to stage dir repairs, since
375 : * we don't bump the link counts of the children.
376 : */
377 168976261 : if (xrep_is_tempfile(dp))
378 : return 0;
379 :
380 : /* Prevent anyone from changing this directory while we walk it. */
381 168976261 : xfs_ilock(dp, XFS_IOLOCK_SHARED);
382 168976261 : lock_mode = xfs_ilock_data_map_shared(dp);
383 :
384 : /*
385 : * The dotdot entry of an unlinked directory still points to the last
386 : * parent, but the parent no longer links to this directory. Skip the
387 : * directory to avoid overcounting.
388 : */
389 168976261 : if (VFS_I(dp)->i_nlink == 0)
390 2310 : goto out_unlock;
391 :
392 : /*
393 : * We cannot count file links if the directory looks as though it has
394 : * been zapped by the inode record repair code.
395 : */
396 168973951 : if (xchk_dir_looks_zapped(dp))
397 0 : goto out_abort;
398 :
399 168973951 : error = xchk_dir_walk(sc, dp, xchk_nlinks_collect_dirent, xnc);
400 168973951 : if (error == -ECANCELED) {
401 22 : error = 0;
402 22 : goto out_unlock;
403 : }
404 168973929 : if (error)
405 0 : goto out_abort;
406 :
407 : /* Walk the parent pointers to get real backref counts. */
408 168973929 : if (xfs_has_parent(sc->mp)) {
409 168972627 : error = xchk_pptr_walk(sc, dp, xchk_nlinks_collect_pptr,
410 : &xnc->pptr, xnc);
411 168972627 : if (error == -ECANCELED) {
412 0 : error = 0;
413 0 : goto out_unlock;
414 : }
415 168972627 : if (error)
416 0 : goto out_abort;
417 : }
418 :
419 168973929 : xchk_iscan_mark_visited(&xnc->collect_iscan, dp);
420 168973929 : goto out_unlock;
421 :
422 0 : out_abort:
423 0 : xchk_set_incomplete(sc);
424 0 : xchk_iscan_abort(&xnc->collect_iscan);
425 168976261 : out_unlock:
426 168976261 : xfs_iunlock(dp, lock_mode);
427 168976261 : xfs_iunlock(dp, XFS_IOLOCK_SHARED);
428 168976261 : return error;
429 : }
430 :
431 : /* If this looks like a valid pointer, count it. */
432 : static inline int
433 63420 : xchk_nlinks_collect_metafile(
434 : struct xchk_nlink_ctrs *xnc,
435 : xfs_ino_t ino)
436 : {
437 63420 : if (!xfs_verify_ino(xnc->sc->mp, ino))
438 : return 0;
439 :
440 47583 : trace_xchk_nlinks_collect_metafile(xnc->sc->mp, ino);
441 47583 : return xchk_nlinks_update_incore(xnc, ino, 1, 0, 0);
442 : }
443 :
444 : /* Bump the link counts of metadata files rooted in the superblock. */
445 : STATIC int
446 12684 : xchk_nlinks_collect_metafiles(
447 : struct xchk_nlink_ctrs *xnc)
448 : {
449 12684 : struct xfs_mount *mp = xnc->sc->mp;
450 12684 : int error = -ECANCELED;
451 :
452 :
453 12684 : if (xchk_iscan_aborted(&xnc->collect_iscan))
454 0 : goto out_incomplete;
455 :
456 12684 : mutex_lock(&xnc->lock);
457 12684 : error = xchk_nlinks_collect_metafile(xnc, mp->m_sb.sb_rbmino);
458 12684 : if (error)
459 0 : goto out_abort;
460 :
461 12684 : error = xchk_nlinks_collect_metafile(xnc, mp->m_sb.sb_rsumino);
462 12684 : if (error)
463 0 : goto out_abort;
464 :
465 12684 : error = xchk_nlinks_collect_metafile(xnc, mp->m_sb.sb_uquotino);
466 12684 : if (error)
467 0 : goto out_abort;
468 :
469 12684 : error = xchk_nlinks_collect_metafile(xnc, mp->m_sb.sb_gquotino);
470 12684 : if (error)
471 0 : goto out_abort;
472 :
473 12684 : error = xchk_nlinks_collect_metafile(xnc, mp->m_sb.sb_pquotino);
474 12684 : if (error)
475 0 : goto out_abort;
476 12684 : mutex_unlock(&xnc->lock);
477 :
478 12684 : return 0;
479 :
480 0 : out_abort:
481 0 : mutex_unlock(&xnc->lock);
482 0 : xchk_iscan_abort(&xnc->collect_iscan);
483 0 : out_incomplete:
484 0 : xchk_set_incomplete(xnc->sc);
485 0 : return error;
486 : }
487 :
488 : /* Advance the collection scan cursor for this non-directory file. */
489 : static inline int
490 310266878 : xchk_nlinks_collect_file(
491 : struct xchk_nlink_ctrs *xnc,
492 : struct xfs_inode *ip)
493 : {
494 310266878 : xfs_ilock(ip, XFS_IOLOCK_SHARED);
495 310266878 : xchk_iscan_mark_visited(&xnc->collect_iscan, ip);
496 310266878 : xfs_iunlock(ip, XFS_IOLOCK_SHARED);
497 310266878 : return 0;
498 : }
499 :
500 : /* Walk all directories and count inode links. */
501 : STATIC int
502 12684 : xchk_nlinks_collect(
503 : struct xchk_nlink_ctrs *xnc)
504 : {
505 12684 : struct xfs_scrub *sc = xnc->sc;
506 12684 : struct xfs_inode *ip;
507 12684 : int error;
508 :
509 : /* Count the rt and quota files that are rooted in the superblock. */
510 12684 : error = xchk_nlinks_collect_metafiles(xnc);
511 12684 : if (error)
512 : return error;
513 :
514 : /*
515 : * Set up for a potentially lengthy filesystem scan by reducing our
516 : * transaction resource usage for the duration. Specifically:
517 : *
518 : * Cancel the transaction to release the log grant space while we scan
519 : * the filesystem.
520 : *
521 : * Create a new empty transaction to eliminate the possibility of the
522 : * inode scan deadlocking on cyclical metadata.
523 : *
524 : * We pass the empty transaction to the file scanning function to avoid
525 : * repeatedly cycling empty transactions. This can be done even though
526 : * we take the IOLOCK to quiesce the file because empty transactions
527 : * do not take sb_internal.
528 : */
529 12684 : xchk_trans_cancel(sc);
530 12684 : error = xchk_trans_alloc_empty(sc);
531 12684 : if (error)
532 : return error;
533 :
534 479255821 : while ((error = xchk_iscan_iter(&xnc->collect_iscan, &ip)) == 1) {
535 479243139 : if (S_ISDIR(VFS_I(ip)->i_mode))
536 168976261 : error = xchk_nlinks_collect_dir(xnc, ip);
537 : else
538 310266878 : error = xchk_nlinks_collect_file(xnc, ip);
539 479243139 : xchk_irele(sc, ip);
540 479243139 : if (error)
541 : break;
542 :
543 479243139 : if (xchk_should_terminate(sc, &error))
544 : break;
545 : }
546 12684 : xchk_iscan_iter_finish(&xnc->collect_iscan);
547 12684 : if (error) {
548 26 : xchk_set_incomplete(sc);
549 : /*
550 : * If we couldn't grab an inode that was busy with a state
551 : * change, change the error code so that we exit to userspace
552 : * as quickly as possible.
553 : */
554 26 : if (error == -EBUSY)
555 : return -ECANCELED;
556 26 : return error;
557 : }
558 :
559 : /*
560 : * Switch out for a real transaction in preparation for building a new
561 : * tree.
562 : */
563 12658 : xchk_trans_cancel(sc);
564 12658 : return xchk_setup_fs(sc);
565 : }
566 :
567 : /*
568 : * Part 2: Comparing file link counters. Walk each inode and compare the link
569 : * counts against our shadow information; and then walk each shadow link count
570 : * structure (that wasn't covered in the first part), comparing it against the
571 : * file.
572 : */
573 :
574 : /* Read the observed link count for comparison with the actual inode. */
575 : STATIC int
576 479411814 : xchk_nlinks_comparison_read(
577 : struct xchk_nlink_ctrs *xnc,
578 : xfs_ino_t ino,
579 : struct xchk_nlink *obs)
580 : {
581 479411814 : struct xchk_nlink nl;
582 479411814 : int error;
583 :
584 479411814 : error = xfarray_load_sparse(xnc->nlinks, ino, &nl);
585 479411814 : if (error)
586 : return error;
587 :
588 479411814 : nl.flags |= (XCHK_NLINK_COMPARE_SCANNED | XCHK_NLINK_WRITTEN);
589 :
590 479411814 : error = xfarray_store(xnc->nlinks, ino, &nl);
591 479411814 : if (error == -EFBIG) {
592 : /*
593 : * EFBIG means we tried to store data at too high a byte offset
594 : * in the sparse array. IOWs, we cannot complete the check and
595 : * must notify userspace that the check was incomplete. This
596 : * shouldn't really happen outside of the collection phase.
597 : */
598 0 : xchk_set_incomplete(xnc->sc);
599 0 : return -ECANCELED;
600 : }
601 479411814 : if (error)
602 : return error;
603 :
604 : /* Copy the counters, but do not expose the internal state. */
605 479411814 : obs->parents = nl.parents;
606 479411814 : obs->backrefs = nl.backrefs;
607 479411814 : obs->children = nl.children;
608 479411814 : obs->flags = 0;
609 479411814 : return 0;
610 : }
611 :
612 : /* Check our link count against an inode. */
613 : STATIC int
614 479411468 : xchk_nlinks_compare_inode(
615 : struct xchk_nlink_ctrs *xnc,
616 : struct xfs_inode *ip)
617 : {
618 479411468 : struct xchk_nlink obs;
619 479411468 : struct xfs_scrub *sc = xnc->sc;
620 479411468 : uint64_t total_links;
621 479411468 : unsigned int actual_nlink;
622 479411468 : int error;
623 :
624 : /*
625 : * Ignore temporary files being used to stage repairs, since we assume
626 : * they're correct for non-directories, and the directory repair code
627 : * doesn't bump the link counts for the children.
628 : */
629 479411468 : if (xrep_is_tempfile(ip))
630 : return 0;
631 :
632 479411468 : xfs_ilock(ip, XFS_ILOCK_SHARED);
633 479411468 : mutex_lock(&xnc->lock);
634 :
635 479411468 : if (xchk_iscan_aborted(&xnc->collect_iscan)) {
636 0 : xchk_set_incomplete(xnc->sc);
637 0 : error = -ECANCELED;
638 0 : goto out_scanlock;
639 : }
640 :
641 479411468 : error = xchk_nlinks_comparison_read(xnc, ip->i_ino, &obs);
642 479411468 : if (error)
643 0 : goto out_scanlock;
644 :
645 : /*
646 : * If we don't have ftype to get an accurate count of the subdirectory
647 : * entries in this directory, take advantage of the fact that on a
648 : * consistent ftype=0 filesystem, the number of subdirectory
649 : * backreferences (dotdot entries) pointing towards this directory
650 : * should be equal to the number of subdirectory entries in the
651 : * directory.
652 : */
653 479411468 : if (!xfs_has_ftype(sc->mp) && S_ISDIR(VFS_I(ip)->i_mode))
654 0 : obs.children = obs.backrefs;
655 :
656 479411468 : total_links = xchk_nlink_total(ip, &obs);
657 479411468 : actual_nlink = VFS_I(ip)->i_nlink;
658 :
659 479411468 : trace_xchk_nlinks_compare_inode(sc->mp, ip, &obs);
660 :
661 : /*
662 : * If we found so many parents that we'd overflow i_nlink, we must
663 : * flag this as a corruption. If we found enough parents to exceed
664 : * the somewhat lower XFS_MAXLINK, warn the system administrator about
665 : * this. The VFS won't let users increase the link count, but it will
666 : * let them decrease it.
667 : */
668 479411468 : if (total_links > XFS_NLINK_PINNED)
669 0 : xchk_ino_set_corrupt(sc, ip->i_ino);
670 479411468 : else if (total_links > XFS_MAXLINK)
671 0 : xchk_ino_set_warning(sc, ip->i_ino);
672 :
673 : /* Link counts should match. */
674 479411468 : if (total_links != actual_nlink)
675 0 : xchk_ino_set_corrupt(sc, ip->i_ino);
676 :
677 479411468 : if (S_ISDIR(VFS_I(ip)->i_mode) && actual_nlink > 0) {
678 : /*
679 : * The collection phase ignores directories with zero link
680 : * count, so we ignore them here too.
681 : *
682 : * The number of subdirectory backreferences (dotdot entries)
683 : * pointing towards this directory should be equal to the
684 : * number of subdirectory entries in the directory.
685 : */
686 169031896 : if (obs.children != obs.backrefs)
687 0 : xchk_ino_xref_set_corrupt(sc, ip->i_ino);
688 : } else {
689 : /*
690 : * Non-directories and unlinked directories should not have
691 : * back references.
692 : */
693 310379572 : if (obs.backrefs != 0)
694 0 : xchk_ino_set_corrupt(sc, ip->i_ino);
695 :
696 : /*
697 : * Non-directories and unlinked directories should not have
698 : * children.
699 : */
700 310379572 : if (obs.children != 0)
701 0 : xchk_ino_set_corrupt(sc, ip->i_ino);
702 : }
703 :
704 479411468 : if (ip == sc->mp->m_rootip) {
705 : /*
706 : * For the root of a directory tree, both the '.' and '..'
707 : * entries should point to the root directory. The dotdot
708 : * entry is counted as a parent of the root /and/ a backref of
709 : * the root directory.
710 : */
711 12658 : if (obs.parents != 1)
712 0 : xchk_ino_set_corrupt(sc, ip->i_ino);
713 479398810 : } else if (actual_nlink > 0) {
714 : /*
715 : * Linked files that are not the root directory should have at
716 : * least one parent.
717 : */
718 479396275 : if (obs.parents == 0)
719 0 : xchk_ino_set_corrupt(sc, ip->i_ino);
720 : }
721 :
722 479411468 : if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
723 0 : error = -ECANCELED;
724 :
725 479411468 : out_scanlock:
726 479411468 : mutex_unlock(&xnc->lock);
727 479411468 : xfs_iunlock(ip, XFS_ILOCK_SHARED);
728 479411468 : return error;
729 : }
730 :
731 : /*
732 : * Check our link count against an inode that wasn't checked previously. This
733 : * is intended to catch directories with dangling links, though we could be
734 : * racing with inode allocation in other threads.
735 : */
736 : STATIC int
737 288047 : xchk_nlinks_compare_inum(
738 : struct xchk_nlink_ctrs *xnc,
739 : xfs_ino_t ino)
740 : {
741 288047 : struct xchk_nlink obs;
742 288047 : struct xfs_mount *mp = xnc->sc->mp;
743 288047 : struct xfs_trans *tp = xnc->sc->tp;
744 288047 : struct xfs_buf *agi_bp;
745 288047 : struct xfs_inode *ip;
746 288047 : int error;
747 :
748 : /*
749 : * The first iget failed, so try again with the variant that returns
750 : * either an incore inode or the AGI buffer. If the function returns
751 : * EINVAL/ENOENT, it should have passed us the AGI buffer so that we
752 : * can guarantee that the inode won't be allocated while we check for
753 : * a zero link count in the observed link count data.
754 : */
755 288047 : error = xchk_iget_agi(xnc->sc, ino, &agi_bp, &ip);
756 288047 : if (!error) {
757 : /* Actually got an inode, so use the inode compare. */
758 287701 : error = xchk_nlinks_compare_inode(xnc, ip);
759 287701 : xchk_irele(xnc->sc, ip);
760 287701 : return error;
761 : }
762 346 : if (error == -ENOENT || error == -EINVAL) {
763 : /* No inode was found. Check for zero link count below. */
764 346 : error = 0;
765 : }
766 346 : if (error)
767 0 : goto out_agi;
768 :
769 : /* Ensure that we have protected against inode allocation/freeing. */
770 346 : if (agi_bp == NULL) {
771 0 : ASSERT(agi_bp != NULL);
772 0 : xchk_set_incomplete(xnc->sc);
773 0 : return -ECANCELED;
774 : }
775 :
776 346 : if (xchk_iscan_aborted(&xnc->collect_iscan)) {
777 0 : xchk_set_incomplete(xnc->sc);
778 0 : error = -ECANCELED;
779 0 : goto out_agi;
780 : }
781 :
782 346 : mutex_lock(&xnc->lock);
783 346 : error = xchk_nlinks_comparison_read(xnc, ino, &obs);
784 346 : if (error)
785 0 : goto out_scanlock;
786 :
787 346 : trace_xchk_nlinks_check_zero(mp, ino, &obs);
788 :
789 : /*
790 : * If we can't grab the inode, the link count had better be zero. We
791 : * still hold the AGI to prevent inode allocation/freeing.
792 : */
793 346 : if (xchk_nlink_total(NULL, &obs) != 0) {
794 0 : xchk_ino_set_corrupt(xnc->sc, ino);
795 0 : error = -ECANCELED;
796 : }
797 :
798 346 : out_scanlock:
799 346 : mutex_unlock(&xnc->lock);
800 346 : out_agi:
801 346 : if (agi_bp)
802 346 : xfs_trans_brelse(tp, agi_bp);
803 : return error;
804 : }
805 :
806 : /*
807 : * Try to visit every inode in the filesystem to compare the link count. Move
808 : * on if we can't grab an inode, since we'll revisit unchecked nlink records in
809 : * the second part.
810 : */
811 : static int
812 : xchk_nlinks_compare_iter(
813 : struct xchk_nlink_ctrs *xnc,
814 : struct xfs_inode **ipp)
815 : {
816 479138813 : int error;
817 :
818 479138813 : do {
819 479138813 : error = xchk_iscan_iter(&xnc->compare_iscan, ipp);
820 479138813 : } while (error == -EBUSY);
821 :
822 479136423 : return error;
823 : }
824 :
825 : /* Compare the link counts we observed against the live information. */
826 : STATIC int
827 12658 : xchk_nlinks_compare(
828 : struct xchk_nlink_ctrs *xnc)
829 : {
830 12658 : struct xchk_nlink nl;
831 12658 : struct xfs_scrub *sc = xnc->sc;
832 12658 : struct xfs_inode *ip;
833 12658 : xfarray_idx_t cur = XFARRAY_CURSOR_INIT;
834 12658 : int error;
835 :
836 12658 : if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
837 : return 0;
838 :
839 : /*
840 : * Create a new empty transaction so that we can advance the iscan
841 : * cursor without deadlocking if the inobt has a cycle and push on the
842 : * inactivation workqueue.
843 : */
844 12658 : xchk_trans_cancel(sc);
845 12658 : error = xchk_trans_alloc_empty(sc);
846 12658 : if (error)
847 : return error;
848 :
849 : /*
850 : * Use the inobt to walk all allocated inodes to compare the link
851 : * counts. Inodes skipped by _compare_iter will be tried again in the
852 : * next phase of the scan.
853 : */
854 12658 : xchk_iscan_start(sc, 0, 0, &xnc->compare_iscan);
855 958272846 : while ((error = xchk_nlinks_compare_iter(xnc, &ip)) == 1) {
856 479123767 : error = xchk_nlinks_compare_inode(xnc, ip);
857 479123767 : xchk_iscan_mark_visited(&xnc->compare_iscan, ip);
858 479123767 : xchk_irele(sc, ip);
859 479123767 : if (error)
860 : break;
861 :
862 479123767 : if (xchk_should_terminate(sc, &error))
863 : break;
864 : }
865 12658 : xchk_iscan_iter_finish(&xnc->compare_iscan);
866 12658 : xchk_iscan_teardown(&xnc->compare_iscan);
867 12658 : if (error)
868 : return error;
869 :
870 12656 : if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
871 : return 0;
872 :
873 : /*
874 : * Walk all the non-null nlink observations that weren't checked in the
875 : * previous step.
876 : */
877 12656 : mutex_lock(&xnc->lock);
878 479022719 : while ((error = xfarray_iter(xnc->nlinks, &cur, &nl)) == 1) {
879 479010063 : xfs_ino_t ino = cur - 1;
880 :
881 479010063 : if (nl.flags & XCHK_NLINK_COMPARE_SCANNED)
882 478722016 : continue;
883 :
884 288047 : mutex_unlock(&xnc->lock);
885 :
886 288047 : error = xchk_nlinks_compare_inum(xnc, ino);
887 288047 : if (error)
888 0 : return error;
889 :
890 288047 : if (xchk_should_terminate(xnc->sc, &error))
891 0 : return error;
892 :
893 288047 : mutex_lock(&xnc->lock);
894 : }
895 12656 : mutex_unlock(&xnc->lock);
896 :
897 12656 : return error;
898 : }
899 :
900 : /* Tear down everything associated with a nlinks check. */
901 : static void
902 12684 : xchk_nlinks_teardown_scan(
903 : void *priv)
904 : {
905 12684 : struct xchk_nlink_ctrs *xnc = priv;
906 :
907 : /* Discourage any hook functions that might be running. */
908 12684 : xchk_iscan_abort(&xnc->collect_iscan);
909 :
910 12684 : xfs_dir_hook_del(xnc->sc->mp, &xnc->hooks);
911 :
912 12684 : xfarray_destroy(xnc->nlinks);
913 12684 : xnc->nlinks = NULL;
914 :
915 12684 : xchk_iscan_teardown(&xnc->collect_iscan);
916 12684 : mutex_destroy(&xnc->lock);
917 12684 : xnc->sc = NULL;
918 12684 : }
919 :
920 : /*
921 : * Scan all inodes in the entire filesystem to generate link count data. If
922 : * the scan is successful, the counts will be left alive for a repair. If any
923 : * error occurs, we'll tear everything down.
924 : */
925 : STATIC int
926 12684 : xchk_nlinks_setup_scan(
927 : struct xfs_scrub *sc,
928 : struct xchk_nlink_ctrs *xnc)
929 : {
930 12684 : struct xfs_mount *mp = sc->mp;
931 12684 : char *descr;
932 12684 : unsigned long long max_inos;
933 12684 : xfs_agnumber_t last_agno = mp->m_sb.sb_agcount - 1;
934 12684 : xfs_agino_t first_agino, last_agino;
935 12684 : int error;
936 :
937 12684 : ASSERT(xnc->sc == NULL);
938 12684 : xnc->sc = sc;
939 :
940 12684 : mutex_init(&xnc->lock);
941 :
942 : /* Retry iget every tenth of a second for up to 30 seconds. */
943 12684 : xchk_iscan_start(sc, 30000, 100, &xnc->collect_iscan);
944 :
945 : /*
946 : * Set up enough space to store an nlink record for the highest
947 : * possible inode number in this system.
948 : */
949 12684 : xfs_agino_range(mp, last_agno, &first_agino, &last_agino);
950 12684 : max_inos = XFS_AGINO_TO_INO(mp, last_agno, last_agino) + 1;
951 12684 : descr = xchk_xfile_descr(sc, "file link counts");
952 12684 : error = xfarray_create(descr, min(XFS_MAXINUMBER + 1, max_inos),
953 : sizeof(struct xchk_nlink), &xnc->nlinks);
954 12684 : kfree(descr);
955 12684 : if (error)
956 0 : goto out_teardown;
957 :
958 : /*
959 : * Hook into the directory entry code so that we can capture updates to
960 : * file link counts. The hook only triggers for inodes that were
961 : * already scanned, and the scanner thread takes each inode's ILOCK,
962 : * which means that any in-progress inode updates will finish before we
963 : * can scan the inode.
964 : */
965 12684 : ASSERT(sc->flags & XCHK_FSGATES_DIRENTS);
966 12684 : xfs_hook_setup(&xnc->hooks.dirent_hook, xchk_nlinks_live_update);
967 12684 : error = xfs_dir_hook_add(mp, &xnc->hooks);
968 12684 : if (error)
969 0 : goto out_teardown;
970 :
971 : /* Use deferred cleanup to pass the inode link count data to repair. */
972 12684 : sc->buf_cleanup = xchk_nlinks_teardown_scan;
973 12684 : return 0;
974 :
975 0 : out_teardown:
976 0 : xchk_nlinks_teardown_scan(xnc);
977 0 : return error;
978 : }
979 :
980 : /* Scrub the link count of all inodes on the filesystem. */
981 : int
982 12684 : xchk_nlinks(
983 : struct xfs_scrub *sc)
984 : {
985 12684 : struct xchk_nlink_ctrs *xnc = sc->buf;
986 12684 : int error = 0;
987 :
988 : /* Set ourselves up to check link counts on the live filesystem. */
989 12684 : error = xchk_nlinks_setup_scan(sc, xnc);
990 12684 : if (error)
991 : return error;
992 :
993 : /* Walk all inodes, picking up link count information. */
994 12684 : error = xchk_nlinks_collect(xnc);
995 12684 : if (!xchk_xref_process_error(sc, 0, 0, &error))
996 26 : return error;
997 :
998 : /* Fail fast if we're not playing with a full dataset. */
999 12658 : if (xchk_iscan_aborted(&xnc->collect_iscan))
1000 0 : xchk_set_incomplete(sc);
1001 12658 : if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_INCOMPLETE)
1002 : return 0;
1003 :
1004 : /* Compare link counts. */
1005 12658 : error = xchk_nlinks_compare(xnc);
1006 12658 : if (!xchk_xref_process_error(sc, 0, 0, &error))
1007 2 : return error;
1008 :
1009 : /* Check one last time for an incomplete dataset. */
1010 12656 : if (xchk_iscan_aborted(&xnc->collect_iscan))
1011 0 : xchk_set_incomplete(sc);
1012 :
1013 : return 0;
1014 : }
|