Line data Source code
1 : // SPDX-License-Identifier: GPL-2.0-or-later
2 : /*
3 : * Copyright (C) 2020-2023 Oracle. All Rights Reserved.
4 : * Author: Darrick J. Wong <djwong@kernel.org>
5 : */
6 : #include "xfs.h"
7 : #include "xfs_fs.h"
8 : #include "xfs_shared.h"
9 : #include "xfs_format.h"
10 : #include "xfs_trans_resv.h"
11 : #include "xfs_mount.h"
12 : #include "xfs_defer.h"
13 : #include "xfs_bit.h"
14 : #include "xfs_log_format.h"
15 : #include "xfs_trans.h"
16 : #include "xfs_sb.h"
17 : #include "xfs_inode.h"
18 : #include "xfs_icache.h"
19 : #include "xfs_da_format.h"
20 : #include "xfs_da_btree.h"
21 : #include "xfs_dir2.h"
22 : #include "xfs_dir2_priv.h"
23 : #include "xfs_bmap.h"
24 : #include "xfs_quota.h"
25 : #include "xfs_bmap_btree.h"
26 : #include "xfs_trans_space.h"
27 : #include "xfs_bmap_util.h"
28 : #include "xfs_swapext.h"
29 : #include "xfs_xchgrange.h"
30 : #include "xfs_ag.h"
31 : #include "xfs_parent.h"
32 : #include "scrub/xfs_scrub.h"
33 : #include "scrub/scrub.h"
34 : #include "scrub/common.h"
35 : #include "scrub/trace.h"
36 : #include "scrub/repair.h"
37 : #include "scrub/tempfile.h"
38 : #include "scrub/tempswap.h"
39 : #include "scrub/xfile.h"
40 : #include "scrub/xfarray.h"
41 : #include "scrub/xfblob.h"
42 : #include "scrub/iscan.h"
43 : #include "scrub/readdir.h"
44 : #include "scrub/reap.h"
45 : #include "scrub/findparent.h"
46 : #include "scrub/orphanage.h"
47 : #include "scrub/listxattr.h"
48 :
49 : /*
50 : * Directory Repair
51 : * ================
52 : *
53 : * We repair directories by reading the directory data blocks looking for
54 : * directory entries that look salvageable (name passes verifiers, entry points
55 : * to a valid allocated inode, etc). Each entry worth salvaging is stashed in
56 : * memory, and the stashed entries are periodically replayed into a temporary
57 : * directory to constrain memory use. Batching the construction of the
58 : * temporary directory in this fashion reduces lock cycling of the directory
59 : * being repaired and the temporary directory, and will later become important
60 : * for parent pointer scanning.
61 : *
62 : * If parent pointers are enabled on this filesystem, we instead reconstruct
63 : * the directory by visiting each parent pointer of each file in the filesystem
64 : * and translating the relevant parent pointer records into dirents. In this
65 : * case, it is advantageous to stash all directory entries created from parent
66 : * pointers for a single child file before replaying them into the temporary
67 : * directory. To save memory, the live filesystem scan reuses the findparent
68 : * fields. Directory repair chooses either parent pointer scanning or
69 : * directory entry salvaging, but not both.
70 : *
71 : * Directory entries added to the temporary directory do not elevate the link
72 : * counts of the inodes found. When salvaging completes, the remaining stashed
73 : * entries are replayed to the temporary directory. An atomic extent swap is
74 : * used to commit the new directory blocks to the directory being repaired.
75 : * This will disrupt readdir cursors.
76 : *
77 : * Legacy Locking Issues
78 : * ---------------------
79 : *
80 : * Prior to Linux 6.5, if /a, /a/b, and /c were all directories, the VFS would
81 : * not take i_rwsem on /a/b for a "mv /a/b /c/" operation. This meant that
82 : * only b's ILOCK protected b's dotdot update. b's IOLOCK was not taken,
83 : * unlike every other dotdot update (link, remove, mkdir). If the repair code
84 : * dropped the ILOCK, we it was required either to revalidate the dotdot entry
85 : * or to use dirent hooks to capture updates from other threads.
86 : */
87 :
88 : /* Create a dirent in the tempdir. */
89 : #define XREP_DIRENT_ADD (1)
90 :
91 : /* Remove a dirent from the tempdir. */
92 : #define XREP_DIRENT_REMOVE (2)
93 :
94 : /* Directory entry to be restored in the new directory. */
95 : struct xrep_dirent {
96 : /* Cookie for retrieval of the dirent name. */
97 : xfblob_cookie name_cookie;
98 :
99 : /* Target inode number. */
100 : xfs_ino_t ino;
101 :
102 : /* Length of the dirent name. */
103 : uint8_t namelen;
104 :
105 : /* File type of the dirent. */
106 : uint8_t ftype;
107 :
108 : /* XREP_DIRENT_{ADD,REMOVE} */
109 : uint8_t action;
110 : };
111 :
112 : /*
113 : * Stash up to 8 pages of recovered dirent data in dir_entries and dir_names
114 : * before we write them to the temp dir.
115 : */
116 : #define XREP_DIR_MAX_STASH_BYTES (PAGE_SIZE * 8)
117 :
118 : struct xrep_dir {
119 : struct xfs_scrub *sc;
120 :
121 : /* Fixed-size array of xrep_dirent structures. */
122 : struct xfarray *dir_entries;
123 :
124 : /* Blobs containing directory entry names. */
125 : struct xfblob *dir_names;
126 :
127 : /* Information for swapping data forks at the end. */
128 : struct xrep_tempswap tx;
129 :
130 : /* Preallocated args struct for performing dir operations */
131 : struct xfs_da_args args;
132 :
133 : /*
134 : * Information used to scan the filesystem to find the inumber of the
135 : * dotdot entry for this directory. For directory salvaging when
136 : * parent pointers are not enabled, we use the findparent_* functions
137 : * on this object and access only the parent_ino field directly.
138 : *
139 : * When parent pointers are enabled, however, the pptr scanner uses the
140 : * iscan, hooks, lock, and parent_ino fields of this object directly.
141 : * @pscan.lock coordinates access to dir_entries, dir_names,
142 : * parent_ino, subdirs, dirents, and args. This reduces the memory
143 : * requirements of this structure.
144 : */
145 : struct xrep_parent_scan_info pscan;
146 :
147 : /*
148 : * Context information for attaching this directory to the lost+found
149 : * if this directory does not have a parent.
150 : */
151 : struct xrep_adoption adoption;
152 :
153 : /* How many subdirectories did we find? */
154 : uint64_t subdirs;
155 :
156 : /* How many dirents did we find? */
157 : unsigned int dirents;
158 :
159 : /* Should we move this directory to the orphanage? */
160 : bool needs_adoption;
161 :
162 : /*
163 : * Scratch buffer for reading parent pointers from child files. The
164 : * p_name field is used to flush stashed dirents into the temporary
165 : * directory in between parent pointers. At the very end of the
166 : * repair, it can also be used to compute the lost+found filename
167 : * if we need to reparent the directory.
168 : */
169 : struct xfs_parent_name_irec pptr;
170 : };
171 :
172 : /* Tear down all the incore stuff we created. */
173 : static void
174 192990 : xrep_dir_teardown(
175 : struct xfs_scrub *sc)
176 : {
177 192990 : struct xrep_dir *rd = sc->buf;
178 :
179 192990 : xrep_findparent_scan_teardown(&rd->pscan);
180 192997 : xfblob_destroy(rd->dir_names);
181 192994 : xfarray_destroy(rd->dir_entries);
182 192997 : }
183 :
184 : /* Set up for a directory repair. */
185 : int
186 584906 : xrep_setup_directory(
187 : struct xfs_scrub *sc)
188 : {
189 584906 : struct xrep_dir *rd;
190 584906 : int error;
191 :
192 584906 : xchk_fsgates_enable(sc, XCHK_FSGATES_DIRENTS);
193 :
194 584890 : error = xrep_orphanage_try_create(sc);
195 584906 : if (error)
196 : return error;
197 :
198 584906 : error = xrep_tempfile_create(sc, S_IFDIR);
199 584745 : if (error)
200 : return error;
201 :
202 584288 : rd = kvzalloc(sizeof(struct xrep_dir), XCHK_GFP_FLAGS);
203 584387 : if (!rd)
204 : return -ENOMEM;
205 584387 : rd->sc = sc;
206 584387 : sc->buf = rd;
207 :
208 584387 : return 0;
209 : }
210 :
211 : /*
212 : * If we're the root of a directory tree, we are our own parent. If we're an
213 : * unlinked directory, the parent /won't/ have a link to us. Set the parent
214 : * directory to the root for both cases. Returns NULLFSINO if we don't know
215 : * what to do.
216 : */
217 : static inline xfs_ino_t
218 : xrep_dir_self_parent(
219 : struct xrep_dir *rd)
220 : {
221 : struct xfs_scrub *sc = rd->sc;
222 :
223 : if (sc->ip->i_ino == sc->mp->m_sb.sb_rootino)
224 : return sc->mp->m_sb.sb_rootino;
225 :
226 : if (VFS_I(sc->ip)->i_nlink == 0)
227 : return sc->mp->m_sb.sb_rootino;
228 :
229 : return NULLFSINO;
230 : }
231 :
232 : /*
233 : * Look up the dotdot entry and confirm that it's really the parent.
234 : * Returns NULLFSINO if we don't know what to do.
235 : */
236 : static inline xfs_ino_t
237 0 : xrep_dir_lookup_parent(
238 : struct xrep_dir *rd)
239 : {
240 0 : struct xfs_scrub *sc = rd->sc;
241 0 : xfs_ino_t ino;
242 0 : int error;
243 :
244 0 : error = xfs_dir_lookup(sc->tp, sc->ip, &xfs_name_dotdot, &ino, NULL);
245 0 : if (error)
246 : return NULLFSINO;
247 0 : if (!xfs_verify_dir_ino(sc->mp, ino))
248 : return NULLFSINO;
249 :
250 0 : error = xrep_findparent_confirm(sc, &ino);
251 0 : if (error)
252 : return NULLFSINO;
253 :
254 0 : return ino;
255 : }
256 :
257 : /*
258 : * Look up '..' in the dentry cache and confirm that it's really the parent.
259 : * Returns NULLFSINO if the dcache misses or if the hit is implausible.
260 : */
261 : static inline xfs_ino_t
262 655 : xrep_dir_dcache_parent(
263 : struct xrep_dir *rd)
264 : {
265 655 : struct xfs_scrub *sc = rd->sc;
266 655 : xfs_ino_t parent_ino;
267 655 : int error;
268 :
269 655 : parent_ino = xrep_findparent_from_dcache(sc);
270 655 : if (parent_ino == NULLFSINO)
271 : return parent_ino;
272 :
273 655 : error = xrep_findparent_confirm(sc, &parent_ino);
274 655 : if (error)
275 : return NULLFSINO;
276 :
277 655 : return parent_ino;
278 : }
279 :
280 : /* Try to find the parent of the directory being repaired. */
281 : STATIC int
282 664 : xrep_dir_find_parent(
283 : struct xrep_dir *rd)
284 : {
285 664 : xfs_ino_t ino;
286 :
287 664 : ino = xrep_findparent_self_reference(rd->sc);
288 664 : if (ino != NULLFSINO) {
289 9 : xrep_findparent_scan_finish_early(&rd->pscan, ino);
290 9 : return 0;
291 : }
292 :
293 655 : ino = xrep_dir_dcache_parent(rd);
294 655 : if (ino != NULLFSINO) {
295 655 : xrep_findparent_scan_finish_early(&rd->pscan, ino);
296 655 : return 0;
297 : }
298 :
299 0 : ino = xrep_dir_lookup_parent(rd);
300 0 : if (ino != NULLFSINO) {
301 0 : xrep_findparent_scan_finish_early(&rd->pscan, ino);
302 0 : return 0;
303 : }
304 :
305 : /*
306 : * A full filesystem scan is the last resort. On a busy filesystem,
307 : * the scan can fail with -EBUSY if we cannot grab IOLOCKs. That means
308 : * that we don't know what who the parent is, so we should return to
309 : * userspace.
310 : */
311 0 : return xrep_findparent_scan(&rd->pscan);
312 : }
313 :
314 : /*
315 : * Decide if we want to salvage this entry. We don't bother with oversized
316 : * names or the dot entry.
317 : */
318 : STATIC int
319 5305 : xrep_dir_want_salvage(
320 : struct xrep_dir *rd,
321 : const char *name,
322 : int namelen,
323 : xfs_ino_t ino)
324 : {
325 5305 : struct xfs_mount *mp = rd->sc->mp;
326 :
327 : /* No pointers to ourselves or to garbage. */
328 5305 : if (ino == rd->sc->ip->i_ino)
329 : return false;
330 5297 : if (!xfs_verify_dir_ino(mp, ino))
331 : return false;
332 :
333 : /* No weird looking names or dot entries. */
334 5297 : if (namelen >= MAXNAMELEN || namelen <= 0)
335 : return false;
336 5297 : if (namelen == 1 && name[0] == '.')
337 0 : return false;
338 :
339 : return true;
340 : }
341 :
342 : /*
343 : * Remember that we want to create a dirent in the tempdir. These stashed
344 : * actions will be replayed later.
345 : */
346 : STATIC int
347 2252154 : xrep_dir_stash_createname(
348 : struct xrep_dir *rd,
349 : const struct xfs_name *name,
350 : xfs_ino_t ino)
351 : {
352 2252154 : struct xrep_dirent dirent = {
353 : .action = XREP_DIRENT_ADD,
354 : .ino = ino,
355 2252154 : .namelen = name->len,
356 2252154 : .ftype = name->type,
357 : };
358 2252154 : int error;
359 :
360 2252154 : trace_xrep_dir_stash_createname(rd->sc->tempip, name, ino);
361 :
362 2252155 : error = xfblob_store(rd->dir_names, &dirent.name_cookie, name->name,
363 2252155 : name->len);
364 2252155 : if (error)
365 : return error;
366 :
367 2252155 : return xfarray_append(rd->dir_entries, &dirent);
368 : }
369 :
370 : /*
371 : * Remember that we want to remove a dirent from the tempdir. These stashed
372 : * actions will be replayed later.
373 : */
374 : STATIC int
375 0 : xrep_dir_stash_removename(
376 : struct xrep_dir *rd,
377 : const struct xfs_name *name,
378 : xfs_ino_t ino)
379 : {
380 0 : struct xrep_dirent dirent = {
381 : .action = XREP_DIRENT_REMOVE,
382 : .ino = ino,
383 0 : .namelen = name->len,
384 0 : .ftype = name->type,
385 : };
386 0 : int error;
387 :
388 0 : trace_xrep_dir_stash_removename(rd->sc->tempip, name, ino);
389 :
390 0 : error = xfblob_store(rd->dir_names, &dirent.name_cookie, name->name,
391 0 : name->len);
392 0 : if (error)
393 : return error;
394 :
395 0 : return xfarray_append(rd->dir_entries, &dirent);
396 : }
397 :
398 : /* Allocate an in-core record to hold entries while we rebuild the dir data. */
399 : STATIC int
400 5297 : xrep_dir_salvage_entry(
401 : struct xrep_dir *rd,
402 : unsigned char *name,
403 : unsigned int namelen,
404 : xfs_ino_t ino)
405 : {
406 5297 : struct xfs_name xname = {
407 : .name = name,
408 : };
409 5297 : struct xfs_scrub *sc = rd->sc;
410 5297 : struct xfs_inode *ip;
411 5297 : unsigned int i = 0;
412 5297 : int error = 0;
413 :
414 5297 : if (xchk_should_terminate(sc, &error))
415 0 : return error;
416 :
417 : /*
418 : * Truncate the name to the first character that would trip namecheck.
419 : * If we no longer have a name after that, ignore this entry.
420 : */
421 53268 : while (i < namelen && name[i] != 0 && name[i] != '/')
422 47971 : i++;
423 5297 : if (i == 0)
424 : return 0;
425 5297 : xname.len = i;
426 :
427 : /* Ignore '..' entries; we already picked the new parent. */
428 5297 : if (xname.len == 2 && name[0] == '.' && name[1] == '.') {
429 8 : trace_xrep_dir_salvaged_parent(sc->ip, ino);
430 8 : return 0;
431 : }
432 :
433 5289 : trace_xrep_dir_salvage_entry(sc->ip, &xname, ino);
434 :
435 : /*
436 : * Compute the ftype or dump the entry if we can't. We don't lock the
437 : * inode because inodes can't change type while we have a reference.
438 : */
439 5289 : error = xchk_iget(sc, ino, &ip);
440 5289 : if (error)
441 : return 0;
442 :
443 5289 : xname.type = xfs_mode_to_ftype(VFS_I(ip)->i_mode);
444 5289 : xchk_irele(sc, ip);
445 :
446 5289 : return xrep_dir_stash_createname(rd, &xname, ino);
447 : }
448 :
449 : /* Record a shortform directory entry for later reinsertion. */
450 : STATIC int
451 3446 : xrep_dir_salvage_sf_entry(
452 : struct xrep_dir *rd,
453 : struct xfs_dir2_sf_hdr *sfp,
454 : struct xfs_dir2_sf_entry *sfep)
455 : {
456 3446 : xfs_ino_t ino;
457 :
458 3446 : ino = xfs_dir2_sf_get_ino(rd->sc->mp, sfp, sfep);
459 3446 : if (!xrep_dir_want_salvage(rd, sfep->name, sfep->namelen, ino))
460 : return 0;
461 :
462 3446 : return xrep_dir_salvage_entry(rd, sfep->name, sfep->namelen, ino);
463 : }
464 :
465 : /* Record a regular directory entry for later reinsertion. */
466 : STATIC int
467 1859 : xrep_dir_salvage_data_entry(
468 : struct xrep_dir *rd,
469 : struct xfs_dir2_data_entry *dep)
470 : {
471 1859 : xfs_ino_t ino;
472 :
473 1859 : ino = be64_to_cpu(dep->inumber);
474 1859 : if (!xrep_dir_want_salvage(rd, dep->name, dep->namelen, ino))
475 : return 0;
476 :
477 1851 : return xrep_dir_salvage_entry(rd, dep->name, dep->namelen, ino);
478 : }
479 :
480 : /* Try to recover block/data format directory entries. */
481 : STATIC int
482 14 : xrep_dir_recover_data(
483 : struct xrep_dir *rd,
484 : struct xfs_buf *bp)
485 : {
486 14 : struct xfs_da_geometry *geo = rd->sc->mp->m_dir_geo;
487 14 : unsigned int offset;
488 14 : unsigned int end;
489 14 : int error = 0;
490 :
491 : /*
492 : * Loop over the data portion of the block.
493 : * Each object is a real entry (dep) or an unused one (dup).
494 : */
495 14 : offset = geo->data_entry_offset;
496 14 : end = min_t(unsigned int, BBTOB(bp->b_length),
497 : xfs_dir3_data_end_offset(geo, bp->b_addr));
498 :
499 1929 : while (offset < end) {
500 1915 : struct xfs_dir2_data_unused *dup = bp->b_addr + offset;
501 1915 : struct xfs_dir2_data_entry *dep = bp->b_addr + offset;
502 :
503 1915 : if (xchk_should_terminate(rd->sc, &error))
504 0 : return error;
505 :
506 : /* Skip unused entries. */
507 1915 : if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) {
508 56 : offset += be16_to_cpu(dup->length);
509 56 : continue;
510 : }
511 :
512 : /* Don't walk off the end of the block. */
513 1859 : offset += xfs_dir2_data_entsize(rd->sc->mp, dep->namelen);
514 1859 : if (offset > end)
515 : break;
516 :
517 : /* Ok, let's save this entry. */
518 1859 : error = xrep_dir_salvage_data_entry(rd, dep);
519 1859 : if (error)
520 0 : return error;
521 :
522 : }
523 :
524 : return 0;
525 : }
526 :
527 : /* Try to recover shortform directory entries. */
528 : STATIC int
529 656 : xrep_dir_recover_sf(
530 : struct xrep_dir *rd)
531 : {
532 656 : struct xfs_dir2_sf_hdr *sfp;
533 656 : struct xfs_dir2_sf_entry *sfep;
534 656 : struct xfs_dir2_sf_entry *next;
535 656 : struct xfs_ifork *ifp;
536 656 : xfs_ino_t ino;
537 656 : unsigned char *end;
538 656 : int error = 0;
539 :
540 656 : ifp = xfs_ifork_ptr(rd->sc->ip, XFS_DATA_FORK);
541 656 : sfp = (struct xfs_dir2_sf_hdr *)rd->sc->ip->i_df.if_u1.if_data;
542 656 : end = (unsigned char *)ifp->if_u1.if_data + ifp->if_bytes;
543 :
544 656 : ino = xfs_dir2_sf_get_parent_ino(sfp);
545 656 : trace_xrep_dir_salvaged_parent(rd->sc->ip, ino);
546 :
547 656 : sfep = xfs_dir2_sf_firstentry(sfp);
548 4102 : while ((unsigned char *)sfep < end) {
549 3446 : if (xchk_should_terminate(rd->sc, &error))
550 0 : return error;
551 :
552 3446 : next = xfs_dir2_sf_nextentry(rd->sc->mp, sfp, sfep);
553 3446 : if ((unsigned char *)next > end)
554 : break;
555 :
556 : /* Ok, let's save this entry. */
557 3446 : error = xrep_dir_salvage_sf_entry(rd, sfp, sfep);
558 3446 : if (error)
559 0 : return error;
560 :
561 : sfep = next;
562 : }
563 :
564 : return 0;
565 : }
566 :
567 : /*
568 : * Try to figure out the format of this directory from the data fork mappings
569 : * and the directory size. If we can be reasonably sure of format, we can be
570 : * more aggressive in salvaging directory entries. On return, @magic_guess
571 : * will be set to DIR3_BLOCK_MAGIC if we think this is a "block format"
572 : * directory; DIR3_DATA_MAGIC if we think this is a "data format" directory,
573 : * and 0 if we can't tell.
574 : */
575 : STATIC void
576 8 : xrep_dir_guess_format(
577 : struct xrep_dir *rd,
578 : __be32 *magic_guess)
579 : {
580 8 : struct xfs_inode *dp = rd->sc->ip;
581 8 : struct xfs_mount *mp = rd->sc->mp;
582 8 : struct xfs_da_geometry *geo = mp->m_dir_geo;
583 8 : xfs_fileoff_t last;
584 8 : int error;
585 :
586 8 : ASSERT(xfs_has_crc(mp));
587 :
588 8 : *magic_guess = 0;
589 :
590 : /*
591 : * If there's a single directory block and the directory size is
592 : * exactly one block, this has to be a single block format directory.
593 : */
594 8 : error = xfs_bmap_last_offset(dp, &last, XFS_DATA_FORK);
595 8 : if (!error && XFS_FSB_TO_B(mp, last) == geo->blksize &&
596 7 : dp->i_disk_size == geo->blksize) {
597 7 : *magic_guess = cpu_to_be32(XFS_DIR3_BLOCK_MAGIC);
598 8 : return;
599 : }
600 :
601 : /*
602 : * If the last extent before the leaf offset matches the directory
603 : * size and the directory size is larger than 1 block, this is a
604 : * data format directory.
605 : */
606 1 : last = geo->leafblk;
607 1 : error = xfs_bmap_last_before(rd->sc->tp, dp, &last, XFS_DATA_FORK);
608 1 : if (!error &&
609 1 : XFS_FSB_TO_B(mp, last) > geo->blksize &&
610 1 : XFS_FSB_TO_B(mp, last) == dp->i_disk_size) {
611 1 : *magic_guess = cpu_to_be32(XFS_DIR3_DATA_MAGIC);
612 1 : return;
613 : }
614 : }
615 :
616 : /* Recover directory entries from a specific directory block. */
617 : STATIC int
618 14 : xrep_dir_recover_dirblock(
619 : struct xrep_dir *rd,
620 : __be32 magic_guess,
621 : xfs_dablk_t dabno)
622 : {
623 14 : struct xfs_dir2_data_hdr *hdr;
624 14 : struct xfs_buf *bp;
625 14 : __be32 oldmagic;
626 14 : int error;
627 :
628 : /*
629 : * Try to read buffer. We invalidate them in the next step so we don't
630 : * bother to set a buffer type or ops.
631 : */
632 14 : error = xfs_da_read_buf(rd->sc->tp, rd->sc->ip, dabno,
633 : XFS_DABUF_MAP_HOLE_OK, &bp, XFS_DATA_FORK, NULL);
634 14 : if (error || !bp)
635 : return error;
636 :
637 14 : hdr = bp->b_addr;
638 14 : oldmagic = hdr->magic;
639 :
640 14 : trace_xrep_dir_recover_dirblock(rd->sc->ip, dabno,
641 : be32_to_cpu(hdr->magic), be32_to_cpu(magic_guess));
642 :
643 : /*
644 : * If we're sure of the block's format, proceed with the salvage
645 : * operation using the specified magic number.
646 : */
647 14 : if (magic_guess) {
648 14 : hdr->magic = magic_guess;
649 14 : goto recover;
650 : }
651 :
652 : /*
653 : * If we couldn't guess what type of directory this is, then we will
654 : * only salvage entries from directory blocks that match the magic
655 : * number and pass verifiers.
656 : */
657 0 : switch (hdr->magic) {
658 0 : case cpu_to_be32(XFS_DIR2_BLOCK_MAGIC):
659 : case cpu_to_be32(XFS_DIR3_BLOCK_MAGIC):
660 0 : if (!xrep_buf_verify_struct(bp, &xfs_dir3_block_buf_ops))
661 0 : goto out;
662 0 : if (xfs_dir3_block_header_check(bp, rd->sc->ip->i_ino) != NULL)
663 0 : goto out;
664 : break;
665 0 : case cpu_to_be32(XFS_DIR2_DATA_MAGIC):
666 : case cpu_to_be32(XFS_DIR3_DATA_MAGIC):
667 0 : if (!xrep_buf_verify_struct(bp, &xfs_dir3_data_buf_ops))
668 0 : goto out;
669 0 : if (xfs_dir3_data_header_check(bp, rd->sc->ip->i_ino) != NULL)
670 0 : goto out;
671 : break;
672 0 : default:
673 0 : goto out;
674 : }
675 :
676 14 : recover:
677 14 : error = xrep_dir_recover_data(rd, bp);
678 :
679 14 : out:
680 14 : hdr->magic = oldmagic;
681 14 : xfs_trans_brelse(rd->sc->tp, bp);
682 14 : return error;
683 : }
684 :
685 : static inline void
686 2626249 : xrep_dir_init_args(
687 : struct xrep_dir *rd,
688 : struct xfs_inode *dp,
689 : const struct xfs_name *name)
690 : {
691 2626249 : memset(&rd->args, 0, sizeof(struct xfs_da_args));
692 2626249 : rd->args.geo = rd->sc->mp->m_dir_geo;
693 2626249 : rd->args.whichfork = XFS_DATA_FORK;
694 2626249 : rd->args.owner = rd->sc->ip->i_ino;
695 2626249 : rd->args.trans = rd->sc->tp;
696 2626249 : rd->args.dp = dp;
697 2626249 : if (!name)
698 : return;
699 2433307 : rd->args.name = name->name;
700 2433307 : rd->args.namelen = name->len;
701 2433307 : rd->args.filetype = name->type;
702 2433307 : rd->args.hashval = xfs_dir2_hashname(rd->sc->mp, name);
703 : }
704 :
705 : /* Replay a stashed createname into the temporary directory. */
706 : STATIC int
707 2251722 : xrep_dir_replay_createname(
708 : struct xrep_dir *rd,
709 : const struct xfs_name *name,
710 : xfs_ino_t inum,
711 : xfs_extlen_t total)
712 : {
713 2251722 : struct xfs_scrub *sc = rd->sc;
714 2251722 : struct xfs_inode *dp = rd->sc->tempip;
715 2251722 : bool is_block, is_leaf;
716 2251722 : int error;
717 :
718 2251722 : ASSERT(S_ISDIR(VFS_I(dp)->i_mode));
719 :
720 2251722 : error = xfs_dir_ino_validate(sc->mp, inum);
721 2251730 : if (error)
722 : return error;
723 :
724 2251730 : trace_xrep_dir_replay_createname(dp, name, inum);
725 :
726 2251725 : xrep_dir_init_args(rd, dp, name);
727 2251726 : rd->args.inumber = inum;
728 2251726 : rd->args.total = total;
729 2251726 : rd->args.op_flags = XFS_DA_OP_ADDNAME | XFS_DA_OP_OKNOENT;
730 :
731 2251726 : if (dp->i_df.if_format == XFS_DINODE_FMT_LOCAL)
732 1112116 : return xfs_dir2_sf_addname(&rd->args);
733 :
734 1139610 : error = xfs_dir2_isblock(&rd->args, &is_block);
735 1139610 : if (error)
736 : return error;
737 1139610 : if (is_block)
738 235260 : return xfs_dir2_block_addname(&rd->args);
739 :
740 904350 : error = xfs_dir2_isleaf(&rd->args, &is_leaf);
741 904350 : if (error)
742 : return error;
743 904350 : if (is_leaf)
744 522265 : return xfs_dir2_leaf_addname(&rd->args);
745 :
746 382085 : return xfs_dir2_node_addname(&rd->args);
747 : }
748 :
749 : /* Replay a stashed removename onto the temporary directory. */
750 : STATIC int
751 0 : xrep_dir_replay_removename(
752 : struct xrep_dir *rd,
753 : const struct xfs_name *name,
754 : xfs_extlen_t total)
755 : {
756 0 : struct xfs_inode *dp = rd->args.dp;
757 0 : bool is_block, is_leaf;
758 0 : int error;
759 :
760 0 : ASSERT(S_ISDIR(VFS_I(dp)->i_mode));
761 :
762 0 : xrep_dir_init_args(rd, dp, name);
763 0 : rd->args.op_flags = 0;
764 0 : rd->args.total = total;
765 :
766 0 : trace_xrep_dir_replay_removename(dp, name, 0);
767 :
768 0 : if (dp->i_df.if_format == XFS_DINODE_FMT_LOCAL)
769 0 : return xfs_dir2_sf_removename(&rd->args);
770 :
771 0 : error = xfs_dir2_isblock(&rd->args, &is_block);
772 0 : if (error)
773 : return error;
774 0 : if (is_block)
775 0 : return xfs_dir2_block_removename(&rd->args);
776 :
777 0 : error = xfs_dir2_isleaf(&rd->args, &is_leaf);
778 0 : if (error)
779 : return error;
780 0 : if (is_leaf)
781 0 : return xfs_dir2_leaf_removename(&rd->args);
782 :
783 0 : return xfs_dir2_node_removename(&rd->args);
784 : }
785 :
786 : /*
787 : * Add this stashed incore directory entry to the temporary directory.
788 : * The caller must hold the tempdir's IOLOCK, must not hold any ILOCKs, and
789 : * must not be in transaction context.
790 : */
791 : STATIC int
792 2251760 : xrep_dir_replay_update(
793 : struct xrep_dir *rd,
794 : const struct xrep_dirent *dirent)
795 : {
796 2251760 : struct xfs_name name = {
797 2251760 : .len = dirent->namelen,
798 2251760 : .type = dirent->ftype,
799 2251760 : .name = rd->pptr.p_name,
800 : };
801 2251760 : struct xfs_mount *mp = rd->sc->mp;
802 : #ifdef DEBUG
803 2251760 : xfs_ino_t ino;
804 : #endif
805 2251760 : uint resblks;
806 2251760 : int error;
807 :
808 2251760 : resblks = xfs_link_space_res(mp, dirent->namelen);
809 2251761 : error = xchk_trans_alloc(rd->sc, resblks);
810 2251760 : if (error)
811 : return error;
812 :
813 : /* Lock the temporary directory and join it to the transaction */
814 2251736 : xrep_tempfile_ilock(rd->sc);
815 2251734 : xfs_trans_ijoin(rd->sc->tp, rd->sc->tempip, 0);
816 :
817 2251735 : switch (dirent->action) {
818 2251735 : case XREP_DIRENT_ADD:
819 : /*
820 : * Create a replacement dirent in the temporary directory.
821 : * Note that _createname doesn't check for existing entries.
822 : * There shouldn't be any in the temporary dir, but we'll
823 : * verify this in debug mode.
824 : */
825 : #ifdef DEBUG
826 2251735 : error = xchk_dir_lookup(rd->sc, rd->sc->tempip, &name, &ino);
827 2251723 : if (error != -ENOENT) {
828 0 : ASSERT(error != -ENOENT);
829 0 : goto out_cancel;
830 : }
831 : #endif
832 :
833 2251723 : error = xrep_dir_replay_createname(rd, &name, dirent->ino,
834 : resblks);
835 2251735 : if (error)
836 0 : goto out_cancel;
837 :
838 2251735 : if (name.type == XFS_DIR3_FT_DIR)
839 198909 : rd->subdirs++;
840 2251735 : rd->dirents++;
841 2251735 : break;
842 0 : case XREP_DIRENT_REMOVE:
843 : /*
844 : * Remove a dirent from the temporary directory. Note that
845 : * _removename doesn't check the inode target of the exist
846 : * entry. There should be a perfect match in the temporary
847 : * dir, but we'll verify this in debug mode.
848 : */
849 : #ifdef DEBUG
850 0 : error = xchk_dir_lookup(rd->sc, rd->sc->tempip, &name, &ino);
851 0 : if (error) {
852 0 : ASSERT(error != 0);
853 0 : goto out_cancel;
854 : }
855 0 : if (ino != dirent->ino) {
856 0 : ASSERT(ino == dirent->ino);
857 0 : error = -EIO;
858 0 : goto out_cancel;
859 : }
860 : #endif
861 :
862 0 : error = xrep_dir_replay_removename(rd, &name, resblks);
863 0 : if (error)
864 0 : goto out_cancel;
865 :
866 0 : if (name.type == XFS_DIR3_FT_DIR)
867 0 : rd->subdirs--;
868 0 : rd->dirents--;
869 0 : break;
870 0 : default:
871 0 : ASSERT(0);
872 0 : error = -EIO;
873 0 : goto out_cancel;
874 : }
875 :
876 : /* Commit and unlock. */
877 2251735 : error = xrep_trans_commit(rd->sc);
878 2251737 : if (error)
879 : return error;
880 :
881 2251736 : xrep_tempfile_iunlock(rd->sc);
882 2251736 : return 0;
883 0 : out_cancel:
884 0 : xchk_trans_cancel(rd->sc);
885 0 : xrep_tempfile_iunlock(rd->sc);
886 0 : return error;
887 : }
888 :
889 : /*
890 : * Flush stashed incore dirent updates that have been recorded by the scanner.
891 : * This is done to reduce the memory requirements of the directory rebuild,
892 : * since directories can contain up to 32GB of directory data.
893 : *
894 : * Caller must not hold transactions or ILOCKs. Caller must hold the tempdir
895 : * IOLOCK.
896 : */
897 : STATIC int
898 194364 : xrep_dir_replay_updates(
899 : struct xrep_dir *rd)
900 : {
901 194364 : xfarray_idx_t array_cur;
902 194364 : int error;
903 :
904 : /* Add all the salvaged dirents to the temporary directory. */
905 2446091 : foreach_xfarray_idx(rd->dir_entries, array_cur) {
906 2251758 : struct xrep_dirent dirent;
907 :
908 2251758 : error = xfarray_load(rd->dir_entries, array_cur, &dirent);
909 2251758 : if (error)
910 24 : return error;
911 :
912 : /* The dirent name is stored in the in-core buffer. */
913 2251758 : error = xfblob_load(rd->dir_names, dirent.name_cookie,
914 2251758 : rd->pptr.p_name, dirent.namelen);
915 2251760 : if (error)
916 0 : return error;
917 2251760 : rd->pptr.p_name[MAXNAMELEN - 1] = 0;
918 :
919 2251760 : error = xrep_dir_replay_update(rd, &dirent);
920 2251751 : if (error)
921 24 : return error;
922 : }
923 :
924 : /* Empty out both arrays now that we've added the entries. */
925 194338 : xfarray_truncate(rd->dir_entries);
926 194342 : xfblob_truncate(rd->dir_names);
927 194342 : return 0;
928 : }
929 :
930 : /*
931 : * Periodically flush stashed directory entries to the temporary dir. This
932 : * is done to reduce the memory requirements of the directory rebuild, since
933 : * directories can contain up to 32GB of directory data.
934 : */
935 : STATIC int
936 666 : xrep_dir_flush_stashed(
937 : struct xrep_dir *rd)
938 : {
939 666 : int error;
940 :
941 : /*
942 : * Entering this function, the scrub context has a reference to the
943 : * inode being repaired, the temporary file, and a scrub transaction
944 : * that we use during dirent salvaging to avoid livelocking if there
945 : * are cycles in the directory structures. We hold ILOCK_EXCL on both
946 : * the inode being repaired and the temporary file, though they are
947 : * not ijoined to the scrub transaction.
948 : *
949 : * To constrain kernel memory use, we occasionally write salvaged
950 : * dirents from the xfarray and xfblob structures into the temporary
951 : * directory in preparation for swapping the directory structures at
952 : * the end. Updating the temporary file requires a transaction, so we
953 : * commit the scrub transaction and drop the two ILOCKs so that
954 : * we can allocate whatever transaction we want.
955 : *
956 : * We still hold IOLOCK_EXCL on the inode being repaired, which
957 : * prevents anyone from accessing the damaged directory data while we
958 : * repair it.
959 : */
960 666 : error = xrep_trans_commit(rd->sc);
961 666 : if (error)
962 : return error;
963 666 : xchk_iunlock(rd->sc, XFS_ILOCK_EXCL);
964 :
965 : /*
966 : * Take the IOLOCK of the temporary file while we modify dirents. This
967 : * isn't strictly required because the temporary file is never revealed
968 : * to userspace, but we follow the same locking rules. We still hold
969 : * sc->ip's IOLOCK.
970 : */
971 666 : error = xrep_tempfile_iolock_polled(rd->sc);
972 666 : if (error)
973 : return error;
974 :
975 : /* Write to the tempdir all the updates that we've stashed. */
976 666 : error = xrep_dir_replay_updates(rd);
977 666 : xrep_tempfile_iounlock(rd->sc);
978 666 : if (error)
979 : return error;
980 :
981 : /*
982 : * Recreate the salvage transaction and relock the dir we're salvaging.
983 : */
984 666 : error = xchk_trans_alloc(rd->sc, 0);
985 666 : if (error)
986 : return error;
987 666 : xchk_ilock(rd->sc, XFS_ILOCK_EXCL);
988 666 : return 0;
989 : }
990 :
991 : /* Decide if we've stashed too much dirent data in memory. */
992 : static inline bool
993 3085122325 : xrep_dir_want_flush_stashed(
994 : struct xrep_dir *rd)
995 : {
996 3085122325 : unsigned long long bytes;
997 :
998 3085122325 : bytes = xfarray_bytes(rd->dir_entries) + xfblob_bytes(rd->dir_names);
999 3082060887 : return bytes > XREP_DIR_MAX_STASH_BYTES;
1000 : }
1001 :
1002 : /* Extract as many directory entries as we can. */
1003 : STATIC int
1004 8 : xrep_dir_recover(
1005 : struct xrep_dir *rd)
1006 : {
1007 8 : struct xfs_bmbt_irec got;
1008 8 : struct xfs_scrub *sc = rd->sc;
1009 8 : struct xfs_da_geometry *geo = sc->mp->m_dir_geo;
1010 8 : xfs_fileoff_t offset;
1011 8 : xfs_dablk_t dabno;
1012 8 : __be32 magic_guess;
1013 8 : int nmap;
1014 8 : int error;
1015 :
1016 8 : xrep_dir_guess_format(rd, &magic_guess);
1017 :
1018 : /* Iterate each directory data block in the data fork. */
1019 8 : for (offset = 0;
1020 27 : offset < geo->leafblk;
1021 19 : offset = got.br_startoff + got.br_blockcount) {
1022 19 : nmap = 1;
1023 19 : error = xfs_bmapi_read(sc->ip, offset, geo->leafblk - offset,
1024 : &got, &nmap, 0);
1025 19 : if (error)
1026 0 : return error;
1027 19 : if (nmap != 1)
1028 : return -EFSCORRUPTED;
1029 19 : if (!xfs_bmap_is_written_extent(&got))
1030 8 : continue;
1031 :
1032 11 : for (dabno = round_up(got.br_startoff, geo->fsbcount);
1033 25 : dabno < got.br_startoff + got.br_blockcount;
1034 14 : dabno += geo->fsbcount) {
1035 14 : if (xchk_should_terminate(rd->sc, &error))
1036 0 : return error;
1037 :
1038 14 : error = xrep_dir_recover_dirblock(rd,
1039 : magic_guess, dabno);
1040 14 : if (error)
1041 0 : return error;
1042 :
1043 : /* Flush dirents to constrain memory usage. */
1044 14 : if (xrep_dir_want_flush_stashed(rd)) {
1045 2 : error = xrep_dir_flush_stashed(rd);
1046 2 : if (error)
1047 0 : return error;
1048 : }
1049 : }
1050 : }
1051 :
1052 : return 0;
1053 : }
1054 :
1055 : /*
1056 : * Find all the directory entries for this inode by scraping them out of the
1057 : * directory leaf blocks by hand, and flushing them into the temp dir.
1058 : */
1059 : STATIC int
1060 664 : xrep_dir_find_entries(
1061 : struct xrep_dir *rd)
1062 : {
1063 664 : struct xfs_inode *dp = rd->sc->ip;
1064 664 : int error;
1065 :
1066 : /*
1067 : * Salvage directory entries from the old directory, and write them to
1068 : * the temporary directory.
1069 : */
1070 664 : if (dp->i_df.if_format == XFS_DINODE_FMT_LOCAL) {
1071 656 : error = xrep_dir_recover_sf(rd);
1072 : } else {
1073 8 : error = xfs_iread_extents(rd->sc->tp, dp, XFS_DATA_FORK);
1074 8 : if (error)
1075 : return error;
1076 :
1077 8 : error = xrep_dir_recover(rd);
1078 : }
1079 664 : if (error)
1080 : return error;
1081 :
1082 664 : return xrep_dir_flush_stashed(rd);
1083 : }
1084 :
1085 : /* Scan all files in the filesystem for dirents. */
1086 : STATIC int
1087 664 : xrep_dir_salvage_entries(
1088 : struct xrep_dir *rd)
1089 : {
1090 664 : struct xfs_scrub *sc = rd->sc;
1091 664 : int error;
1092 :
1093 : /*
1094 : * Drop the ILOCK on this directory so that we can scan for this
1095 : * directory's parent. Figure out who is going to be the parent of
1096 : * this directory, then retake the ILOCK so that we can salvage
1097 : * directory entries.
1098 : */
1099 664 : xchk_iunlock(sc, XFS_ILOCK_EXCL);
1100 664 : error = xrep_dir_find_parent(rd);
1101 664 : xchk_ilock(sc, XFS_ILOCK_EXCL);
1102 664 : if (error)
1103 : return error;
1104 :
1105 : /*
1106 : * Collect directory entries by parsing raw leaf blocks to salvage
1107 : * whatever we can. When we're done, free the staging memory before
1108 : * swapping the directories to reduce memory usage.
1109 : */
1110 664 : error = xrep_dir_find_entries(rd);
1111 664 : if (error)
1112 : return error;
1113 :
1114 : /*
1115 : * Cancel the repair transaction and drop the ILOCK so that we can
1116 : * (later) use the atomic extent swap helper functions to compute the
1117 : * correct block reservations and re-lock the inodes.
1118 : *
1119 : * We still hold IOLOCK_EXCL (aka i_rwsem) which will prevent directory
1120 : * modifications, but there's nothing to prevent userspace from reading
1121 : * the directory until we're ready for the swap operation. Reads will
1122 : * return -EIO without shutting down the fs, so we're ok with that.
1123 : *
1124 : * The VFS can change dotdot on us, but the findparent scan will keep
1125 : * our incore parent inode up to date. See the note on locking issues
1126 : * for more details.
1127 : */
1128 664 : error = xrep_trans_commit(sc);
1129 664 : if (error)
1130 : return error;
1131 :
1132 664 : xchk_iunlock(sc, XFS_ILOCK_EXCL);
1133 664 : return 0;
1134 : }
1135 :
1136 :
1137 : /*
1138 : * Examine a parent pointer of a file. If it leads us back to the directory
1139 : * that we're rebuilding, create an incore dirent from the parent pointer and
1140 : * stash it.
1141 : */
1142 : STATIC int
1143 3307436214 : xrep_dir_scan_pptr(
1144 : struct xfs_scrub *sc,
1145 : struct xfs_inode *ip,
1146 : const struct xfs_parent_name_irec *pptr,
1147 : void *priv)
1148 : {
1149 3307436214 : struct xfs_name xname;
1150 3307436214 : struct xrep_dir *rd = priv;
1151 3307436214 : int error;
1152 :
1153 : /* Ignore parent pointers that point back to a different dir. */
1154 3307436214 : if (pptr->p_ino != sc->ip->i_ino ||
1155 2246866 : pptr->p_gen != VFS_I(sc->ip)->i_generation)
1156 : return 0;
1157 :
1158 : /*
1159 : * Transform this parent pointer into a dirent and queue it for later
1160 : * addition to the temporary directory.
1161 : */
1162 2246866 : xname.name = pptr->p_name;
1163 2246866 : xname.len = pptr->p_namelen;
1164 2246866 : xname.type = xfs_mode_to_ftype(VFS_I(ip)->i_mode);
1165 :
1166 2246865 : mutex_lock(&rd->pscan.lock);
1167 2246865 : error = xrep_dir_stash_createname(rd, &xname, ip->i_ino);
1168 2246865 : mutex_unlock(&rd->pscan.lock);
1169 2246865 : return error;
1170 : }
1171 :
1172 : /*
1173 : * If this child dirent points to the directory being repaired, remember that
1174 : * fact so that we can reset the dotdot entry if necessary.
1175 : */
1176 : STATIC int
1177 4972756983 : xrep_dir_scan_dirent(
1178 : struct xfs_scrub *sc,
1179 : struct xfs_inode *dp,
1180 : xfs_dir2_dataptr_t dapos,
1181 : const struct xfs_name *name,
1182 : xfs_ino_t ino,
1183 : void *priv)
1184 : {
1185 4972756983 : struct xrep_dir *rd = priv;
1186 :
1187 : /* Dirent doesn't point to this directory. */
1188 4972756983 : if (ino != rd->sc->ip->i_ino)
1189 : return 0;
1190 :
1191 : /* Ignore garbage inum. */
1192 388573 : if (!xfs_verify_dir_ino(rd->sc->mp, ino))
1193 : return 0;
1194 :
1195 : /* No weird looking names. */
1196 388573 : if (name->len >= MAXNAMELEN || name->len <= 0)
1197 : return 0;
1198 :
1199 : /* Don't pick up dot or dotdot entries; we only want child dirents. */
1200 578893 : if (xfs_dir2_samename(name, &xfs_name_dotdot) ||
1201 190320 : xfs_dir2_samename(name, &xfs_name_dot))
1202 198252 : return 0;
1203 :
1204 190321 : trace_xrep_dir_stash_createname(sc->tempip, &xfs_name_dotdot,
1205 : dp->i_ino);
1206 :
1207 190321 : xrep_findparent_scan_found(&rd->pscan, dp->i_ino);
1208 190321 : return 0;
1209 : }
1210 :
1211 : /*
1212 : * Decide if we want to look for child dirents or parent pointers in this file.
1213 : * Skip the dir being repaired and any files being used to stage repairs.
1214 : */
1215 : static inline bool
1216 6161017772 : xrep_dir_want_scan(
1217 : struct xrep_dir *rd,
1218 : const struct xfs_inode *ip)
1219 : {
1220 6161017772 : return ip != rd->sc->ip && !xrep_is_tempfile(ip);
1221 : }
1222 :
1223 : /*
1224 : * Take ILOCK on a file that we want to scan.
1225 : *
1226 : * Select ILOCK_EXCL if the file is a directory with an unloaded data bmbt or
1227 : * has an unloaded attr bmbt. Otherwise, take ILOCK_SHARED.
1228 : */
1229 : static inline unsigned int
1230 3082993301 : xrep_dir_scan_ilock(
1231 : struct xrep_dir *rd,
1232 : struct xfs_inode *ip)
1233 : {
1234 3082993301 : uint lock_mode = XFS_ILOCK_SHARED;
1235 :
1236 : /* Need to take the shared ILOCK to advance the iscan cursor. */
1237 3082993301 : if (!xrep_dir_want_scan(rd, ip))
1238 939599 : goto lock;
1239 :
1240 3918133746 : if (S_ISDIR(VFS_I(ip)->i_mode) && xfs_need_iread_extents(&ip->i_df)) {
1241 0 : lock_mode = XFS_ILOCK_EXCL;
1242 0 : goto lock;
1243 : }
1244 :
1245 6162810883 : if (xfs_inode_has_attr_fork(ip) && xfs_need_iread_extents(&ip->i_af))
1246 0 : lock_mode = XFS_ILOCK_EXCL;
1247 :
1248 3081657994 : lock:
1249 3082597593 : xfs_ilock(ip, lock_mode);
1250 3083018627 : return lock_mode;
1251 : }
1252 :
1253 : /*
1254 : * Scan this file for relevant child dirents or parent pointers that point to
1255 : * the directory we're rebuilding.
1256 : */
1257 : STATIC int
1258 3083058940 : xrep_dir_scan_file(
1259 : struct xrep_dir *rd,
1260 : struct xfs_inode *ip)
1261 : {
1262 3083058940 : unsigned int lock_mode;
1263 3083058940 : int error = 0;
1264 :
1265 3083058940 : lock_mode = xrep_dir_scan_ilock(rd, ip);
1266 :
1267 3082407666 : if (!xrep_dir_want_scan(rd, ip))
1268 939600 : goto scan_done;
1269 :
1270 3080836318 : error = xchk_pptr_walk(rd->sc, ip, xrep_dir_scan_pptr, &rd->pptr, rd);
1271 3082630931 : if (error)
1272 0 : goto scan_done;
1273 :
1274 3082630931 : if (S_ISDIR(VFS_I(ip)->i_mode)) {
1275 836142689 : error = xchk_dir_walk(rd->sc, ip, xrep_dir_scan_dirent, rd);
1276 836368642 : if (error)
1277 0 : goto scan_done;
1278 : }
1279 :
1280 3082856884 : scan_done:
1281 3083796484 : xchk_iscan_mark_visited(&rd->pscan.iscan, ip);
1282 3082666054 : xfs_iunlock(ip, lock_mode);
1283 3081273553 : return error;
1284 : }
1285 :
1286 : /*
1287 : * Scan all files in the filesystem for parent pointers that we can turn into
1288 : * replacement dirents, and a dirent that we can use to set the dotdot pointer.
1289 : */
1290 : STATIC int
1291 192335 : xrep_dir_scan_dirtree(
1292 : struct xrep_dir *rd)
1293 : {
1294 192335 : struct xfs_scrub *sc = rd->sc;
1295 192335 : struct xfs_inode *ip;
1296 192335 : int error;
1297 :
1298 : /* Roots of directory trees are their own parents. */
1299 192335 : if (sc->ip == sc->mp->m_rootip)
1300 2005 : xrep_findparent_scan_found(&rd->pscan, sc->ip->i_ino);
1301 :
1302 : /*
1303 : * Filesystem scans are time consuming. Drop the directory ILOCK and
1304 : * all other resources for the duration of the scan and hope for the
1305 : * best. The live update hooks will keep our scan information up to
1306 : * date even though we've dropped the locks.
1307 : */
1308 192335 : xchk_trans_cancel(sc);
1309 192333 : if (sc->ilock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL))
1310 192334 : xchk_iunlock(sc, sc->ilock_flags & (XFS_ILOCK_SHARED |
1311 : XFS_ILOCK_EXCL));
1312 192329 : error = xchk_trans_alloc_empty(sc);
1313 192331 : if (error)
1314 : return error;
1315 :
1316 3082076777 : while ((error = xchk_iscan_iter(&rd->pscan.iscan, &ip)) == 1) {
1317 3083190689 : bool flush;
1318 :
1319 3083190689 : error = xrep_dir_scan_file(rd, ip);
1320 3081410929 : xchk_irele(sc, ip);
1321 3083953102 : if (error)
1322 : break;
1323 :
1324 : /* Flush stashed dirent updates to constrain memory usage. */
1325 3083953102 : mutex_lock(&rd->pscan.lock);
1326 3085380255 : flush = xrep_dir_want_flush_stashed(rd);
1327 3081955746 : mutex_unlock(&rd->pscan.lock);
1328 3085346287 : if (flush) {
1329 1387 : xchk_trans_cancel(sc);
1330 :
1331 1387 : error = xrep_tempfile_iolock_polled(sc);
1332 1387 : if (error)
1333 : break;
1334 :
1335 1387 : mutex_lock(&rd->pscan.lock);
1336 1387 : error = xrep_dir_replay_updates(rd);
1337 1387 : mutex_unlock(&rd->pscan.lock);
1338 1387 : xrep_tempfile_iounlock(sc);
1339 1387 : if (error)
1340 : break;
1341 :
1342 1387 : error = xchk_trans_alloc_empty(sc);
1343 1387 : if (error)
1344 : break;
1345 : }
1346 :
1347 3085346287 : if (xchk_should_terminate(sc, &error))
1348 : break;
1349 : }
1350 192335 : xchk_iscan_iter_finish(&rd->pscan.iscan);
1351 192335 : if (error) {
1352 : /*
1353 : * If we couldn't grab an inode that was busy with a state
1354 : * change, change the error code so that we exit to userspace
1355 : * as quickly as possible.
1356 : */
1357 19 : if (error == -EBUSY)
1358 : return -ECANCELED;
1359 19 : return error;
1360 : }
1361 :
1362 : /*
1363 : * Cancel the empty transaction so that we can (later) use the atomic
1364 : * extent swap helpers to lock files and commit the new directory.
1365 : */
1366 192316 : xchk_trans_cancel(rd->sc);
1367 192316 : return 0;
1368 : }
1369 :
1370 : /*
1371 : * Capture dirent updates being made by other threads which are relevant to the
1372 : * directory being repaired.
1373 : */
1374 : STATIC int
1375 15943538 : xrep_dir_live_update(
1376 : struct notifier_block *nb,
1377 : unsigned long action,
1378 : void *data)
1379 : {
1380 15943538 : struct xfs_dir_update_params *p = data;
1381 15943538 : struct xrep_dir *rd;
1382 15943538 : struct xfs_scrub *sc;
1383 15943538 : int error = 0;
1384 :
1385 15943538 : rd = container_of(nb, struct xrep_dir, pscan.hooks.dirent_hook.nb);
1386 15943538 : sc = rd->sc;
1387 :
1388 : /*
1389 : * This thread updated a child dirent in the directory that we're
1390 : * rebuilding. Stash the update for replay against the temporary
1391 : * directory.
1392 : */
1393 15943538 : if (p->dp->i_ino == sc->ip->i_ino &&
1394 0 : xchk_iscan_want_live_update(&rd->pscan.iscan, p->ip->i_ino)) {
1395 0 : mutex_lock(&rd->pscan.lock);
1396 0 : if (p->delta > 0)
1397 0 : error = xrep_dir_stash_createname(rd, p->name,
1398 0 : p->ip->i_ino);
1399 : else
1400 0 : error = xrep_dir_stash_removename(rd, p->name,
1401 0 : p->ip->i_ino);
1402 0 : mutex_unlock(&rd->pscan.lock);
1403 0 : if (error)
1404 0 : goto out_abort;
1405 : }
1406 :
1407 : /*
1408 : * This thread updated another directory's child dirent that points to
1409 : * the directory that we're rebuilding, so remember the new dotdot
1410 : * target.
1411 : */
1412 15943538 : if (p->ip->i_ino == sc->ip->i_ino &&
1413 0 : xchk_iscan_want_live_update(&rd->pscan.iscan, p->dp->i_ino)) {
1414 0 : if (p->delta > 0) {
1415 0 : trace_xrep_dir_stash_createname(sc->tempip,
1416 : &xfs_name_dotdot,
1417 0 : p->dp->i_ino);
1418 :
1419 0 : xrep_findparent_scan_found(&rd->pscan, p->dp->i_ino);
1420 : } else {
1421 0 : trace_xrep_dir_stash_removename(sc->tempip,
1422 : &xfs_name_dotdot,
1423 : rd->pscan.parent_ino);
1424 :
1425 0 : xrep_findparent_scan_found(&rd->pscan, NULLFSINO);
1426 : }
1427 : }
1428 :
1429 : return NOTIFY_DONE;
1430 : out_abort:
1431 0 : xchk_iscan_abort(&rd->pscan.iscan);
1432 0 : return NOTIFY_DONE;
1433 : }
1434 :
1435 : /*
1436 : * Free all the directory blocks and reset the data fork. The caller must
1437 : * join the inode to the transaction. This function returns with the inode
1438 : * joined to a clean scrub transaction.
1439 : */
1440 : STATIC int
1441 192937 : xrep_dir_reset_fork(
1442 : struct xrep_dir *rd,
1443 : xfs_ino_t parent_ino)
1444 : {
1445 192937 : struct xfs_scrub *sc = rd->sc;
1446 192937 : struct xfs_ifork *ifp = xfs_ifork_ptr(sc->tempip, XFS_DATA_FORK);
1447 192937 : int error;
1448 :
1449 : /* Unmap all the directory buffers. */
1450 192937 : if (xfs_ifork_has_extents(ifp)) {
1451 4391 : error = xrep_reap_ifork(sc, sc->tempip, XFS_DATA_FORK);
1452 4391 : if (error)
1453 : return error;
1454 : }
1455 :
1456 192937 : trace_xrep_dir_reset_fork(sc->tempip, parent_ino);
1457 :
1458 : /* Reset the data fork to an empty data fork. */
1459 192918 : xfs_idestroy_fork(ifp);
1460 192943 : ifp->if_bytes = 0;
1461 192943 : sc->tempip->i_disk_size = 0;
1462 :
1463 : /* Reinitialize the short form directory. */
1464 192943 : xrep_dir_init_args(rd, sc->tempip, NULL);
1465 192938 : error = xfs_dir2_sf_create(&rd->args, parent_ino);
1466 192945 : if (error)
1467 : return error;
1468 :
1469 192944 : return xrep_tempfile_roll_trans(sc);
1470 : }
1471 :
1472 : /*
1473 : * Prepare both inodes' directory forks for extent swapping. Promote the
1474 : * tempfile from short format to leaf format, and if the file being repaired
1475 : * has a short format data fork, turn it into an empty extent list.
1476 : */
1477 : STATIC int
1478 4391 : xrep_dir_swap_prep(
1479 : struct xfs_scrub *sc,
1480 : bool temp_local,
1481 : bool ip_local)
1482 : {
1483 4391 : int error;
1484 :
1485 : /*
1486 : * If the tempfile's directory is in shortform format, convert that
1487 : * to a single leaf extent so that we can use the atomic extent swap.
1488 : */
1489 4391 : if (temp_local) {
1490 2582 : struct xfs_da_args args = {
1491 2582 : .dp = sc->tempip,
1492 2582 : .geo = sc->mp->m_dir_geo,
1493 : .whichfork = XFS_DATA_FORK,
1494 2582 : .trans = sc->tp,
1495 : .total = 1,
1496 2582 : .owner = sc->ip->i_ino,
1497 : };
1498 :
1499 2582 : error = xfs_dir2_sf_to_block(&args);
1500 2582 : if (error)
1501 0 : return error;
1502 :
1503 : /*
1504 : * Roll the deferred log items to get us back to a clean
1505 : * transaction.
1506 : */
1507 2582 : error = xfs_defer_finish(&sc->tp);
1508 2582 : if (error)
1509 : return error;
1510 : }
1511 :
1512 : /*
1513 : * If the file being repaired had a shortform data fork, convert that
1514 : * to an empty extent list in preparation for the atomic extent swap.
1515 : */
1516 4391 : if (ip_local) {
1517 0 : struct xfs_ifork *ifp;
1518 :
1519 0 : ifp = xfs_ifork_ptr(sc->ip, XFS_DATA_FORK);
1520 0 : xfs_idestroy_fork(ifp);
1521 0 : ifp->if_format = XFS_DINODE_FMT_EXTENTS;
1522 0 : ifp->if_nextents = 0;
1523 0 : ifp->if_bytes = 0;
1524 0 : ifp->if_u1.if_root = NULL;
1525 0 : ifp->if_height = 0;
1526 :
1527 0 : xfs_trans_log_inode(sc->tp, sc->ip,
1528 : XFS_ILOG_CORE | XFS_ILOG_DDATA);
1529 : }
1530 :
1531 : return 0;
1532 : }
1533 :
1534 : /*
1535 : * Replace the inode number of a directory entry.
1536 : */
1537 : static int
1538 181578 : xrep_dir_replace(
1539 : struct xrep_dir *rd,
1540 : struct xfs_inode *dp,
1541 : const struct xfs_name *name,
1542 : xfs_ino_t inum,
1543 : xfs_extlen_t total)
1544 : {
1545 181578 : struct xfs_scrub *sc = rd->sc;
1546 181578 : bool is_block, is_leaf;
1547 181578 : int error;
1548 :
1549 181578 : ASSERT(S_ISDIR(VFS_I(dp)->i_mode));
1550 :
1551 181578 : error = xfs_dir_ino_validate(sc->mp, inum);
1552 181578 : if (error)
1553 : return error;
1554 :
1555 181579 : xrep_dir_init_args(rd, dp, name);
1556 181577 : rd->args.inumber = inum;
1557 181577 : rd->args.total = total;
1558 :
1559 181577 : if (dp->i_df.if_format == XFS_DINODE_FMT_LOCAL)
1560 180877 : return xfs_dir2_sf_replace(&rd->args);
1561 :
1562 700 : error = xfs_dir2_isblock(&rd->args, &is_block);
1563 700 : if (error)
1564 : return error;
1565 700 : if (is_block)
1566 620 : return xfs_dir2_block_replace(&rd->args);
1567 :
1568 80 : error = xfs_dir2_isleaf(&rd->args, &is_leaf);
1569 80 : if (error)
1570 : return error;
1571 80 : if (is_leaf)
1572 40 : return xfs_dir2_leaf_replace(&rd->args);
1573 :
1574 40 : return xfs_dir2_node_replace(&rd->args);
1575 : }
1576 :
1577 : /*
1578 : * Reset the link count of this directory and adjust the unlinked list pointers
1579 : * as needed.
1580 : */
1581 : STATIC int
1582 192949 : xrep_dir_set_nlink(
1583 : struct xrep_dir *rd)
1584 : {
1585 192949 : struct xfs_scrub *sc = rd->sc;
1586 192949 : struct xfs_inode *dp = sc->ip;
1587 192949 : struct xfs_perag *pag;
1588 192949 : int error;
1589 :
1590 : /*
1591 : * The directory is not on the incore unlinked list, which means that
1592 : * it needs to be reachable via the directory tree. Update the nlink
1593 : * with our observed link count. If the directory has no parent, it
1594 : * will be moved to the orphanage.
1595 : */
1596 192949 : if (!xfs_inode_on_unlinked_list(dp)) {
1597 192949 : xrep_set_nlink(sc->ip, rd->subdirs + 2);
1598 192949 : return 0;
1599 : }
1600 :
1601 0 : xfs_emerg(dp->i_mount, "IUNLINK unlinked dir 0x%llx repair, dirents %u subdirs %llu curr_nlink %u orphan? %d", dp->i_ino, rd->dirents, rd->subdirs, VFS_I(dp)->i_nlink, rd->needs_adoption);
1602 :
1603 : /*
1604 : * The directory is on the unlinked list and we did not find any
1605 : * dirents. Set the link count to zero and let the directory
1606 : * inactivate when the last reference drops.
1607 : */
1608 0 : if (rd->dirents == 0) {
1609 0 : rd->needs_adoption = false;
1610 0 : xrep_set_nlink(sc->ip, 0);
1611 0 : return 0;
1612 : }
1613 :
1614 : /*
1615 : * The directory is on the unlinked list and we found dirents. This
1616 : * directory needs to be reachable via the directory tree. Remove the
1617 : * dir from the unlinked list and update nlink with the observed link
1618 : * count. If the directory has no parent, it will be moved to the
1619 : * orphanage.
1620 : */
1621 0 : pag = xfs_perag_get(sc->mp, XFS_INO_TO_AGNO(sc->mp, dp->i_ino));
1622 0 : if (!pag) {
1623 0 : ASSERT(0);
1624 0 : return -EFSCORRUPTED;
1625 : }
1626 :
1627 0 : error = xfs_iunlink_remove(sc->tp, pag, dp);
1628 0 : xfs_perag_put(pag);
1629 0 : if (error)
1630 : return error;
1631 :
1632 0 : xrep_set_nlink(sc->ip, rd->subdirs + 2);
1633 0 : return 0;
1634 : }
1635 :
1636 : /*
1637 : * Finish replaying stashed dirent updates, allocate a transaction for swapping
1638 : * extents, and take the ILOCKs of both directories before we commit the new
1639 : * directory structure.
1640 : */
1641 : STATIC int
1642 192976 : xrep_dir_finalize_tempdir(
1643 : struct xrep_dir *rd)
1644 : {
1645 192976 : struct xfs_scrub *sc = rd->sc;
1646 192976 : int error;
1647 :
1648 192976 : if (!xfs_has_parent(sc->mp))
1649 663 : return xrep_tempswap_trans_alloc(sc, XFS_DATA_FORK, &rd->tx);
1650 :
1651 192313 : error = xrep_dir_replay_updates(rd);
1652 192315 : if (error)
1653 : return error;
1654 :
1655 192288 : error = xrep_tempswap_trans_alloc(sc, XFS_DATA_FORK, &rd->tx);
1656 192280 : if (error)
1657 : return error;
1658 :
1659 : /*
1660 : * We rely on the caller's hold on @sc->ip's IOLOCK_EXCL to quiesce all
1661 : * possible directory updates during the time when we did not hold the
1662 : * ILOCK. There should not be any dirent updates to replay, but check
1663 : * anyway.
1664 : */
1665 192279 : if (xfarray_length(rd->dir_entries) != 0) {
1666 0 : ASSERT(xfarray_length(rd->dir_entries) == 0);
1667 0 : return -EFSCORRUPTED;
1668 : }
1669 :
1670 : return 0;
1671 : }
1672 :
1673 : /* Swap the temporary directory's data fork with the one being repaired. */
1674 : STATIC int
1675 192932 : xrep_dir_swap(
1676 : struct xrep_dir *rd)
1677 : {
1678 192932 : struct xfs_scrub *sc = rd->sc;
1679 192932 : bool ip_local, temp_local;
1680 192932 : int error = 0;
1681 :
1682 : /*
1683 : * If we never found the parent for this directory, temporarily assign
1684 : * the root dir as the parent; we'll move this to the orphanage after
1685 : * swapping the dir contents. We hold the ILOCK of the dir being
1686 : * repaired, so we're not worried about racy updates of dotdot.
1687 : */
1688 192932 : ASSERT(sc->ilock_flags & XFS_ILOCK_EXCL);
1689 192932 : if (rd->pscan.parent_ino == NULLFSINO) {
1690 0 : rd->needs_adoption = true;
1691 0 : rd->pscan.parent_ino = rd->sc->mp->m_sb.sb_rootino;
1692 : }
1693 :
1694 : /*
1695 : * Reset the temporary directory's '..' entry to point to the parent
1696 : * that we found. The temporary directory was created with the root
1697 : * directory as the parent, so we can skip this if repairing a
1698 : * subdirectory of the root.
1699 : *
1700 : * It's also possible that this replacement could also expand a sf
1701 : * tempdir into block format.
1702 : */
1703 192932 : if (rd->pscan.parent_ino != sc->mp->m_rootip->i_ino) {
1704 181568 : error = xrep_dir_replace(rd, rd->sc->tempip, &xfs_name_dotdot,
1705 181568 : rd->pscan.parent_ino, rd->tx.req.resblks);
1706 181584 : if (error)
1707 : return error;
1708 : }
1709 :
1710 : /*
1711 : * Changing the dot and dotdot entries could have changed the shape of
1712 : * the directory, so we recompute these.
1713 : */
1714 192948 : ip_local = sc->ip->i_df.if_format == XFS_DINODE_FMT_LOCAL;
1715 192948 : temp_local = sc->tempip->i_df.if_format == XFS_DINODE_FMT_LOCAL;
1716 :
1717 : /*
1718 : * If the both files have a local format data fork and the rebuilt
1719 : * directory data would fit in the repaired file's data fork, copy
1720 : * the contents from the tempfile and update the directory link count.
1721 : * We're done now.
1722 : */
1723 192948 : if (ip_local && temp_local &&
1724 188557 : sc->tempip->i_disk_size <= xfs_inode_data_fork_size(sc->ip)) {
1725 188557 : xrep_tempfile_copyout_local(sc, XFS_DATA_FORK);
1726 188561 : return xrep_dir_set_nlink(rd);
1727 : }
1728 :
1729 : /* Clean the transaction before we start working on the extent swap. */
1730 4391 : error = xrep_tempfile_roll_trans(rd->sc);
1731 4391 : if (error)
1732 : return error;
1733 :
1734 : /* Otherwise, make sure both data forks are in block-mapping mode. */
1735 4391 : error = xrep_dir_swap_prep(sc, temp_local, ip_local);
1736 4391 : if (error)
1737 : return error;
1738 :
1739 : /*
1740 : * Set nlink of the directory in the same transaction sequence that
1741 : * (atomically) commits the new directory data.
1742 : */
1743 4391 : error = xrep_dir_set_nlink(rd);
1744 4391 : if (error)
1745 : return error;
1746 :
1747 4391 : return xrep_tempswap_contents(sc, &rd->tx);
1748 : }
1749 :
1750 : /*
1751 : * Swap the new directory contents (which we created in the tempfile) into the
1752 : * directory being repaired.
1753 : */
1754 : STATIC int
1755 192979 : xrep_dir_rebuild_tree(
1756 : struct xrep_dir *rd)
1757 : {
1758 192979 : struct xfs_scrub *sc = rd->sc;
1759 192979 : int error;
1760 :
1761 192979 : trace_xrep_dir_rebuild_tree(sc->ip, rd->pscan.parent_ino);
1762 :
1763 : /*
1764 : * Take the IOLOCK on the temporary file so that we can run dir
1765 : * operations with the same locks held as we would for a normal file.
1766 : * We still hold sc->ip's IOLOCK.
1767 : */
1768 192976 : error = xrep_tempfile_iolock_polled(rd->sc);
1769 192974 : if (error)
1770 : return error;
1771 :
1772 : /*
1773 : * Allocate transaction, lock inodes, and make sure that we've replayed
1774 : * all the stashed dirent updates to the tempdir. After this point,
1775 : * we're ready to swapext.
1776 : */
1777 192975 : error = xrep_dir_finalize_tempdir(rd);
1778 192968 : if (error)
1779 : return error;
1780 :
1781 192942 : if (xchk_iscan_aborted(&rd->pscan.iscan))
1782 : return -ECANCELED;
1783 :
1784 : /*
1785 : * Swap the tempdir's data fork with the file being repaired. This
1786 : * recreates the transaction and re-takes the ILOCK in the scrub
1787 : * context.
1788 : */
1789 192932 : error = xrep_dir_swap(rd);
1790 192932 : if (error)
1791 : return error;
1792 :
1793 : /*
1794 : * Release the old directory blocks and reset the data fork of the temp
1795 : * directory to an empty shortform directory because inactivation does
1796 : * nothing for directories.
1797 : */
1798 192932 : return xrep_dir_reset_fork(rd, sc->mp->m_rootip->i_ino);
1799 : }
1800 :
1801 : /* Set up the filesystem scan so we can regenerate directory entries. */
1802 : STATIC int
1803 192995 : xrep_dir_setup_scan(
1804 : struct xrep_dir *rd)
1805 : {
1806 192995 : struct xfs_scrub *sc = rd->sc;
1807 192995 : char *descr;
1808 192995 : int error;
1809 :
1810 : /* Set up some staging memory for salvaging dirents. */
1811 192995 : descr = xchk_xfile_ino_descr(sc, "directory entries");
1812 192995 : error = xfarray_create(descr, 0, sizeof(struct xrep_dirent),
1813 : &rd->dir_entries);
1814 192998 : kfree(descr);
1815 192999 : if (error)
1816 : return error;
1817 :
1818 192999 : descr = xchk_xfile_ino_descr(sc, "directory entry names");
1819 192998 : error = xfblob_create(descr, &rd->dir_names);
1820 192999 : kfree(descr);
1821 192999 : if (error)
1822 0 : goto out_xfarray;
1823 :
1824 192999 : if (xfs_has_parent(sc->mp))
1825 192335 : error = __xrep_findparent_scan_start(sc, &rd->pscan,
1826 : xrep_dir_live_update);
1827 : else
1828 664 : error = xrep_findparent_scan_start(sc, &rd->pscan);
1829 192999 : if (error)
1830 0 : goto out_xfblob;
1831 :
1832 : return 0;
1833 :
1834 : out_xfblob:
1835 0 : xfblob_destroy(rd->dir_names);
1836 0 : rd->dir_names = NULL;
1837 0 : out_xfarray:
1838 0 : xfarray_destroy(rd->dir_entries);
1839 0 : rd->dir_entries = NULL;
1840 0 : return error;
1841 : }
1842 :
1843 : /*
1844 : * Move the current file to the orphanage.
1845 : *
1846 : * Caller must hold IOLOCK_EXCL on @sc->ip, and no other inode locks. Upon
1847 : * successful return, the scrub transaction will have enough extra reservation
1848 : * to make the move; it will hold IOLOCK_EXCL and ILOCK_EXCL of @sc->ip and the
1849 : * orphanage; and both inodes will be ijoined.
1850 : */
1851 : STATIC int
1852 0 : xrep_dir_move_to_orphanage(
1853 : struct xrep_dir *rd)
1854 : {
1855 0 : struct xfs_scrub *sc = rd->sc;
1856 0 : xfs_ino_t orig_parent, new_parent;
1857 0 : int error;
1858 :
1859 : /* No orphanage? We can't fix this. */
1860 0 : if (!sc->orphanage)
1861 : return -EFSCORRUPTED;
1862 :
1863 : /*
1864 : * We are about to drop the ILOCK on sc->ip to lock the orphanage and
1865 : * prepare for the adoption. Therefore, look up the old dotdot entry
1866 : * for sc->ip so that we can compare it after we re-lock sc->ip.
1867 : */
1868 0 : error = xchk_dir_lookup(sc, sc->ip, &xfs_name_dotdot, &orig_parent);
1869 0 : if (error)
1870 : return error;
1871 :
1872 : /*
1873 : * We hold ILOCK_EXCL on both the directory and the tempdir after a
1874 : * successful rebuild. Before we can move the directory to the
1875 : * orphanage, we must roll to a clean unjoined transaction.
1876 : */
1877 0 : error = xfs_trans_roll(&sc->tp);
1878 0 : if (error)
1879 : return error;
1880 :
1881 : /*
1882 : * Because the orphanage is just another directory in the filesystem,
1883 : * we must take its IOLOCK to coordinate with the VFS. We cannot take
1884 : * an IOLOCK while holding an ILOCK, so we must drop them all. We may
1885 : * have to drop the IOLOCK as well.
1886 : */
1887 0 : xrep_tempfile_iunlock_both(sc);
1888 :
1889 0 : error = xrep_adoption_init(sc, &rd->adoption);
1890 0 : if (error)
1891 : return error;
1892 :
1893 0 : if (!xrep_orphanage_ilock_nowait(sc, XFS_IOLOCK_EXCL)) {
1894 0 : xchk_iunlock(sc, sc->ilock_flags);
1895 0 : error = xrep_orphanage_iolock_two(sc);
1896 0 : if (error)
1897 0 : goto err_adoption;
1898 : }
1899 :
1900 : /* Prepare for the adoption and lock both down. */
1901 0 : error = xrep_adoption_prep(&rd->adoption);
1902 0 : if (error)
1903 0 : goto err_adoption;
1904 :
1905 0 : error = xrep_adoption_compute_name(&rd->adoption, rd->pptr.p_name);
1906 0 : if (error)
1907 0 : goto err_adoption;
1908 :
1909 : /*
1910 : * Now that we've reacquired the ILOCK on sc->ip, look up the dotdot
1911 : * entry again. If the parent changed or the child was unlinked while
1912 : * the child directory was unlocked, we don't need to move the child to
1913 : * the orphanage after all.
1914 : */
1915 0 : error = xchk_dir_lookup(sc, sc->ip, &xfs_name_dotdot, &new_parent);
1916 0 : if (error)
1917 0 : goto err_adoption;
1918 0 : if (orig_parent != new_parent || VFS_I(sc->ip)->i_nlink == 0) {
1919 0 : error = 0;
1920 0 : goto err_adoption;
1921 : }
1922 :
1923 : /* Attach to the orphanage. */
1924 0 : return xrep_adoption_commit(&rd->adoption);
1925 0 : err_adoption:
1926 0 : xrep_adoption_cancel(&rd->adoption, error);
1927 0 : return error;
1928 : }
1929 :
1930 : /*
1931 : * Repair the directory metadata.
1932 : *
1933 : * XXX: Directory entry buffers can be multiple fsblocks in size. The buffer
1934 : * cache in XFS can't handle aliased multiblock buffers, so this might
1935 : * misbehave if the directory blocks are crosslinked with other filesystem
1936 : * metadata.
1937 : *
1938 : * XXX: Is it necessary to check the dcache for this directory to make sure
1939 : * that we always recreate every cached entry?
1940 : */
1941 : int
1942 582826 : xrep_directory(
1943 : struct xfs_scrub *sc)
1944 : {
1945 582826 : struct xrep_dir *rd = sc->buf;
1946 582826 : int error;
1947 :
1948 : /* The rmapbt is required to reap the old data fork. */
1949 582826 : if (!xfs_has_rmapbt(sc->mp))
1950 : return -EOPNOTSUPP;
1951 :
1952 192996 : error = xrep_dir_setup_scan(rd);
1953 192999 : if (error)
1954 : return error;
1955 :
1956 192999 : if (xfs_has_parent(sc->mp))
1957 192335 : error = xrep_dir_scan_dirtree(rd);
1958 : else
1959 664 : error = xrep_dir_salvage_entries(rd);
1960 192997 : if (error)
1961 20 : goto out_teardown;
1962 :
1963 : /* Last chance to abort before we start committing fixes. */
1964 192977 : if (xchk_should_terminate(sc, &error))
1965 0 : goto out_teardown;
1966 :
1967 192977 : error = xrep_dir_rebuild_tree(rd);
1968 192973 : if (error)
1969 25 : goto out_teardown;
1970 :
1971 192948 : if (rd->needs_adoption) {
1972 0 : error = xrep_dir_move_to_orphanage(rd);
1973 0 : if (error)
1974 : goto out_teardown;
1975 : }
1976 :
1977 192948 : out_teardown:
1978 192993 : xrep_dir_teardown(sc);
1979 192996 : return error;
1980 : }
|