Line data Source code
1 : // SPDX-License-Identifier: GPL-2.0-or-later
2 : /*
3 : * Copyright (C) 2020-2023 Oracle. All Rights Reserved.
4 : * Author: Darrick J. Wong <djwong@kernel.org>
5 : */
6 : #include "xfs.h"
7 : #include "xfs_fs.h"
8 : #include "xfs_shared.h"
9 : #include "xfs_format.h"
10 : #include "xfs_trans_resv.h"
11 : #include "xfs_mount.h"
12 : #include "xfs_defer.h"
13 : #include "xfs_bit.h"
14 : #include "xfs_log_format.h"
15 : #include "xfs_trans.h"
16 : #include "xfs_sb.h"
17 : #include "xfs_inode.h"
18 : #include "xfs_icache.h"
19 : #include "xfs_da_format.h"
20 : #include "xfs_da_btree.h"
21 : #include "xfs_dir2.h"
22 : #include "xfs_dir2_priv.h"
23 : #include "xfs_bmap.h"
24 : #include "xfs_quota.h"
25 : #include "xfs_bmap_btree.h"
26 : #include "xfs_trans_space.h"
27 : #include "xfs_bmap_util.h"
28 : #include "xfs_swapext.h"
29 : #include "xfs_xchgrange.h"
30 : #include "xfs_ag.h"
31 : #include "xfs_parent.h"
32 : #include "scrub/xfs_scrub.h"
33 : #include "scrub/scrub.h"
34 : #include "scrub/common.h"
35 : #include "scrub/trace.h"
36 : #include "scrub/repair.h"
37 : #include "scrub/tempfile.h"
38 : #include "scrub/tempswap.h"
39 : #include "scrub/xfile.h"
40 : #include "scrub/xfarray.h"
41 : #include "scrub/xfblob.h"
42 : #include "scrub/iscan.h"
43 : #include "scrub/readdir.h"
44 : #include "scrub/reap.h"
45 : #include "scrub/findparent.h"
46 : #include "scrub/orphanage.h"
47 : #include "scrub/listxattr.h"
48 :
49 : /*
50 : * Directory Repair
51 : * ================
52 : *
53 : * We repair directories by reading the directory data blocks looking for
54 : * directory entries that look salvageable (name passes verifiers, entry points
55 : * to a valid allocated inode, etc). Each entry worth salvaging is stashed in
56 : * memory, and the stashed entries are periodically replayed into a temporary
57 : * directory to constrain memory use. Batching the construction of the
58 : * temporary directory in this fashion reduces lock cycling of the directory
59 : * being repaired and the temporary directory, and will later become important
60 : * for parent pointer scanning.
61 : *
62 : * If parent pointers are enabled on this filesystem, we instead reconstruct
63 : * the directory by visiting each parent pointer of each file in the filesystem
64 : * and translating the relevant parent pointer records into dirents. In this
65 : * case, it is advantageous to stash all directory entries created from parent
66 : * pointers for a single child file before replaying them into the temporary
67 : * directory. To save memory, the live filesystem scan reuses the findparent
68 : * fields. Directory repair chooses either parent pointer scanning or
69 : * directory entry salvaging, but not both.
70 : *
71 : * Directory entries added to the temporary directory do not elevate the link
72 : * counts of the inodes found. When salvaging completes, the remaining stashed
73 : * entries are replayed to the temporary directory. An atomic extent swap is
74 : * used to commit the new directory blocks to the directory being repaired.
75 : * This will disrupt readdir cursors.
76 : *
77 : * Legacy Locking Issues
78 : * ---------------------
79 : *
80 : * Prior to Linux 6.5, if /a, /a/b, and /c were all directories, the VFS would
81 : * not take i_rwsem on /a/b for a "mv /a/b /c/" operation. This meant that
82 : * only b's ILOCK protected b's dotdot update. b's IOLOCK was not taken,
83 : * unlike every other dotdot update (link, remove, mkdir). If the repair code
84 : * dropped the ILOCK, we it was required either to revalidate the dotdot entry
85 : * or to use dirent hooks to capture updates from other threads.
86 : */
87 :
88 : /* Create a dirent in the tempdir. */
89 : #define XREP_DIRENT_ADD (1)
90 :
91 : /* Remove a dirent from the tempdir. */
92 : #define XREP_DIRENT_REMOVE (2)
93 :
94 : /* Directory entry to be restored in the new directory. */
95 : struct xrep_dirent {
96 : /* Cookie for retrieval of the dirent name. */
97 : xfblob_cookie name_cookie;
98 :
99 : /* Target inode number. */
100 : xfs_ino_t ino;
101 :
102 : /* Length of the dirent name. */
103 : uint8_t namelen;
104 :
105 : /* File type of the dirent. */
106 : uint8_t ftype;
107 :
108 : /* XREP_DIRENT_{ADD,REMOVE} */
109 : uint8_t action;
110 : };
111 :
112 : /*
113 : * Stash up to 8 pages of recovered dirent data in dir_entries and dir_names
114 : * before we write them to the temp dir.
115 : */
116 : #define XREP_DIR_MAX_STASH_BYTES (PAGE_SIZE * 8)
117 :
118 : struct xrep_dir {
119 : struct xfs_scrub *sc;
120 :
121 : /* Fixed-size array of xrep_dirent structures. */
122 : struct xfarray *dir_entries;
123 :
124 : /* Blobs containing directory entry names. */
125 : struct xfblob *dir_names;
126 :
127 : /* Information for swapping data forks at the end. */
128 : struct xrep_tempswap tx;
129 :
130 : /* Preallocated args struct for performing dir operations */
131 : struct xfs_da_args args;
132 :
133 : /*
134 : * Information used to scan the filesystem to find the inumber of the
135 : * dotdot entry for this directory. For directory salvaging when
136 : * parent pointers are not enabled, we use the findparent_* functions
137 : * on this object and access only the parent_ino field directly.
138 : *
139 : * When parent pointers are enabled, however, the pptr scanner uses the
140 : * iscan, hooks, lock, and parent_ino fields of this object directly.
141 : * @pscan.lock coordinates access to dir_entries, dir_names,
142 : * parent_ino, subdirs, dirents, and args. This reduces the memory
143 : * requirements of this structure.
144 : */
145 : struct xrep_parent_scan_info pscan;
146 :
147 : /*
148 : * Context information for attaching this directory to the lost+found
149 : * if this directory does not have a parent.
150 : */
151 : struct xrep_adoption adoption;
152 :
153 : /* How many subdirectories did we find? */
154 : uint64_t subdirs;
155 :
156 : /* How many dirents did we find? */
157 : unsigned int dirents;
158 :
159 : /* Should we move this directory to the orphanage? */
160 : bool needs_adoption;
161 :
162 : /*
163 : * Scratch buffer for reading parent pointers from child files. The
164 : * p_name field is used to flush stashed dirents into the temporary
165 : * directory in between parent pointers. At the very end of the
166 : * repair, it can also be used to compute the lost+found filename
167 : * if we need to reparent the directory.
168 : */
169 : struct xfs_parent_name_irec pptr;
170 : };
171 :
172 : /* Tear down all the incore stuff we created. */
173 : static void
174 12808 : xrep_dir_teardown(
175 : struct xfs_scrub *sc)
176 : {
177 12808 : struct xrep_dir *rd = sc->buf;
178 :
179 12808 : xrep_findparent_scan_teardown(&rd->pscan);
180 12814 : xfblob_destroy(rd->dir_names);
181 12809 : xfarray_destroy(rd->dir_entries);
182 12813 : }
183 :
184 : /* Set up for a directory repair. */
185 : int
186 184730 : xrep_setup_directory(
187 : struct xfs_scrub *sc)
188 : {
189 184730 : struct xrep_dir *rd;
190 184730 : int error;
191 :
192 184730 : xchk_fsgates_enable(sc, XCHK_FSGATES_DIRENTS);
193 :
194 184709 : error = xrep_orphanage_try_create(sc);
195 184730 : if (error)
196 : return error;
197 :
198 184730 : error = xrep_tempfile_create(sc, S_IFDIR);
199 184687 : if (error)
200 : return error;
201 :
202 184686 : rd = kvzalloc(sizeof(struct xrep_dir), XCHK_GFP_FLAGS);
203 184659 : if (!rd)
204 : return -ENOMEM;
205 184659 : rd->sc = sc;
206 184659 : sc->buf = rd;
207 :
208 184659 : return 0;
209 : }
210 :
211 : /*
212 : * If we're the root of a directory tree, we are our own parent. If we're an
213 : * unlinked directory, the parent /won't/ have a link to us. Set the parent
214 : * directory to the root for both cases. Returns NULLFSINO if we don't know
215 : * what to do.
216 : */
217 : static inline xfs_ino_t
218 : xrep_dir_self_parent(
219 : struct xrep_dir *rd)
220 : {
221 : struct xfs_scrub *sc = rd->sc;
222 :
223 : if (sc->ip->i_ino == sc->mp->m_sb.sb_rootino)
224 : return sc->mp->m_sb.sb_rootino;
225 :
226 : if (VFS_I(sc->ip)->i_nlink == 0)
227 : return sc->mp->m_sb.sb_rootino;
228 :
229 : return NULLFSINO;
230 : }
231 :
232 : /*
233 : * Look up the dotdot entry and confirm that it's really the parent.
234 : * Returns NULLFSINO if we don't know what to do.
235 : */
236 : static inline xfs_ino_t
237 0 : xrep_dir_lookup_parent(
238 : struct xrep_dir *rd)
239 : {
240 0 : struct xfs_scrub *sc = rd->sc;
241 0 : xfs_ino_t ino;
242 0 : int error;
243 :
244 0 : error = xfs_dir_lookup(sc->tp, sc->ip, &xfs_name_dotdot, &ino, NULL);
245 0 : if (error)
246 : return NULLFSINO;
247 0 : if (!xfs_verify_dir_ino(sc->mp, ino))
248 : return NULLFSINO;
249 :
250 0 : error = xrep_findparent_confirm(sc, &ino);
251 0 : if (error)
252 : return NULLFSINO;
253 :
254 0 : return ino;
255 : }
256 :
257 : /*
258 : * Look up '..' in the dentry cache and confirm that it's really the parent.
259 : * Returns NULLFSINO if the dcache misses or if the hit is implausible.
260 : */
261 : static inline xfs_ino_t
262 0 : xrep_dir_dcache_parent(
263 : struct xrep_dir *rd)
264 : {
265 0 : struct xfs_scrub *sc = rd->sc;
266 0 : xfs_ino_t parent_ino;
267 0 : int error;
268 :
269 0 : parent_ino = xrep_findparent_from_dcache(sc);
270 0 : if (parent_ino == NULLFSINO)
271 : return parent_ino;
272 :
273 0 : error = xrep_findparent_confirm(sc, &parent_ino);
274 0 : if (error)
275 : return NULLFSINO;
276 :
277 0 : return parent_ino;
278 : }
279 :
280 : /* Try to find the parent of the directory being repaired. */
281 : STATIC int
282 0 : xrep_dir_find_parent(
283 : struct xrep_dir *rd)
284 : {
285 0 : xfs_ino_t ino;
286 :
287 0 : ino = xrep_findparent_self_reference(rd->sc);
288 0 : if (ino != NULLFSINO) {
289 0 : xrep_findparent_scan_finish_early(&rd->pscan, ino);
290 0 : return 0;
291 : }
292 :
293 0 : ino = xrep_dir_dcache_parent(rd);
294 0 : if (ino != NULLFSINO) {
295 0 : xrep_findparent_scan_finish_early(&rd->pscan, ino);
296 0 : return 0;
297 : }
298 :
299 0 : ino = xrep_dir_lookup_parent(rd);
300 0 : if (ino != NULLFSINO) {
301 0 : xrep_findparent_scan_finish_early(&rd->pscan, ino);
302 0 : return 0;
303 : }
304 :
305 : /*
306 : * A full filesystem scan is the last resort. On a busy filesystem,
307 : * the scan can fail with -EBUSY if we cannot grab IOLOCKs. That means
308 : * that we don't know what who the parent is, so we should return to
309 : * userspace.
310 : */
311 0 : return xrep_findparent_scan(&rd->pscan);
312 : }
313 :
314 : /*
315 : * Decide if we want to salvage this entry. We don't bother with oversized
316 : * names or the dot entry.
317 : */
318 : STATIC int
319 0 : xrep_dir_want_salvage(
320 : struct xrep_dir *rd,
321 : const char *name,
322 : int namelen,
323 : xfs_ino_t ino)
324 : {
325 0 : struct xfs_mount *mp = rd->sc->mp;
326 :
327 : /* No pointers to ourselves or to garbage. */
328 0 : if (ino == rd->sc->ip->i_ino)
329 : return false;
330 0 : if (!xfs_verify_dir_ino(mp, ino))
331 : return false;
332 :
333 : /* No weird looking names or dot entries. */
334 0 : if (namelen >= MAXNAMELEN || namelen <= 0)
335 : return false;
336 0 : if (namelen == 1 && name[0] == '.')
337 0 : return false;
338 :
339 : return true;
340 : }
341 :
342 : /*
343 : * Remember that we want to create a dirent in the tempdir. These stashed
344 : * actions will be replayed later.
345 : */
346 : STATIC int
347 129793 : xrep_dir_stash_createname(
348 : struct xrep_dir *rd,
349 : const struct xfs_name *name,
350 : xfs_ino_t ino)
351 : {
352 129793 : struct xrep_dirent dirent = {
353 : .action = XREP_DIRENT_ADD,
354 : .ino = ino,
355 129793 : .namelen = name->len,
356 129793 : .ftype = name->type,
357 : };
358 129793 : int error;
359 :
360 129793 : trace_xrep_dir_stash_createname(rd->sc->tempip, name, ino);
361 :
362 129793 : error = xfblob_store(rd->dir_names, &dirent.name_cookie, name->name,
363 129793 : name->len);
364 129793 : if (error)
365 : return error;
366 :
367 129793 : return xfarray_append(rd->dir_entries, &dirent);
368 : }
369 :
370 : /*
371 : * Remember that we want to remove a dirent from the tempdir. These stashed
372 : * actions will be replayed later.
373 : */
374 : STATIC int
375 0 : xrep_dir_stash_removename(
376 : struct xrep_dir *rd,
377 : const struct xfs_name *name,
378 : xfs_ino_t ino)
379 : {
380 0 : struct xrep_dirent dirent = {
381 : .action = XREP_DIRENT_REMOVE,
382 : .ino = ino,
383 0 : .namelen = name->len,
384 0 : .ftype = name->type,
385 : };
386 0 : int error;
387 :
388 0 : trace_xrep_dir_stash_removename(rd->sc->tempip, name, ino);
389 :
390 0 : error = xfblob_store(rd->dir_names, &dirent.name_cookie, name->name,
391 0 : name->len);
392 0 : if (error)
393 : return error;
394 :
395 0 : return xfarray_append(rd->dir_entries, &dirent);
396 : }
397 :
398 : /* Allocate an in-core record to hold entries while we rebuild the dir data. */
399 : STATIC int
400 0 : xrep_dir_salvage_entry(
401 : struct xrep_dir *rd,
402 : unsigned char *name,
403 : unsigned int namelen,
404 : xfs_ino_t ino)
405 : {
406 0 : struct xfs_name xname = {
407 : .name = name,
408 : };
409 0 : struct xfs_scrub *sc = rd->sc;
410 0 : struct xfs_inode *ip;
411 0 : unsigned int i = 0;
412 0 : int error = 0;
413 :
414 0 : if (xchk_should_terminate(sc, &error))
415 0 : return error;
416 :
417 : /*
418 : * Truncate the name to the first character that would trip namecheck.
419 : * If we no longer have a name after that, ignore this entry.
420 : */
421 0 : while (i < namelen && name[i] != 0 && name[i] != '/')
422 0 : i++;
423 0 : if (i == 0)
424 : return 0;
425 0 : xname.len = i;
426 :
427 : /* Ignore '..' entries; we already picked the new parent. */
428 0 : if (xname.len == 2 && name[0] == '.' && name[1] == '.') {
429 0 : trace_xrep_dir_salvaged_parent(sc->ip, ino);
430 0 : return 0;
431 : }
432 :
433 0 : trace_xrep_dir_salvage_entry(sc->ip, &xname, ino);
434 :
435 : /*
436 : * Compute the ftype or dump the entry if we can't. We don't lock the
437 : * inode because inodes can't change type while we have a reference.
438 : */
439 0 : error = xchk_iget(sc, ino, &ip);
440 0 : if (error)
441 : return 0;
442 :
443 0 : xname.type = xfs_mode_to_ftype(VFS_I(ip)->i_mode);
444 0 : xchk_irele(sc, ip);
445 :
446 0 : return xrep_dir_stash_createname(rd, &xname, ino);
447 : }
448 :
449 : /* Record a shortform directory entry for later reinsertion. */
450 : STATIC int
451 0 : xrep_dir_salvage_sf_entry(
452 : struct xrep_dir *rd,
453 : struct xfs_dir2_sf_hdr *sfp,
454 : struct xfs_dir2_sf_entry *sfep)
455 : {
456 0 : xfs_ino_t ino;
457 :
458 0 : ino = xfs_dir2_sf_get_ino(rd->sc->mp, sfp, sfep);
459 0 : if (!xrep_dir_want_salvage(rd, sfep->name, sfep->namelen, ino))
460 : return 0;
461 :
462 0 : return xrep_dir_salvage_entry(rd, sfep->name, sfep->namelen, ino);
463 : }
464 :
465 : /* Record a regular directory entry for later reinsertion. */
466 : STATIC int
467 0 : xrep_dir_salvage_data_entry(
468 : struct xrep_dir *rd,
469 : struct xfs_dir2_data_entry *dep)
470 : {
471 0 : xfs_ino_t ino;
472 :
473 0 : ino = be64_to_cpu(dep->inumber);
474 0 : if (!xrep_dir_want_salvage(rd, dep->name, dep->namelen, ino))
475 : return 0;
476 :
477 0 : return xrep_dir_salvage_entry(rd, dep->name, dep->namelen, ino);
478 : }
479 :
480 : /* Try to recover block/data format directory entries. */
481 : STATIC int
482 0 : xrep_dir_recover_data(
483 : struct xrep_dir *rd,
484 : struct xfs_buf *bp)
485 : {
486 0 : struct xfs_da_geometry *geo = rd->sc->mp->m_dir_geo;
487 0 : unsigned int offset;
488 0 : unsigned int end;
489 0 : int error = 0;
490 :
491 : /*
492 : * Loop over the data portion of the block.
493 : * Each object is a real entry (dep) or an unused one (dup).
494 : */
495 0 : offset = geo->data_entry_offset;
496 0 : end = min_t(unsigned int, BBTOB(bp->b_length),
497 : xfs_dir3_data_end_offset(geo, bp->b_addr));
498 :
499 0 : while (offset < end) {
500 0 : struct xfs_dir2_data_unused *dup = bp->b_addr + offset;
501 0 : struct xfs_dir2_data_entry *dep = bp->b_addr + offset;
502 :
503 0 : if (xchk_should_terminate(rd->sc, &error))
504 0 : return error;
505 :
506 : /* Skip unused entries. */
507 0 : if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) {
508 0 : offset += be16_to_cpu(dup->length);
509 0 : continue;
510 : }
511 :
512 : /* Don't walk off the end of the block. */
513 0 : offset += xfs_dir2_data_entsize(rd->sc->mp, dep->namelen);
514 0 : if (offset > end)
515 : break;
516 :
517 : /* Ok, let's save this entry. */
518 0 : error = xrep_dir_salvage_data_entry(rd, dep);
519 0 : if (error)
520 0 : return error;
521 :
522 : }
523 :
524 : return 0;
525 : }
526 :
527 : /* Try to recover shortform directory entries. */
528 : STATIC int
529 0 : xrep_dir_recover_sf(
530 : struct xrep_dir *rd)
531 : {
532 0 : struct xfs_dir2_sf_hdr *sfp;
533 0 : struct xfs_dir2_sf_entry *sfep;
534 0 : struct xfs_dir2_sf_entry *next;
535 0 : struct xfs_ifork *ifp;
536 0 : xfs_ino_t ino;
537 0 : unsigned char *end;
538 0 : int error = 0;
539 :
540 0 : ifp = xfs_ifork_ptr(rd->sc->ip, XFS_DATA_FORK);
541 0 : sfp = (struct xfs_dir2_sf_hdr *)rd->sc->ip->i_df.if_u1.if_data;
542 0 : end = (unsigned char *)ifp->if_u1.if_data + ifp->if_bytes;
543 :
544 0 : ino = xfs_dir2_sf_get_parent_ino(sfp);
545 0 : trace_xrep_dir_salvaged_parent(rd->sc->ip, ino);
546 :
547 0 : sfep = xfs_dir2_sf_firstentry(sfp);
548 0 : while ((unsigned char *)sfep < end) {
549 0 : if (xchk_should_terminate(rd->sc, &error))
550 0 : return error;
551 :
552 0 : next = xfs_dir2_sf_nextentry(rd->sc->mp, sfp, sfep);
553 0 : if ((unsigned char *)next > end)
554 : break;
555 :
556 : /* Ok, let's save this entry. */
557 0 : error = xrep_dir_salvage_sf_entry(rd, sfp, sfep);
558 0 : if (error)
559 0 : return error;
560 :
561 : sfep = next;
562 : }
563 :
564 : return 0;
565 : }
566 :
567 : /*
568 : * Try to figure out the format of this directory from the data fork mappings
569 : * and the directory size. If we can be reasonably sure of format, we can be
570 : * more aggressive in salvaging directory entries. On return, @magic_guess
571 : * will be set to DIR3_BLOCK_MAGIC if we think this is a "block format"
572 : * directory; DIR3_DATA_MAGIC if we think this is a "data format" directory,
573 : * and 0 if we can't tell.
574 : */
575 : STATIC void
576 0 : xrep_dir_guess_format(
577 : struct xrep_dir *rd,
578 : __be32 *magic_guess)
579 : {
580 0 : struct xfs_inode *dp = rd->sc->ip;
581 0 : struct xfs_mount *mp = rd->sc->mp;
582 0 : struct xfs_da_geometry *geo = mp->m_dir_geo;
583 0 : xfs_fileoff_t last;
584 0 : int error;
585 :
586 0 : ASSERT(xfs_has_crc(mp));
587 :
588 0 : *magic_guess = 0;
589 :
590 : /*
591 : * If there's a single directory block and the directory size is
592 : * exactly one block, this has to be a single block format directory.
593 : */
594 0 : error = xfs_bmap_last_offset(dp, &last, XFS_DATA_FORK);
595 0 : if (!error && XFS_FSB_TO_B(mp, last) == geo->blksize &&
596 0 : dp->i_disk_size == geo->blksize) {
597 0 : *magic_guess = cpu_to_be32(XFS_DIR3_BLOCK_MAGIC);
598 0 : return;
599 : }
600 :
601 : /*
602 : * If the last extent before the leaf offset matches the directory
603 : * size and the directory size is larger than 1 block, this is a
604 : * data format directory.
605 : */
606 0 : last = geo->leafblk;
607 0 : error = xfs_bmap_last_before(rd->sc->tp, dp, &last, XFS_DATA_FORK);
608 0 : if (!error &&
609 0 : XFS_FSB_TO_B(mp, last) > geo->blksize &&
610 0 : XFS_FSB_TO_B(mp, last) == dp->i_disk_size) {
611 0 : *magic_guess = cpu_to_be32(XFS_DIR3_DATA_MAGIC);
612 0 : return;
613 : }
614 : }
615 :
616 : /* Recover directory entries from a specific directory block. */
617 : STATIC int
618 0 : xrep_dir_recover_dirblock(
619 : struct xrep_dir *rd,
620 : __be32 magic_guess,
621 : xfs_dablk_t dabno)
622 : {
623 0 : struct xfs_dir2_data_hdr *hdr;
624 0 : struct xfs_buf *bp;
625 0 : __be32 oldmagic;
626 0 : int error;
627 :
628 : /*
629 : * Try to read buffer. We invalidate them in the next step so we don't
630 : * bother to set a buffer type or ops.
631 : */
632 0 : error = xfs_da_read_buf(rd->sc->tp, rd->sc->ip, dabno,
633 : XFS_DABUF_MAP_HOLE_OK, &bp, XFS_DATA_FORK, NULL);
634 0 : if (error || !bp)
635 : return error;
636 :
637 0 : hdr = bp->b_addr;
638 0 : oldmagic = hdr->magic;
639 :
640 0 : trace_xrep_dir_recover_dirblock(rd->sc->ip, dabno,
641 0 : be32_to_cpu(hdr->magic), be32_to_cpu(magic_guess));
642 :
643 : /*
644 : * If we're sure of the block's format, proceed with the salvage
645 : * operation using the specified magic number.
646 : */
647 0 : if (magic_guess) {
648 0 : hdr->magic = magic_guess;
649 0 : goto recover;
650 : }
651 :
652 : /*
653 : * If we couldn't guess what type of directory this is, then we will
654 : * only salvage entries from directory blocks that match the magic
655 : * number and pass verifiers.
656 : */
657 0 : switch (hdr->magic) {
658 0 : case cpu_to_be32(XFS_DIR2_BLOCK_MAGIC):
659 : case cpu_to_be32(XFS_DIR3_BLOCK_MAGIC):
660 0 : if (!xrep_buf_verify_struct(bp, &xfs_dir3_block_buf_ops))
661 0 : goto out;
662 0 : if (xfs_dir3_block_header_check(bp, rd->sc->ip->i_ino) != NULL)
663 0 : goto out;
664 : break;
665 0 : case cpu_to_be32(XFS_DIR2_DATA_MAGIC):
666 : case cpu_to_be32(XFS_DIR3_DATA_MAGIC):
667 0 : if (!xrep_buf_verify_struct(bp, &xfs_dir3_data_buf_ops))
668 0 : goto out;
669 0 : if (xfs_dir3_data_header_check(bp, rd->sc->ip->i_ino) != NULL)
670 0 : goto out;
671 : break;
672 0 : default:
673 0 : goto out;
674 : }
675 :
676 0 : recover:
677 0 : error = xrep_dir_recover_data(rd, bp);
678 :
679 0 : out:
680 0 : hdr->magic = oldmagic;
681 0 : xfs_trans_brelse(rd->sc->tp, bp);
682 0 : return error;
683 : }
684 :
685 : static inline void
686 153591 : xrep_dir_init_args(
687 : struct xrep_dir *rd,
688 : struct xfs_inode *dp,
689 : const struct xfs_name *name)
690 : {
691 153591 : memset(&rd->args, 0, sizeof(struct xfs_da_args));
692 153591 : rd->args.geo = rd->sc->mp->m_dir_geo;
693 153591 : rd->args.whichfork = XFS_DATA_FORK;
694 153591 : rd->args.owner = rd->sc->ip->i_ino;
695 153591 : rd->args.trans = rd->sc->tp;
696 153591 : rd->args.dp = dp;
697 153591 : if (!name)
698 : return;
699 140781 : rd->args.name = name->name;
700 140781 : rd->args.namelen = name->len;
701 140781 : rd->args.filetype = name->type;
702 140781 : rd->args.hashval = xfs_dir2_hashname(rd->sc->mp, name);
703 : }
704 :
705 : /* Replay a stashed createname into the temporary directory. */
706 : STATIC int
707 129783 : xrep_dir_replay_createname(
708 : struct xrep_dir *rd,
709 : const struct xfs_name *name,
710 : xfs_ino_t inum,
711 : xfs_extlen_t total)
712 : {
713 129783 : struct xfs_scrub *sc = rd->sc;
714 129783 : struct xfs_inode *dp = rd->sc->tempip;
715 129783 : bool is_block, is_leaf;
716 129783 : int error;
717 :
718 129783 : ASSERT(S_ISDIR(VFS_I(dp)->i_mode));
719 :
720 129783 : error = xfs_dir_ino_validate(sc->mp, inum);
721 129783 : if (error)
722 : return error;
723 :
724 129783 : trace_xrep_dir_replay_createname(dp, name, inum);
725 :
726 129783 : xrep_dir_init_args(rd, dp, name);
727 129783 : rd->args.inumber = inum;
728 129783 : rd->args.total = total;
729 129783 : rd->args.op_flags = XFS_DA_OP_ADDNAME | XFS_DA_OP_OKNOENT;
730 :
731 129783 : if (dp->i_df.if_format == XFS_DINODE_FMT_LOCAL)
732 72023 : return xfs_dir2_sf_addname(&rd->args);
733 :
734 57760 : error = xfs_dir2_isblock(&rd->args, &is_block);
735 57760 : if (error)
736 : return error;
737 57760 : if (is_block)
738 6054 : return xfs_dir2_block_addname(&rd->args);
739 :
740 51706 : error = xfs_dir2_isleaf(&rd->args, &is_leaf);
741 51706 : if (error)
742 : return error;
743 51706 : if (is_leaf)
744 9603 : return xfs_dir2_leaf_addname(&rd->args);
745 :
746 42103 : return xfs_dir2_node_addname(&rd->args);
747 : }
748 :
749 : /* Replay a stashed removename onto the temporary directory. */
750 : STATIC int
751 0 : xrep_dir_replay_removename(
752 : struct xrep_dir *rd,
753 : const struct xfs_name *name,
754 : xfs_extlen_t total)
755 : {
756 0 : struct xfs_inode *dp = rd->args.dp;
757 0 : bool is_block, is_leaf;
758 0 : int error;
759 :
760 0 : ASSERT(S_ISDIR(VFS_I(dp)->i_mode));
761 :
762 0 : xrep_dir_init_args(rd, dp, name);
763 0 : rd->args.op_flags = 0;
764 0 : rd->args.total = total;
765 :
766 0 : trace_xrep_dir_replay_removename(dp, name, 0);
767 :
768 0 : if (dp->i_df.if_format == XFS_DINODE_FMT_LOCAL)
769 0 : return xfs_dir2_sf_removename(&rd->args);
770 :
771 0 : error = xfs_dir2_isblock(&rd->args, &is_block);
772 0 : if (error)
773 : return error;
774 0 : if (is_block)
775 0 : return xfs_dir2_block_removename(&rd->args);
776 :
777 0 : error = xfs_dir2_isleaf(&rd->args, &is_leaf);
778 0 : if (error)
779 : return error;
780 0 : if (is_leaf)
781 0 : return xfs_dir2_leaf_removename(&rd->args);
782 :
783 0 : return xfs_dir2_node_removename(&rd->args);
784 : }
785 :
786 : /*
787 : * Add this stashed incore directory entry to the temporary directory.
788 : * The caller must hold the tempdir's IOLOCK, must not hold any ILOCKs, and
789 : * must not be in transaction context.
790 : */
791 : STATIC int
792 129783 : xrep_dir_replay_update(
793 : struct xrep_dir *rd,
794 : const struct xrep_dirent *dirent)
795 : {
796 129783 : struct xfs_name name = {
797 129783 : .len = dirent->namelen,
798 129783 : .type = dirent->ftype,
799 129783 : .name = rd->pptr.p_name,
800 : };
801 129783 : struct xfs_mount *mp = rd->sc->mp;
802 : #ifdef DEBUG
803 129783 : xfs_ino_t ino;
804 : #endif
805 129783 : uint resblks;
806 129783 : int error;
807 :
808 129783 : resblks = xfs_link_space_res(mp, dirent->namelen);
809 129783 : error = xchk_trans_alloc(rd->sc, resblks);
810 129783 : if (error)
811 : return error;
812 :
813 : /* Lock the temporary directory and join it to the transaction */
814 129783 : xrep_tempfile_ilock(rd->sc);
815 129783 : xfs_trans_ijoin(rd->sc->tp, rd->sc->tempip, 0);
816 :
817 129783 : switch (dirent->action) {
818 129783 : case XREP_DIRENT_ADD:
819 : /*
820 : * Create a replacement dirent in the temporary directory.
821 : * Note that _createname doesn't check for existing entries.
822 : * There shouldn't be any in the temporary dir, but we'll
823 : * verify this in debug mode.
824 : */
825 : #ifdef DEBUG
826 129783 : error = xchk_dir_lookup(rd->sc, rd->sc->tempip, &name, &ino);
827 129783 : if (error != -ENOENT) {
828 0 : ASSERT(error != -ENOENT);
829 0 : goto out_cancel;
830 : }
831 : #endif
832 :
833 129783 : error = xrep_dir_replay_createname(rd, &name, dirent->ino,
834 : resblks);
835 129783 : if (error)
836 0 : goto out_cancel;
837 :
838 129783 : if (name.type == XFS_DIR3_FT_DIR)
839 15970 : rd->subdirs++;
840 129783 : rd->dirents++;
841 129783 : break;
842 0 : case XREP_DIRENT_REMOVE:
843 : /*
844 : * Remove a dirent from the temporary directory. Note that
845 : * _removename doesn't check the inode target of the exist
846 : * entry. There should be a perfect match in the temporary
847 : * dir, but we'll verify this in debug mode.
848 : */
849 : #ifdef DEBUG
850 0 : error = xchk_dir_lookup(rd->sc, rd->sc->tempip, &name, &ino);
851 0 : if (error) {
852 0 : ASSERT(error != 0);
853 0 : goto out_cancel;
854 : }
855 0 : if (ino != dirent->ino) {
856 0 : ASSERT(ino == dirent->ino);
857 0 : error = -EIO;
858 0 : goto out_cancel;
859 : }
860 : #endif
861 :
862 0 : error = xrep_dir_replay_removename(rd, &name, resblks);
863 0 : if (error)
864 0 : goto out_cancel;
865 :
866 0 : if (name.type == XFS_DIR3_FT_DIR)
867 0 : rd->subdirs--;
868 0 : rd->dirents--;
869 0 : break;
870 0 : default:
871 0 : ASSERT(0);
872 0 : error = -EIO;
873 0 : goto out_cancel;
874 : }
875 :
876 : /* Commit and unlock. */
877 129783 : error = xrep_trans_commit(rd->sc);
878 129783 : if (error)
879 : return error;
880 :
881 129783 : xrep_tempfile_iunlock(rd->sc);
882 129783 : return 0;
883 0 : out_cancel:
884 0 : xchk_trans_cancel(rd->sc);
885 0 : xrep_tempfile_iunlock(rd->sc);
886 0 : return error;
887 : }
888 :
889 : /*
890 : * Flush stashed incore dirent updates that have been recorded by the scanner.
891 : * This is done to reduce the memory requirements of the directory rebuild,
892 : * since directories can contain up to 32GB of directory data.
893 : *
894 : * Caller must not hold transactions or ILOCKs. Caller must hold the tempdir
895 : * IOLOCK.
896 : */
897 : STATIC int
898 12813 : xrep_dir_replay_updates(
899 : struct xrep_dir *rd)
900 : {
901 12813 : xfarray_idx_t array_cur;
902 12813 : int error;
903 :
904 : /* Add all the salvaged dirents to the temporary directory. */
905 142596 : foreach_xfarray_idx(rd->dir_entries, array_cur) {
906 129783 : struct xrep_dirent dirent;
907 :
908 129783 : error = xfarray_load(rd->dir_entries, array_cur, &dirent);
909 129783 : if (error)
910 0 : return error;
911 :
912 : /* The dirent name is stored in the in-core buffer. */
913 129783 : error = xfblob_load(rd->dir_names, dirent.name_cookie,
914 129783 : rd->pptr.p_name, dirent.namelen);
915 129783 : if (error)
916 0 : return error;
917 129783 : rd->pptr.p_name[MAXNAMELEN - 1] = 0;
918 :
919 129783 : error = xrep_dir_replay_update(rd, &dirent);
920 129783 : if (error)
921 0 : return error;
922 : }
923 :
924 : /* Empty out both arrays now that we've added the entries. */
925 12813 : xfarray_truncate(rd->dir_entries);
926 12813 : xfblob_truncate(rd->dir_names);
927 12813 : return 0;
928 : }
929 :
930 : /*
931 : * Periodically flush stashed directory entries to the temporary dir. This
932 : * is done to reduce the memory requirements of the directory rebuild, since
933 : * directories can contain up to 32GB of directory data.
934 : */
935 : STATIC int
936 0 : xrep_dir_flush_stashed(
937 : struct xrep_dir *rd)
938 : {
939 0 : int error;
940 :
941 : /*
942 : * Entering this function, the scrub context has a reference to the
943 : * inode being repaired, the temporary file, and a scrub transaction
944 : * that we use during dirent salvaging to avoid livelocking if there
945 : * are cycles in the directory structures. We hold ILOCK_EXCL on both
946 : * the inode being repaired and the temporary file, though they are
947 : * not ijoined to the scrub transaction.
948 : *
949 : * To constrain kernel memory use, we occasionally write salvaged
950 : * dirents from the xfarray and xfblob structures into the temporary
951 : * directory in preparation for swapping the directory structures at
952 : * the end. Updating the temporary file requires a transaction, so we
953 : * commit the scrub transaction and drop the two ILOCKs so that
954 : * we can allocate whatever transaction we want.
955 : *
956 : * We still hold IOLOCK_EXCL on the inode being repaired, which
957 : * prevents anyone from accessing the damaged directory data while we
958 : * repair it.
959 : */
960 0 : error = xrep_trans_commit(rd->sc);
961 0 : if (error)
962 : return error;
963 0 : xchk_iunlock(rd->sc, XFS_ILOCK_EXCL);
964 :
965 : /*
966 : * Take the IOLOCK of the temporary file while we modify dirents. This
967 : * isn't strictly required because the temporary file is never revealed
968 : * to userspace, but we follow the same locking rules. We still hold
969 : * sc->ip's IOLOCK.
970 : */
971 0 : error = xrep_tempfile_iolock_polled(rd->sc);
972 0 : if (error)
973 : return error;
974 :
975 : /* Write to the tempdir all the updates that we've stashed. */
976 0 : error = xrep_dir_replay_updates(rd);
977 0 : xrep_tempfile_iounlock(rd->sc);
978 0 : if (error)
979 : return error;
980 :
981 : /*
982 : * Recreate the salvage transaction and relock the dir we're salvaging.
983 : */
984 0 : error = xchk_trans_alloc(rd->sc, 0);
985 0 : if (error)
986 : return error;
987 0 : xchk_ilock(rd->sc, XFS_ILOCK_EXCL);
988 0 : return 0;
989 : }
990 :
991 : /* Decide if we've stashed too much dirent data in memory. */
992 : static inline bool
993 358803836 : xrep_dir_want_flush_stashed(
994 : struct xrep_dir *rd)
995 : {
996 358803836 : unsigned long long bytes;
997 :
998 358803836 : bytes = xfarray_bytes(rd->dir_entries) + xfblob_bytes(rd->dir_names);
999 358800704 : return bytes > XREP_DIR_MAX_STASH_BYTES;
1000 : }
1001 :
1002 : /* Extract as many directory entries as we can. */
1003 : STATIC int
1004 0 : xrep_dir_recover(
1005 : struct xrep_dir *rd)
1006 : {
1007 0 : struct xfs_bmbt_irec got;
1008 0 : struct xfs_scrub *sc = rd->sc;
1009 0 : struct xfs_da_geometry *geo = sc->mp->m_dir_geo;
1010 0 : xfs_fileoff_t offset;
1011 0 : xfs_dablk_t dabno;
1012 0 : __be32 magic_guess;
1013 0 : int nmap;
1014 0 : int error;
1015 :
1016 0 : xrep_dir_guess_format(rd, &magic_guess);
1017 :
1018 : /* Iterate each directory data block in the data fork. */
1019 0 : for (offset = 0;
1020 0 : offset < geo->leafblk;
1021 0 : offset = got.br_startoff + got.br_blockcount) {
1022 0 : nmap = 1;
1023 0 : error = xfs_bmapi_read(sc->ip, offset, geo->leafblk - offset,
1024 : &got, &nmap, 0);
1025 0 : if (error)
1026 0 : return error;
1027 0 : if (nmap != 1)
1028 : return -EFSCORRUPTED;
1029 0 : if (!xfs_bmap_is_written_extent(&got))
1030 0 : continue;
1031 :
1032 0 : for (dabno = round_up(got.br_startoff, geo->fsbcount);
1033 0 : dabno < got.br_startoff + got.br_blockcount;
1034 0 : dabno += geo->fsbcount) {
1035 0 : if (xchk_should_terminate(rd->sc, &error))
1036 0 : return error;
1037 :
1038 0 : error = xrep_dir_recover_dirblock(rd,
1039 : magic_guess, dabno);
1040 0 : if (error)
1041 0 : return error;
1042 :
1043 : /* Flush dirents to constrain memory usage. */
1044 0 : if (xrep_dir_want_flush_stashed(rd)) {
1045 0 : error = xrep_dir_flush_stashed(rd);
1046 0 : if (error)
1047 0 : return error;
1048 : }
1049 : }
1050 : }
1051 :
1052 : return 0;
1053 : }
1054 :
1055 : /*
1056 : * Find all the directory entries for this inode by scraping them out of the
1057 : * directory leaf blocks by hand, and flushing them into the temp dir.
1058 : */
1059 : STATIC int
1060 0 : xrep_dir_find_entries(
1061 : struct xrep_dir *rd)
1062 : {
1063 0 : struct xfs_inode *dp = rd->sc->ip;
1064 0 : int error;
1065 :
1066 : /*
1067 : * Salvage directory entries from the old directory, and write them to
1068 : * the temporary directory.
1069 : */
1070 0 : if (dp->i_df.if_format == XFS_DINODE_FMT_LOCAL) {
1071 0 : error = xrep_dir_recover_sf(rd);
1072 : } else {
1073 0 : error = xfs_iread_extents(rd->sc->tp, dp, XFS_DATA_FORK);
1074 0 : if (error)
1075 : return error;
1076 :
1077 0 : error = xrep_dir_recover(rd);
1078 : }
1079 0 : if (error)
1080 : return error;
1081 :
1082 0 : return xrep_dir_flush_stashed(rd);
1083 : }
1084 :
1085 : /* Scan all files in the filesystem for dirents. */
1086 : STATIC int
1087 0 : xrep_dir_salvage_entries(
1088 : struct xrep_dir *rd)
1089 : {
1090 0 : struct xfs_scrub *sc = rd->sc;
1091 0 : int error;
1092 :
1093 : /*
1094 : * Drop the ILOCK on this directory so that we can scan for this
1095 : * directory's parent. Figure out who is going to be the parent of
1096 : * this directory, then retake the ILOCK so that we can salvage
1097 : * directory entries.
1098 : */
1099 0 : xchk_iunlock(sc, XFS_ILOCK_EXCL);
1100 0 : error = xrep_dir_find_parent(rd);
1101 0 : xchk_ilock(sc, XFS_ILOCK_EXCL);
1102 0 : if (error)
1103 : return error;
1104 :
1105 : /*
1106 : * Collect directory entries by parsing raw leaf blocks to salvage
1107 : * whatever we can. When we're done, free the staging memory before
1108 : * swapping the directories to reduce memory usage.
1109 : */
1110 0 : error = xrep_dir_find_entries(rd);
1111 0 : if (error)
1112 : return error;
1113 :
1114 : /*
1115 : * Cancel the repair transaction and drop the ILOCK so that we can
1116 : * (later) use the atomic extent swap helper functions to compute the
1117 : * correct block reservations and re-lock the inodes.
1118 : *
1119 : * We still hold IOLOCK_EXCL (aka i_rwsem) which will prevent directory
1120 : * modifications, but there's nothing to prevent userspace from reading
1121 : * the directory until we're ready for the swap operation. Reads will
1122 : * return -EIO without shutting down the fs, so we're ok with that.
1123 : *
1124 : * The VFS can change dotdot on us, but the findparent scan will keep
1125 : * our incore parent inode up to date. See the note on locking issues
1126 : * for more details.
1127 : */
1128 0 : error = xrep_trans_commit(sc);
1129 0 : if (error)
1130 : return error;
1131 :
1132 0 : xchk_iunlock(sc, XFS_ILOCK_EXCL);
1133 0 : return 0;
1134 : }
1135 :
1136 :
1137 : /*
1138 : * Examine a parent pointer of a file. If it leads us back to the directory
1139 : * that we're rebuilding, create an incore dirent from the parent pointer and
1140 : * stash it.
1141 : */
1142 : STATIC int
1143 457794284 : xrep_dir_scan_pptr(
1144 : struct xfs_scrub *sc,
1145 : struct xfs_inode *ip,
1146 : const struct xfs_parent_name_irec *pptr,
1147 : void *priv)
1148 : {
1149 457794284 : struct xfs_name xname;
1150 457794284 : struct xrep_dir *rd = priv;
1151 457794284 : int error;
1152 :
1153 : /* Ignore parent pointers that point back to a different dir. */
1154 457794284 : if (pptr->p_ino != sc->ip->i_ino ||
1155 129793 : pptr->p_gen != VFS_I(sc->ip)->i_generation)
1156 : return 0;
1157 :
1158 : /*
1159 : * Transform this parent pointer into a dirent and queue it for later
1160 : * addition to the temporary directory.
1161 : */
1162 129793 : xname.name = pptr->p_name;
1163 129793 : xname.len = pptr->p_namelen;
1164 129793 : xname.type = xfs_mode_to_ftype(VFS_I(ip)->i_mode);
1165 :
1166 129793 : mutex_lock(&rd->pscan.lock);
1167 129793 : error = xrep_dir_stash_createname(rd, &xname, ip->i_ino);
1168 129793 : mutex_unlock(&rd->pscan.lock);
1169 129793 : return error;
1170 : }
1171 :
1172 : /*
1173 : * If this child dirent points to the directory being repaired, remember that
1174 : * fact so that we can reset the dotdot entry if necessary.
1175 : */
1176 : STATIC int
1177 752730643 : xrep_dir_scan_dirent(
1178 : struct xfs_scrub *sc,
1179 : struct xfs_inode *dp,
1180 : xfs_dir2_dataptr_t dapos,
1181 : const struct xfs_name *name,
1182 : xfs_ino_t ino,
1183 : void *priv)
1184 : {
1185 752730643 : struct xrep_dir *rd = priv;
1186 :
1187 : /* Dirent doesn't point to this directory. */
1188 752730643 : if (ino != rd->sc->ip->i_ino)
1189 : return 0;
1190 :
1191 : /* Ignore garbage inum. */
1192 27940 : if (!xfs_verify_dir_ino(rd->sc->mp, ino))
1193 : return 0;
1194 :
1195 : /* No weird looking names. */
1196 27940 : if (name->len >= MAXNAMELEN || name->len <= 0)
1197 : return 0;
1198 :
1199 : /* Don't pick up dot or dotdot entries; we only want child dirents. */
1200 39908 : if (xfs_dir2_samename(name, &xfs_name_dotdot) ||
1201 11968 : xfs_dir2_samename(name, &xfs_name_dot))
1202 15974 : return 0;
1203 :
1204 11968 : trace_xrep_dir_stash_createname(sc->tempip, &xfs_name_dotdot,
1205 : dp->i_ino);
1206 :
1207 11968 : xrep_findparent_scan_found(&rd->pscan, dp->i_ino);
1208 11968 : return 0;
1209 : }
1210 :
1211 : /*
1212 : * Decide if we want to look for child dirents or parent pointers in this file.
1213 : * Skip the dir being repaired and any files being used to stage repairs.
1214 : */
1215 : static inline bool
1216 717006269 : xrep_dir_want_scan(
1217 : struct xrep_dir *rd,
1218 : const struct xfs_inode *ip)
1219 : {
1220 717006269 : return ip != rd->sc->ip && !xrep_is_tempfile(ip);
1221 : }
1222 :
1223 : /*
1224 : * Take ILOCK on a file that we want to scan.
1225 : *
1226 : * Select ILOCK_EXCL if the file is a directory with an unloaded data bmbt or
1227 : * has an unloaded attr bmbt. Otherwise, take ILOCK_SHARED.
1228 : */
1229 : static inline unsigned int
1230 358706674 : xrep_dir_scan_ilock(
1231 : struct xrep_dir *rd,
1232 : struct xfs_inode *ip)
1233 : {
1234 358706674 : uint lock_mode = XFS_ILOCK_SHARED;
1235 :
1236 : /* Need to take the shared ILOCK to advance the iscan cursor. */
1237 358706674 : if (!xrep_dir_want_scan(rd, ip))
1238 37169 : goto lock;
1239 :
1240 506230353 : if (S_ISDIR(VFS_I(ip)->i_mode) && xfs_need_iread_extents(&ip->i_df)) {
1241 0 : lock_mode = XFS_ILOCK_EXCL;
1242 0 : goto lock;
1243 : }
1244 :
1245 717152010 : if (xfs_inode_has_attr_fork(ip) && xfs_need_iread_extents(&ip->i_af))
1246 0 : lock_mode = XFS_ILOCK_EXCL;
1247 :
1248 358689301 : lock:
1249 358726470 : xfs_ilock(ip, lock_mode);
1250 358737219 : return lock_mode;
1251 : }
1252 :
1253 : /*
1254 : * Scan this file for relevant child dirents or parent pointers that point to
1255 : * the directory we're rebuilding.
1256 : */
1257 : STATIC int
1258 358682838 : xrep_dir_scan_file(
1259 : struct xrep_dir *rd,
1260 : struct xfs_inode *ip)
1261 : {
1262 358682838 : unsigned int lock_mode;
1263 358682838 : int error = 0;
1264 :
1265 358682838 : lock_mode = xrep_dir_scan_ilock(rd, ip);
1266 :
1267 358762560 : if (!xrep_dir_want_scan(rd, ip))
1268 37169 : goto scan_done;
1269 :
1270 358727452 : error = xchk_pptr_walk(rd->sc, ip, xrep_dir_scan_pptr, &rd->pptr, rd);
1271 358717100 : if (error)
1272 0 : goto scan_done;
1273 :
1274 358717100 : if (S_ISDIR(VFS_I(ip)->i_mode)) {
1275 147815451 : error = xchk_dir_walk(rd->sc, ip, xrep_dir_scan_dirent, rd);
1276 147793097 : if (error)
1277 0 : goto scan_done;
1278 : }
1279 :
1280 358694746 : scan_done:
1281 358731915 : xchk_iscan_mark_visited(&rd->pscan.iscan, ip);
1282 358544478 : xfs_iunlock(ip, lock_mode);
1283 358730809 : return error;
1284 : }
1285 :
1286 : /*
1287 : * Scan all files in the filesystem for parent pointers that we can turn into
1288 : * replacement dirents, and a dirent that we can use to set the dotdot pointer.
1289 : */
1290 : STATIC int
1291 12819 : xrep_dir_scan_dirtree(
1292 : struct xrep_dir *rd)
1293 : {
1294 12819 : struct xfs_scrub *sc = rd->sc;
1295 12819 : struct xfs_inode *ip;
1296 12819 : int error;
1297 :
1298 : /* Roots of directory trees are their own parents. */
1299 12819 : if (sc->ip == sc->mp->m_rootip)
1300 848 : xrep_findparent_scan_found(&rd->pscan, sc->ip->i_ino);
1301 :
1302 : /*
1303 : * Filesystem scans are time consuming. Drop the directory ILOCK and
1304 : * all other resources for the duration of the scan and hope for the
1305 : * best. The live update hooks will keep our scan information up to
1306 : * date even though we've dropped the locks.
1307 : */
1308 12819 : xchk_trans_cancel(sc);
1309 12819 : if (sc->ilock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL))
1310 12819 : xchk_iunlock(sc, sc->ilock_flags & (XFS_ILOCK_SHARED |
1311 : XFS_ILOCK_EXCL));
1312 12819 : error = xchk_trans_alloc_empty(sc);
1313 12819 : if (error)
1314 : return error;
1315 :
1316 358783901 : while ((error = xchk_iscan_iter(&rd->pscan.iscan, &ip)) == 1) {
1317 358674936 : bool flush;
1318 :
1319 358674936 : error = xrep_dir_scan_file(rd, ip);
1320 358835322 : xchk_irele(sc, ip);
1321 358779165 : if (error)
1322 : break;
1323 :
1324 : /* Flush stashed dirent updates to constrain memory usage. */
1325 358779165 : mutex_lock(&rd->pscan.lock);
1326 358801831 : flush = xrep_dir_want_flush_stashed(rd);
1327 358813362 : mutex_unlock(&rd->pscan.lock);
1328 358784526 : if (flush) {
1329 0 : xchk_trans_cancel(sc);
1330 :
1331 0 : error = xrep_tempfile_iolock_polled(sc);
1332 0 : if (error)
1333 : break;
1334 :
1335 0 : mutex_lock(&rd->pscan.lock);
1336 0 : error = xrep_dir_replay_updates(rd);
1337 0 : mutex_unlock(&rd->pscan.lock);
1338 0 : xrep_tempfile_iounlock(sc);
1339 0 : if (error)
1340 : break;
1341 :
1342 0 : error = xchk_trans_alloc_empty(sc);
1343 0 : if (error)
1344 : break;
1345 : }
1346 :
1347 358784526 : if (xchk_should_terminate(sc, &error))
1348 : break;
1349 : }
1350 12819 : xchk_iscan_iter_finish(&rd->pscan.iscan);
1351 12819 : if (error) {
1352 : /*
1353 : * If we couldn't grab an inode that was busy with a state
1354 : * change, change the error code so that we exit to userspace
1355 : * as quickly as possible.
1356 : */
1357 5 : if (error == -EBUSY)
1358 : return -ECANCELED;
1359 5 : return error;
1360 : }
1361 :
1362 : /*
1363 : * Cancel the empty transaction so that we can (later) use the atomic
1364 : * extent swap helpers to lock files and commit the new directory.
1365 : */
1366 12814 : xchk_trans_cancel(rd->sc);
1367 12814 : return 0;
1368 : }
1369 :
1370 : /*
1371 : * Capture dirent updates being made by other threads which are relevant to the
1372 : * directory being repaired.
1373 : */
1374 : STATIC int
1375 8011781 : xrep_dir_live_update(
1376 : struct notifier_block *nb,
1377 : unsigned long action,
1378 : void *data)
1379 : {
1380 8011781 : struct xfs_dir_update_params *p = data;
1381 8011781 : struct xrep_dir *rd;
1382 8011781 : struct xfs_scrub *sc;
1383 8011781 : int error = 0;
1384 :
1385 8011781 : rd = container_of(nb, struct xrep_dir, pscan.hooks.dirent_hook.nb);
1386 8011781 : sc = rd->sc;
1387 :
1388 : /*
1389 : * This thread updated a child dirent in the directory that we're
1390 : * rebuilding. Stash the update for replay against the temporary
1391 : * directory.
1392 : */
1393 8011781 : if (p->dp->i_ino == sc->ip->i_ino &&
1394 0 : xchk_iscan_want_live_update(&rd->pscan.iscan, p->ip->i_ino)) {
1395 0 : mutex_lock(&rd->pscan.lock);
1396 0 : if (p->delta > 0)
1397 0 : error = xrep_dir_stash_createname(rd, p->name,
1398 0 : p->ip->i_ino);
1399 : else
1400 0 : error = xrep_dir_stash_removename(rd, p->name,
1401 0 : p->ip->i_ino);
1402 0 : mutex_unlock(&rd->pscan.lock);
1403 0 : if (error)
1404 0 : goto out_abort;
1405 : }
1406 :
1407 : /*
1408 : * This thread updated another directory's child dirent that points to
1409 : * the directory that we're rebuilding, so remember the new dotdot
1410 : * target.
1411 : */
1412 8011781 : if (p->ip->i_ino == sc->ip->i_ino &&
1413 0 : xchk_iscan_want_live_update(&rd->pscan.iscan, p->dp->i_ino)) {
1414 0 : if (p->delta > 0) {
1415 0 : trace_xrep_dir_stash_createname(sc->tempip,
1416 : &xfs_name_dotdot,
1417 0 : p->dp->i_ino);
1418 :
1419 0 : xrep_findparent_scan_found(&rd->pscan, p->dp->i_ino);
1420 : } else {
1421 0 : trace_xrep_dir_stash_removename(sc->tempip,
1422 : &xfs_name_dotdot,
1423 : rd->pscan.parent_ino);
1424 :
1425 0 : xrep_findparent_scan_found(&rd->pscan, NULLFSINO);
1426 : }
1427 : }
1428 :
1429 : return NOTIFY_DONE;
1430 : out_abort:
1431 0 : xchk_iscan_abort(&rd->pscan.iscan);
1432 0 : return NOTIFY_DONE;
1433 : }
1434 :
1435 : /*
1436 : * Free all the directory blocks and reset the data fork. The caller must
1437 : * join the inode to the transaction. This function returns with the inode
1438 : * joined to a clean scrub transaction.
1439 : */
1440 : STATIC int
1441 12812 : xrep_dir_reset_fork(
1442 : struct xrep_dir *rd,
1443 : xfs_ino_t parent_ino)
1444 : {
1445 12812 : struct xfs_scrub *sc = rd->sc;
1446 12812 : struct xfs_ifork *ifp = xfs_ifork_ptr(sc->tempip, XFS_DATA_FORK);
1447 12812 : int error;
1448 :
1449 : /* Unmap all the directory buffers. */
1450 12812 : if (xfs_ifork_has_extents(ifp)) {
1451 883 : error = xrep_reap_ifork(sc, sc->tempip, XFS_DATA_FORK);
1452 883 : if (error)
1453 : return error;
1454 : }
1455 :
1456 12812 : trace_xrep_dir_reset_fork(sc->tempip, parent_ino);
1457 :
1458 : /* Reset the data fork to an empty data fork. */
1459 12809 : xfs_idestroy_fork(ifp);
1460 12810 : ifp->if_bytes = 0;
1461 12810 : sc->tempip->i_disk_size = 0;
1462 :
1463 : /* Reinitialize the short form directory. */
1464 12810 : xrep_dir_init_args(rd, sc->tempip, NULL);
1465 12809 : error = xfs_dir2_sf_create(&rd->args, parent_ino);
1466 12813 : if (error)
1467 : return error;
1468 :
1469 12813 : return xrep_tempfile_roll_trans(sc);
1470 : }
1471 :
1472 : /*
1473 : * Prepare both inodes' directory forks for extent swapping. Promote the
1474 : * tempfile from short format to leaf format, and if the file being repaired
1475 : * has a short format data fork, turn it into an empty extent list.
1476 : */
1477 : STATIC int
1478 883 : xrep_dir_swap_prep(
1479 : struct xfs_scrub *sc,
1480 : bool temp_local,
1481 : bool ip_local)
1482 : {
1483 883 : int error;
1484 :
1485 : /*
1486 : * If the tempfile's directory is in shortform format, convert that
1487 : * to a single leaf extent so that we can use the atomic extent swap.
1488 : */
1489 883 : if (temp_local) {
1490 820 : struct xfs_da_args args = {
1491 820 : .dp = sc->tempip,
1492 820 : .geo = sc->mp->m_dir_geo,
1493 : .whichfork = XFS_DATA_FORK,
1494 820 : .trans = sc->tp,
1495 : .total = 1,
1496 820 : .owner = sc->ip->i_ino,
1497 : };
1498 :
1499 820 : error = xfs_dir2_sf_to_block(&args);
1500 820 : if (error)
1501 0 : return error;
1502 :
1503 : /*
1504 : * Roll the deferred log items to get us back to a clean
1505 : * transaction.
1506 : */
1507 820 : error = xfs_defer_finish(&sc->tp);
1508 820 : if (error)
1509 : return error;
1510 : }
1511 :
1512 : /*
1513 : * If the file being repaired had a shortform data fork, convert that
1514 : * to an empty extent list in preparation for the atomic extent swap.
1515 : */
1516 883 : if (ip_local) {
1517 0 : struct xfs_ifork *ifp;
1518 :
1519 0 : ifp = xfs_ifork_ptr(sc->ip, XFS_DATA_FORK);
1520 0 : xfs_idestroy_fork(ifp);
1521 0 : ifp->if_format = XFS_DINODE_FMT_EXTENTS;
1522 0 : ifp->if_nextents = 0;
1523 0 : ifp->if_bytes = 0;
1524 0 : ifp->if_u1.if_root = NULL;
1525 0 : ifp->if_height = 0;
1526 :
1527 0 : xfs_trans_log_inode(sc->tp, sc->ip,
1528 : XFS_ILOG_CORE | XFS_ILOG_DDATA);
1529 : }
1530 :
1531 : return 0;
1532 : }
1533 :
1534 : /*
1535 : * Replace the inode number of a directory entry.
1536 : */
1537 : static int
1538 10998 : xrep_dir_replace(
1539 : struct xrep_dir *rd,
1540 : struct xfs_inode *dp,
1541 : const struct xfs_name *name,
1542 : xfs_ino_t inum,
1543 : xfs_extlen_t total)
1544 : {
1545 10998 : struct xfs_scrub *sc = rd->sc;
1546 10998 : bool is_block, is_leaf;
1547 10998 : int error;
1548 :
1549 10998 : ASSERT(S_ISDIR(VFS_I(dp)->i_mode));
1550 :
1551 10998 : error = xfs_dir_ino_validate(sc->mp, inum);
1552 10998 : if (error)
1553 : return error;
1554 :
1555 10998 : xrep_dir_init_args(rd, dp, name);
1556 10998 : rd->args.inumber = inum;
1557 10998 : rd->args.total = total;
1558 :
1559 10998 : if (dp->i_df.if_format == XFS_DINODE_FMT_LOCAL)
1560 10992 : return xfs_dir2_sf_replace(&rd->args);
1561 :
1562 6 : error = xfs_dir2_isblock(&rd->args, &is_block);
1563 6 : if (error)
1564 : return error;
1565 6 : if (is_block)
1566 6 : return xfs_dir2_block_replace(&rd->args);
1567 :
1568 0 : error = xfs_dir2_isleaf(&rd->args, &is_leaf);
1569 0 : if (error)
1570 : return error;
1571 0 : if (is_leaf)
1572 0 : return xfs_dir2_leaf_replace(&rd->args);
1573 :
1574 0 : return xfs_dir2_node_replace(&rd->args);
1575 : }
1576 :
1577 : /*
1578 : * Reset the link count of this directory and adjust the unlinked list pointers
1579 : * as needed.
1580 : */
1581 : STATIC int
1582 12813 : xrep_dir_set_nlink(
1583 : struct xrep_dir *rd)
1584 : {
1585 12813 : struct xfs_scrub *sc = rd->sc;
1586 12813 : struct xfs_inode *dp = sc->ip;
1587 12813 : struct xfs_perag *pag;
1588 12813 : int error;
1589 :
1590 : /*
1591 : * The directory is not on the incore unlinked list, which means that
1592 : * it needs to be reachable via the directory tree. Update the nlink
1593 : * with our observed link count. If the directory has no parent, it
1594 : * will be moved to the orphanage.
1595 : */
1596 12813 : if (!xfs_inode_on_unlinked_list(dp)) {
1597 12813 : xrep_set_nlink(sc->ip, rd->subdirs + 2);
1598 12813 : return 0;
1599 : }
1600 :
1601 0 : xfs_emerg(dp->i_mount, "IUNLINK unlinked dir 0x%llx repair, dirents %u subdirs %llu curr_nlink %u orphan? %d", dp->i_ino, rd->dirents, rd->subdirs, VFS_I(dp)->i_nlink, rd->needs_adoption);
1602 :
1603 : /*
1604 : * The directory is on the unlinked list and we did not find any
1605 : * dirents. Set the link count to zero and let the directory
1606 : * inactivate when the last reference drops.
1607 : */
1608 0 : if (rd->dirents == 0) {
1609 0 : rd->needs_adoption = false;
1610 0 : xrep_set_nlink(sc->ip, 0);
1611 0 : return 0;
1612 : }
1613 :
1614 : /*
1615 : * The directory is on the unlinked list and we found dirents. This
1616 : * directory needs to be reachable via the directory tree. Remove the
1617 : * dir from the unlinked list and update nlink with the observed link
1618 : * count. If the directory has no parent, it will be moved to the
1619 : * orphanage.
1620 : */
1621 0 : pag = xfs_perag_get(sc->mp, XFS_INO_TO_AGNO(sc->mp, dp->i_ino));
1622 0 : if (!pag) {
1623 0 : ASSERT(0);
1624 0 : return -EFSCORRUPTED;
1625 : }
1626 :
1627 0 : error = xfs_iunlink_remove(sc->tp, pag, dp);
1628 0 : xfs_perag_put(pag);
1629 0 : if (error)
1630 : return error;
1631 :
1632 0 : xrep_set_nlink(sc->ip, rd->subdirs + 2);
1633 0 : return 0;
1634 : }
1635 :
1636 : /*
1637 : * Finish replaying stashed dirent updates, allocate a transaction for swapping
1638 : * extents, and take the ILOCKs of both directories before we commit the new
1639 : * directory structure.
1640 : */
1641 : STATIC int
1642 12810 : xrep_dir_finalize_tempdir(
1643 : struct xrep_dir *rd)
1644 : {
1645 12810 : struct xfs_scrub *sc = rd->sc;
1646 12810 : int error;
1647 :
1648 12810 : if (!xfs_has_parent(sc->mp))
1649 0 : return xrep_tempswap_trans_alloc(sc, XFS_DATA_FORK, &rd->tx);
1650 :
1651 12810 : error = xrep_dir_replay_updates(rd);
1652 12814 : if (error)
1653 : return error;
1654 :
1655 12814 : error = xrep_tempswap_trans_alloc(sc, XFS_DATA_FORK, &rd->tx);
1656 12814 : if (error)
1657 : return error;
1658 :
1659 : /*
1660 : * We rely on the caller's hold on @sc->ip's IOLOCK_EXCL to quiesce all
1661 : * possible directory updates during the time when we did not hold the
1662 : * ILOCK. There should not be any dirent updates to replay, but check
1663 : * anyway.
1664 : */
1665 12814 : if (xfarray_length(rd->dir_entries) != 0) {
1666 0 : ASSERT(xfarray_length(rd->dir_entries) == 0);
1667 0 : return -EFSCORRUPTED;
1668 : }
1669 :
1670 : return 0;
1671 : }
1672 :
1673 : /* Swap the temporary directory's data fork with the one being repaired. */
1674 : STATIC int
1675 12812 : xrep_dir_swap(
1676 : struct xrep_dir *rd)
1677 : {
1678 12812 : struct xfs_scrub *sc = rd->sc;
1679 12812 : bool ip_local, temp_local;
1680 12812 : int error = 0;
1681 :
1682 : /*
1683 : * If we never found the parent for this directory, temporarily assign
1684 : * the root dir as the parent; we'll move this to the orphanage after
1685 : * swapping the dir contents. We hold the ILOCK of the dir being
1686 : * repaired, so we're not worried about racy updates of dotdot.
1687 : */
1688 12812 : ASSERT(sc->ilock_flags & XFS_ILOCK_EXCL);
1689 12812 : if (rd->pscan.parent_ino == NULLFSINO) {
1690 0 : rd->needs_adoption = true;
1691 0 : rd->pscan.parent_ino = rd->sc->mp->m_sb.sb_rootino;
1692 : }
1693 :
1694 : /*
1695 : * Reset the temporary directory's '..' entry to point to the parent
1696 : * that we found. The temporary directory was created with the root
1697 : * directory as the parent, so we can skip this if repairing a
1698 : * subdirectory of the root.
1699 : *
1700 : * It's also possible that this replacement could also expand a sf
1701 : * tempdir into block format.
1702 : */
1703 12812 : if (rd->pscan.parent_ino != sc->mp->m_rootip->i_ino) {
1704 10998 : error = xrep_dir_replace(rd, rd->sc->tempip, &xfs_name_dotdot,
1705 10998 : rd->pscan.parent_ino, rd->tx.req.resblks);
1706 10998 : if (error)
1707 : return error;
1708 : }
1709 :
1710 : /*
1711 : * Changing the dot and dotdot entries could have changed the shape of
1712 : * the directory, so we recompute these.
1713 : */
1714 12812 : ip_local = sc->ip->i_df.if_format == XFS_DINODE_FMT_LOCAL;
1715 12812 : temp_local = sc->tempip->i_df.if_format == XFS_DINODE_FMT_LOCAL;
1716 :
1717 : /*
1718 : * If the both files have a local format data fork and the rebuilt
1719 : * directory data would fit in the repaired file's data fork, copy
1720 : * the contents from the tempfile and update the directory link count.
1721 : * We're done now.
1722 : */
1723 12812 : if (ip_local && temp_local &&
1724 11929 : sc->tempip->i_disk_size <= xfs_inode_data_fork_size(sc->ip)) {
1725 11926 : xrep_tempfile_copyout_local(sc, XFS_DATA_FORK);
1726 11930 : return xrep_dir_set_nlink(rd);
1727 : }
1728 :
1729 : /* Clean the transaction before we start working on the extent swap. */
1730 886 : error = xrep_tempfile_roll_trans(rd->sc);
1731 883 : if (error)
1732 : return error;
1733 :
1734 : /* Otherwise, make sure both data forks are in block-mapping mode. */
1735 883 : error = xrep_dir_swap_prep(sc, temp_local, ip_local);
1736 883 : if (error)
1737 : return error;
1738 :
1739 : /*
1740 : * Set nlink of the directory in the same transaction sequence that
1741 : * (atomically) commits the new directory data.
1742 : */
1743 883 : error = xrep_dir_set_nlink(rd);
1744 883 : if (error)
1745 : return error;
1746 :
1747 883 : return xrep_tempswap_contents(sc, &rd->tx);
1748 : }
1749 :
1750 : /*
1751 : * Swap the new directory contents (which we created in the tempfile) into the
1752 : * directory being repaired.
1753 : */
1754 : STATIC int
1755 12814 : xrep_dir_rebuild_tree(
1756 : struct xrep_dir *rd)
1757 : {
1758 12814 : struct xfs_scrub *sc = rd->sc;
1759 12814 : int error;
1760 :
1761 12814 : trace_xrep_dir_rebuild_tree(sc->ip, rd->pscan.parent_ino);
1762 :
1763 : /*
1764 : * Take the IOLOCK on the temporary file so that we can run dir
1765 : * operations with the same locks held as we would for a normal file.
1766 : * We still hold sc->ip's IOLOCK.
1767 : */
1768 12814 : error = xrep_tempfile_iolock_polled(rd->sc);
1769 12811 : if (error)
1770 : return error;
1771 :
1772 : /*
1773 : * Allocate transaction, lock inodes, and make sure that we've replayed
1774 : * all the stashed dirent updates to the tempdir. After this point,
1775 : * we're ready to swapext.
1776 : */
1777 12811 : error = xrep_dir_finalize_tempdir(rd);
1778 12813 : if (error)
1779 : return error;
1780 :
1781 12813 : if (xchk_iscan_aborted(&rd->pscan.iscan))
1782 : return -ECANCELED;
1783 :
1784 : /*
1785 : * Swap the tempdir's data fork with the file being repaired. This
1786 : * recreates the transaction and re-takes the ILOCK in the scrub
1787 : * context.
1788 : */
1789 12813 : error = xrep_dir_swap(rd);
1790 12811 : if (error)
1791 : return error;
1792 :
1793 : /*
1794 : * Release the old directory blocks and reset the data fork of the temp
1795 : * directory to an empty shortform directory because inactivation does
1796 : * nothing for directories.
1797 : */
1798 12811 : return xrep_dir_reset_fork(rd, sc->mp->m_rootip->i_ino);
1799 : }
1800 :
1801 : /* Set up the filesystem scan so we can regenerate directory entries. */
1802 : STATIC int
1803 12819 : xrep_dir_setup_scan(
1804 : struct xrep_dir *rd)
1805 : {
1806 12819 : struct xfs_scrub *sc = rd->sc;
1807 12819 : char *descr;
1808 12819 : int error;
1809 :
1810 : /* Set up some staging memory for salvaging dirents. */
1811 12819 : descr = xchk_xfile_ino_descr(sc, "directory entries");
1812 12819 : error = xfarray_create(descr, 0, sizeof(struct xrep_dirent),
1813 : &rd->dir_entries);
1814 12819 : kfree(descr);
1815 12819 : if (error)
1816 : return error;
1817 :
1818 12819 : descr = xchk_xfile_ino_descr(sc, "directory entry names");
1819 12819 : error = xfblob_create(descr, &rd->dir_names);
1820 12819 : kfree(descr);
1821 12819 : if (error)
1822 0 : goto out_xfarray;
1823 :
1824 12819 : if (xfs_has_parent(sc->mp))
1825 12819 : error = __xrep_findparent_scan_start(sc, &rd->pscan,
1826 : xrep_dir_live_update);
1827 : else
1828 0 : error = xrep_findparent_scan_start(sc, &rd->pscan);
1829 12819 : if (error)
1830 0 : goto out_xfblob;
1831 :
1832 : return 0;
1833 :
1834 : out_xfblob:
1835 0 : xfblob_destroy(rd->dir_names);
1836 0 : rd->dir_names = NULL;
1837 0 : out_xfarray:
1838 0 : xfarray_destroy(rd->dir_entries);
1839 0 : rd->dir_entries = NULL;
1840 0 : return error;
1841 : }
1842 :
1843 : /*
1844 : * Move the current file to the orphanage.
1845 : *
1846 : * Caller must hold IOLOCK_EXCL on @sc->ip, and no other inode locks. Upon
1847 : * successful return, the scrub transaction will have enough extra reservation
1848 : * to make the move; it will hold IOLOCK_EXCL and ILOCK_EXCL of @sc->ip and the
1849 : * orphanage; and both inodes will be ijoined.
1850 : */
1851 : STATIC int
1852 0 : xrep_dir_move_to_orphanage(
1853 : struct xrep_dir *rd)
1854 : {
1855 0 : struct xfs_scrub *sc = rd->sc;
1856 0 : xfs_ino_t orig_parent, new_parent;
1857 0 : int error;
1858 :
1859 : /* No orphanage? We can't fix this. */
1860 0 : if (!sc->orphanage)
1861 : return -EFSCORRUPTED;
1862 :
1863 : /*
1864 : * We are about to drop the ILOCK on sc->ip to lock the orphanage and
1865 : * prepare for the adoption. Therefore, look up the old dotdot entry
1866 : * for sc->ip so that we can compare it after we re-lock sc->ip.
1867 : */
1868 0 : error = xchk_dir_lookup(sc, sc->ip, &xfs_name_dotdot, &orig_parent);
1869 0 : if (error)
1870 : return error;
1871 :
1872 : /*
1873 : * We hold ILOCK_EXCL on both the directory and the tempdir after a
1874 : * successful rebuild. Before we can move the directory to the
1875 : * orphanage, we must roll to a clean unjoined transaction.
1876 : */
1877 0 : error = xfs_trans_roll(&sc->tp);
1878 0 : if (error)
1879 : return error;
1880 :
1881 : /*
1882 : * Because the orphanage is just another directory in the filesystem,
1883 : * we must take its IOLOCK to coordinate with the VFS. We cannot take
1884 : * an IOLOCK while holding an ILOCK, so we must drop them all. We may
1885 : * have to drop the IOLOCK as well.
1886 : */
1887 0 : xrep_tempfile_iunlock_both(sc);
1888 :
1889 0 : error = xrep_adoption_init(sc, &rd->adoption);
1890 0 : if (error)
1891 : return error;
1892 :
1893 0 : if (!xrep_orphanage_ilock_nowait(sc, XFS_IOLOCK_EXCL)) {
1894 0 : xchk_iunlock(sc, sc->ilock_flags);
1895 0 : error = xrep_orphanage_iolock_two(sc);
1896 0 : if (error)
1897 0 : goto err_adoption;
1898 : }
1899 :
1900 : /* Prepare for the adoption and lock both down. */
1901 0 : error = xrep_adoption_prep(&rd->adoption);
1902 0 : if (error)
1903 0 : goto err_adoption;
1904 :
1905 0 : error = xrep_adoption_compute_name(&rd->adoption, rd->pptr.p_name);
1906 0 : if (error)
1907 0 : goto err_adoption;
1908 :
1909 : /*
1910 : * Now that we've reacquired the ILOCK on sc->ip, look up the dotdot
1911 : * entry again. If the parent changed or the child was unlinked while
1912 : * the child directory was unlocked, we don't need to move the child to
1913 : * the orphanage after all.
1914 : */
1915 0 : error = xchk_dir_lookup(sc, sc->ip, &xfs_name_dotdot, &new_parent);
1916 0 : if (error)
1917 0 : goto err_adoption;
1918 0 : if (orig_parent != new_parent || VFS_I(sc->ip)->i_nlink == 0) {
1919 0 : error = 0;
1920 0 : goto err_adoption;
1921 : }
1922 :
1923 : /* Attach to the orphanage. */
1924 0 : return xrep_adoption_commit(&rd->adoption);
1925 0 : err_adoption:
1926 0 : xrep_adoption_cancel(&rd->adoption, error);
1927 0 : return error;
1928 : }
1929 :
1930 : /*
1931 : * Repair the directory metadata.
1932 : *
1933 : * XXX: Directory entry buffers can be multiple fsblocks in size. The buffer
1934 : * cache in XFS can't handle aliased multiblock buffers, so this might
1935 : * misbehave if the directory blocks are crosslinked with other filesystem
1936 : * metadata.
1937 : *
1938 : * XXX: Is it necessary to check the dcache for this directory to make sure
1939 : * that we always recreate every cached entry?
1940 : */
1941 : int
1942 183695 : xrep_directory(
1943 : struct xfs_scrub *sc)
1944 : {
1945 183695 : struct xrep_dir *rd = sc->buf;
1946 183695 : int error;
1947 :
1948 : /* The rmapbt is required to reap the old data fork. */
1949 183695 : if (!xfs_has_rmapbt(sc->mp))
1950 : return -EOPNOTSUPP;
1951 :
1952 12819 : error = xrep_dir_setup_scan(rd);
1953 12819 : if (error)
1954 : return error;
1955 :
1956 12819 : if (xfs_has_parent(sc->mp))
1957 12819 : error = xrep_dir_scan_dirtree(rd);
1958 : else
1959 0 : error = xrep_dir_salvage_entries(rd);
1960 12819 : if (error)
1961 5 : goto out_teardown;
1962 :
1963 : /* Last chance to abort before we start committing fixes. */
1964 12814 : if (xchk_should_terminate(sc, &error))
1965 0 : goto out_teardown;
1966 :
1967 12810 : error = xrep_dir_rebuild_tree(rd);
1968 12807 : if (error)
1969 0 : goto out_teardown;
1970 :
1971 12807 : if (rd->needs_adoption) {
1972 0 : error = xrep_dir_move_to_orphanage(rd);
1973 0 : if (error)
1974 : goto out_teardown;
1975 : }
1976 :
1977 12807 : out_teardown:
1978 12812 : xrep_dir_teardown(sc);
1979 12812 : return error;
1980 : }
|