Line data Source code
1 : // SPDX-License-Identifier: GPL-2.0-or-later
2 : /*
3 : * Copyright (C) 2020-2023 Oracle. All Rights Reserved.
4 : * Author: Darrick J. Wong <djwong@kernel.org>
5 : */
6 : #include "xfs.h"
7 : #include "xfs_fs.h"
8 : #include "xfs_shared.h"
9 : #include "xfs_format.h"
10 : #include "xfs_trans_resv.h"
11 : #include "xfs_mount.h"
12 : #include "xfs_defer.h"
13 : #include "xfs_bit.h"
14 : #include "xfs_log_format.h"
15 : #include "xfs_trans.h"
16 : #include "xfs_sb.h"
17 : #include "xfs_inode.h"
18 : #include "xfs_icache.h"
19 : #include "xfs_da_format.h"
20 : #include "xfs_da_btree.h"
21 : #include "xfs_dir2.h"
22 : #include "xfs_dir2_priv.h"
23 : #include "xfs_bmap.h"
24 : #include "xfs_quota.h"
25 : #include "xfs_bmap_btree.h"
26 : #include "xfs_trans_space.h"
27 : #include "xfs_bmap_util.h"
28 : #include "xfs_swapext.h"
29 : #include "xfs_xchgrange.h"
30 : #include "xfs_ag.h"
31 : #include "xfs_parent.h"
32 : #include "scrub/xfs_scrub.h"
33 : #include "scrub/scrub.h"
34 : #include "scrub/common.h"
35 : #include "scrub/trace.h"
36 : #include "scrub/repair.h"
37 : #include "scrub/tempfile.h"
38 : #include "scrub/tempswap.h"
39 : #include "scrub/xfile.h"
40 : #include "scrub/xfarray.h"
41 : #include "scrub/xfblob.h"
42 : #include "scrub/iscan.h"
43 : #include "scrub/readdir.h"
44 : #include "scrub/reap.h"
45 : #include "scrub/findparent.h"
46 : #include "scrub/orphanage.h"
47 : #include "scrub/listxattr.h"
48 :
49 : /*
50 : * Directory Repair
51 : * ================
52 : *
53 : * We repair directories by reading the directory data blocks looking for
54 : * directory entries that look salvageable (name passes verifiers, entry points
55 : * to a valid allocated inode, etc). Each entry worth salvaging is stashed in
56 : * memory, and the stashed entries are periodically replayed into a temporary
57 : * directory to constrain memory use. Batching the construction of the
58 : * temporary directory in this fashion reduces lock cycling of the directory
59 : * being repaired and the temporary directory, and will later become important
60 : * for parent pointer scanning.
61 : *
62 : * If parent pointers are enabled on this filesystem, we instead reconstruct
63 : * the directory by visiting each parent pointer of each file in the filesystem
64 : * and translating the relevant parent pointer records into dirents. In this
65 : * case, it is advantageous to stash all directory entries created from parent
66 : * pointers for a single child file before replaying them into the temporary
67 : * directory. To save memory, the live filesystem scan reuses the findparent
68 : * fields. Directory repair chooses either parent pointer scanning or
69 : * directory entry salvaging, but not both.
70 : *
71 : * Directory entries added to the temporary directory do not elevate the link
72 : * counts of the inodes found. When salvaging completes, the remaining stashed
73 : * entries are replayed to the temporary directory. An atomic extent swap is
74 : * used to commit the new directory blocks to the directory being repaired.
75 : * This will disrupt readdir cursors.
76 : *
77 : * Legacy Locking Issues
78 : * ---------------------
79 : *
80 : * Prior to Linux 6.5, if /a, /a/b, and /c were all directories, the VFS would
81 : * not take i_rwsem on /a/b for a "mv /a/b /c/" operation. This meant that
82 : * only b's ILOCK protected b's dotdot update. b's IOLOCK was not taken,
83 : * unlike every other dotdot update (link, remove, mkdir). If the repair code
84 : * dropped the ILOCK, we it was required either to revalidate the dotdot entry
85 : * or to use dirent hooks to capture updates from other threads.
86 : */
87 :
88 : /* Create a dirent in the tempdir. */
89 : #define XREP_DIRENT_ADD (1)
90 :
91 : /* Remove a dirent from the tempdir. */
92 : #define XREP_DIRENT_REMOVE (2)
93 :
94 : /* Directory entry to be restored in the new directory. */
95 : struct xrep_dirent {
96 : /* Cookie for retrieval of the dirent name. */
97 : xfblob_cookie name_cookie;
98 :
99 : /* Target inode number. */
100 : xfs_ino_t ino;
101 :
102 : /* Length of the dirent name. */
103 : uint8_t namelen;
104 :
105 : /* File type of the dirent. */
106 : uint8_t ftype;
107 :
108 : /* XREP_DIRENT_{ADD,REMOVE} */
109 : uint8_t action;
110 : };
111 :
112 : /*
113 : * Stash up to 8 pages of recovered dirent data in dir_entries and dir_names
114 : * before we write them to the temp dir.
115 : */
116 : #define XREP_DIR_MAX_STASH_BYTES (PAGE_SIZE * 8)
117 :
118 : struct xrep_dir {
119 : struct xfs_scrub *sc;
120 :
121 : /* Fixed-size array of xrep_dirent structures. */
122 : struct xfarray *dir_entries;
123 :
124 : /* Blobs containing directory entry names. */
125 : struct xfblob *dir_names;
126 :
127 : /* Information for swapping data forks at the end. */
128 : struct xrep_tempswap tx;
129 :
130 : /* Preallocated args struct for performing dir operations */
131 : struct xfs_da_args args;
132 :
133 : /*
134 : * Information used to scan the filesystem to find the inumber of the
135 : * dotdot entry for this directory. For directory salvaging when
136 : * parent pointers are not enabled, we use the findparent_* functions
137 : * on this object and access only the parent_ino field directly.
138 : *
139 : * When parent pointers are enabled, however, the pptr scanner uses the
140 : * iscan, hooks, lock, and parent_ino fields of this object directly.
141 : * @pscan.lock coordinates access to dir_entries, dir_names,
142 : * parent_ino, subdirs, dirents, and args. This reduces the memory
143 : * requirements of this structure.
144 : */
145 : struct xrep_parent_scan_info pscan;
146 :
147 : /*
148 : * Context information for attaching this directory to the lost+found
149 : * if this directory does not have a parent.
150 : */
151 : struct xrep_adoption adoption;
152 :
153 : /* How many subdirectories did we find? */
154 : uint64_t subdirs;
155 :
156 : /* How many dirents did we find? */
157 : unsigned int dirents;
158 :
159 : /* Should we move this directory to the orphanage? */
160 : bool needs_adoption;
161 :
162 : /*
163 : * Scratch buffer for reading parent pointers from child files. The
164 : * p_name field is used to flush stashed dirents into the temporary
165 : * directory in between parent pointers. At the very end of the
166 : * repair, it can also be used to compute the lost+found filename
167 : * if we need to reparent the directory.
168 : */
169 : struct xfs_parent_name_irec pptr;
170 : };
171 :
172 : /* Tear down all the incore stuff we created. */
173 : static void
174 24752 : xrep_dir_teardown(
175 : struct xfs_scrub *sc)
176 : {
177 24752 : struct xrep_dir *rd = sc->buf;
178 :
179 24752 : xrep_findparent_scan_teardown(&rd->pscan);
180 24753 : xfblob_destroy(rd->dir_names);
181 24752 : xfarray_destroy(rd->dir_entries);
182 24753 : }
183 :
184 : /* Set up for a directory repair. */
185 : int
186 25089 : xrep_setup_directory(
187 : struct xfs_scrub *sc)
188 : {
189 25089 : struct xrep_dir *rd;
190 25089 : int error;
191 :
192 25089 : xchk_fsgates_enable(sc, XCHK_FSGATES_DIRENTS);
193 :
194 25088 : error = xrep_orphanage_try_create(sc);
195 25089 : if (error)
196 : return error;
197 :
198 25089 : error = xrep_tempfile_create(sc, S_IFDIR);
199 25089 : if (error)
200 : return error;
201 :
202 25089 : rd = kvzalloc(sizeof(struct xrep_dir), XCHK_GFP_FLAGS);
203 25086 : if (!rd)
204 : return -ENOMEM;
205 25086 : rd->sc = sc;
206 25086 : sc->buf = rd;
207 :
208 25086 : return 0;
209 : }
210 :
211 : /*
212 : * If we're the root of a directory tree, we are our own parent. If we're an
213 : * unlinked directory, the parent /won't/ have a link to us. Set the parent
214 : * directory to the root for both cases. Returns NULLFSINO if we don't know
215 : * what to do.
216 : */
217 : static inline xfs_ino_t
218 : xrep_dir_self_parent(
219 : struct xrep_dir *rd)
220 : {
221 : struct xfs_scrub *sc = rd->sc;
222 :
223 : if (sc->ip->i_ino == sc->mp->m_sb.sb_rootino)
224 : return sc->mp->m_sb.sb_rootino;
225 :
226 : if (VFS_I(sc->ip)->i_nlink == 0)
227 : return sc->mp->m_sb.sb_rootino;
228 :
229 : return NULLFSINO;
230 : }
231 :
232 : /*
233 : * Look up the dotdot entry and confirm that it's really the parent.
234 : * Returns NULLFSINO if we don't know what to do.
235 : */
236 : static inline xfs_ino_t
237 0 : xrep_dir_lookup_parent(
238 : struct xrep_dir *rd)
239 : {
240 0 : struct xfs_scrub *sc = rd->sc;
241 0 : xfs_ino_t ino;
242 0 : int error;
243 :
244 0 : error = xfs_dir_lookup(sc->tp, sc->ip, &xfs_name_dotdot, &ino, NULL);
245 0 : if (error)
246 : return NULLFSINO;
247 0 : if (!xfs_verify_dir_ino(sc->mp, ino))
248 : return NULLFSINO;
249 :
250 0 : error = xrep_findparent_confirm(sc, &ino);
251 0 : if (error)
252 : return NULLFSINO;
253 :
254 0 : return ino;
255 : }
256 :
257 : /*
258 : * Look up '..' in the dentry cache and confirm that it's really the parent.
259 : * Returns NULLFSINO if the dcache misses or if the hit is implausible.
260 : */
261 : static inline xfs_ino_t
262 0 : xrep_dir_dcache_parent(
263 : struct xrep_dir *rd)
264 : {
265 0 : struct xfs_scrub *sc = rd->sc;
266 0 : xfs_ino_t parent_ino;
267 0 : int error;
268 :
269 0 : parent_ino = xrep_findparent_from_dcache(sc);
270 0 : if (parent_ino == NULLFSINO)
271 : return parent_ino;
272 :
273 0 : error = xrep_findparent_confirm(sc, &parent_ino);
274 0 : if (error)
275 : return NULLFSINO;
276 :
277 0 : return parent_ino;
278 : }
279 :
280 : /* Try to find the parent of the directory being repaired. */
281 : STATIC int
282 0 : xrep_dir_find_parent(
283 : struct xrep_dir *rd)
284 : {
285 0 : xfs_ino_t ino;
286 :
287 0 : ino = xrep_findparent_self_reference(rd->sc);
288 0 : if (ino != NULLFSINO) {
289 0 : xrep_findparent_scan_finish_early(&rd->pscan, ino);
290 0 : return 0;
291 : }
292 :
293 0 : ino = xrep_dir_dcache_parent(rd);
294 0 : if (ino != NULLFSINO) {
295 0 : xrep_findparent_scan_finish_early(&rd->pscan, ino);
296 0 : return 0;
297 : }
298 :
299 0 : ino = xrep_dir_lookup_parent(rd);
300 0 : if (ino != NULLFSINO) {
301 0 : xrep_findparent_scan_finish_early(&rd->pscan, ino);
302 0 : return 0;
303 : }
304 :
305 : /*
306 : * A full filesystem scan is the last resort. On a busy filesystem,
307 : * the scan can fail with -EBUSY if we cannot grab IOLOCKs. That means
308 : * that we don't know what who the parent is, so we should return to
309 : * userspace.
310 : */
311 0 : return xrep_findparent_scan(&rd->pscan);
312 : }
313 :
314 : /*
315 : * Decide if we want to salvage this entry. We don't bother with oversized
316 : * names or the dot entry.
317 : */
318 : STATIC int
319 0 : xrep_dir_want_salvage(
320 : struct xrep_dir *rd,
321 : const char *name,
322 : int namelen,
323 : xfs_ino_t ino)
324 : {
325 0 : struct xfs_mount *mp = rd->sc->mp;
326 :
327 : /* No pointers to ourselves or to garbage. */
328 0 : if (ino == rd->sc->ip->i_ino)
329 : return false;
330 0 : if (!xfs_verify_dir_ino(mp, ino))
331 : return false;
332 :
333 : /* No weird looking names or dot entries. */
334 0 : if (namelen >= MAXNAMELEN || namelen <= 0)
335 : return false;
336 0 : if (namelen == 1 && name[0] == '.')
337 0 : return false;
338 :
339 : return true;
340 : }
341 :
342 : /*
343 : * Remember that we want to create a dirent in the tempdir. These stashed
344 : * actions will be replayed later.
345 : */
346 : STATIC int
347 236466 : xrep_dir_stash_createname(
348 : struct xrep_dir *rd,
349 : const struct xfs_name *name,
350 : xfs_ino_t ino)
351 : {
352 236466 : struct xrep_dirent dirent = {
353 : .action = XREP_DIRENT_ADD,
354 : .ino = ino,
355 236466 : .namelen = name->len,
356 236466 : .ftype = name->type,
357 : };
358 236466 : int error;
359 :
360 236466 : trace_xrep_dir_stash_createname(rd->sc->tempip, name, ino);
361 :
362 236466 : error = xfblob_store(rd->dir_names, &dirent.name_cookie, name->name,
363 236466 : name->len);
364 236463 : if (error)
365 : return error;
366 :
367 236463 : return xfarray_append(rd->dir_entries, &dirent);
368 : }
369 :
370 : /*
371 : * Remember that we want to remove a dirent from the tempdir. These stashed
372 : * actions will be replayed later.
373 : */
374 : STATIC int
375 0 : xrep_dir_stash_removename(
376 : struct xrep_dir *rd,
377 : const struct xfs_name *name,
378 : xfs_ino_t ino)
379 : {
380 0 : struct xrep_dirent dirent = {
381 : .action = XREP_DIRENT_REMOVE,
382 : .ino = ino,
383 0 : .namelen = name->len,
384 0 : .ftype = name->type,
385 : };
386 0 : int error;
387 :
388 0 : trace_xrep_dir_stash_removename(rd->sc->tempip, name, ino);
389 :
390 0 : error = xfblob_store(rd->dir_names, &dirent.name_cookie, name->name,
391 0 : name->len);
392 0 : if (error)
393 : return error;
394 :
395 0 : return xfarray_append(rd->dir_entries, &dirent);
396 : }
397 :
398 : /* Allocate an in-core record to hold entries while we rebuild the dir data. */
399 : STATIC int
400 0 : xrep_dir_salvage_entry(
401 : struct xrep_dir *rd,
402 : unsigned char *name,
403 : unsigned int namelen,
404 : xfs_ino_t ino)
405 : {
406 0 : struct xfs_name xname = {
407 : .name = name,
408 : };
409 0 : struct xfs_scrub *sc = rd->sc;
410 0 : struct xfs_inode *ip;
411 0 : unsigned int i = 0;
412 0 : int error = 0;
413 :
414 0 : if (xchk_should_terminate(sc, &error))
415 0 : return error;
416 :
417 : /*
418 : * Truncate the name to the first character that would trip namecheck.
419 : * If we no longer have a name after that, ignore this entry.
420 : */
421 0 : while (i < namelen && name[i] != 0 && name[i] != '/')
422 0 : i++;
423 0 : if (i == 0)
424 : return 0;
425 0 : xname.len = i;
426 :
427 : /* Ignore '..' entries; we already picked the new parent. */
428 0 : if (xname.len == 2 && name[0] == '.' && name[1] == '.') {
429 0 : trace_xrep_dir_salvaged_parent(sc->ip, ino);
430 0 : return 0;
431 : }
432 :
433 0 : trace_xrep_dir_salvage_entry(sc->ip, &xname, ino);
434 :
435 : /*
436 : * Compute the ftype or dump the entry if we can't. We don't lock the
437 : * inode because inodes can't change type while we have a reference.
438 : */
439 0 : error = xchk_iget(sc, ino, &ip);
440 0 : if (error)
441 : return 0;
442 :
443 : /* Don't mix metadata and regular directory trees. */
444 0 : if (xfs_is_metadir_inode(ip) ^ xfs_is_metadir_inode(rd->sc->ip)) {
445 0 : xchk_irele(sc, ip);
446 0 : return 0;
447 : }
448 :
449 0 : xname.type = xfs_mode_to_ftype(VFS_I(ip)->i_mode);
450 0 : xchk_irele(sc, ip);
451 :
452 0 : return xrep_dir_stash_createname(rd, &xname, ino);
453 : }
454 :
455 : /* Record a shortform directory entry for later reinsertion. */
456 : STATIC int
457 0 : xrep_dir_salvage_sf_entry(
458 : struct xrep_dir *rd,
459 : struct xfs_dir2_sf_hdr *sfp,
460 : struct xfs_dir2_sf_entry *sfep)
461 : {
462 0 : xfs_ino_t ino;
463 :
464 0 : ino = xfs_dir2_sf_get_ino(rd->sc->mp, sfp, sfep);
465 0 : if (!xrep_dir_want_salvage(rd, sfep->name, sfep->namelen, ino))
466 : return 0;
467 :
468 0 : return xrep_dir_salvage_entry(rd, sfep->name, sfep->namelen, ino);
469 : }
470 :
471 : /* Record a regular directory entry for later reinsertion. */
472 : STATIC int
473 0 : xrep_dir_salvage_data_entry(
474 : struct xrep_dir *rd,
475 : struct xfs_dir2_data_entry *dep)
476 : {
477 0 : xfs_ino_t ino;
478 :
479 0 : ino = be64_to_cpu(dep->inumber);
480 0 : if (!xrep_dir_want_salvage(rd, dep->name, dep->namelen, ino))
481 : return 0;
482 :
483 0 : return xrep_dir_salvage_entry(rd, dep->name, dep->namelen, ino);
484 : }
485 :
486 : /* Try to recover block/data format directory entries. */
487 : STATIC int
488 0 : xrep_dir_recover_data(
489 : struct xrep_dir *rd,
490 : struct xfs_buf *bp)
491 : {
492 0 : struct xfs_da_geometry *geo = rd->sc->mp->m_dir_geo;
493 0 : unsigned int offset;
494 0 : unsigned int end;
495 0 : int error = 0;
496 :
497 : /*
498 : * Loop over the data portion of the block.
499 : * Each object is a real entry (dep) or an unused one (dup).
500 : */
501 0 : offset = geo->data_entry_offset;
502 0 : end = min_t(unsigned int, BBTOB(bp->b_length),
503 : xfs_dir3_data_end_offset(geo, bp->b_addr));
504 :
505 0 : while (offset < end) {
506 0 : struct xfs_dir2_data_unused *dup = bp->b_addr + offset;
507 0 : struct xfs_dir2_data_entry *dep = bp->b_addr + offset;
508 :
509 0 : if (xchk_should_terminate(rd->sc, &error))
510 0 : return error;
511 :
512 : /* Skip unused entries. */
513 0 : if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) {
514 0 : offset += be16_to_cpu(dup->length);
515 0 : continue;
516 : }
517 :
518 : /* Don't walk off the end of the block. */
519 0 : offset += xfs_dir2_data_entsize(rd->sc->mp, dep->namelen);
520 0 : if (offset > end)
521 : break;
522 :
523 : /* Ok, let's save this entry. */
524 0 : error = xrep_dir_salvage_data_entry(rd, dep);
525 0 : if (error)
526 0 : return error;
527 :
528 : }
529 :
530 : return 0;
531 : }
532 :
533 : /* Try to recover shortform directory entries. */
534 : STATIC int
535 0 : xrep_dir_recover_sf(
536 : struct xrep_dir *rd)
537 : {
538 0 : struct xfs_dir2_sf_hdr *sfp;
539 0 : struct xfs_dir2_sf_entry *sfep;
540 0 : struct xfs_dir2_sf_entry *next;
541 0 : struct xfs_ifork *ifp;
542 0 : xfs_ino_t ino;
543 0 : unsigned char *end;
544 0 : int error = 0;
545 :
546 0 : ifp = xfs_ifork_ptr(rd->sc->ip, XFS_DATA_FORK);
547 0 : sfp = (struct xfs_dir2_sf_hdr *)rd->sc->ip->i_df.if_u1.if_data;
548 0 : end = (unsigned char *)ifp->if_u1.if_data + ifp->if_bytes;
549 :
550 0 : ino = xfs_dir2_sf_get_parent_ino(sfp);
551 0 : trace_xrep_dir_salvaged_parent(rd->sc->ip, ino);
552 :
553 0 : sfep = xfs_dir2_sf_firstentry(sfp);
554 0 : while ((unsigned char *)sfep < end) {
555 0 : if (xchk_should_terminate(rd->sc, &error))
556 0 : return error;
557 :
558 0 : next = xfs_dir2_sf_nextentry(rd->sc->mp, sfp, sfep);
559 0 : if ((unsigned char *)next > end)
560 : break;
561 :
562 : /* Ok, let's save this entry. */
563 0 : error = xrep_dir_salvage_sf_entry(rd, sfp, sfep);
564 0 : if (error)
565 0 : return error;
566 :
567 : sfep = next;
568 : }
569 :
570 : return 0;
571 : }
572 :
573 : /*
574 : * Try to figure out the format of this directory from the data fork mappings
575 : * and the directory size. If we can be reasonably sure of format, we can be
576 : * more aggressive in salvaging directory entries. On return, @magic_guess
577 : * will be set to DIR3_BLOCK_MAGIC if we think this is a "block format"
578 : * directory; DIR3_DATA_MAGIC if we think this is a "data format" directory,
579 : * and 0 if we can't tell.
580 : */
581 : STATIC void
582 0 : xrep_dir_guess_format(
583 : struct xrep_dir *rd,
584 : __be32 *magic_guess)
585 : {
586 0 : struct xfs_inode *dp = rd->sc->ip;
587 0 : struct xfs_mount *mp = rd->sc->mp;
588 0 : struct xfs_da_geometry *geo = mp->m_dir_geo;
589 0 : xfs_fileoff_t last;
590 0 : int error;
591 :
592 0 : ASSERT(xfs_has_crc(mp));
593 :
594 0 : *magic_guess = 0;
595 :
596 : /*
597 : * If there's a single directory block and the directory size is
598 : * exactly one block, this has to be a single block format directory.
599 : */
600 0 : error = xfs_bmap_last_offset(dp, &last, XFS_DATA_FORK);
601 0 : if (!error && XFS_FSB_TO_B(mp, last) == geo->blksize &&
602 0 : dp->i_disk_size == geo->blksize) {
603 0 : *magic_guess = cpu_to_be32(XFS_DIR3_BLOCK_MAGIC);
604 0 : return;
605 : }
606 :
607 : /*
608 : * If the last extent before the leaf offset matches the directory
609 : * size and the directory size is larger than 1 block, this is a
610 : * data format directory.
611 : */
612 0 : last = geo->leafblk;
613 0 : error = xfs_bmap_last_before(rd->sc->tp, dp, &last, XFS_DATA_FORK);
614 0 : if (!error &&
615 0 : XFS_FSB_TO_B(mp, last) > geo->blksize &&
616 0 : XFS_FSB_TO_B(mp, last) == dp->i_disk_size) {
617 0 : *magic_guess = cpu_to_be32(XFS_DIR3_DATA_MAGIC);
618 0 : return;
619 : }
620 : }
621 :
622 : /* Recover directory entries from a specific directory block. */
623 : STATIC int
624 0 : xrep_dir_recover_dirblock(
625 : struct xrep_dir *rd,
626 : __be32 magic_guess,
627 : xfs_dablk_t dabno)
628 : {
629 0 : struct xfs_dir2_data_hdr *hdr;
630 0 : struct xfs_buf *bp;
631 0 : __be32 oldmagic;
632 0 : int error;
633 :
634 : /*
635 : * Try to read buffer. We invalidate them in the next step so we don't
636 : * bother to set a buffer type or ops.
637 : */
638 0 : error = xfs_da_read_buf(rd->sc->tp, rd->sc->ip, dabno,
639 : XFS_DABUF_MAP_HOLE_OK, &bp, XFS_DATA_FORK, NULL);
640 0 : if (error || !bp)
641 : return error;
642 :
643 0 : hdr = bp->b_addr;
644 0 : oldmagic = hdr->magic;
645 :
646 0 : trace_xrep_dir_recover_dirblock(rd->sc->ip, dabno,
647 0 : be32_to_cpu(hdr->magic), be32_to_cpu(magic_guess));
648 :
649 : /*
650 : * If we're sure of the block's format, proceed with the salvage
651 : * operation using the specified magic number.
652 : */
653 0 : if (magic_guess) {
654 0 : hdr->magic = magic_guess;
655 0 : goto recover;
656 : }
657 :
658 : /*
659 : * If we couldn't guess what type of directory this is, then we will
660 : * only salvage entries from directory blocks that match the magic
661 : * number and pass verifiers.
662 : */
663 0 : switch (hdr->magic) {
664 0 : case cpu_to_be32(XFS_DIR2_BLOCK_MAGIC):
665 : case cpu_to_be32(XFS_DIR3_BLOCK_MAGIC):
666 0 : if (!xrep_buf_verify_struct(bp, &xfs_dir3_block_buf_ops))
667 0 : goto out;
668 0 : if (xfs_dir3_block_header_check(bp, rd->sc->ip->i_ino) != NULL)
669 0 : goto out;
670 : break;
671 0 : case cpu_to_be32(XFS_DIR2_DATA_MAGIC):
672 : case cpu_to_be32(XFS_DIR3_DATA_MAGIC):
673 0 : if (!xrep_buf_verify_struct(bp, &xfs_dir3_data_buf_ops))
674 0 : goto out;
675 0 : if (xfs_dir3_data_header_check(bp, rd->sc->ip->i_ino) != NULL)
676 0 : goto out;
677 : break;
678 0 : default:
679 0 : goto out;
680 : }
681 :
682 0 : recover:
683 0 : error = xrep_dir_recover_data(rd, bp);
684 :
685 0 : out:
686 0 : hdr->magic = oldmagic;
687 0 : xfs_trans_brelse(rd->sc->tp, bp);
688 0 : return error;
689 : }
690 :
691 : static inline void
692 282864 : xrep_dir_init_args(
693 : struct xrep_dir *rd,
694 : struct xfs_inode *dp,
695 : const struct xfs_name *name)
696 : {
697 282864 : memset(&rd->args, 0, sizeof(struct xfs_da_args));
698 282864 : rd->args.geo = rd->sc->mp->m_dir_geo;
699 282864 : rd->args.whichfork = XFS_DATA_FORK;
700 282864 : rd->args.owner = rd->sc->ip->i_ino;
701 282864 : rd->args.trans = rd->sc->tp;
702 282864 : rd->args.dp = dp;
703 282864 : if (!name)
704 : return;
705 258120 : rd->args.name = name->name;
706 258120 : rd->args.namelen = name->len;
707 258120 : rd->args.filetype = name->type;
708 258120 : rd->args.hashval = xfs_dir2_hashname(rd->sc->mp, name);
709 : }
710 :
711 : /* Replay a stashed createname into the temporary directory. */
712 : STATIC int
713 236438 : xrep_dir_replay_createname(
714 : struct xrep_dir *rd,
715 : const struct xfs_name *name,
716 : xfs_ino_t inum,
717 : xfs_extlen_t total)
718 : {
719 236438 : struct xfs_scrub *sc = rd->sc;
720 236438 : struct xfs_inode *dp = rd->sc->tempip;
721 236438 : bool is_block, is_leaf;
722 236438 : int error;
723 :
724 236438 : ASSERT(S_ISDIR(VFS_I(dp)->i_mode));
725 :
726 236438 : error = xfs_dir_ino_validate(sc->mp, inum);
727 236431 : if (error)
728 : return error;
729 :
730 236428 : trace_xrep_dir_replay_createname(dp, name, inum);
731 :
732 236432 : xrep_dir_init_args(rd, dp, name);
733 236429 : rd->args.inumber = inum;
734 236429 : rd->args.total = total;
735 236429 : rd->args.op_flags = XFS_DA_OP_ADDNAME | XFS_DA_OP_OKNOENT;
736 :
737 236429 : if (dp->i_df.if_format == XFS_DINODE_FMT_LOCAL)
738 133307 : return xfs_dir2_sf_addname(&rd->args);
739 :
740 103122 : error = xfs_dir2_isblock(&rd->args, &is_block);
741 103122 : if (error)
742 : return error;
743 103122 : if (is_block)
744 8609 : return xfs_dir2_block_addname(&rd->args);
745 :
746 94513 : error = xfs_dir2_isleaf(&rd->args, &is_leaf);
747 94513 : if (error)
748 : return error;
749 94513 : if (is_leaf)
750 16176 : return xfs_dir2_leaf_addname(&rd->args);
751 :
752 78337 : return xfs_dir2_node_addname(&rd->args);
753 : }
754 :
755 : /* Replay a stashed removename onto the temporary directory. */
756 : STATIC int
757 0 : xrep_dir_replay_removename(
758 : struct xrep_dir *rd,
759 : const struct xfs_name *name,
760 : xfs_extlen_t total)
761 : {
762 0 : struct xfs_inode *dp = rd->args.dp;
763 0 : bool is_block, is_leaf;
764 0 : int error;
765 :
766 0 : ASSERT(S_ISDIR(VFS_I(dp)->i_mode));
767 :
768 0 : xrep_dir_init_args(rd, dp, name);
769 0 : rd->args.op_flags = 0;
770 0 : rd->args.total = total;
771 :
772 0 : trace_xrep_dir_replay_removename(dp, name, 0);
773 :
774 0 : if (dp->i_df.if_format == XFS_DINODE_FMT_LOCAL)
775 0 : return xfs_dir2_sf_removename(&rd->args);
776 :
777 0 : error = xfs_dir2_isblock(&rd->args, &is_block);
778 0 : if (error)
779 : return error;
780 0 : if (is_block)
781 0 : return xfs_dir2_block_removename(&rd->args);
782 :
783 0 : error = xfs_dir2_isleaf(&rd->args, &is_leaf);
784 0 : if (error)
785 : return error;
786 0 : if (is_leaf)
787 0 : return xfs_dir2_leaf_removename(&rd->args);
788 :
789 0 : return xfs_dir2_node_removename(&rd->args);
790 : }
791 :
792 : /*
793 : * Add this stashed incore directory entry to the temporary directory.
794 : * The caller must hold the tempdir's IOLOCK, must not hold any ILOCKs, and
795 : * must not be in transaction context.
796 : */
797 : STATIC int
798 236444 : xrep_dir_replay_update(
799 : struct xrep_dir *rd,
800 : const struct xrep_dirent *dirent)
801 : {
802 236444 : struct xfs_name name = {
803 236444 : .len = dirent->namelen,
804 236444 : .type = dirent->ftype,
805 236444 : .name = rd->pptr.p_name,
806 : };
807 236444 : struct xfs_mount *mp = rd->sc->mp;
808 : #ifdef DEBUG
809 236444 : xfs_ino_t ino;
810 : #endif
811 236444 : uint resblks;
812 236444 : int error;
813 :
814 236444 : resblks = xfs_link_space_res(mp, dirent->namelen);
815 236443 : error = xchk_trans_alloc(rd->sc, resblks);
816 236444 : if (error)
817 : return error;
818 :
819 : /* Lock the temporary directory and join it to the transaction */
820 236442 : xrep_tempfile_ilock(rd->sc);
821 236444 : xfs_trans_ijoin(rd->sc->tp, rd->sc->tempip, 0);
822 :
823 236434 : switch (dirent->action) {
824 236434 : case XREP_DIRENT_ADD:
825 : /*
826 : * Create a replacement dirent in the temporary directory.
827 : * Note that _createname doesn't check for existing entries.
828 : * There shouldn't be any in the temporary dir, but we'll
829 : * verify this in debug mode.
830 : */
831 : #ifdef DEBUG
832 236434 : error = xchk_dir_lookup(rd->sc, rd->sc->tempip, &name, &ino);
833 236431 : if (error != -ENOENT) {
834 0 : ASSERT(error != -ENOENT);
835 0 : goto out_cancel;
836 : }
837 : #endif
838 :
839 236431 : error = xrep_dir_replay_createname(rd, &name, dirent->ino,
840 : resblks);
841 236442 : if (error)
842 0 : goto out_cancel;
843 :
844 236442 : if (name.type == XFS_DIR3_FT_DIR)
845 28121 : rd->subdirs++;
846 236442 : rd->dirents++;
847 236442 : break;
848 0 : case XREP_DIRENT_REMOVE:
849 : /*
850 : * Remove a dirent from the temporary directory. Note that
851 : * _removename doesn't check the inode target of the exist
852 : * entry. There should be a perfect match in the temporary
853 : * dir, but we'll verify this in debug mode.
854 : */
855 : #ifdef DEBUG
856 0 : error = xchk_dir_lookup(rd->sc, rd->sc->tempip, &name, &ino);
857 0 : if (error) {
858 0 : ASSERT(error != 0);
859 0 : goto out_cancel;
860 : }
861 0 : if (ino != dirent->ino) {
862 0 : ASSERT(ino == dirent->ino);
863 0 : error = -EIO;
864 0 : goto out_cancel;
865 : }
866 : #endif
867 :
868 0 : error = xrep_dir_replay_removename(rd, &name, resblks);
869 0 : if (error)
870 0 : goto out_cancel;
871 :
872 0 : if (name.type == XFS_DIR3_FT_DIR)
873 0 : rd->subdirs--;
874 0 : rd->dirents--;
875 0 : break;
876 0 : default:
877 0 : ASSERT(0);
878 0 : error = -EIO;
879 0 : goto out_cancel;
880 : }
881 :
882 : /* Commit and unlock. */
883 236442 : error = xrep_trans_commit(rd->sc);
884 236445 : if (error)
885 : return error;
886 :
887 236445 : xrep_tempfile_iunlock(rd->sc);
888 236445 : return 0;
889 0 : out_cancel:
890 0 : xchk_trans_cancel(rd->sc);
891 0 : xrep_tempfile_iunlock(rd->sc);
892 0 : return error;
893 : }
894 :
895 : /*
896 : * Flush stashed incore dirent updates that have been recorded by the scanner.
897 : * This is done to reduce the memory requirements of the directory rebuild,
898 : * since directories can contain up to 32GB of directory data.
899 : *
900 : * Caller must not hold transactions or ILOCKs. Caller must hold the tempdir
901 : * IOLOCK.
902 : */
903 : STATIC int
904 24743 : xrep_dir_replay_updates(
905 : struct xrep_dir *rd)
906 : {
907 24743 : xfarray_idx_t array_cur;
908 24743 : int error;
909 :
910 : /* Add all the salvaged dirents to the temporary directory. */
911 261183 : foreach_xfarray_idx(rd->dir_entries, array_cur) {
912 236438 : struct xrep_dirent dirent;
913 :
914 236438 : error = xfarray_load(rd->dir_entries, array_cur, &dirent);
915 236444 : if (error)
916 0 : return error;
917 :
918 : /* The dirent name is stored in the in-core buffer. */
919 236444 : error = xfblob_load(rd->dir_names, dirent.name_cookie,
920 236444 : rd->pptr.p_name, dirent.namelen);
921 236445 : if (error)
922 0 : return error;
923 236445 : rd->pptr.p_name[MAXNAMELEN - 1] = 0;
924 :
925 236445 : error = xrep_dir_replay_update(rd, &dirent);
926 236440 : if (error)
927 0 : return error;
928 : }
929 :
930 : /* Empty out both arrays now that we've added the entries. */
931 24747 : xfarray_truncate(rd->dir_entries);
932 24746 : xfblob_truncate(rd->dir_names);
933 24746 : return 0;
934 : }
935 :
936 : /*
937 : * Periodically flush stashed directory entries to the temporary dir. This
938 : * is done to reduce the memory requirements of the directory rebuild, since
939 : * directories can contain up to 32GB of directory data.
940 : */
941 : STATIC int
942 0 : xrep_dir_flush_stashed(
943 : struct xrep_dir *rd)
944 : {
945 0 : int error;
946 :
947 : /*
948 : * Entering this function, the scrub context has a reference to the
949 : * inode being repaired, the temporary file, and a scrub transaction
950 : * that we use during dirent salvaging to avoid livelocking if there
951 : * are cycles in the directory structures. We hold ILOCK_EXCL on both
952 : * the inode being repaired and the temporary file, though they are
953 : * not ijoined to the scrub transaction.
954 : *
955 : * To constrain kernel memory use, we occasionally write salvaged
956 : * dirents from the xfarray and xfblob structures into the temporary
957 : * directory in preparation for swapping the directory structures at
958 : * the end. Updating the temporary file requires a transaction, so we
959 : * commit the scrub transaction and drop the two ILOCKs so that
960 : * we can allocate whatever transaction we want.
961 : *
962 : * We still hold IOLOCK_EXCL on the inode being repaired, which
963 : * prevents anyone from accessing the damaged directory data while we
964 : * repair it.
965 : */
966 0 : error = xrep_trans_commit(rd->sc);
967 0 : if (error)
968 : return error;
969 0 : xchk_iunlock(rd->sc, XFS_ILOCK_EXCL);
970 :
971 : /*
972 : * Take the IOLOCK of the temporary file while we modify dirents. This
973 : * isn't strictly required because the temporary file is never revealed
974 : * to userspace, but we follow the same locking rules. We still hold
975 : * sc->ip's IOLOCK.
976 : */
977 0 : error = xrep_tempfile_iolock_polled(rd->sc);
978 0 : if (error)
979 : return error;
980 :
981 : /* Write to the tempdir all the updates that we've stashed. */
982 0 : error = xrep_dir_replay_updates(rd);
983 0 : xrep_tempfile_iounlock(rd->sc);
984 0 : if (error)
985 : return error;
986 :
987 : /*
988 : * Recreate the salvage transaction and relock the dir we're salvaging.
989 : */
990 0 : error = xchk_trans_alloc(rd->sc, 0);
991 0 : if (error)
992 : return error;
993 0 : xchk_ilock(rd->sc, XFS_ILOCK_EXCL);
994 0 : return 0;
995 : }
996 :
997 : /* Decide if we've stashed too much dirent data in memory. */
998 : static inline bool
999 662152814 : xrep_dir_want_flush_stashed(
1000 : struct xrep_dir *rd)
1001 : {
1002 662152814 : unsigned long long bytes;
1003 :
1004 662152814 : bytes = xfarray_bytes(rd->dir_entries) + xfblob_bytes(rd->dir_names);
1005 662093887 : return bytes > XREP_DIR_MAX_STASH_BYTES;
1006 : }
1007 :
1008 : /* Extract as many directory entries as we can. */
1009 : STATIC int
1010 0 : xrep_dir_recover(
1011 : struct xrep_dir *rd)
1012 : {
1013 0 : struct xfs_bmbt_irec got;
1014 0 : struct xfs_scrub *sc = rd->sc;
1015 0 : struct xfs_da_geometry *geo = sc->mp->m_dir_geo;
1016 0 : xfs_fileoff_t offset;
1017 0 : xfs_dablk_t dabno;
1018 0 : __be32 magic_guess;
1019 0 : int nmap;
1020 0 : int error;
1021 :
1022 0 : xrep_dir_guess_format(rd, &magic_guess);
1023 :
1024 : /* Iterate each directory data block in the data fork. */
1025 0 : for (offset = 0;
1026 0 : offset < geo->leafblk;
1027 0 : offset = got.br_startoff + got.br_blockcount) {
1028 0 : nmap = 1;
1029 0 : error = xfs_bmapi_read(sc->ip, offset, geo->leafblk - offset,
1030 : &got, &nmap, 0);
1031 0 : if (error)
1032 0 : return error;
1033 0 : if (nmap != 1)
1034 : return -EFSCORRUPTED;
1035 0 : if (!xfs_bmap_is_written_extent(&got))
1036 0 : continue;
1037 :
1038 0 : for (dabno = round_up(got.br_startoff, geo->fsbcount);
1039 0 : dabno < got.br_startoff + got.br_blockcount;
1040 0 : dabno += geo->fsbcount) {
1041 0 : if (xchk_should_terminate(rd->sc, &error))
1042 0 : return error;
1043 :
1044 0 : error = xrep_dir_recover_dirblock(rd,
1045 : magic_guess, dabno);
1046 0 : if (error)
1047 0 : return error;
1048 :
1049 : /* Flush dirents to constrain memory usage. */
1050 0 : if (xrep_dir_want_flush_stashed(rd)) {
1051 0 : error = xrep_dir_flush_stashed(rd);
1052 0 : if (error)
1053 0 : return error;
1054 : }
1055 : }
1056 : }
1057 :
1058 : return 0;
1059 : }
1060 :
1061 : /*
1062 : * Find all the directory entries for this inode by scraping them out of the
1063 : * directory leaf blocks by hand, and flushing them into the temp dir.
1064 : */
1065 : STATIC int
1066 0 : xrep_dir_find_entries(
1067 : struct xrep_dir *rd)
1068 : {
1069 0 : struct xfs_inode *dp = rd->sc->ip;
1070 0 : int error;
1071 :
1072 : /*
1073 : * Salvage directory entries from the old directory, and write them to
1074 : * the temporary directory.
1075 : */
1076 0 : if (dp->i_df.if_format == XFS_DINODE_FMT_LOCAL) {
1077 0 : error = xrep_dir_recover_sf(rd);
1078 : } else {
1079 0 : error = xfs_iread_extents(rd->sc->tp, dp, XFS_DATA_FORK);
1080 0 : if (error)
1081 : return error;
1082 :
1083 0 : error = xrep_dir_recover(rd);
1084 : }
1085 0 : if (error)
1086 : return error;
1087 :
1088 0 : return xrep_dir_flush_stashed(rd);
1089 : }
1090 :
1091 : /* Scan all files in the filesystem for dirents. */
1092 : STATIC int
1093 0 : xrep_dir_salvage_entries(
1094 : struct xrep_dir *rd)
1095 : {
1096 0 : struct xfs_scrub *sc = rd->sc;
1097 0 : int error;
1098 :
1099 : /*
1100 : * Drop the ILOCK on this directory so that we can scan for this
1101 : * directory's parent. Figure out who is going to be the parent of
1102 : * this directory, then retake the ILOCK so that we can salvage
1103 : * directory entries.
1104 : */
1105 0 : xchk_iunlock(sc, XFS_ILOCK_EXCL);
1106 0 : error = xrep_dir_find_parent(rd);
1107 0 : xchk_ilock(sc, XFS_ILOCK_EXCL);
1108 0 : if (error)
1109 : return error;
1110 :
1111 : /*
1112 : * Collect directory entries by parsing raw leaf blocks to salvage
1113 : * whatever we can. When we're done, free the staging memory before
1114 : * swapping the directories to reduce memory usage.
1115 : */
1116 0 : error = xrep_dir_find_entries(rd);
1117 0 : if (error)
1118 : return error;
1119 :
1120 : /*
1121 : * Cancel the repair transaction and drop the ILOCK so that we can
1122 : * (later) use the atomic extent swap helper functions to compute the
1123 : * correct block reservations and re-lock the inodes.
1124 : *
1125 : * We still hold IOLOCK_EXCL (aka i_rwsem) which will prevent directory
1126 : * modifications, but there's nothing to prevent userspace from reading
1127 : * the directory until we're ready for the swap operation. Reads will
1128 : * return -EIO without shutting down the fs, so we're ok with that.
1129 : *
1130 : * The VFS can change dotdot on us, but the findparent scan will keep
1131 : * our incore parent inode up to date. See the note on locking issues
1132 : * for more details.
1133 : */
1134 0 : error = xrep_trans_commit(sc);
1135 0 : if (error)
1136 : return error;
1137 :
1138 0 : xchk_iunlock(sc, XFS_ILOCK_EXCL);
1139 0 : return 0;
1140 : }
1141 :
1142 :
1143 : /*
1144 : * Examine a parent pointer of a file. If it leads us back to the directory
1145 : * that we're rebuilding, create an incore dirent from the parent pointer and
1146 : * stash it.
1147 : */
1148 : STATIC int
1149 835631259 : xrep_dir_scan_pptr(
1150 : struct xfs_scrub *sc,
1151 : struct xfs_inode *ip,
1152 : const struct xfs_parent_name_irec *pptr,
1153 : void *priv)
1154 : {
1155 835631259 : struct xfs_name xname;
1156 835631259 : struct xrep_dir *rd = priv;
1157 835631259 : int error;
1158 :
1159 : /* Ignore parent pointers that point back to a different dir. */
1160 835631259 : if (pptr->p_ino != sc->ip->i_ino ||
1161 236464 : pptr->p_gen != VFS_I(sc->ip)->i_generation)
1162 : return 0;
1163 :
1164 : /*
1165 : * Transform this parent pointer into a dirent and queue it for later
1166 : * addition to the temporary directory.
1167 : */
1168 236464 : xname.name = pptr->p_name;
1169 236464 : xname.len = pptr->p_namelen;
1170 236464 : xname.type = xfs_mode_to_ftype(VFS_I(ip)->i_mode);
1171 :
1172 236466 : mutex_lock(&rd->pscan.lock);
1173 236467 : error = xrep_dir_stash_createname(rd, &xname, ip->i_ino);
1174 236461 : mutex_unlock(&rd->pscan.lock);
1175 236461 : return error;
1176 : }
1177 :
1178 : /*
1179 : * If this child dirent points to the directory being repaired, remember that
1180 : * fact so that we can reset the dotdot entry if necessary.
1181 : */
1182 : STATIC int
1183 1364399196 : xrep_dir_scan_dirent(
1184 : struct xfs_scrub *sc,
1185 : struct xfs_inode *dp,
1186 : xfs_dir2_dataptr_t dapos,
1187 : const struct xfs_name *name,
1188 : xfs_ino_t ino,
1189 : void *priv)
1190 : {
1191 1364399196 : struct xrep_dir *rd = priv;
1192 :
1193 : /* Dirent doesn't point to this directory. */
1194 1364399196 : if (ino != rd->sc->ip->i_ino)
1195 : return 0;
1196 :
1197 : /* Ignore garbage inum. */
1198 51417 : if (!xfs_verify_dir_ino(rd->sc->mp, ino))
1199 : return 0;
1200 :
1201 : /* No weird looking names. */
1202 51417 : if (name->len >= MAXNAMELEN || name->len <= 0)
1203 : return 0;
1204 :
1205 : /* Don't pick up dot or dotdot entries; we only want child dirents. */
1206 74709 : if (xfs_dir2_samename(name, &xfs_name_dotdot) ||
1207 23292 : xfs_dir2_samename(name, &xfs_name_dot))
1208 28126 : return 0;
1209 :
1210 23292 : trace_xrep_dir_stash_createname(sc->tempip, &xfs_name_dotdot,
1211 : dp->i_ino);
1212 :
1213 23292 : xrep_findparent_scan_found(&rd->pscan, dp->i_ino);
1214 23292 : return 0;
1215 : }
1216 :
1217 : /*
1218 : * Decide if we want to look for child dirents or parent pointers in this file.
1219 : * Skip the dir being repaired and any files being used to stage repairs.
1220 : */
1221 : static inline bool
1222 1323388473 : xrep_dir_want_scan(
1223 : struct xrep_dir *rd,
1224 : const struct xfs_inode *ip)
1225 : {
1226 1323388473 : return ip != rd->sc->ip && !xrep_is_tempfile(ip);
1227 : }
1228 :
1229 : /*
1230 : * Take ILOCK on a file that we want to scan.
1231 : *
1232 : * Select ILOCK_EXCL if the file is a directory with an unloaded data bmbt or
1233 : * has an unloaded attr bmbt. Otherwise, take ILOCK_SHARED.
1234 : */
1235 : static inline unsigned int
1236 661944427 : xrep_dir_scan_ilock(
1237 : struct xrep_dir *rd,
1238 : struct xfs_inode *ip)
1239 : {
1240 661944427 : uint lock_mode = XFS_ILOCK_SHARED;
1241 :
1242 : /* Need to take the shared ILOCK to advance the iscan cursor. */
1243 661944427 : if (!xrep_dir_want_scan(rd, ip))
1244 72287 : goto lock;
1245 :
1246 926455861 : if (S_ISDIR(VFS_I(ip)->i_mode) && xfs_need_iread_extents(&ip->i_df)) {
1247 0 : lock_mode = XFS_ILOCK_EXCL;
1248 0 : goto lock;
1249 : }
1250 :
1251 1323752421 : if (xfs_inode_has_attr_fork(ip) && xfs_need_iread_extents(&ip->i_af))
1252 0 : lock_mode = XFS_ILOCK_EXCL;
1253 :
1254 661970628 : lock:
1255 662042915 : xfs_ilock(ip, lock_mode);
1256 662020331 : return lock_mode;
1257 : }
1258 :
1259 : /*
1260 : * Scan this file for relevant child dirents or parent pointers that point to
1261 : * the directory we're rebuilding.
1262 : */
1263 : STATIC int
1264 661870829 : xrep_dir_scan_file(
1265 : struct xrep_dir *rd,
1266 : struct xfs_inode *ip)
1267 : {
1268 661870829 : unsigned int lock_mode;
1269 661870829 : int error = 0;
1270 :
1271 661870829 : lock_mode = xrep_dir_scan_ilock(rd, ip);
1272 :
1273 662044253 : if (!xrep_dir_want_scan(rd, ip))
1274 72287 : goto scan_done;
1275 :
1276 661991553 : error = xchk_pptr_walk(rd->sc, ip, xrep_dir_scan_pptr, &rd->pptr, rd);
1277 661992264 : if (error)
1278 0 : goto scan_done;
1279 :
1280 661992264 : if (S_ISDIR(VFS_I(ip)->i_mode)) {
1281 264714065 : error = xchk_dir_walk(rd->sc, ip, xrep_dir_scan_dirent, rd);
1282 264707235 : if (error)
1283 0 : goto scan_done;
1284 : }
1285 :
1286 661985434 : scan_done:
1287 662057721 : xchk_iscan_mark_visited(&rd->pscan.iscan, ip);
1288 661904608 : xfs_iunlock(ip, lock_mode);
1289 662129654 : return error;
1290 : }
1291 :
1292 : /*
1293 : * Scan all files in the filesystem for parent pointers that we can turn into
1294 : * replacement dirents, and a dirent that we can use to set the dotdot pointer.
1295 : */
1296 : STATIC int
1297 24753 : xrep_dir_scan_dirtree(
1298 : struct xrep_dir *rd)
1299 : {
1300 24753 : struct xfs_scrub *sc = rd->sc;
1301 24753 : struct xfs_inode *ip;
1302 24753 : int error;
1303 :
1304 : /* Roots of directory trees are their own parents. */
1305 24753 : if (sc->ip == sc->mp->m_rootip || sc->ip == sc->mp->m_metadirip)
1306 1460 : xrep_findparent_scan_found(&rd->pscan, sc->ip->i_ino);
1307 :
1308 : /*
1309 : * Filesystem scans are time consuming. Drop the directory ILOCK and
1310 : * all other resources for the duration of the scan and hope for the
1311 : * best. The live update hooks will keep our scan information up to
1312 : * date even though we've dropped the locks.
1313 : */
1314 24753 : xchk_trans_cancel(sc);
1315 24752 : if (sc->ilock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL))
1316 24751 : xchk_iunlock(sc, sc->ilock_flags & (XFS_ILOCK_SHARED |
1317 : XFS_ILOCK_EXCL));
1318 24754 : error = xchk_trans_alloc_empty(sc);
1319 24751 : if (error)
1320 : return error;
1321 :
1322 661905172 : while ((error = xchk_iscan_iter(&rd->pscan.iscan, &ip)) == 1) {
1323 662010725 : bool flush;
1324 :
1325 662010725 : error = xrep_dir_scan_file(rd, ip);
1326 662090768 : xchk_irele(sc, ip);
1327 662152945 : if (error)
1328 : break;
1329 :
1330 : /* Flush stashed dirent updates to constrain memory usage. */
1331 662152945 : mutex_lock(&rd->pscan.lock);
1332 662156615 : flush = xrep_dir_want_flush_stashed(rd);
1333 662148506 : mutex_unlock(&rd->pscan.lock);
1334 661791362 : if (flush) {
1335 0 : xchk_trans_cancel(sc);
1336 :
1337 0 : error = xrep_tempfile_iolock_polled(sc);
1338 0 : if (error)
1339 : break;
1340 :
1341 0 : mutex_lock(&rd->pscan.lock);
1342 0 : error = xrep_dir_replay_updates(rd);
1343 0 : mutex_unlock(&rd->pscan.lock);
1344 0 : xrep_tempfile_iounlock(sc);
1345 0 : if (error)
1346 : break;
1347 :
1348 0 : error = xchk_trans_alloc_empty(sc);
1349 0 : if (error)
1350 : break;
1351 : }
1352 :
1353 661791362 : if (xchk_should_terminate(sc, &error))
1354 : break;
1355 : }
1356 24753 : xchk_iscan_iter_finish(&rd->pscan.iscan);
1357 24752 : if (error) {
1358 : /*
1359 : * If we couldn't grab an inode that was busy with a state
1360 : * change, change the error code so that we exit to userspace
1361 : * as quickly as possible.
1362 : */
1363 6 : if (error == -EBUSY)
1364 : return -ECANCELED;
1365 6 : return error;
1366 : }
1367 :
1368 : /*
1369 : * Cancel the empty transaction so that we can (later) use the atomic
1370 : * extent swap helpers to lock files and commit the new directory.
1371 : */
1372 24746 : xchk_trans_cancel(rd->sc);
1373 24746 : return 0;
1374 : }
1375 :
1376 : /*
1377 : * Capture dirent updates being made by other threads which are relevant to the
1378 : * directory being repaired.
1379 : */
1380 : STATIC int
1381 13166476 : xrep_dir_live_update(
1382 : struct notifier_block *nb,
1383 : unsigned long action,
1384 : void *data)
1385 : {
1386 13166476 : struct xfs_dir_update_params *p = data;
1387 13166476 : struct xrep_dir *rd;
1388 13166476 : struct xfs_scrub *sc;
1389 13166476 : int error = 0;
1390 :
1391 13166476 : rd = container_of(nb, struct xrep_dir, pscan.hooks.dirent_hook.nb);
1392 13166476 : sc = rd->sc;
1393 :
1394 : /*
1395 : * This thread updated a child dirent in the directory that we're
1396 : * rebuilding. Stash the update for replay against the temporary
1397 : * directory.
1398 : */
1399 13166476 : if (p->dp->i_ino == sc->ip->i_ino &&
1400 0 : xchk_iscan_want_live_update(&rd->pscan.iscan, p->ip->i_ino)) {
1401 0 : mutex_lock(&rd->pscan.lock);
1402 0 : if (p->delta > 0)
1403 0 : error = xrep_dir_stash_createname(rd, p->name,
1404 0 : p->ip->i_ino);
1405 : else
1406 0 : error = xrep_dir_stash_removename(rd, p->name,
1407 0 : p->ip->i_ino);
1408 0 : mutex_unlock(&rd->pscan.lock);
1409 0 : if (error)
1410 0 : goto out_abort;
1411 : }
1412 :
1413 : /*
1414 : * This thread updated another directory's child dirent that points to
1415 : * the directory that we're rebuilding, so remember the new dotdot
1416 : * target.
1417 : */
1418 13166476 : if (p->ip->i_ino == sc->ip->i_ino &&
1419 0 : xchk_iscan_want_live_update(&rd->pscan.iscan, p->dp->i_ino)) {
1420 0 : if (p->delta > 0) {
1421 0 : trace_xrep_dir_stash_createname(sc->tempip,
1422 : &xfs_name_dotdot,
1423 0 : p->dp->i_ino);
1424 :
1425 0 : xrep_findparent_scan_found(&rd->pscan, p->dp->i_ino);
1426 : } else {
1427 0 : trace_xrep_dir_stash_removename(sc->tempip,
1428 : &xfs_name_dotdot,
1429 : rd->pscan.parent_ino);
1430 :
1431 0 : xrep_findparent_scan_found(&rd->pscan, NULLFSINO);
1432 : }
1433 : }
1434 :
1435 : return NOTIFY_DONE;
1436 : out_abort:
1437 0 : xchk_iscan_abort(&rd->pscan.iscan);
1438 0 : return NOTIFY_DONE;
1439 : }
1440 :
1441 : /*
1442 : * Free all the directory blocks and reset the data fork. The caller must
1443 : * join the inode to the transaction. This function returns with the inode
1444 : * joined to a clean scrub transaction.
1445 : */
1446 : STATIC int
1447 24745 : xrep_dir_reset_fork(
1448 : struct xrep_dir *rd,
1449 : xfs_ino_t parent_ino)
1450 : {
1451 24745 : struct xfs_scrub *sc = rd->sc;
1452 24745 : struct xfs_ifork *ifp = xfs_ifork_ptr(sc->tempip, XFS_DATA_FORK);
1453 24745 : int error;
1454 :
1455 : /* Unmap all the directory buffers. */
1456 24745 : if (xfs_ifork_has_extents(ifp)) {
1457 1701 : error = xrep_reap_ifork(sc, sc->tempip, XFS_DATA_FORK);
1458 1701 : if (error)
1459 : return error;
1460 : }
1461 :
1462 24745 : trace_xrep_dir_reset_fork(sc->tempip, parent_ino);
1463 :
1464 : /* Reset the data fork to an empty data fork. */
1465 24747 : xfs_idestroy_fork(ifp);
1466 24740 : ifp->if_bytes = 0;
1467 24740 : sc->tempip->i_disk_size = 0;
1468 :
1469 : /* Reinitialize the short form directory. */
1470 24740 : xrep_dir_init_args(rd, sc->tempip, NULL);
1471 24740 : error = xfs_dir2_sf_create(&rd->args, parent_ino);
1472 24743 : if (error)
1473 : return error;
1474 :
1475 24743 : return xrep_tempfile_roll_trans(sc);
1476 : }
1477 :
1478 : /*
1479 : * Prepare both inodes' directory forks for extent swapping. Promote the
1480 : * tempfile from short format to leaf format, and if the file being repaired
1481 : * has a short format data fork, turn it into an empty extent list.
1482 : */
1483 : STATIC int
1484 1701 : xrep_dir_swap_prep(
1485 : struct xfs_scrub *sc,
1486 : bool temp_local,
1487 : bool ip_local)
1488 : {
1489 1701 : int error;
1490 :
1491 : /*
1492 : * If the tempfile's directory is in shortform format, convert that
1493 : * to a single leaf extent so that we can use the atomic extent swap.
1494 : */
1495 1701 : if (temp_local) {
1496 1618 : struct xfs_da_args args = {
1497 1618 : .dp = sc->tempip,
1498 1618 : .geo = sc->mp->m_dir_geo,
1499 : .whichfork = XFS_DATA_FORK,
1500 1618 : .trans = sc->tp,
1501 : .total = 1,
1502 1618 : .owner = sc->ip->i_ino,
1503 : };
1504 :
1505 1618 : error = xfs_dir2_sf_to_block(&args);
1506 1618 : if (error)
1507 0 : return error;
1508 :
1509 : /*
1510 : * Roll the deferred log items to get us back to a clean
1511 : * transaction.
1512 : */
1513 1618 : error = xfs_defer_finish(&sc->tp);
1514 1618 : if (error)
1515 : return error;
1516 : }
1517 :
1518 : /*
1519 : * If the file being repaired had a shortform data fork, convert that
1520 : * to an empty extent list in preparation for the atomic extent swap.
1521 : */
1522 1701 : if (ip_local) {
1523 0 : struct xfs_ifork *ifp;
1524 :
1525 0 : ifp = xfs_ifork_ptr(sc->ip, XFS_DATA_FORK);
1526 0 : xfs_idestroy_fork(ifp);
1527 0 : ifp->if_format = XFS_DINODE_FMT_EXTENTS;
1528 0 : ifp->if_nextents = 0;
1529 0 : ifp->if_bytes = 0;
1530 0 : ifp->if_u1.if_root = NULL;
1531 0 : ifp->if_height = 0;
1532 :
1533 0 : xfs_trans_log_inode(sc->tp, sc->ip,
1534 : XFS_ILOG_CORE | XFS_ILOG_DDATA);
1535 : }
1536 :
1537 : return 0;
1538 : }
1539 :
1540 : /*
1541 : * Replace the inode number of a directory entry.
1542 : */
1543 : static int
1544 21681 : xrep_dir_replace(
1545 : struct xrep_dir *rd,
1546 : struct xfs_inode *dp,
1547 : const struct xfs_name *name,
1548 : xfs_ino_t inum,
1549 : xfs_extlen_t total)
1550 : {
1551 21681 : struct xfs_scrub *sc = rd->sc;
1552 21681 : bool is_block, is_leaf;
1553 21681 : int error;
1554 :
1555 21681 : ASSERT(S_ISDIR(VFS_I(dp)->i_mode));
1556 :
1557 21681 : error = xfs_dir_ino_validate(sc->mp, inum);
1558 21681 : if (error)
1559 : return error;
1560 :
1561 21681 : xrep_dir_init_args(rd, dp, name);
1562 21681 : rd->args.inumber = inum;
1563 21681 : rd->args.total = total;
1564 :
1565 21681 : if (dp->i_df.if_format == XFS_DINODE_FMT_LOCAL)
1566 21674 : return xfs_dir2_sf_replace(&rd->args);
1567 :
1568 7 : error = xfs_dir2_isblock(&rd->args, &is_block);
1569 7 : if (error)
1570 : return error;
1571 7 : if (is_block)
1572 7 : return xfs_dir2_block_replace(&rd->args);
1573 :
1574 0 : error = xfs_dir2_isleaf(&rd->args, &is_leaf);
1575 0 : if (error)
1576 : return error;
1577 0 : if (is_leaf)
1578 0 : return xfs_dir2_leaf_replace(&rd->args);
1579 :
1580 0 : return xfs_dir2_node_replace(&rd->args);
1581 : }
1582 :
1583 : /*
1584 : * Reset the link count of this directory and adjust the unlinked list pointers
1585 : * as needed.
1586 : */
1587 : STATIC int
1588 24747 : xrep_dir_set_nlink(
1589 : struct xrep_dir *rd)
1590 : {
1591 24747 : struct xfs_scrub *sc = rd->sc;
1592 24747 : struct xfs_inode *dp = sc->ip;
1593 24747 : struct xfs_perag *pag;
1594 24747 : int error;
1595 :
1596 : /*
1597 : * The directory is not on the incore unlinked list, which means that
1598 : * it needs to be reachable via the directory tree. Update the nlink
1599 : * with our observed link count. If the directory has no parent, it
1600 : * will be moved to the orphanage.
1601 : */
1602 24747 : if (!xfs_inode_on_unlinked_list(dp)) {
1603 24747 : xrep_set_nlink(sc->ip, rd->subdirs + 2);
1604 24747 : return 0;
1605 : }
1606 :
1607 0 : xfs_emerg(dp->i_mount, "IUNLINK unlinked dir 0x%llx repair, dirents %u subdirs %llu curr_nlink %u orphan? %d", dp->i_ino, rd->dirents, rd->subdirs, VFS_I(dp)->i_nlink, rd->needs_adoption);
1608 :
1609 : /*
1610 : * The directory is on the unlinked list and we did not find any
1611 : * dirents. Set the link count to zero and let the directory
1612 : * inactivate when the last reference drops.
1613 : */
1614 0 : if (rd->dirents == 0) {
1615 0 : rd->needs_adoption = false;
1616 0 : xrep_set_nlink(sc->ip, 0);
1617 0 : return 0;
1618 : }
1619 :
1620 : /*
1621 : * The directory is on the unlinked list and we found dirents. This
1622 : * directory needs to be reachable via the directory tree. Remove the
1623 : * dir from the unlinked list and update nlink with the observed link
1624 : * count. If the directory has no parent, it will be moved to the
1625 : * orphanage.
1626 : */
1627 0 : pag = xfs_perag_get(sc->mp, XFS_INO_TO_AGNO(sc->mp, dp->i_ino));
1628 0 : if (!pag) {
1629 0 : ASSERT(0);
1630 0 : return -EFSCORRUPTED;
1631 : }
1632 :
1633 0 : error = xfs_iunlink_remove(sc->tp, pag, dp);
1634 0 : xfs_perag_put(pag);
1635 0 : if (error)
1636 : return error;
1637 :
1638 0 : xrep_set_nlink(sc->ip, rd->subdirs + 2);
1639 0 : return 0;
1640 : }
1641 :
1642 : /*
1643 : * Finish replaying stashed dirent updates, allocate a transaction for swapping
1644 : * extents, and take the ILOCKs of both directories before we commit the new
1645 : * directory structure.
1646 : */
1647 : STATIC int
1648 24745 : xrep_dir_finalize_tempdir(
1649 : struct xrep_dir *rd)
1650 : {
1651 24745 : struct xfs_scrub *sc = rd->sc;
1652 24745 : int error;
1653 :
1654 24745 : if (!xfs_has_parent(sc->mp))
1655 0 : return xrep_tempswap_trans_alloc(sc, XFS_DATA_FORK, &rd->tx);
1656 :
1657 24745 : error = xrep_dir_replay_updates(rd);
1658 24747 : if (error)
1659 : return error;
1660 :
1661 24747 : error = xrep_tempswap_trans_alloc(sc, XFS_DATA_FORK, &rd->tx);
1662 24745 : if (error)
1663 : return error;
1664 :
1665 : /*
1666 : * We rely on the caller's hold on @sc->ip's IOLOCK_EXCL to quiesce all
1667 : * possible directory updates during the time when we did not hold the
1668 : * ILOCK. There should not be any dirent updates to replay, but check
1669 : * anyway.
1670 : */
1671 24746 : if (xfarray_length(rd->dir_entries) != 0) {
1672 0 : ASSERT(xfarray_length(rd->dir_entries) == 0);
1673 0 : return -EFSCORRUPTED;
1674 : }
1675 :
1676 : return 0;
1677 : }
1678 :
1679 : /* Swap the temporary directory's data fork with the one being repaired. */
1680 : STATIC int
1681 24746 : xrep_dir_swap(
1682 : struct xrep_dir *rd)
1683 : {
1684 24746 : struct xfs_scrub *sc = rd->sc;
1685 24746 : bool ip_local, temp_local;
1686 24746 : int error = 0;
1687 :
1688 : /*
1689 : * If we never found the parent for this directory, temporarily assign
1690 : * the root dir as the parent; we'll move this to the orphanage after
1691 : * swapping the dir contents. We hold the ILOCK of the dir being
1692 : * repaired, so we're not worried about racy updates of dotdot.
1693 : */
1694 24746 : ASSERT(sc->ilock_flags & XFS_ILOCK_EXCL);
1695 24746 : if (rd->pscan.parent_ino == NULLFSINO) {
1696 0 : rd->needs_adoption = true;
1697 0 : rd->pscan.parent_ino = rd->sc->mp->m_sb.sb_rootino;
1698 : }
1699 :
1700 : /*
1701 : * Reset the temporary directory's '..' entry to point to the parent
1702 : * that we found. The temporary directory was created with the root
1703 : * directory as the parent, so we can skip this if repairing a
1704 : * subdirectory of the root.
1705 : *
1706 : * It's also possible that this replacement could also expand a sf
1707 : * tempdir into block format.
1708 : */
1709 24746 : if (rd->pscan.parent_ino != sc->mp->m_rootip->i_ino) {
1710 21681 : error = xrep_dir_replace(rd, rd->sc->tempip, &xfs_name_dotdot,
1711 21681 : rd->pscan.parent_ino, rd->tx.req.resblks);
1712 21681 : if (error)
1713 : return error;
1714 : }
1715 :
1716 : /*
1717 : * Changing the dot and dotdot entries could have changed the shape of
1718 : * the directory, so we recompute these.
1719 : */
1720 24746 : ip_local = sc->ip->i_df.if_format == XFS_DINODE_FMT_LOCAL;
1721 24746 : temp_local = sc->tempip->i_df.if_format == XFS_DINODE_FMT_LOCAL;
1722 :
1723 : /*
1724 : * If the both files have a local format data fork and the rebuilt
1725 : * directory data would fit in the repaired file's data fork, copy
1726 : * the contents from the tempfile and update the directory link count.
1727 : * We're done now.
1728 : */
1729 24746 : if (ip_local && temp_local &&
1730 23045 : sc->tempip->i_disk_size <= xfs_inode_data_fork_size(sc->ip)) {
1731 23041 : xrep_tempfile_copyout_local(sc, XFS_DATA_FORK);
1732 23046 : return xrep_dir_set_nlink(rd);
1733 : }
1734 :
1735 : /* Clean the transaction before we start working on the extent swap. */
1736 1705 : error = xrep_tempfile_roll_trans(rd->sc);
1737 1701 : if (error)
1738 : return error;
1739 :
1740 : /* Otherwise, make sure both data forks are in block-mapping mode. */
1741 1701 : error = xrep_dir_swap_prep(sc, temp_local, ip_local);
1742 1701 : if (error)
1743 : return error;
1744 :
1745 : /*
1746 : * Set nlink of the directory in the same transaction sequence that
1747 : * (atomically) commits the new directory data.
1748 : */
1749 1701 : error = xrep_dir_set_nlink(rd);
1750 1701 : if (error)
1751 : return error;
1752 :
1753 1701 : return xrep_tempswap_contents(sc, &rd->tx);
1754 : }
1755 :
1756 : /*
1757 : * Swap the new directory contents (which we created in the tempfile) into the
1758 : * directory being repaired.
1759 : */
1760 : STATIC int
1761 24746 : xrep_dir_rebuild_tree(
1762 : struct xrep_dir *rd)
1763 : {
1764 24746 : struct xfs_scrub *sc = rd->sc;
1765 24746 : int error;
1766 :
1767 24746 : trace_xrep_dir_rebuild_tree(sc->ip, rd->pscan.parent_ino);
1768 :
1769 : /*
1770 : * Take the IOLOCK on the temporary file so that we can run dir
1771 : * operations with the same locks held as we would for a normal file.
1772 : * We still hold sc->ip's IOLOCK.
1773 : */
1774 24746 : error = xrep_tempfile_iolock_polled(rd->sc);
1775 24745 : if (error)
1776 : return error;
1777 :
1778 : /*
1779 : * Allocate transaction, lock inodes, and make sure that we've replayed
1780 : * all the stashed dirent updates to the tempdir. After this point,
1781 : * we're ready to swapext.
1782 : */
1783 24745 : error = xrep_dir_finalize_tempdir(rd);
1784 24746 : if (error)
1785 : return error;
1786 :
1787 24746 : if (xchk_iscan_aborted(&rd->pscan.iscan))
1788 : return -ECANCELED;
1789 :
1790 : /*
1791 : * Swap the tempdir's data fork with the file being repaired. This
1792 : * recreates the transaction and re-takes the ILOCK in the scrub
1793 : * context.
1794 : */
1795 24744 : error = xrep_dir_swap(rd);
1796 24745 : if (error)
1797 : return error;
1798 :
1799 : /*
1800 : * Release the old directory blocks and reset the data fork of the temp
1801 : * directory to an empty shortform directory because inactivation does
1802 : * nothing for directories.
1803 : */
1804 24745 : return xrep_dir_reset_fork(rd, sc->mp->m_rootip->i_ino);
1805 : }
1806 :
1807 : /* Set up the filesystem scan so we can regenerate directory entries. */
1808 : STATIC int
1809 24753 : xrep_dir_setup_scan(
1810 : struct xrep_dir *rd)
1811 : {
1812 24753 : struct xfs_scrub *sc = rd->sc;
1813 24753 : char *descr;
1814 24753 : int error;
1815 :
1816 : /* Set up some staging memory for salvaging dirents. */
1817 24753 : descr = xchk_xfile_ino_descr(sc, "directory entries");
1818 24751 : error = xfarray_create(descr, 0, sizeof(struct xrep_dirent),
1819 : &rd->dir_entries);
1820 24753 : kfree(descr);
1821 24753 : if (error)
1822 : return error;
1823 :
1824 24753 : descr = xchk_xfile_ino_descr(sc, "directory entry names");
1825 24753 : error = xfblob_create(descr, &rd->dir_names);
1826 24752 : kfree(descr);
1827 24752 : if (error)
1828 0 : goto out_xfarray;
1829 :
1830 24752 : if (xfs_has_parent(sc->mp))
1831 24752 : error = __xrep_findparent_scan_start(sc, &rd->pscan,
1832 : xrep_dir_live_update);
1833 : else
1834 0 : error = xrep_findparent_scan_start(sc, &rd->pscan);
1835 24753 : if (error)
1836 0 : goto out_xfblob;
1837 :
1838 : return 0;
1839 :
1840 : out_xfblob:
1841 0 : xfblob_destroy(rd->dir_names);
1842 0 : rd->dir_names = NULL;
1843 0 : out_xfarray:
1844 0 : xfarray_destroy(rd->dir_entries);
1845 0 : rd->dir_entries = NULL;
1846 0 : return error;
1847 : }
1848 :
1849 : /*
1850 : * Move the current file to the orphanage.
1851 : *
1852 : * Caller must hold IOLOCK_EXCL on @sc->ip, and no other inode locks. Upon
1853 : * successful return, the scrub transaction will have enough extra reservation
1854 : * to make the move; it will hold IOLOCK_EXCL and ILOCK_EXCL of @sc->ip and the
1855 : * orphanage; and both inodes will be ijoined.
1856 : */
1857 : STATIC int
1858 0 : xrep_dir_move_to_orphanage(
1859 : struct xrep_dir *rd)
1860 : {
1861 0 : struct xfs_scrub *sc = rd->sc;
1862 0 : xfs_ino_t orig_parent, new_parent;
1863 0 : int error;
1864 :
1865 : /*
1866 0 : * We are about to drop the ILOCK on sc->ip to lock the orphanage and
1867 : * prepare for the adoption. Therefore, look up the old dotdot entry
1868 : * for sc->ip so that we can compare it after we re-lock sc->ip.
1869 : */
1870 : error = xchk_dir_lookup(sc, sc->ip, &xfs_name_dotdot, &orig_parent);
1871 : if (error)
1872 : return error;
1873 :
1874 0 : /*
1875 0 : * We hold ILOCK_EXCL on both the directory and the tempdir after a
1876 : * successful rebuild. Before we can move the directory to the
1877 : * orphanage, we must roll to a clean unjoined transaction.
1878 : */
1879 : error = xfs_trans_roll(&sc->tp);
1880 : if (error)
1881 : return error;
1882 :
1883 0 : /*
1884 0 : * Because the orphanage is just another directory in the filesystem,
1885 : * we must take its IOLOCK to coordinate with the VFS. We cannot take
1886 : * an IOLOCK while holding an ILOCK, so we must drop them all. We may
1887 : * have to drop the IOLOCK as well.
1888 : */
1889 : xrep_tempfile_iunlock_both(sc);
1890 :
1891 : error = xrep_adoption_init(sc, &rd->adoption);
1892 : if (error)
1893 0 : return error;
1894 :
1895 0 : if (!xrep_orphanage_ilock_nowait(sc, XFS_IOLOCK_EXCL)) {
1896 0 : xchk_iunlock(sc, sc->ilock_flags);
1897 : error = xrep_orphanage_iolock_two(sc);
1898 : if (error)
1899 0 : goto err_adoption;
1900 0 : }
1901 0 :
1902 0 : /* Prepare for the adoption and lock both down. */
1903 0 : error = xrep_adoption_prep(&rd->adoption);
1904 : if (error)
1905 : goto err_adoption;
1906 :
1907 0 : error = xrep_adoption_compute_name(&rd->adoption, rd->pptr.p_name);
1908 0 : if (error)
1909 0 : goto err_adoption;
1910 :
1911 0 : /*
1912 0 : * Now that we've reacquired the ILOCK on sc->ip, look up the dotdot
1913 0 : * entry again. If the parent changed or the child was unlinked while
1914 : * the child directory was unlocked, we don't need to move the child to
1915 : * the orphanage after all.
1916 : */
1917 : error = xchk_dir_lookup(sc, sc->ip, &xfs_name_dotdot, &new_parent);
1918 : if (error)
1919 : goto err_adoption;
1920 : if (orig_parent != new_parent || VFS_I(sc->ip)->i_nlink == 0) {
1921 0 : error = 0;
1922 0 : goto err_adoption;
1923 0 : }
1924 0 :
1925 0 : /* Attach to the orphanage. */
1926 0 : return xrep_adoption_commit(&rd->adoption);
1927 : err_adoption:
1928 : xrep_adoption_cancel(&rd->adoption, error);
1929 : return error;
1930 0 : }
1931 0 :
1932 0 : /*
1933 0 : * Repair the directory metadata.
1934 : *
1935 : * XXX: Directory entry buffers can be multiple fsblocks in size. The buffer
1936 : * cache in XFS can't handle aliased multiblock buffers, so this might
1937 : * misbehave if the directory blocks are crosslinked with other filesystem
1938 : * metadata.
1939 : *
1940 : * XXX: Is it necessary to check the dcache for this directory to make sure
1941 : * that we always recreate every cached entry?
1942 : */
1943 : int
1944 : xrep_directory(
1945 : struct xfs_scrub *sc)
1946 : {
1947 : struct xrep_dir *rd = sc->buf;
1948 24751 : int error;
1949 :
1950 : /* The rmapbt is required to reap the old data fork. */
1951 24751 : if (!xfs_has_rmapbt(sc->mp))
1952 24751 : return -EOPNOTSUPP;
1953 :
1954 : error = xrep_dir_setup_scan(rd);
1955 24751 : if (error)
1956 : return error;
1957 :
1958 24751 : if (xfs_has_parent(sc->mp))
1959 24751 : error = xrep_dir_scan_dirtree(rd);
1960 : else
1961 : error = xrep_dir_salvage_entries(rd);
1962 24751 : if (error)
1963 24751 : goto out_teardown;
1964 :
1965 0 : /* Last chance to abort before we start committing fixes. */
1966 24752 : if (xchk_should_terminate(sc, &error))
1967 5 : goto out_teardown;
1968 :
1969 : error = xrep_dir_rebuild_tree(rd);
1970 24747 : if (error)
1971 0 : goto out_teardown;
1972 :
1973 24747 : if (rd->needs_adoption) {
1974 24746 : if (!xrep_orphanage_can_adopt(rd->sc))
1975 0 : error = -EFSCORRUPTED;
1976 : else
1977 24746 : error = xrep_dir_move_to_orphanage(rd);
1978 0 : if (error)
1979 0 : goto out_teardown;
1980 : }
1981 :
1982 : out_teardown:
1983 24746 : xrep_dir_teardown(sc);
1984 24751 : return error;
1985 24753 : }
|