Line data Source code
1 : // SPDX-License-Identifier: GPL-2.0-or-later
2 : /*
3 : * Copyright (C) 2020-2023 Oracle. All Rights Reserved.
4 : * Author: Darrick J. Wong <djwong@kernel.org>
5 : */
6 : #include "xfs.h"
7 : #include "xfs_fs.h"
8 : #include "xfs_shared.h"
9 : #include "xfs_format.h"
10 : #include "xfs_trans_resv.h"
11 : #include "xfs_mount.h"
12 : #include "xfs_defer.h"
13 : #include "xfs_bit.h"
14 : #include "xfs_log_format.h"
15 : #include "xfs_trans.h"
16 : #include "xfs_sb.h"
17 : #include "xfs_inode.h"
18 : #include "xfs_icache.h"
19 : #include "xfs_da_format.h"
20 : #include "xfs_da_btree.h"
21 : #include "xfs_dir2.h"
22 : #include "xfs_dir2_priv.h"
23 : #include "xfs_bmap.h"
24 : #include "xfs_quota.h"
25 : #include "xfs_bmap_btree.h"
26 : #include "xfs_trans_space.h"
27 : #include "xfs_bmap_util.h"
28 : #include "xfs_swapext.h"
29 : #include "xfs_xchgrange.h"
30 : #include "xfs_ag.h"
31 : #include "xfs_parent.h"
32 : #include "scrub/xfs_scrub.h"
33 : #include "scrub/scrub.h"
34 : #include "scrub/common.h"
35 : #include "scrub/trace.h"
36 : #include "scrub/repair.h"
37 : #include "scrub/tempfile.h"
38 : #include "scrub/tempswap.h"
39 : #include "scrub/xfile.h"
40 : #include "scrub/xfarray.h"
41 : #include "scrub/xfblob.h"
42 : #include "scrub/iscan.h"
43 : #include "scrub/readdir.h"
44 : #include "scrub/reap.h"
45 : #include "scrub/findparent.h"
46 : #include "scrub/orphanage.h"
47 : #include "scrub/listxattr.h"
48 :
49 : /*
50 : * Directory Repair
51 : * ================
52 : *
53 : * We repair directories by reading the directory data blocks looking for
54 : * directory entries that look salvageable (name passes verifiers, entry points
55 : * to a valid allocated inode, etc). Each entry worth salvaging is stashed in
56 : * memory, and the stashed entries are periodically replayed into a temporary
57 : * directory to constrain memory use. Batching the construction of the
58 : * temporary directory in this fashion reduces lock cycling of the directory
59 : * being repaired and the temporary directory, and will later become important
60 : * for parent pointer scanning.
61 : *
62 : * If parent pointers are enabled on this filesystem, we instead reconstruct
63 : * the directory by visiting each parent pointer of each file in the filesystem
64 : * and translating the relevant parent pointer records into dirents. In this
65 : * case, it is advantageous to stash all directory entries created from parent
66 : * pointers for a single child file before replaying them into the temporary
67 : * directory. To save memory, the live filesystem scan reuses the findparent
68 : * fields. Directory repair chooses either parent pointer scanning or
69 : * directory entry salvaging, but not both.
70 : *
71 : * Directory entries added to the temporary directory do not elevate the link
72 : * counts of the inodes found. When salvaging completes, the remaining stashed
73 : * entries are replayed to the temporary directory. An atomic extent swap is
74 : * used to commit the new directory blocks to the directory being repaired.
75 : * This will disrupt readdir cursors.
76 : *
77 : * Legacy Locking Issues
78 : * ---------------------
79 : *
80 : * Prior to Linux 6.5, if /a, /a/b, and /c were all directories, the VFS would
81 : * not take i_rwsem on /a/b for a "mv /a/b /c/" operation. This meant that
82 : * only b's ILOCK protected b's dotdot update. b's IOLOCK was not taken,
83 : * unlike every other dotdot update (link, remove, mkdir). If the repair code
84 : * dropped the ILOCK, we it was required either to revalidate the dotdot entry
85 : * or to use dirent hooks to capture updates from other threads.
86 : */
87 :
88 : /* Create a dirent in the tempdir. */
89 : #define XREP_DIRENT_ADD (1)
90 :
91 : /* Remove a dirent from the tempdir. */
92 : #define XREP_DIRENT_REMOVE (2)
93 :
94 : /* Directory entry to be restored in the new directory. */
95 : struct xrep_dirent {
96 : /* Cookie for retrieval of the dirent name. */
97 : xfblob_cookie name_cookie;
98 :
99 : /* Target inode number. */
100 : xfs_ino_t ino;
101 :
102 : /* Length of the dirent name. */
103 : uint8_t namelen;
104 :
105 : /* File type of the dirent. */
106 : uint8_t ftype;
107 :
108 : /* XREP_DIRENT_{ADD,REMOVE} */
109 : uint8_t action;
110 : };
111 :
112 : /*
113 : * Stash up to 8 pages of recovered dirent data in dir_entries and dir_names
114 : * before we write them to the temp dir.
115 : */
116 : #define XREP_DIR_MAX_STASH_BYTES (PAGE_SIZE * 8)
117 :
118 : struct xrep_dir {
119 : struct xfs_scrub *sc;
120 :
121 : /* Fixed-size array of xrep_dirent structures. */
122 : struct xfarray *dir_entries;
123 :
124 : /* Blobs containing directory entry names. */
125 : struct xfblob *dir_names;
126 :
127 : /* Information for swapping data forks at the end. */
128 : struct xrep_tempswap tx;
129 :
130 : /* Preallocated args struct for performing dir operations */
131 : struct xfs_da_args args;
132 :
133 : /*
134 : * Information used to scan the filesystem to find the inumber of the
135 : * dotdot entry for this directory. For directory salvaging when
136 : * parent pointers are not enabled, we use the findparent_* functions
137 : * on this object and access only the parent_ino field directly.
138 : *
139 : * When parent pointers are enabled, however, the pptr scanner uses the
140 : * iscan, hooks, lock, and parent_ino fields of this object directly.
141 : * @pscan.lock coordinates access to dir_entries, dir_names,
142 : * parent_ino, subdirs, dirents, and args. This reduces the memory
143 : * requirements of this structure.
144 : */
145 : struct xrep_parent_scan_info pscan;
146 :
147 : /*
148 : * Context information for attaching this directory to the lost+found
149 : * if this directory does not have a parent.
150 : */
151 : struct xrep_adoption adoption;
152 :
153 : /* How many subdirectories did we find? */
154 : uint64_t subdirs;
155 :
156 : /* How many dirents did we find? */
157 : unsigned int dirents;
158 :
159 : /* Should we move this directory to the orphanage? */
160 : bool needs_adoption;
161 :
162 : /*
163 : * Scratch buffer for reading parent pointers from child files. The
164 : * p_name field is used to flush stashed dirents into the temporary
165 : * directory in between parent pointers. At the very end of the
166 : * repair, it can also be used to compute the lost+found filename
167 : * if we need to reparent the directory.
168 : */
169 : struct xfs_parent_name_irec pptr;
170 : };
171 :
172 : /* Tear down all the incore stuff we created. */
173 : static void
174 197896 : xrep_dir_teardown(
175 : struct xfs_scrub *sc)
176 : {
177 197896 : struct xrep_dir *rd = sc->buf;
178 :
179 197896 : xrep_findparent_scan_teardown(&rd->pscan);
180 197901 : xfblob_destroy(rd->dir_names);
181 197896 : xfarray_destroy(rd->dir_entries);
182 197894 : }
183 :
184 : /* Set up for a directory repair. */
185 : int
186 242643 : xrep_setup_directory(
187 : struct xfs_scrub *sc)
188 : {
189 242643 : struct xrep_dir *rd;
190 242643 : int error;
191 :
192 242643 : xchk_fsgates_enable(sc, XCHK_FSGATES_DIRENTS);
193 :
194 242638 : error = xrep_orphanage_try_create(sc);
195 242643 : if (error)
196 : return error;
197 :
198 242643 : error = xrep_tempfile_create(sc, S_IFDIR);
199 242599 : if (error)
200 : return error;
201 :
202 242117 : rd = kvzalloc(sizeof(struct xrep_dir), XCHK_GFP_FLAGS);
203 242130 : if (!rd)
204 : return -ENOMEM;
205 242130 : rd->sc = sc;
206 242130 : sc->buf = rd;
207 :
208 242130 : return 0;
209 : }
210 :
211 : /*
212 : * If we're the root of a directory tree, we are our own parent. If we're an
213 : * unlinked directory, the parent /won't/ have a link to us. Set the parent
214 : * directory to the root for both cases. Returns NULLFSINO if we don't know
215 : * what to do.
216 : */
217 : static inline xfs_ino_t
218 : xrep_dir_self_parent(
219 : struct xrep_dir *rd)
220 : {
221 : struct xfs_scrub *sc = rd->sc;
222 :
223 : if (sc->ip->i_ino == sc->mp->m_sb.sb_rootino)
224 : return sc->mp->m_sb.sb_rootino;
225 :
226 : if (VFS_I(sc->ip)->i_nlink == 0)
227 : return sc->mp->m_sb.sb_rootino;
228 :
229 : return NULLFSINO;
230 : }
231 :
232 : /*
233 : * Look up the dotdot entry and confirm that it's really the parent.
234 : * Returns NULLFSINO if we don't know what to do.
235 : */
236 : static inline xfs_ino_t
237 4 : xrep_dir_lookup_parent(
238 : struct xrep_dir *rd)
239 : {
240 4 : struct xfs_scrub *sc = rd->sc;
241 4 : xfs_ino_t ino;
242 4 : int error;
243 :
244 4 : error = xfs_dir_lookup(sc->tp, sc->ip, &xfs_name_dotdot, &ino, NULL);
245 4 : if (error)
246 : return NULLFSINO;
247 4 : if (!xfs_verify_dir_ino(sc->mp, ino))
248 : return NULLFSINO;
249 :
250 4 : error = xrep_findparent_confirm(sc, &ino);
251 4 : if (error)
252 : return NULLFSINO;
253 :
254 4 : return ino;
255 : }
256 :
257 : /*
258 : * Look up '..' in the dentry cache and confirm that it's really the parent.
259 : * Returns NULLFSINO if the dcache misses or if the hit is implausible.
260 : */
261 : static inline xfs_ino_t
262 980 : xrep_dir_dcache_parent(
263 : struct xrep_dir *rd)
264 : {
265 980 : struct xfs_scrub *sc = rd->sc;
266 980 : xfs_ino_t parent_ino;
267 980 : int error;
268 :
269 980 : parent_ino = xrep_findparent_from_dcache(sc);
270 982 : if (parent_ino == NULLFSINO)
271 : return parent_ino;
272 :
273 978 : error = xrep_findparent_confirm(sc, &parent_ino);
274 978 : if (error)
275 : return NULLFSINO;
276 :
277 978 : return parent_ino;
278 : }
279 :
280 : /* Try to find the parent of the directory being repaired. */
281 : STATIC int
282 992 : xrep_dir_find_parent(
283 : struct xrep_dir *rd)
284 : {
285 992 : xfs_ino_t ino;
286 :
287 992 : ino = xrep_findparent_self_reference(rd->sc);
288 990 : if (ino != NULLFSINO) {
289 11 : xrep_findparent_scan_finish_early(&rd->pscan, ino);
290 11 : return 0;
291 : }
292 :
293 979 : ino = xrep_dir_dcache_parent(rd);
294 981 : if (ino != NULLFSINO) {
295 977 : xrep_findparent_scan_finish_early(&rd->pscan, ino);
296 977 : return 0;
297 : }
298 :
299 4 : ino = xrep_dir_lookup_parent(rd);
300 4 : if (ino != NULLFSINO) {
301 4 : xrep_findparent_scan_finish_early(&rd->pscan, ino);
302 4 : return 0;
303 : }
304 :
305 : /*
306 : * A full filesystem scan is the last resort. On a busy filesystem,
307 : * the scan can fail with -EBUSY if we cannot grab IOLOCKs. That means
308 : * that we don't know what who the parent is, so we should return to
309 : * userspace.
310 : */
311 0 : return xrep_findparent_scan(&rd->pscan);
312 : }
313 :
314 : /*
315 : * Decide if we want to salvage this entry. We don't bother with oversized
316 : * names or the dot entry.
317 : */
318 : STATIC int
319 7195 : xrep_dir_want_salvage(
320 : struct xrep_dir *rd,
321 : const char *name,
322 : int namelen,
323 : xfs_ino_t ino)
324 : {
325 7195 : struct xfs_mount *mp = rd->sc->mp;
326 :
327 : /* No pointers to ourselves or to garbage. */
328 7195 : if (ino == rd->sc->ip->i_ino)
329 : return false;
330 7189 : if (!xfs_verify_dir_ino(mp, ino))
331 : return false;
332 :
333 : /* No weird looking names or dot entries. */
334 7187 : if (namelen >= MAXNAMELEN || namelen <= 0)
335 : return false;
336 7187 : if (namelen == 1 && name[0] == '.')
337 0 : return false;
338 :
339 : return true;
340 : }
341 :
342 : /*
343 : * Remember that we want to create a dirent in the tempdir. These stashed
344 : * actions will be replayed later.
345 : */
346 : STATIC int
347 2490000 : xrep_dir_stash_createname(
348 : struct xrep_dir *rd,
349 : const struct xfs_name *name,
350 : xfs_ino_t ino)
351 : {
352 2490000 : struct xrep_dirent dirent = {
353 : .action = XREP_DIRENT_ADD,
354 : .ino = ino,
355 2490000 : .namelen = name->len,
356 2490000 : .ftype = name->type,
357 : };
358 2490000 : int error;
359 :
360 2490000 : trace_xrep_dir_stash_createname(rd->sc->tempip, name, ino);
361 :
362 2489996 : error = xfblob_store(rd->dir_names, &dirent.name_cookie, name->name,
363 2489996 : name->len);
364 2489983 : if (error)
365 : return error;
366 :
367 2489983 : return xfarray_append(rd->dir_entries, &dirent);
368 : }
369 :
370 : /*
371 : * Remember that we want to remove a dirent from the tempdir. These stashed
372 : * actions will be replayed later.
373 : */
374 : STATIC int
375 0 : xrep_dir_stash_removename(
376 : struct xrep_dir *rd,
377 : const struct xfs_name *name,
378 : xfs_ino_t ino)
379 : {
380 0 : struct xrep_dirent dirent = {
381 : .action = XREP_DIRENT_REMOVE,
382 : .ino = ino,
383 0 : .namelen = name->len,
384 0 : .ftype = name->type,
385 : };
386 0 : int error;
387 :
388 0 : trace_xrep_dir_stash_removename(rd->sc->tempip, name, ino);
389 :
390 0 : error = xfblob_store(rd->dir_names, &dirent.name_cookie, name->name,
391 0 : name->len);
392 0 : if (error)
393 : return error;
394 :
395 0 : return xfarray_append(rd->dir_entries, &dirent);
396 : }
397 :
398 : /* Allocate an in-core record to hold entries while we rebuild the dir data. */
399 : STATIC int
400 7188 : xrep_dir_salvage_entry(
401 : struct xrep_dir *rd,
402 : unsigned char *name,
403 : unsigned int namelen,
404 : xfs_ino_t ino)
405 : {
406 7188 : struct xfs_name xname = {
407 : .name = name,
408 : };
409 7188 : struct xfs_scrub *sc = rd->sc;
410 7188 : struct xfs_inode *ip;
411 7188 : unsigned int i = 0;
412 7188 : int error = 0;
413 :
414 7188 : if (xchk_should_terminate(sc, &error))
415 0 : return error;
416 :
417 : /*
418 : * Truncate the name to the first character that would trip namecheck.
419 : * If we no longer have a name after that, ignore this entry.
420 : */
421 62994 : while (i < namelen && name[i] != 0 && name[i] != '/')
422 55806 : i++;
423 7188 : if (i == 0)
424 : return 0;
425 7188 : xname.len = i;
426 :
427 : /* Ignore '..' entries; we already picked the new parent. */
428 7188 : if (xname.len == 2 && name[0] == '.' && name[1] == '.') {
429 6 : trace_xrep_dir_salvaged_parent(sc->ip, ino);
430 6 : return 0;
431 : }
432 :
433 7182 : trace_xrep_dir_salvage_entry(sc->ip, &xname, ino);
434 :
435 : /*
436 : * Compute the ftype or dump the entry if we can't. We don't lock the
437 : * inode because inodes can't change type while we have a reference.
438 : */
439 7182 : error = xchk_iget(sc, ino, &ip);
440 7183 : if (error)
441 : return 0;
442 :
443 : /* Don't mix metadata and regular directory trees. */
444 7183 : if (xfs_is_metadir_inode(ip) ^ xfs_is_metadir_inode(rd->sc->ip)) {
445 0 : xchk_irele(sc, ip);
446 0 : return 0;
447 : }
448 :
449 7183 : xname.type = xfs_mode_to_ftype(VFS_I(ip)->i_mode);
450 7183 : xchk_irele(sc, ip);
451 :
452 7183 : return xrep_dir_stash_createname(rd, &xname, ino);
453 : }
454 :
455 : /* Record a shortform directory entry for later reinsertion. */
456 : STATIC int
457 5395 : xrep_dir_salvage_sf_entry(
458 : struct xrep_dir *rd,
459 : struct xfs_dir2_sf_hdr *sfp,
460 : struct xfs_dir2_sf_entry *sfep)
461 : {
462 5395 : xfs_ino_t ino;
463 :
464 5395 : ino = xfs_dir2_sf_get_ino(rd->sc->mp, sfp, sfep);
465 5393 : if (!xrep_dir_want_salvage(rd, sfep->name, sfep->namelen, ino))
466 : return 0;
467 :
468 5393 : return xrep_dir_salvage_entry(rd, sfep->name, sfep->namelen, ino);
469 : }
470 :
471 : /* Record a regular directory entry for later reinsertion. */
472 : STATIC int
473 1800 : xrep_dir_salvage_data_entry(
474 : struct xrep_dir *rd,
475 : struct xfs_dir2_data_entry *dep)
476 : {
477 1800 : xfs_ino_t ino;
478 :
479 1800 : ino = be64_to_cpu(dep->inumber);
480 1800 : if (!xrep_dir_want_salvage(rd, dep->name, dep->namelen, ino))
481 : return 0;
482 :
483 1794 : return xrep_dir_salvage_entry(rd, dep->name, dep->namelen, ino);
484 : }
485 :
486 : /* Try to recover block/data format directory entries. */
487 : STATIC int
488 12 : xrep_dir_recover_data(
489 : struct xrep_dir *rd,
490 : struct xfs_buf *bp)
491 : {
492 12 : struct xfs_da_geometry *geo = rd->sc->mp->m_dir_geo;
493 12 : unsigned int offset;
494 12 : unsigned int end;
495 12 : int error = 0;
496 :
497 : /*
498 : * Loop over the data portion of the block.
499 : * Each object is a real entry (dep) or an unused one (dup).
500 : */
501 12 : offset = geo->data_entry_offset;
502 12 : end = min_t(unsigned int, BBTOB(bp->b_length),
503 : xfs_dir3_data_end_offset(geo, bp->b_addr));
504 :
505 1848 : while (offset < end) {
506 1836 : struct xfs_dir2_data_unused *dup = bp->b_addr + offset;
507 1836 : struct xfs_dir2_data_entry *dep = bp->b_addr + offset;
508 :
509 1836 : if (xchk_should_terminate(rd->sc, &error))
510 0 : return error;
511 :
512 : /* Skip unused entries. */
513 1836 : if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) {
514 36 : offset += be16_to_cpu(dup->length);
515 36 : continue;
516 : }
517 :
518 : /* Don't walk off the end of the block. */
519 1800 : offset += xfs_dir2_data_entsize(rd->sc->mp, dep->namelen);
520 1800 : if (offset > end)
521 : break;
522 :
523 : /* Ok, let's save this entry. */
524 1800 : error = xrep_dir_salvage_data_entry(rd, dep);
525 1800 : if (error)
526 0 : return error;
527 :
528 : }
529 :
530 : return 0;
531 : }
532 :
533 : /* Try to recover shortform directory entries. */
534 : STATIC int
535 984 : xrep_dir_recover_sf(
536 : struct xrep_dir *rd)
537 : {
538 984 : struct xfs_dir2_sf_hdr *sfp;
539 984 : struct xfs_dir2_sf_entry *sfep;
540 984 : struct xfs_dir2_sf_entry *next;
541 984 : struct xfs_ifork *ifp;
542 984 : xfs_ino_t ino;
543 984 : unsigned char *end;
544 984 : int error = 0;
545 :
546 984 : ifp = xfs_ifork_ptr(rd->sc->ip, XFS_DATA_FORK);
547 984 : sfp = (struct xfs_dir2_sf_hdr *)rd->sc->ip->i_df.if_u1.if_data;
548 984 : end = (unsigned char *)ifp->if_u1.if_data + ifp->if_bytes;
549 :
550 984 : ino = xfs_dir2_sf_get_parent_ino(sfp);
551 984 : trace_xrep_dir_salvaged_parent(rd->sc->ip, ino);
552 :
553 985 : sfep = xfs_dir2_sf_firstentry(sfp);
554 6380 : while ((unsigned char *)sfep < end) {
555 5393 : if (xchk_should_terminate(rd->sc, &error))
556 0 : return error;
557 :
558 5394 : next = xfs_dir2_sf_nextentry(rd->sc->mp, sfp, sfep);
559 5394 : if ((unsigned char *)next > end)
560 : break;
561 :
562 : /* Ok, let's save this entry. */
563 5394 : error = xrep_dir_salvage_sf_entry(rd, sfp, sfep);
564 5395 : if (error)
565 0 : return error;
566 :
567 : sfep = next;
568 : }
569 :
570 : return 0;
571 : }
572 :
573 : /*
574 : * Try to figure out the format of this directory from the data fork mappings
575 : * and the directory size. If we can be reasonably sure of format, we can be
576 : * more aggressive in salvaging directory entries. On return, @magic_guess
577 : * will be set to DIR3_BLOCK_MAGIC if we think this is a "block format"
578 : * directory; DIR3_DATA_MAGIC if we think this is a "data format" directory,
579 : * and 0 if we can't tell.
580 : */
581 : STATIC void
582 6 : xrep_dir_guess_format(
583 : struct xrep_dir *rd,
584 : __be32 *magic_guess)
585 : {
586 6 : struct xfs_inode *dp = rd->sc->ip;
587 6 : struct xfs_mount *mp = rd->sc->mp;
588 6 : struct xfs_da_geometry *geo = mp->m_dir_geo;
589 6 : xfs_fileoff_t last;
590 6 : int error;
591 :
592 6 : ASSERT(xfs_has_crc(mp));
593 :
594 6 : *magic_guess = 0;
595 :
596 : /*
597 : * If there's a single directory block and the directory size is
598 : * exactly one block, this has to be a single block format directory.
599 : */
600 6 : error = xfs_bmap_last_offset(dp, &last, XFS_DATA_FORK);
601 6 : if (!error && XFS_FSB_TO_B(mp, last) == geo->blksize &&
602 5 : dp->i_disk_size == geo->blksize) {
603 5 : *magic_guess = cpu_to_be32(XFS_DIR3_BLOCK_MAGIC);
604 6 : return;
605 : }
606 :
607 : /*
608 : * If the last extent before the leaf offset matches the directory
609 : * size and the directory size is larger than 1 block, this is a
610 : * data format directory.
611 : */
612 1 : last = geo->leafblk;
613 1 : error = xfs_bmap_last_before(rd->sc->tp, dp, &last, XFS_DATA_FORK);
614 1 : if (!error &&
615 1 : XFS_FSB_TO_B(mp, last) > geo->blksize &&
616 1 : XFS_FSB_TO_B(mp, last) == dp->i_disk_size) {
617 1 : *magic_guess = cpu_to_be32(XFS_DIR3_DATA_MAGIC);
618 1 : return;
619 : }
620 : }
621 :
622 : /* Recover directory entries from a specific directory block. */
623 : STATIC int
624 12 : xrep_dir_recover_dirblock(
625 : struct xrep_dir *rd,
626 : __be32 magic_guess,
627 : xfs_dablk_t dabno)
628 : {
629 12 : struct xfs_dir2_data_hdr *hdr;
630 12 : struct xfs_buf *bp;
631 12 : __be32 oldmagic;
632 12 : int error;
633 :
634 : /*
635 : * Try to read buffer. We invalidate them in the next step so we don't
636 : * bother to set a buffer type or ops.
637 : */
638 12 : error = xfs_da_read_buf(rd->sc->tp, rd->sc->ip, dabno,
639 : XFS_DABUF_MAP_HOLE_OK, &bp, XFS_DATA_FORK, NULL);
640 12 : if (error || !bp)
641 : return error;
642 :
643 12 : hdr = bp->b_addr;
644 12 : oldmagic = hdr->magic;
645 :
646 12 : trace_xrep_dir_recover_dirblock(rd->sc->ip, dabno,
647 : be32_to_cpu(hdr->magic), be32_to_cpu(magic_guess));
648 :
649 : /*
650 : * If we're sure of the block's format, proceed with the salvage
651 : * operation using the specified magic number.
652 : */
653 12 : if (magic_guess) {
654 12 : hdr->magic = magic_guess;
655 12 : goto recover;
656 : }
657 :
658 : /*
659 : * If we couldn't guess what type of directory this is, then we will
660 : * only salvage entries from directory blocks that match the magic
661 : * number and pass verifiers.
662 : */
663 0 : switch (hdr->magic) {
664 0 : case cpu_to_be32(XFS_DIR2_BLOCK_MAGIC):
665 : case cpu_to_be32(XFS_DIR3_BLOCK_MAGIC):
666 0 : if (!xrep_buf_verify_struct(bp, &xfs_dir3_block_buf_ops))
667 0 : goto out;
668 0 : if (xfs_dir3_block_header_check(bp, rd->sc->ip->i_ino) != NULL)
669 0 : goto out;
670 : break;
671 0 : case cpu_to_be32(XFS_DIR2_DATA_MAGIC):
672 : case cpu_to_be32(XFS_DIR3_DATA_MAGIC):
673 0 : if (!xrep_buf_verify_struct(bp, &xfs_dir3_data_buf_ops))
674 0 : goto out;
675 0 : if (xfs_dir3_data_header_check(bp, rd->sc->ip->i_ino) != NULL)
676 0 : goto out;
677 : break;
678 0 : default:
679 0 : goto out;
680 : }
681 :
682 12 : recover:
683 12 : error = xrep_dir_recover_data(rd, bp);
684 :
685 12 : out:
686 12 : hdr->magic = oldmagic;
687 12 : xfs_trans_brelse(rd->sc->tp, bp);
688 12 : return error;
689 : }
690 :
691 : static inline void
692 2862939 : xrep_dir_init_args(
693 : struct xrep_dir *rd,
694 : struct xfs_inode *dp,
695 : const struct xfs_name *name)
696 : {
697 2862939 : memset(&rd->args, 0, sizeof(struct xfs_da_args));
698 2862939 : rd->args.geo = rd->sc->mp->m_dir_geo;
699 2862939 : rd->args.whichfork = XFS_DATA_FORK;
700 2862939 : rd->args.owner = rd->sc->ip->i_ino;
701 2862939 : rd->args.trans = rd->sc->tp;
702 2862939 : rd->args.dp = dp;
703 2862939 : if (!name)
704 : return;
705 2665120 : rd->args.name = name->name;
706 2665120 : rd->args.namelen = name->len;
707 2665120 : rd->args.filetype = name->type;
708 2665120 : rd->args.hashval = xfs_dir2_hashname(rd->sc->mp, name);
709 : }
710 :
711 : /* Replay a stashed createname into the temporary directory. */
712 : STATIC int
713 2489351 : xrep_dir_replay_createname(
714 : struct xrep_dir *rd,
715 : const struct xfs_name *name,
716 : xfs_ino_t inum,
717 : xfs_extlen_t total)
718 : {
719 2489351 : struct xfs_scrub *sc = rd->sc;
720 2489351 : struct xfs_inode *dp = rd->sc->tempip;
721 2489351 : bool is_block, is_leaf;
722 2489351 : int error;
723 :
724 2489351 : ASSERT(S_ISDIR(VFS_I(dp)->i_mode));
725 :
726 2489351 : error = xfs_dir_ino_validate(sc->mp, inum);
727 2489086 : if (error)
728 : return error;
729 :
730 2489103 : trace_xrep_dir_replay_createname(dp, name, inum);
731 :
732 2489064 : xrep_dir_init_args(rd, dp, name);
733 2489090 : rd->args.inumber = inum;
734 2489090 : rd->args.total = total;
735 2489090 : rd->args.op_flags = XFS_DA_OP_ADDNAME | XFS_DA_OP_OKNOENT;
736 :
737 2489090 : if (dp->i_df.if_format == XFS_DINODE_FMT_LOCAL)
738 908958 : return xfs_dir2_sf_addname(&rd->args);
739 :
740 1580132 : error = xfs_dir2_isblock(&rd->args, &is_block);
741 1580382 : if (error)
742 : return error;
743 1580382 : if (is_block)
744 278367 : return xfs_dir2_block_addname(&rd->args);
745 :
746 1302015 : error = xfs_dir2_isleaf(&rd->args, &is_leaf);
747 1302052 : if (error)
748 : return error;
749 1302052 : if (is_leaf)
750 488321 : return xfs_dir2_leaf_addname(&rd->args);
751 :
752 813731 : return xfs_dir2_node_addname(&rd->args);
753 : }
754 :
755 : /* Replay a stashed removename onto the temporary directory. */
756 : STATIC int
757 0 : xrep_dir_replay_removename(
758 : struct xrep_dir *rd,
759 : const struct xfs_name *name,
760 : xfs_extlen_t total)
761 : {
762 0 : struct xfs_inode *dp = rd->args.dp;
763 0 : bool is_block, is_leaf;
764 0 : int error;
765 :
766 0 : ASSERT(S_ISDIR(VFS_I(dp)->i_mode));
767 :
768 0 : xrep_dir_init_args(rd, dp, name);
769 0 : rd->args.op_flags = 0;
770 0 : rd->args.total = total;
771 :
772 0 : trace_xrep_dir_replay_removename(dp, name, 0);
773 :
774 0 : if (dp->i_df.if_format == XFS_DINODE_FMT_LOCAL)
775 0 : return xfs_dir2_sf_removename(&rd->args);
776 :
777 0 : error = xfs_dir2_isblock(&rd->args, &is_block);
778 0 : if (error)
779 : return error;
780 0 : if (is_block)
781 0 : return xfs_dir2_block_removename(&rd->args);
782 :
783 0 : error = xfs_dir2_isleaf(&rd->args, &is_leaf);
784 0 : if (error)
785 : return error;
786 0 : if (is_leaf)
787 0 : return xfs_dir2_leaf_removename(&rd->args);
788 :
789 0 : return xfs_dir2_node_removename(&rd->args);
790 : }
791 :
792 : /*
793 : * Add this stashed incore directory entry to the temporary directory.
794 : * The caller must hold the tempdir's IOLOCK, must not hold any ILOCKs, and
795 : * must not be in transaction context.
796 : */
797 : STATIC int
798 2489418 : xrep_dir_replay_update(
799 : struct xrep_dir *rd,
800 : const struct xrep_dirent *dirent)
801 : {
802 2489418 : struct xfs_name name = {
803 2489418 : .len = dirent->namelen,
804 2489418 : .type = dirent->ftype,
805 2489418 : .name = rd->pptr.p_name,
806 : };
807 2489418 : struct xfs_mount *mp = rd->sc->mp;
808 : #ifdef DEBUG
809 2489418 : xfs_ino_t ino;
810 : #endif
811 2489418 : uint resblks;
812 2489418 : int error;
813 :
814 2489418 : resblks = xfs_link_space_res(mp, dirent->namelen);
815 2489433 : error = xchk_trans_alloc(rd->sc, resblks);
816 2489455 : if (error)
817 : return error;
818 :
819 : /* Lock the temporary directory and join it to the transaction */
820 2489421 : xrep_tempfile_ilock(rd->sc);
821 2489331 : xfs_trans_ijoin(rd->sc->tp, rd->sc->tempip, 0);
822 :
823 2489390 : switch (dirent->action) {
824 2489390 : case XREP_DIRENT_ADD:
825 : /*
826 : * Create a replacement dirent in the temporary directory.
827 : * Note that _createname doesn't check for existing entries.
828 : * There shouldn't be any in the temporary dir, but we'll
829 : * verify this in debug mode.
830 : */
831 : #ifdef DEBUG
832 2489390 : error = xchk_dir_lookup(rd->sc, rd->sc->tempip, &name, &ino);
833 2489357 : if (error != -ENOENT) {
834 0 : ASSERT(error != -ENOENT);
835 0 : goto out_cancel;
836 : }
837 : #endif
838 :
839 2489357 : error = xrep_dir_replay_createname(rd, &name, dirent->ino,
840 : resblks);
841 2489412 : if (error)
842 0 : goto out_cancel;
843 :
844 2489412 : if (name.type == XFS_DIR3_FT_DIR)
845 205942 : rd->subdirs++;
846 2489412 : rd->dirents++;
847 2489412 : break;
848 0 : case XREP_DIRENT_REMOVE:
849 : /*
850 : * Remove a dirent from the temporary directory. Note that
851 : * _removename doesn't check the inode target of the exist
852 : * entry. There should be a perfect match in the temporary
853 : * dir, but we'll verify this in debug mode.
854 : */
855 : #ifdef DEBUG
856 0 : error = xchk_dir_lookup(rd->sc, rd->sc->tempip, &name, &ino);
857 0 : if (error) {
858 0 : ASSERT(error != 0);
859 0 : goto out_cancel;
860 : }
861 0 : if (ino != dirent->ino) {
862 0 : ASSERT(ino == dirent->ino);
863 0 : error = -EIO;
864 0 : goto out_cancel;
865 : }
866 : #endif
867 :
868 0 : error = xrep_dir_replay_removename(rd, &name, resblks);
869 0 : if (error)
870 0 : goto out_cancel;
871 :
872 0 : if (name.type == XFS_DIR3_FT_DIR)
873 0 : rd->subdirs--;
874 0 : rd->dirents--;
875 0 : break;
876 0 : default:
877 0 : ASSERT(0);
878 0 : error = -EIO;
879 0 : goto out_cancel;
880 : }
881 :
882 : /* Commit and unlock. */
883 2489412 : error = xrep_trans_commit(rd->sc);
884 2489425 : if (error)
885 : return error;
886 :
887 2489427 : xrep_tempfile_iunlock(rd->sc);
888 2489427 : return 0;
889 0 : out_cancel:
890 0 : xchk_trans_cancel(rd->sc);
891 0 : xrep_tempfile_iunlock(rd->sc);
892 0 : return error;
893 : }
894 :
895 : /*
896 : * Flush stashed incore dirent updates that have been recorded by the scanner.
897 : * This is done to reduce the memory requirements of the directory rebuild,
898 : * since directories can contain up to 32GB of directory data.
899 : *
900 : * Caller must not hold transactions or ILOCKs. Caller must hold the tempdir
901 : * IOLOCK.
902 : */
903 : STATIC int
904 199919 : xrep_dir_replay_updates(
905 : struct xrep_dir *rd)
906 : {
907 199919 : xfarray_idx_t array_cur;
908 199919 : int error;
909 :
910 : /* Add all the salvaged dirents to the temporary directory. */
911 2689180 : foreach_xfarray_idx(rd->dir_entries, array_cur) {
912 2489275 : struct xrep_dirent dirent;
913 :
914 2489275 : error = xfarray_load(rd->dir_entries, array_cur, &dirent);
915 2489420 : if (error)
916 33 : return error;
917 :
918 : /* The dirent name is stored in the in-core buffer. */
919 2489420 : error = xfblob_load(rd->dir_names, dirent.name_cookie,
920 2489420 : rd->pptr.p_name, dirent.namelen);
921 2489409 : if (error)
922 0 : return error;
923 2489409 : rd->pptr.p_name[MAXNAMELEN - 1] = 0;
924 :
925 2489409 : error = xrep_dir_replay_update(rd, &dirent);
926 2489294 : if (error)
927 33 : return error;
928 : }
929 :
930 : /* Empty out both arrays now that we've added the entries. */
931 199898 : xfarray_truncate(rd->dir_entries);
932 199891 : xfblob_truncate(rd->dir_names);
933 199891 : return 0;
934 : }
935 :
936 : /*
937 : * Periodically flush stashed directory entries to the temporary dir. This
938 : * is done to reduce the memory requirements of the directory rebuild, since
939 : * directories can contain up to 32GB of directory data.
940 : */
941 : STATIC int
942 994 : xrep_dir_flush_stashed(
943 : struct xrep_dir *rd)
944 : {
945 994 : int error;
946 :
947 : /*
948 : * Entering this function, the scrub context has a reference to the
949 : * inode being repaired, the temporary file, and a scrub transaction
950 : * that we use during dirent salvaging to avoid livelocking if there
951 : * are cycles in the directory structures. We hold ILOCK_EXCL on both
952 : * the inode being repaired and the temporary file, though they are
953 : * not ijoined to the scrub transaction.
954 : *
955 : * To constrain kernel memory use, we occasionally write salvaged
956 : * dirents from the xfarray and xfblob structures into the temporary
957 : * directory in preparation for swapping the directory structures at
958 : * the end. Updating the temporary file requires a transaction, so we
959 : * commit the scrub transaction and drop the two ILOCKs so that
960 : * we can allocate whatever transaction we want.
961 : *
962 : * We still hold IOLOCK_EXCL on the inode being repaired, which
963 : * prevents anyone from accessing the damaged directory data while we
964 : * repair it.
965 : */
966 994 : error = xrep_trans_commit(rd->sc);
967 993 : if (error)
968 : return error;
969 993 : xchk_iunlock(rd->sc, XFS_ILOCK_EXCL);
970 :
971 : /*
972 : * Take the IOLOCK of the temporary file while we modify dirents. This
973 : * isn't strictly required because the temporary file is never revealed
974 : * to userspace, but we follow the same locking rules. We still hold
975 : * sc->ip's IOLOCK.
976 : */
977 995 : error = xrep_tempfile_iolock_polled(rd->sc);
978 993 : if (error)
979 : return error;
980 :
981 : /* Write to the tempdir all the updates that we've stashed. */
982 993 : error = xrep_dir_replay_updates(rd);
983 995 : xrep_tempfile_iounlock(rd->sc);
984 994 : if (error)
985 : return error;
986 :
987 : /*
988 : * Recreate the salvage transaction and relock the dir we're salvaging.
989 : */
990 994 : error = xchk_trans_alloc(rd->sc, 0);
991 994 : if (error)
992 : return error;
993 994 : xchk_ilock(rd->sc, XFS_ILOCK_EXCL);
994 994 : return 0;
995 : }
996 :
997 : /* Decide if we've stashed too much dirent data in memory. */
998 : static inline bool
999 4447984924 : xrep_dir_want_flush_stashed(
1000 : struct xrep_dir *rd)
1001 : {
1002 4447984924 : unsigned long long bytes;
1003 :
1004 4447984924 : bytes = xfarray_bytes(rd->dir_entries) + xfblob_bytes(rd->dir_names);
1005 4446419212 : return bytes > XREP_DIR_MAX_STASH_BYTES;
1006 : }
1007 :
1008 : /* Extract as many directory entries as we can. */
1009 : STATIC int
1010 6 : xrep_dir_recover(
1011 : struct xrep_dir *rd)
1012 : {
1013 6 : struct xfs_bmbt_irec got;
1014 6 : struct xfs_scrub *sc = rd->sc;
1015 6 : struct xfs_da_geometry *geo = sc->mp->m_dir_geo;
1016 6 : xfs_fileoff_t offset;
1017 6 : xfs_dablk_t dabno;
1018 6 : __be32 magic_guess;
1019 6 : int nmap;
1020 6 : int error;
1021 :
1022 6 : xrep_dir_guess_format(rd, &magic_guess);
1023 :
1024 : /* Iterate each directory data block in the data fork. */
1025 6 : for (offset = 0;
1026 21 : offset < geo->leafblk;
1027 15 : offset = got.br_startoff + got.br_blockcount) {
1028 15 : nmap = 1;
1029 15 : error = xfs_bmapi_read(sc->ip, offset, geo->leafblk - offset,
1030 : &got, &nmap, 0);
1031 15 : if (error)
1032 0 : return error;
1033 15 : if (nmap != 1)
1034 : return -EFSCORRUPTED;
1035 15 : if (!xfs_bmap_is_written_extent(&got))
1036 6 : continue;
1037 :
1038 9 : for (dabno = round_up(got.br_startoff, geo->fsbcount);
1039 21 : dabno < got.br_startoff + got.br_blockcount;
1040 12 : dabno += geo->fsbcount) {
1041 12 : if (xchk_should_terminate(rd->sc, &error))
1042 0 : return error;
1043 :
1044 12 : error = xrep_dir_recover_dirblock(rd,
1045 : magic_guess, dabno);
1046 12 : if (error)
1047 0 : return error;
1048 :
1049 : /* Flush dirents to constrain memory usage. */
1050 12 : if (xrep_dir_want_flush_stashed(rd)) {
1051 2 : error = xrep_dir_flush_stashed(rd);
1052 2 : if (error)
1053 0 : return error;
1054 : }
1055 : }
1056 : }
1057 :
1058 : return 0;
1059 : }
1060 :
1061 : /*
1062 : * Find all the directory entries for this inode by scraping them out of the
1063 : * directory leaf blocks by hand, and flushing them into the temp dir.
1064 : */
1065 : STATIC int
1066 993 : xrep_dir_find_entries(
1067 : struct xrep_dir *rd)
1068 : {
1069 993 : struct xfs_inode *dp = rd->sc->ip;
1070 993 : int error;
1071 :
1072 : /*
1073 : * Salvage directory entries from the old directory, and write them to
1074 : * the temporary directory.
1075 : */
1076 993 : if (dp->i_df.if_format == XFS_DINODE_FMT_LOCAL) {
1077 987 : error = xrep_dir_recover_sf(rd);
1078 : } else {
1079 6 : error = xfs_iread_extents(rd->sc->tp, dp, XFS_DATA_FORK);
1080 6 : if (error)
1081 : return error;
1082 :
1083 6 : error = xrep_dir_recover(rd);
1084 : }
1085 993 : if (error)
1086 : return error;
1087 :
1088 993 : return xrep_dir_flush_stashed(rd);
1089 : }
1090 :
1091 : /* Scan all files in the filesystem for dirents. */
1092 : STATIC int
1093 992 : xrep_dir_salvage_entries(
1094 : struct xrep_dir *rd)
1095 : {
1096 992 : struct xfs_scrub *sc = rd->sc;
1097 992 : int error;
1098 :
1099 : /*
1100 : * Drop the ILOCK on this directory so that we can scan for this
1101 : * directory's parent. Figure out who is going to be the parent of
1102 : * this directory, then retake the ILOCK so that we can salvage
1103 : * directory entries.
1104 : */
1105 992 : xchk_iunlock(sc, XFS_ILOCK_EXCL);
1106 993 : error = xrep_dir_find_parent(rd);
1107 993 : xchk_ilock(sc, XFS_ILOCK_EXCL);
1108 990 : if (error)
1109 : return error;
1110 :
1111 : /*
1112 : * Collect directory entries by parsing raw leaf blocks to salvage
1113 : * whatever we can. When we're done, free the staging memory before
1114 : * swapping the directories to reduce memory usage.
1115 : */
1116 990 : error = xrep_dir_find_entries(rd);
1117 992 : if (error)
1118 : return error;
1119 :
1120 : /*
1121 : * Cancel the repair transaction and drop the ILOCK so that we can
1122 : * (later) use the atomic extent swap helper functions to compute the
1123 : * correct block reservations and re-lock the inodes.
1124 : *
1125 : * We still hold IOLOCK_EXCL (aka i_rwsem) which will prevent directory
1126 : * modifications, but there's nothing to prevent userspace from reading
1127 : * the directory until we're ready for the swap operation. Reads will
1128 : * return -EIO without shutting down the fs, so we're ok with that.
1129 : *
1130 : * The VFS can change dotdot on us, but the findparent scan will keep
1131 : * our incore parent inode up to date. See the note on locking issues
1132 : * for more details.
1133 : */
1134 992 : error = xrep_trans_commit(sc);
1135 993 : if (error)
1136 : return error;
1137 :
1138 993 : xchk_iunlock(sc, XFS_ILOCK_EXCL);
1139 993 : return 0;
1140 : }
1141 :
1142 :
1143 : /*
1144 : * Examine a parent pointer of a file. If it leads us back to the directory
1145 : * that we're rebuilding, create an incore dirent from the parent pointer and
1146 : * stash it.
1147 : */
1148 : STATIC int
1149 4859319552 : xrep_dir_scan_pptr(
1150 : struct xfs_scrub *sc,
1151 : struct xfs_inode *ip,
1152 : const struct xfs_parent_name_irec *pptr,
1153 : void *priv)
1154 : {
1155 4859319552 : struct xfs_name xname;
1156 4859319552 : struct xrep_dir *rd = priv;
1157 4859319552 : int error;
1158 :
1159 : /* Ignore parent pointers that point back to a different dir. */
1160 4859319552 : if (pptr->p_ino != sc->ip->i_ino ||
1161 2482809 : pptr->p_gen != VFS_I(sc->ip)->i_generation)
1162 : return 0;
1163 :
1164 : /*
1165 : * Transform this parent pointer into a dirent and queue it for later
1166 : * addition to the temporary directory.
1167 : */
1168 2482809 : xname.name = pptr->p_name;
1169 2482809 : xname.len = pptr->p_namelen;
1170 2482809 : xname.type = xfs_mode_to_ftype(VFS_I(ip)->i_mode);
1171 :
1172 2482811 : mutex_lock(&rd->pscan.lock);
1173 2482817 : error = xrep_dir_stash_createname(rd, &xname, ip->i_ino);
1174 2482808 : mutex_unlock(&rd->pscan.lock);
1175 2482808 : return error;
1176 : }
1177 :
1178 : /*
1179 : * If this child dirent points to the directory being repaired, remember that
1180 : * fact so that we can reset the dotdot entry if necessary.
1181 : */
1182 : STATIC int
1183 7823360040 : xrep_dir_scan_dirent(
1184 : struct xfs_scrub *sc,
1185 : struct xfs_inode *dp,
1186 : xfs_dir2_dataptr_t dapos,
1187 : const struct xfs_name *name,
1188 : xfs_ino_t ino,
1189 : void *priv)
1190 : {
1191 7823360040 : struct xrep_dir *rd = priv;
1192 :
1193 : /* Dirent doesn't point to this directory. */
1194 7823360040 : if (ino != rd->sc->ip->i_ino)
1195 : return 0;
1196 :
1197 : /* Ignore garbage inum. */
1198 398288 : if (!xfs_verify_dir_ino(rd->sc->mp, ino))
1199 : return 0;
1200 :
1201 : /* No weird looking names. */
1202 398287 : if (name->len >= MAXNAMELEN || name->len <= 0)
1203 : return 0;
1204 :
1205 : /* Don't pick up dot or dotdot entries; we only want child dirents. */
1206 591607 : if (xfs_dir2_samename(name, &xfs_name_dotdot) ||
1207 193321 : xfs_dir2_samename(name, &xfs_name_dot))
1208 204967 : return 0;
1209 :
1210 193321 : trace_xrep_dir_stash_createname(sc->tempip, &xfs_name_dotdot,
1211 : dp->i_ino);
1212 :
1213 193321 : xrep_findparent_scan_found(&rd->pscan, dp->i_ino);
1214 193321 : return 0;
1215 : }
1216 :
1217 : /*
1218 : * Decide if we want to look for child dirents or parent pointers in this file.
1219 : * Skip the dir being repaired and any files being used to stage repairs.
1220 : */
1221 : static inline bool
1222 8879649129 : xrep_dir_want_scan(
1223 : struct xrep_dir *rd,
1224 : const struct xfs_inode *ip)
1225 : {
1226 8879649129 : return ip != rd->sc->ip && !xrep_is_tempfile(ip);
1227 : }
1228 :
1229 : /*
1230 : * Take ILOCK on a file that we want to scan.
1231 : *
1232 : * Select ILOCK_EXCL if the file is a directory with an unloaded data bmbt or
1233 : * has an unloaded attr bmbt. Otherwise, take ILOCK_SHARED.
1234 : */
1235 : static inline unsigned int
1236 4444490974 : xrep_dir_scan_ilock(
1237 : struct xrep_dir *rd,
1238 : struct xfs_inode *ip)
1239 : {
1240 4444490974 : uint lock_mode = XFS_ILOCK_SHARED;
1241 :
1242 : /* Need to take the shared ILOCK to advance the iscan cursor. */
1243 4444490974 : if (!xrep_dir_want_scan(rd, ip))
1244 942687 : goto lock;
1245 :
1246 5931136630 : if (S_ISDIR(VFS_I(ip)->i_mode) && xfs_need_iread_extents(&ip->i_df)) {
1247 0 : lock_mode = XFS_ILOCK_EXCL;
1248 0 : goto lock;
1249 : }
1250 :
1251 8885246801 : if (xfs_inode_has_attr_fork(ip) && xfs_need_iread_extents(&ip->i_af))
1252 0 : lock_mode = XFS_ILOCK_EXCL;
1253 :
1254 4442601509 : lock:
1255 4443544196 : xfs_ilock(ip, lock_mode);
1256 4444598384 : return lock_mode;
1257 : }
1258 :
1259 : /*
1260 : * Scan this file for relevant child dirents or parent pointers that point to
1261 : * the directory we're rebuilding.
1262 : */
1263 : STATIC int
1264 4444572786 : xrep_dir_scan_file(
1265 : struct xrep_dir *rd,
1266 : struct xfs_inode *ip)
1267 : {
1268 4444572786 : unsigned int lock_mode;
1269 4444572786 : int error = 0;
1270 :
1271 4444572786 : lock_mode = xrep_dir_scan_ilock(rd, ip);
1272 :
1273 4443436389 : if (!xrep_dir_want_scan(rd, ip))
1274 942684 : goto scan_done;
1275 :
1276 4441072830 : error = xchk_pptr_walk(rd->sc, ip, xrep_dir_scan_pptr, &rd->pptr, rd);
1277 4444863735 : if (error)
1278 0 : goto scan_done;
1279 :
1280 4444863735 : if (S_ISDIR(VFS_I(ip)->i_mode)) {
1281 1487779740 : error = xchk_dir_walk(rd->sc, ip, xrep_dir_scan_dirent, rd);
1282 1488061894 : if (error)
1283 0 : goto scan_done;
1284 : }
1285 :
1286 4445145889 : scan_done:
1287 4446088573 : xchk_iscan_mark_visited(&rd->pscan.iscan, ip);
1288 4443668643 : xfs_iunlock(ip, lock_mode);
1289 4440385228 : return error;
1290 : }
1291 :
1292 : /*
1293 : * Scan all files in the filesystem for parent pointers that we can turn into
1294 : * replacement dirents, and a dirent that we can use to set the dotdot pointer.
1295 : */
1296 : STATIC int
1297 196914 : xrep_dir_scan_dirtree(
1298 : struct xrep_dir *rd)
1299 : {
1300 196914 : struct xfs_scrub *sc = rd->sc;
1301 196914 : struct xfs_inode *ip;
1302 196914 : int error;
1303 :
1304 : /* Roots of directory trees are their own parents. */
1305 196914 : if (sc->ip == sc->mp->m_rootip || sc->ip == sc->mp->m_metadirip)
1306 3581 : xrep_findparent_scan_found(&rd->pscan, sc->ip->i_ino);
1307 :
1308 : /*
1309 : * Filesystem scans are time consuming. Drop the directory ILOCK and
1310 : * all other resources for the duration of the scan and hope for the
1311 : * best. The live update hooks will keep our scan information up to
1312 : * date even though we've dropped the locks.
1313 : */
1314 196914 : xchk_trans_cancel(sc);
1315 196909 : if (sc->ilock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL))
1316 196904 : xchk_iunlock(sc, sc->ilock_flags & (XFS_ILOCK_SHARED |
1317 : XFS_ILOCK_EXCL));
1318 196903 : error = xchk_trans_alloc_empty(sc);
1319 196881 : if (error)
1320 : return error;
1321 :
1322 4445503612 : while ((error = xchk_iscan_iter(&rd->pscan.iscan, &ip)) == 1) {
1323 4444791499 : bool flush;
1324 :
1325 4444791499 : error = xrep_dir_scan_file(rd, ip);
1326 4441256577 : xchk_irele(sc, ip);
1327 4446476937 : if (error)
1328 : break;
1329 :
1330 : /* Flush stashed dirent updates to constrain memory usage. */
1331 4446476937 : mutex_lock(&rd->pscan.lock);
1332 4449177162 : flush = xrep_dir_want_flush_stashed(rd);
1333 4446147101 : mutex_unlock(&rd->pscan.lock);
1334 4449940308 : if (flush) {
1335 2052 : xchk_trans_cancel(sc);
1336 :
1337 2052 : error = xrep_tempfile_iolock_polled(sc);
1338 2052 : if (error)
1339 : break;
1340 :
1341 2052 : mutex_lock(&rd->pscan.lock);
1342 2052 : error = xrep_dir_replay_updates(rd);
1343 2052 : mutex_unlock(&rd->pscan.lock);
1344 2052 : xrep_tempfile_iounlock(sc);
1345 2052 : if (error)
1346 : break;
1347 :
1348 2052 : error = xchk_trans_alloc_empty(sc);
1349 2052 : if (error)
1350 : break;
1351 : }
1352 :
1353 4449940308 : if (xchk_should_terminate(sc, &error))
1354 : break;
1355 : }
1356 196914 : xchk_iscan_iter_finish(&rd->pscan.iscan);
1357 196913 : if (error) {
1358 : /*
1359 : * If we couldn't grab an inode that was busy with a state
1360 : * change, change the error code so that we exit to userspace
1361 : * as quickly as possible.
1362 : */
1363 29 : if (error == -EBUSY)
1364 : return -ECANCELED;
1365 29 : return error;
1366 : }
1367 :
1368 : /*
1369 : * Cancel the empty transaction so that we can (later) use the atomic
1370 : * extent swap helpers to lock files and commit the new directory.
1371 : */
1372 196884 : xchk_trans_cancel(rd->sc);
1373 196884 : return 0;
1374 : }
1375 :
1376 : /*
1377 : * Capture dirent updates being made by other threads which are relevant to the
1378 : * directory being repaired.
1379 : */
1380 : STATIC int
1381 27818258 : xrep_dir_live_update(
1382 : struct notifier_block *nb,
1383 : unsigned long action,
1384 : void *data)
1385 : {
1386 27818258 : struct xfs_dir_update_params *p = data;
1387 27818258 : struct xrep_dir *rd;
1388 27818258 : struct xfs_scrub *sc;
1389 27818258 : int error = 0;
1390 :
1391 27818258 : rd = container_of(nb, struct xrep_dir, pscan.hooks.dirent_hook.nb);
1392 27818258 : sc = rd->sc;
1393 :
1394 : /*
1395 : * This thread updated a child dirent in the directory that we're
1396 : * rebuilding. Stash the update for replay against the temporary
1397 : * directory.
1398 : */
1399 27818258 : if (p->dp->i_ino == sc->ip->i_ino &&
1400 0 : xchk_iscan_want_live_update(&rd->pscan.iscan, p->ip->i_ino)) {
1401 0 : mutex_lock(&rd->pscan.lock);
1402 0 : if (p->delta > 0)
1403 0 : error = xrep_dir_stash_createname(rd, p->name,
1404 0 : p->ip->i_ino);
1405 : else
1406 0 : error = xrep_dir_stash_removename(rd, p->name,
1407 0 : p->ip->i_ino);
1408 0 : mutex_unlock(&rd->pscan.lock);
1409 0 : if (error)
1410 0 : goto out_abort;
1411 : }
1412 :
1413 : /*
1414 : * This thread updated another directory's child dirent that points to
1415 : * the directory that we're rebuilding, so remember the new dotdot
1416 : * target.
1417 : */
1418 27818258 : if (p->ip->i_ino == sc->ip->i_ino &&
1419 0 : xchk_iscan_want_live_update(&rd->pscan.iscan, p->dp->i_ino)) {
1420 0 : if (p->delta > 0) {
1421 0 : trace_xrep_dir_stash_createname(sc->tempip,
1422 : &xfs_name_dotdot,
1423 0 : p->dp->i_ino);
1424 :
1425 0 : xrep_findparent_scan_found(&rd->pscan, p->dp->i_ino);
1426 : } else {
1427 0 : trace_xrep_dir_stash_removename(sc->tempip,
1428 : &xfs_name_dotdot,
1429 : rd->pscan.parent_ino);
1430 :
1431 0 : xrep_findparent_scan_found(&rd->pscan, NULLFSINO);
1432 : }
1433 : }
1434 :
1435 : return NOTIFY_DONE;
1436 : out_abort:
1437 0 : xchk_iscan_abort(&rd->pscan.iscan);
1438 0 : return NOTIFY_DONE;
1439 : }
1440 :
1441 : /*
1442 : * Free all the directory blocks and reset the data fork. The caller must
1443 : * join the inode to the transaction. This function returns with the inode
1444 : * joined to a clean scrub transaction.
1445 : */
1446 : STATIC int
1447 197807 : xrep_dir_reset_fork(
1448 : struct xrep_dir *rd,
1449 : xfs_ino_t parent_ino)
1450 : {
1451 197807 : struct xfs_scrub *sc = rd->sc;
1452 197807 : struct xfs_ifork *ifp = xfs_ifork_ptr(sc->tempip, XFS_DATA_FORK);
1453 197807 : int error;
1454 :
1455 : /* Unmap all the directory buffers. */
1456 197807 : if (xfs_ifork_has_extents(ifp)) {
1457 10638 : error = xrep_reap_ifork(sc, sc->tempip, XFS_DATA_FORK);
1458 10638 : if (error)
1459 : return error;
1460 : }
1461 :
1462 197807 : trace_xrep_dir_reset_fork(sc->tempip, parent_ino);
1463 :
1464 : /* Reset the data fork to an empty data fork. */
1465 197788 : xfs_idestroy_fork(ifp);
1466 197822 : ifp->if_bytes = 0;
1467 197822 : sc->tempip->i_disk_size = 0;
1468 :
1469 : /* Reinitialize the short form directory. */
1470 197822 : xrep_dir_init_args(rd, sc->tempip, NULL);
1471 197817 : error = xfs_dir2_sf_create(&rd->args, parent_ino);
1472 197841 : if (error)
1473 : return error;
1474 :
1475 197834 : return xrep_tempfile_roll_trans(sc);
1476 : }
1477 :
1478 : /*
1479 : * Prepare both inodes' directory forks for extent swapping. Promote the
1480 : * tempfile from short format to leaf format, and if the file being repaired
1481 : * has a short format data fork, turn it into an empty extent list.
1482 : */
1483 : STATIC int
1484 10638 : xrep_dir_swap_prep(
1485 : struct xfs_scrub *sc,
1486 : bool temp_local,
1487 : bool ip_local)
1488 : {
1489 10638 : int error;
1490 :
1491 : /*
1492 : * If the tempfile's directory is in shortform format, convert that
1493 : * to a single leaf extent so that we can use the atomic extent swap.
1494 : */
1495 10638 : if (temp_local) {
1496 6924 : struct xfs_da_args args = {
1497 6924 : .dp = sc->tempip,
1498 6924 : .geo = sc->mp->m_dir_geo,
1499 : .whichfork = XFS_DATA_FORK,
1500 6924 : .trans = sc->tp,
1501 : .total = 1,
1502 6924 : .owner = sc->ip->i_ino,
1503 : };
1504 :
1505 6924 : error = xfs_dir2_sf_to_block(&args);
1506 6924 : if (error)
1507 0 : return error;
1508 :
1509 : /*
1510 : * Roll the deferred log items to get us back to a clean
1511 : * transaction.
1512 : */
1513 6924 : error = xfs_defer_finish(&sc->tp);
1514 6924 : if (error)
1515 : return error;
1516 : }
1517 :
1518 : /*
1519 : * If the file being repaired had a shortform data fork, convert that
1520 : * to an empty extent list in preparation for the atomic extent swap.
1521 : */
1522 10638 : if (ip_local) {
1523 0 : struct xfs_ifork *ifp;
1524 :
1525 0 : ifp = xfs_ifork_ptr(sc->ip, XFS_DATA_FORK);
1526 0 : xfs_idestroy_fork(ifp);
1527 0 : ifp->if_format = XFS_DINODE_FMT_EXTENTS;
1528 0 : ifp->if_nextents = 0;
1529 0 : ifp->if_bytes = 0;
1530 0 : ifp->if_u1.if_root = NULL;
1531 0 : ifp->if_height = 0;
1532 :
1533 0 : xfs_trans_log_inode(sc->tp, sc->ip,
1534 : XFS_ILOG_CORE | XFS_ILOG_DDATA);
1535 : }
1536 :
1537 : return 0;
1538 : }
1539 :
1540 : /*
1541 : * Replace the inode number of a directory entry.
1542 : */
1543 : static int
1544 175928 : xrep_dir_replace(
1545 : struct xrep_dir *rd,
1546 : struct xfs_inode *dp,
1547 : const struct xfs_name *name,
1548 : xfs_ino_t inum,
1549 : xfs_extlen_t total)
1550 : {
1551 175928 : struct xfs_scrub *sc = rd->sc;
1552 175928 : bool is_block, is_leaf;
1553 175928 : int error;
1554 :
1555 175928 : ASSERT(S_ISDIR(VFS_I(dp)->i_mode));
1556 :
1557 175928 : error = xfs_dir_ino_validate(sc->mp, inum);
1558 175939 : if (error)
1559 : return error;
1560 :
1561 175939 : xrep_dir_init_args(rd, dp, name);
1562 175924 : rd->args.inumber = inum;
1563 175924 : rd->args.total = total;
1564 :
1565 175924 : if (dp->i_df.if_format == XFS_DINODE_FMT_LOCAL)
1566 174123 : return xfs_dir2_sf_replace(&rd->args);
1567 :
1568 1801 : error = xfs_dir2_isblock(&rd->args, &is_block);
1569 1801 : if (error)
1570 : return error;
1571 1801 : if (is_block)
1572 1542 : return xfs_dir2_block_replace(&rd->args);
1573 :
1574 259 : error = xfs_dir2_isleaf(&rd->args, &is_leaf);
1575 259 : if (error)
1576 : return error;
1577 259 : if (is_leaf)
1578 150 : return xfs_dir2_leaf_replace(&rd->args);
1579 :
1580 109 : return xfs_dir2_node_replace(&rd->args);
1581 : }
1582 :
1583 : /*
1584 : * Reset the link count of this directory and adjust the unlinked list pointers
1585 : * as needed.
1586 : */
1587 : STATIC int
1588 197822 : xrep_dir_set_nlink(
1589 : struct xrep_dir *rd)
1590 : {
1591 197822 : struct xfs_scrub *sc = rd->sc;
1592 197822 : struct xfs_inode *dp = sc->ip;
1593 197822 : struct xfs_perag *pag;
1594 197822 : int error;
1595 :
1596 : /*
1597 : * The directory is not on the incore unlinked list, which means that
1598 : * it needs to be reachable via the directory tree. Update the nlink
1599 : * with our observed link count. If the directory has no parent, it
1600 : * will be moved to the orphanage.
1601 : */
1602 197822 : if (!xfs_inode_on_unlinked_list(dp)) {
1603 197822 : xrep_set_nlink(sc->ip, rd->subdirs + 2);
1604 197822 : return 0;
1605 : }
1606 :
1607 0 : xfs_emerg(dp->i_mount, "IUNLINK unlinked dir 0x%llx repair, dirents %u subdirs %llu curr_nlink %u orphan? %d", dp->i_ino, rd->dirents, rd->subdirs, VFS_I(dp)->i_nlink, rd->needs_adoption);
1608 :
1609 : /*
1610 : * The directory is on the unlinked list and we did not find any
1611 : * dirents. Set the link count to zero and let the directory
1612 : * inactivate when the last reference drops.
1613 : */
1614 0 : if (rd->dirents == 0) {
1615 0 : rd->needs_adoption = false;
1616 0 : xrep_set_nlink(sc->ip, 0);
1617 0 : return 0;
1618 : }
1619 :
1620 : /*
1621 : * The directory is on the unlinked list and we found dirents. This
1622 : * directory needs to be reachable via the directory tree. Remove the
1623 : * dir from the unlinked list and update nlink with the observed link
1624 : * count. If the directory has no parent, it will be moved to the
1625 : * orphanage.
1626 : */
1627 0 : pag = xfs_perag_get(sc->mp, XFS_INO_TO_AGNO(sc->mp, dp->i_ino));
1628 0 : if (!pag) {
1629 0 : ASSERT(0);
1630 0 : return -EFSCORRUPTED;
1631 : }
1632 :
1633 0 : error = xfs_iunlink_remove(sc->tp, pag, dp);
1634 0 : xfs_perag_put(pag);
1635 0 : if (error)
1636 : return error;
1637 :
1638 0 : xrep_set_nlink(sc->ip, rd->subdirs + 2);
1639 0 : return 0;
1640 : }
1641 :
1642 : /*
1643 : * Finish replaying stashed dirent updates, allocate a transaction for swapping
1644 : * extents, and take the ILOCKs of both directories before we commit the new
1645 : * directory structure.
1646 : */
1647 : STATIC int
1648 197874 : xrep_dir_finalize_tempdir(
1649 : struct xrep_dir *rd)
1650 : {
1651 197874 : struct xfs_scrub *sc = rd->sc;
1652 197874 : int error;
1653 :
1654 197874 : if (!xfs_has_parent(sc->mp))
1655 992 : return xrep_tempswap_trans_alloc(sc, XFS_DATA_FORK, &rd->tx);
1656 :
1657 196882 : error = xrep_dir_replay_updates(rd);
1658 196885 : if (error)
1659 : return error;
1660 :
1661 196848 : error = xrep_tempswap_trans_alloc(sc, XFS_DATA_FORK, &rd->tx);
1662 196838 : if (error)
1663 : return error;
1664 :
1665 : /*
1666 : * We rely on the caller's hold on @sc->ip's IOLOCK_EXCL to quiesce all
1667 : * possible directory updates during the time when we did not hold the
1668 : * ILOCK. There should not be any dirent updates to replay, but check
1669 : * anyway.
1670 : */
1671 196836 : if (xfarray_length(rd->dir_entries) != 0) {
1672 0 : ASSERT(xfarray_length(rd->dir_entries) == 0);
1673 0 : return -EFSCORRUPTED;
1674 : }
1675 :
1676 : return 0;
1677 : }
1678 :
1679 : /* Swap the temporary directory's data fork with the one being repaired. */
1680 : STATIC int
1681 197816 : xrep_dir_swap(
1682 : struct xrep_dir *rd)
1683 : {
1684 197816 : struct xfs_scrub *sc = rd->sc;
1685 197816 : bool ip_local, temp_local;
1686 197816 : int error = 0;
1687 :
1688 : /*
1689 : * If we never found the parent for this directory, temporarily assign
1690 : * the root dir as the parent; we'll move this to the orphanage after
1691 : * swapping the dir contents. We hold the ILOCK of the dir being
1692 : * repaired, so we're not worried about racy updates of dotdot.
1693 : */
1694 197816 : ASSERT(sc->ilock_flags & XFS_ILOCK_EXCL);
1695 197816 : if (rd->pscan.parent_ino == NULLFSINO) {
1696 0 : rd->needs_adoption = true;
1697 0 : rd->pscan.parent_ino = rd->sc->mp->m_sb.sb_rootino;
1698 : }
1699 :
1700 : /*
1701 : * Reset the temporary directory's '..' entry to point to the parent
1702 : * that we found. The temporary directory was created with the root
1703 : * directory as the parent, so we can skip this if repairing a
1704 : * subdirectory of the root.
1705 : *
1706 : * It's also possible that this replacement could also expand a sf
1707 : * tempdir into block format.
1708 : */
1709 197816 : if (rd->pscan.parent_ino != sc->mp->m_rootip->i_ino) {
1710 175923 : error = xrep_dir_replace(rd, rd->sc->tempip, &xfs_name_dotdot,
1711 175923 : rd->pscan.parent_ino, rd->tx.req.resblks);
1712 175938 : if (error)
1713 : return error;
1714 : }
1715 :
1716 : /*
1717 : * Changing the dot and dotdot entries could have changed the shape of
1718 : * the directory, so we recompute these.
1719 : */
1720 197831 : ip_local = sc->ip->i_df.if_format == XFS_DINODE_FMT_LOCAL;
1721 197831 : temp_local = sc->tempip->i_df.if_format == XFS_DINODE_FMT_LOCAL;
1722 :
1723 : /*
1724 : * If the both files have a local format data fork and the rebuilt
1725 : * directory data would fit in the repaired file's data fork, copy
1726 : * the contents from the tempfile and update the directory link count.
1727 : * We're done now.
1728 : */
1729 197831 : if (ip_local && temp_local &&
1730 187193 : sc->tempip->i_disk_size <= xfs_inode_data_fork_size(sc->ip)) {
1731 187190 : xrep_tempfile_copyout_local(sc, XFS_DATA_FORK);
1732 187203 : return xrep_dir_set_nlink(rd);
1733 : }
1734 :
1735 : /* Clean the transaction before we start working on the extent swap. */
1736 10641 : error = xrep_tempfile_roll_trans(rd->sc);
1737 10638 : if (error)
1738 : return error;
1739 :
1740 : /* Otherwise, make sure both data forks are in block-mapping mode. */
1741 10638 : error = xrep_dir_swap_prep(sc, temp_local, ip_local);
1742 10638 : if (error)
1743 : return error;
1744 :
1745 : /*
1746 : * Set nlink of the directory in the same transaction sequence that
1747 : * (atomically) commits the new directory data.
1748 : */
1749 10638 : error = xrep_dir_set_nlink(rd);
1750 10638 : if (error)
1751 : return error;
1752 :
1753 10638 : return xrep_tempswap_contents(sc, &rd->tx);
1754 : }
1755 :
1756 : /*
1757 : * Swap the new directory contents (which we created in the tempfile) into the
1758 : * directory being repaired.
1759 : */
1760 : STATIC int
1761 197874 : xrep_dir_rebuild_tree(
1762 : struct xrep_dir *rd)
1763 : {
1764 197874 : struct xfs_scrub *sc = rd->sc;
1765 197874 : int error;
1766 :
1767 197874 : trace_xrep_dir_rebuild_tree(sc->ip, rd->pscan.parent_ino);
1768 :
1769 : /*
1770 : * Take the IOLOCK on the temporary file so that we can run dir
1771 : * operations with the same locks held as we would for a normal file.
1772 : * We still hold sc->ip's IOLOCK.
1773 : */
1774 197859 : error = xrep_tempfile_iolock_polled(rd->sc);
1775 197865 : if (error)
1776 : return error;
1777 :
1778 : /*
1779 : * Allocate transaction, lock inodes, and make sure that we've replayed
1780 : * all the stashed dirent updates to the tempdir. After this point,
1781 : * we're ready to swapext.
1782 : */
1783 197862 : error = xrep_dir_finalize_tempdir(rd);
1784 197865 : if (error)
1785 : return error;
1786 :
1787 197833 : if (xchk_iscan_aborted(&rd->pscan.iscan))
1788 : return -ECANCELED;
1789 :
1790 : /*
1791 : * Swap the tempdir's data fork with the file being repaired. This
1792 : * recreates the transaction and re-takes the ILOCK in the scrub
1793 : * context.
1794 : */
1795 197812 : error = xrep_dir_swap(rd);
1796 197811 : if (error)
1797 : return error;
1798 :
1799 : /*
1800 : * Release the old directory blocks and reset the data fork of the temp
1801 : * directory to an empty shortform directory because inactivation does
1802 : * nothing for directories.
1803 : */
1804 197810 : return xrep_dir_reset_fork(rd, sc->mp->m_rootip->i_ino);
1805 : }
1806 :
1807 : /* Set up the filesystem scan so we can regenerate directory entries. */
1808 : STATIC int
1809 197890 : xrep_dir_setup_scan(
1810 : struct xrep_dir *rd)
1811 : {
1812 197890 : struct xfs_scrub *sc = rd->sc;
1813 197890 : char *descr;
1814 197890 : int error;
1815 :
1816 : /* Set up some staging memory for salvaging dirents. */
1817 197890 : descr = xchk_xfile_ino_descr(sc, "directory entries");
1818 197892 : error = xfarray_create(descr, 0, sizeof(struct xrep_dirent),
1819 : &rd->dir_entries);
1820 197907 : kfree(descr);
1821 197905 : if (error)
1822 : return error;
1823 :
1824 197905 : descr = xchk_xfile_ino_descr(sc, "directory entry names");
1825 197891 : error = xfblob_create(descr, &rd->dir_names);
1826 197907 : kfree(descr);
1827 197908 : if (error)
1828 0 : goto out_xfarray;
1829 :
1830 197908 : if (xfs_has_parent(sc->mp))
1831 196915 : error = __xrep_findparent_scan_start(sc, &rd->pscan,
1832 : xrep_dir_live_update);
1833 : else
1834 993 : error = xrep_findparent_scan_start(sc, &rd->pscan);
1835 197909 : if (error)
1836 0 : goto out_xfblob;
1837 :
1838 : return 0;
1839 :
1840 : out_xfblob:
1841 0 : xfblob_destroy(rd->dir_names);
1842 0 : rd->dir_names = NULL;
1843 0 : out_xfarray:
1844 0 : xfarray_destroy(rd->dir_entries);
1845 0 : rd->dir_entries = NULL;
1846 0 : return error;
1847 : }
1848 :
1849 : /*
1850 : * Move the current file to the orphanage.
1851 : *
1852 : * Caller must hold IOLOCK_EXCL on @sc->ip, and no other inode locks. Upon
1853 : * successful return, the scrub transaction will have enough extra reservation
1854 : * to make the move; it will hold IOLOCK_EXCL and ILOCK_EXCL of @sc->ip and the
1855 : * orphanage; and both inodes will be ijoined.
1856 : */
1857 : STATIC int
1858 0 : xrep_dir_move_to_orphanage(
1859 : struct xrep_dir *rd)
1860 : {
1861 0 : struct xfs_scrub *sc = rd->sc;
1862 0 : xfs_ino_t orig_parent, new_parent;
1863 0 : int error;
1864 :
1865 : /*
1866 0 : * We are about to drop the ILOCK on sc->ip to lock the orphanage and
1867 : * prepare for the adoption. Therefore, look up the old dotdot entry
1868 : * for sc->ip so that we can compare it after we re-lock sc->ip.
1869 : */
1870 : error = xchk_dir_lookup(sc, sc->ip, &xfs_name_dotdot, &orig_parent);
1871 : if (error)
1872 : return error;
1873 :
1874 0 : /*
1875 0 : * We hold ILOCK_EXCL on both the directory and the tempdir after a
1876 : * successful rebuild. Before we can move the directory to the
1877 : * orphanage, we must roll to a clean unjoined transaction.
1878 : */
1879 : error = xfs_trans_roll(&sc->tp);
1880 : if (error)
1881 : return error;
1882 :
1883 0 : /*
1884 0 : * Because the orphanage is just another directory in the filesystem,
1885 : * we must take its IOLOCK to coordinate with the VFS. We cannot take
1886 : * an IOLOCK while holding an ILOCK, so we must drop them all. We may
1887 : * have to drop the IOLOCK as well.
1888 : */
1889 : xrep_tempfile_iunlock_both(sc);
1890 :
1891 : error = xrep_adoption_init(sc, &rd->adoption);
1892 : if (error)
1893 0 : return error;
1894 :
1895 0 : if (!xrep_orphanage_ilock_nowait(sc, XFS_IOLOCK_EXCL)) {
1896 0 : xchk_iunlock(sc, sc->ilock_flags);
1897 : error = xrep_orphanage_iolock_two(sc);
1898 : if (error)
1899 0 : goto err_adoption;
1900 0 : }
1901 0 :
1902 0 : /* Prepare for the adoption and lock both down. */
1903 0 : error = xrep_adoption_prep(&rd->adoption);
1904 : if (error)
1905 : goto err_adoption;
1906 :
1907 0 : error = xrep_adoption_compute_name(&rd->adoption, rd->pptr.p_name);
1908 0 : if (error)
1909 0 : goto err_adoption;
1910 :
1911 0 : /*
1912 0 : * Now that we've reacquired the ILOCK on sc->ip, look up the dotdot
1913 0 : * entry again. If the parent changed or the child was unlinked while
1914 : * the child directory was unlocked, we don't need to move the child to
1915 : * the orphanage after all.
1916 : */
1917 : error = xchk_dir_lookup(sc, sc->ip, &xfs_name_dotdot, &new_parent);
1918 : if (error)
1919 : goto err_adoption;
1920 : if (orig_parent != new_parent || VFS_I(sc->ip)->i_nlink == 0) {
1921 0 : error = 0;
1922 0 : goto err_adoption;
1923 0 : }
1924 0 :
1925 0 : /* Attach to the orphanage. */
1926 0 : return xrep_adoption_commit(&rd->adoption);
1927 : err_adoption:
1928 : xrep_adoption_cancel(&rd->adoption, error);
1929 : return error;
1930 0 : }
1931 0 :
1932 0 : /*
1933 0 : * Repair the directory metadata.
1934 : *
1935 : * XXX: Directory entry buffers can be multiple fsblocks in size. The buffer
1936 : * cache in XFS can't handle aliased multiblock buffers, so this might
1937 : * misbehave if the directory blocks are crosslinked with other filesystem
1938 : * metadata.
1939 : *
1940 : * XXX: Is it necessary to check the dcache for this directory to make sure
1941 : * that we always recreate every cached entry?
1942 : */
1943 : int
1944 : xrep_directory(
1945 : struct xfs_scrub *sc)
1946 : {
1947 : struct xrep_dir *rd = sc->buf;
1948 241365 : int error;
1949 :
1950 : /* The rmapbt is required to reap the old data fork. */
1951 241365 : if (!xfs_has_rmapbt(sc->mp))
1952 241365 : return -EOPNOTSUPP;
1953 :
1954 : error = xrep_dir_setup_scan(rd);
1955 241365 : if (error)
1956 : return error;
1957 :
1958 197897 : if (xfs_has_parent(sc->mp))
1959 197909 : error = xrep_dir_scan_dirtree(rd);
1960 : else
1961 : error = xrep_dir_salvage_entries(rd);
1962 197909 : if (error)
1963 196916 : goto out_teardown;
1964 :
1965 993 : /* Last chance to abort before we start committing fixes. */
1966 197903 : if (xchk_should_terminate(sc, &error))
1967 30 : goto out_teardown;
1968 :
1969 : error = xrep_dir_rebuild_tree(rd);
1970 197873 : if (error)
1971 0 : goto out_teardown;
1972 :
1973 197874 : if (rd->needs_adoption) {
1974 197871 : if (!xrep_orphanage_can_adopt(rd->sc))
1975 33 : error = -EFSCORRUPTED;
1976 : else
1977 197838 : error = xrep_dir_move_to_orphanage(rd);
1978 0 : if (error)
1979 0 : goto out_teardown;
1980 : }
1981 :
1982 : out_teardown:
1983 197838 : xrep_dir_teardown(sc);
1984 197901 : return error;
1985 197894 : }
|