Line data Source code
1 : // SPDX-License-Identifier: GPL-2.0-or-later
2 : /*
3 : * Copyright (C) 2021-2023 Oracle. All Rights Reserved.
4 : * Author: Darrick J. Wong <djwong@kernel.org>
5 : */
6 : #include "xfs.h"
7 : #include "xfs_fs.h"
8 : #include "xfs_shared.h"
9 : #include "xfs_format.h"
10 : #include "xfs_trans_resv.h"
11 : #include "xfs_mount.h"
12 : #include "xfs_log_format.h"
13 : #include "xfs_trans.h"
14 : #include "xfs_inode.h"
15 : #include "xfs_ialloc.h"
16 : #include "xfs_quota.h"
17 : #include "xfs_trans_space.h"
18 : #include "xfs_dir2.h"
19 : #include "xfs_icache.h"
20 : #include "xfs_bmap.h"
21 : #include "xfs_bmap_btree.h"
22 : #include "xfs_parent.h"
23 : #include "xfs_da_format.h"
24 : #include "xfs_da_btree.h"
25 : #include "xfs_xattr.h"
26 : #include "scrub/scrub.h"
27 : #include "scrub/common.h"
28 : #include "scrub/repair.h"
29 : #include "scrub/trace.h"
30 : #include "scrub/orphanage.h"
31 : #include "scrub/readdir.h"
32 :
33 : #include <linux/namei.h>
34 :
35 : /* Make the orphanage owned by root. */
36 : STATIC int
37 3772611 : xrep_chown_orphanage(
38 : struct xfs_scrub *sc,
39 : struct xfs_inode *dp)
40 : {
41 3772611 : struct xfs_trans *tp;
42 3772611 : struct xfs_mount *mp = sc->mp;
43 3772611 : struct xfs_dquot *udqp = NULL, *gdqp = NULL, *pdqp = NULL;
44 3772611 : struct xfs_dquot *oldu = NULL, *oldg = NULL, *oldp = NULL;
45 3772611 : struct inode *inode = VFS_I(dp);
46 3772611 : int error;
47 :
48 3772611 : error = xfs_qm_vop_dqalloc(dp, GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, 0,
49 : XFS_QMOPT_QUOTALL, &udqp, &gdqp, &pdqp);
50 3772611 : if (error)
51 : return error;
52 :
53 3772611 : error = xfs_trans_alloc_ichange(dp, udqp, gdqp, pdqp, true, &tp);
54 3772611 : if (error)
55 0 : goto out_dqrele;
56 :
57 : /*
58 : * Always clear setuid/setgid on the orphanage since we don't normally
59 : * want that functionality on this directory and xfs_repair doesn't
60 : * create it this way either. Leave the other access bits unchanged.
61 : */
62 3772611 : inode->i_mode &= ~(S_ISUID | S_ISGID);
63 :
64 : /*
65 : * Change the ownerships and register quota modifications
66 : * in the transaction.
67 : */
68 3772611 : if (!uid_eq(inode->i_uid, GLOBAL_ROOT_UID)) {
69 0 : if (XFS_IS_UQUOTA_ON(mp))
70 0 : oldu = xfs_qm_vop_chown(tp, dp, &dp->i_udquot, udqp);
71 0 : inode->i_uid = GLOBAL_ROOT_UID;
72 : }
73 3772611 : if (!gid_eq(inode->i_gid, GLOBAL_ROOT_GID)) {
74 0 : if (XFS_IS_GQUOTA_ON(mp))
75 0 : oldg = xfs_qm_vop_chown(tp, dp, &dp->i_gdquot, gdqp);
76 0 : inode->i_gid = GLOBAL_ROOT_GID;
77 : }
78 3772611 : if (dp->i_projid != 0) {
79 0 : if (XFS_IS_PQUOTA_ON(mp))
80 0 : oldp = xfs_qm_vop_chown(tp, dp, &dp->i_pdquot, pdqp);
81 0 : dp->i_projid = 0;
82 : }
83 :
84 3772611 : dp->i_diflags &= ~(XFS_DIFLAG_REALTIME | XFS_DIFLAG_RTINHERIT);
85 3772611 : xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
86 :
87 3772611 : XFS_STATS_INC(mp, xs_ig_attrchg);
88 :
89 3772611 : if (xfs_has_wsync(mp))
90 0 : xfs_trans_set_sync(tp);
91 3772611 : error = xfs_trans_commit(tp);
92 :
93 3772611 : xfs_qm_dqrele(oldu);
94 3772611 : xfs_qm_dqrele(oldg);
95 3772611 : xfs_qm_dqrele(oldp);
96 :
97 3772611 : out_dqrele:
98 3772611 : xfs_qm_dqrele(udqp);
99 3772611 : xfs_qm_dqrele(gdqp);
100 3772611 : xfs_qm_dqrele(pdqp);
101 3772611 : return error;
102 : }
103 :
104 : /*
105 : * Enable logged extended attributes for parent pointers. This must get done
106 : * before we create transactions and start making changes.
107 : */
108 : STATIC int
109 3772611 : xrep_adoption_grab_log_assist(
110 : struct xfs_scrub *sc)
111 : {
112 3772611 : int error;
113 :
114 3772611 : if (!xfs_has_parent(sc->mp))
115 : return 0;
116 :
117 3772611 : ASSERT(!(sc->flags & XREP_FSGATES_LARP));
118 :
119 3772611 : error = xfs_attr_grab_log_assist(sc->mp);
120 3772610 : if (error)
121 : return error;
122 :
123 3772611 : trace_xchk_fsgates_enable(sc, XREP_FSGATES_LARP);
124 :
125 3772611 : sc->flags |= XREP_FSGATES_LARP;
126 3772611 : return 0;
127 : }
128 :
129 : #define ORPHANAGE "lost+found"
130 :
131 : /* Create the orphanage directory, and set sc->orphanage to it. */
132 : int
133 3772374 : xrep_orphanage_create(
134 : struct xfs_scrub *sc)
135 : {
136 3772374 : struct xfs_mount *mp = sc->mp;
137 3772374 : struct dentry *root_dentry, *orphanage_dentry;
138 3772374 : struct inode *root_inode = VFS_I(sc->mp->m_rootip);
139 3772374 : struct inode *orphanage_inode;
140 3772374 : int error;
141 :
142 7544748 : if (xfs_is_shutdown(mp))
143 : return -EIO;
144 7544748 : if (xfs_is_readonly(mp)) {
145 0 : sc->orphanage = NULL;
146 0 : return 0;
147 : }
148 :
149 3772374 : ASSERT(sc->tp == NULL);
150 3772374 : ASSERT(sc->orphanage == NULL);
151 :
152 : /* Find the dentry for the root directory... */
153 3772374 : root_dentry = d_find_alias(root_inode);
154 3772588 : if (!root_dentry) {
155 0 : error = -EFSCORRUPTED;
156 0 : goto out;
157 : }
158 :
159 : /* ...which is a directory, right? */
160 3772588 : if (!d_is_dir(root_dentry)) {
161 0 : error = -EFSCORRUPTED;
162 0 : goto out_dput_root;
163 : }
164 :
165 : /* Try to find the orphanage directory. */
166 3772588 : inode_lock_nested(root_inode, I_MUTEX_PARENT);
167 3772611 : orphanage_dentry = lookup_one_len(ORPHANAGE, root_dentry,
168 : strlen(ORPHANAGE));
169 3772611 : if (IS_ERR(orphanage_dentry)) {
170 0 : error = PTR_ERR(orphanage_dentry);
171 0 : goto out_unlock_root;
172 : }
173 :
174 : /*
175 : * Nothing found? Call mkdir to create the orphanage. Create the
176 : * directory without group or other-user access because we're live and
177 : * someone could have been relying partly on minimal access to a parent
178 : * directory to control access to a file we put in here.
179 : */
180 3772611 : if (d_really_is_negative(orphanage_dentry)) {
181 48 : error = vfs_mkdir(&nop_mnt_idmap, root_inode, orphanage_dentry,
182 : 0700);
183 48 : if (error)
184 0 : goto out_dput_orphanage;
185 : }
186 :
187 : /* Not a directory? Bail out. */
188 3772611 : if (!d_is_dir(orphanage_dentry)) {
189 0 : error = -ENOTDIR;
190 0 : goto out_dput_orphanage;
191 : }
192 :
193 : /*
194 : * Grab a reference to the orphanage. This /should/ succeed since
195 : * we hold the root directory locked and therefore nobody can delete
196 : * the orphanage.
197 : */
198 3772611 : orphanage_inode = igrab(d_inode(orphanage_dentry));
199 3772611 : if (!orphanage_inode) {
200 0 : error = -ENOENT;
201 0 : goto out_dput_orphanage;
202 : }
203 :
204 : /* Make sure the orphanage is owned by root. */
205 3772611 : error = xrep_chown_orphanage(sc, XFS_I(orphanage_inode));
206 3772611 : if (error)
207 0 : goto out_dput_orphanage;
208 :
209 : /* Stash the reference for later and bail out. */
210 3772611 : sc->orphanage = XFS_I(orphanage_inode);
211 3772611 : sc->orphanage_ilock_flags = 0;
212 :
213 3772611 : out_dput_orphanage:
214 3772611 : dput(orphanage_dentry);
215 3772611 : out_unlock_root:
216 3772611 : inode_unlock(VFS_I(sc->mp->m_rootip));
217 3772610 : out_dput_root:
218 3772610 : dput(root_dentry);
219 : out:
220 : /*
221 : * Turn on whatever log features are required for an adoption to be
222 : * committed correctly.
223 : */
224 3772597 : if (!error)
225 3772596 : error = xrep_adoption_grab_log_assist(sc);
226 : return error;
227 : }
228 :
229 : void
230 0 : xrep_orphanage_ilock(
231 : struct xfs_scrub *sc,
232 : unsigned int ilock_flags)
233 : {
234 0 : sc->orphanage_ilock_flags |= ilock_flags;
235 0 : xfs_ilock(sc->orphanage, ilock_flags);
236 0 : }
237 :
238 : bool
239 91368846 : xrep_orphanage_ilock_nowait(
240 : struct xfs_scrub *sc,
241 : unsigned int ilock_flags)
242 : {
243 91368846 : if (xfs_ilock_nowait(sc->orphanage, ilock_flags)) {
244 91367777 : sc->orphanage_ilock_flags |= ilock_flags;
245 91367777 : return true;
246 : }
247 :
248 : return false;
249 : }
250 :
251 : void
252 182721012 : xrep_orphanage_iunlock(
253 : struct xfs_scrub *sc,
254 : unsigned int ilock_flags)
255 : {
256 182721012 : xfs_iunlock(sc->orphanage, ilock_flags);
257 182728282 : sc->orphanage_ilock_flags &= ~ilock_flags;
258 7270 : }
259 :
260 : /* Grab the IOLOCK of the orphanage and sc->ip. */
261 : int
262 91360507 : xrep_orphanage_iolock_two(
263 : struct xfs_scrub *sc)
264 : {
265 91360507 : int error = 0;
266 :
267 91368845 : while (true) {
268 91368845 : if (xchk_should_terminate(sc, &error))
269 0 : return error;
270 :
271 : /*
272 : * Normal XFS takes the IOLOCK before grabbing a transaction.
273 : * Scrub holds a transaction, which means that we can't block
274 : * on either IOLOCK.
275 : */
276 91368845 : if (xrep_orphanage_ilock_nowait(sc, XFS_IOLOCK_EXCL)) {
277 91367777 : if (xchk_ilock_nowait(sc, XFS_IOLOCK_EXCL))
278 : break;
279 7270 : xrep_orphanage_iunlock(sc, XFS_IOLOCK_EXCL);
280 : }
281 8338 : delay(1);
282 : }
283 :
284 : return 0;
285 : }
286 :
287 : /*
288 : * Set up the adoption structure and compute the block reservations needed to
289 : * add sc->ip to the orphanage.
290 : */
291 : int
292 91360507 : xrep_adoption_init(
293 : struct xfs_scrub *sc,
294 : struct xrep_adoption *adopt)
295 : {
296 91360507 : struct xfs_mount *mp = sc->mp;
297 91360507 : unsigned int child_blkres = 0;
298 :
299 91360507 : adopt->sc = sc;
300 91360507 : adopt->orphanage_blkres = xfs_link_space_res(mp, MAXNAMELEN);
301 91360507 : if (S_ISDIR(VFS_I(sc->ip)->i_mode))
302 33255294 : child_blkres = xfs_rename_space_res(mp, 0, false,
303 33255294 : xfs_name_dotdot.len, false);
304 91360507 : adopt->child_blkres = child_blkres;
305 :
306 91360507 : if (xfs_has_parent(mp)) {
307 91360507 : ASSERT(sc->flags & XREP_FSGATES_LARP);
308 91360507 : return xfs_parent_start_locked(mp, &adopt->parent);
309 : } else {
310 0 : adopt->parent = NULL;
311 : }
312 :
313 0 : return 0;
314 : }
315 :
316 : /*
317 : * Compute the xfs_name for the directory entry that we're adding to the
318 : * orphanage. Caller must hold ILOCKs of sc->ip and the orphanage and must not
319 : * reuse namebuf until the adoption completes or is cancelled.
320 : */
321 : int
322 1 : xrep_adoption_compute_name(
323 : struct xrep_adoption *adopt,
324 : unsigned char *namebuf)
325 : {
326 1 : struct xfs_name *xname = &adopt->xname;
327 1 : struct xfs_scrub *sc = adopt->sc;
328 1 : xfs_ino_t ino;
329 1 : unsigned int incr = 0;
330 1 : int error = 0;
331 :
332 1 : xname->name = namebuf;
333 1 : xname->len = snprintf(namebuf, MAXNAMELEN, "%llu", sc->ip->i_ino);
334 1 : xname->type = xfs_mode_to_ftype(VFS_I(sc->ip)->i_mode);
335 :
336 : /* Make sure the filename is unique in the lost+found. */
337 1 : error = xchk_dir_lookup(sc, sc->orphanage, xname, &ino);
338 1 : while (error == 0 && incr < 10000) {
339 0 : xname->len = snprintf(namebuf, MAXNAMELEN, "%llu.%u",
340 0 : sc->ip->i_ino, ++incr);
341 0 : error = xchk_dir_lookup(sc, sc->orphanage, xname, &ino);
342 : }
343 1 : if (error == 0) {
344 : /* We already have 10,000 entries in the orphanage? */
345 : return -EFSCORRUPTED;
346 : }
347 :
348 1 : if (error != -ENOENT)
349 0 : return error;
350 : return 0;
351 : }
352 :
353 : /*
354 : * Prepare to send a child to the orphanage.
355 : *
356 : * Reserve more space in the transaction, take the ILOCKs of the orphanage and
357 : * sc->ip, join them to the transaction, and reserve quota to reparent the
358 : * latter. Caller must hold the IOLOCK of the orphanage and sc->ip.
359 : */
360 : int
361 91360507 : xrep_adoption_prep(
362 : struct xrep_adoption *adopt)
363 : {
364 91360507 : struct xfs_scrub *sc = adopt->sc;
365 91360507 : int error;
366 :
367 : /*
368 : * Reserve space to the transaction to handle expansion of both the
369 : * orphanage and the child directory.
370 : */
371 91360507 : error = xfs_trans_reserve_more(sc->tp,
372 91360507 : adopt->orphanage_blkres + adopt->child_blkres, 0);
373 91360507 : if (error)
374 : return error;
375 :
376 91360507 : xfs_lock_two_inodes(sc->orphanage, XFS_ILOCK_EXCL,
377 : sc->ip, XFS_ILOCK_EXCL);
378 91360507 : sc->ilock_flags |= XFS_ILOCK_EXCL;
379 91360507 : sc->orphanage_ilock_flags |= XFS_ILOCK_EXCL;
380 :
381 91360507 : xfs_trans_ijoin(sc->tp, sc->orphanage, 0);
382 91360507 : xfs_trans_ijoin(sc->tp, sc->ip, 0);
383 :
384 : /*
385 : * Reserve enough quota in the orphan directory to add the new name.
386 : * Normally the orphanage should have user/group/project ids of zero
387 : * and hence is not subject to quota enforcement, but we're allowed to
388 : * exceed quota to reattach disconnected parts of the directory tree.
389 : */
390 91360507 : error = xfs_trans_reserve_quota_nblks(sc->tp, sc->orphanage,
391 91360507 : adopt->orphanage_blkres, 0, true);
392 91360507 : if (error)
393 : return error;
394 :
395 : /*
396 : * Reserve enough quota in the child directory to change dotdot.
397 : * Here we're also allowed to exceed file quota to repair inconsistent
398 : * metadata.
399 : */
400 91360507 : if (adopt->child_blkres) {
401 33255294 : error = xfs_trans_reserve_quota_nblks(sc->tp, sc->ip,
402 : adopt->child_blkres, 0, true);
403 33255294 : if (error)
404 0 : return error;
405 : }
406 :
407 : return 0;
408 : }
409 :
410 : /*
411 : * Make sure the dcache does not have a positive dentry for the name we've
412 : * chosen. The caller should have checked with the ondisk directory, so any
413 : * discrepancy is a sign that something is seriously wrong.
414 : */
415 : static int
416 0 : xrep_orphanage_check_dcache(
417 : struct xrep_adoption *adopt)
418 : {
419 0 : struct qstr qname = QSTR_INIT(adopt->xname.name,
420 : adopt->xname.len);
421 0 : struct dentry *d_orphanage, *d_child;
422 0 : int error = 0;
423 :
424 0 : d_orphanage = d_find_alias(VFS_I(adopt->sc->orphanage));
425 0 : if (!d_orphanage)
426 : return 0;
427 :
428 0 : d_child = d_hash_and_lookup(d_orphanage, &qname);
429 0 : if (d_child) {
430 0 : trace_xrep_orphanage_check_child(adopt->sc->mp, d_child);
431 :
432 0 : if (d_is_positive(d_child)) {
433 0 : ASSERT(d_is_negative(d_child));
434 0 : error = -EFSCORRUPTED;
435 : }
436 :
437 0 : dput(d_child);
438 : }
439 :
440 0 : dput(d_orphanage);
441 0 : if (error)
442 : return error;
443 :
444 : /*
445 : * Do we need to update d_parent of the dentry for the file being
446 : * repaired? In theory there shouldn't be one since the file had
447 : * nonzero nlink but wasn't connected to any parent dir.
448 : */
449 0 : d_child = d_find_alias(VFS_I(adopt->sc->ip));
450 0 : if (d_child) {
451 0 : trace_xrep_orphanage_check_alias(adopt->sc->mp, d_child);
452 0 : ASSERT(d_child->d_parent == NULL);
453 :
454 0 : dput(d_child);
455 0 : return -EFSCORRUPTED;
456 : }
457 :
458 : return 0;
459 : }
460 :
461 : /*
462 : * Remove all negative dentries from the dcache. There should not be any
463 : * positive entries, since we've maintained our lock on the orphanage
464 : * directory.
465 : */
466 : static void
467 0 : xrep_orphanage_zap_dcache(
468 : struct xrep_adoption *adopt)
469 : {
470 0 : struct qstr qname = QSTR_INIT(adopt->xname.name,
471 : adopt->xname.len);
472 0 : struct dentry *d_orphanage, *d_child;
473 :
474 0 : d_orphanage = d_find_alias(VFS_I(adopt->sc->orphanage));
475 0 : if (!d_orphanage)
476 0 : return;
477 :
478 0 : d_child = d_hash_and_lookup(d_orphanage, &qname);
479 0 : while (d_child != NULL) {
480 0 : trace_xrep_orphanage_invalidate_child(adopt->sc->mp, d_child);
481 :
482 0 : ASSERT(d_is_negative(d_child));
483 0 : d_invalidate(d_child);
484 0 : dput(d_child);
485 0 : d_child = d_lookup(d_orphanage, &qname);
486 : }
487 :
488 0 : dput(d_orphanage);
489 : }
490 :
491 : /*
492 : * Move the current file to the orphanage.
493 : *
494 : * The caller must hold the IOLOCKs and the ILOCKs for both sc->ip and the
495 : * orphanage. The directory entry name must have been computed, and quota
496 : * reserved. The function returns with both inodes joined and ILOCKed to the
497 : * transaction.
498 : */
499 : int
500 0 : xrep_adoption_commit(
501 : struct xrep_adoption *adopt)
502 : {
503 0 : struct xfs_scrub *sc = adopt->sc;
504 0 : struct xfs_name *xname = &adopt->xname;
505 0 : bool isdir = S_ISDIR(VFS_I(sc->ip)->i_mode);
506 0 : int error;
507 :
508 0 : trace_xrep_adoption_commit(sc->orphanage, &adopt->xname, sc->ip->i_ino);
509 :
510 0 : error = xrep_orphanage_check_dcache(adopt);
511 0 : if (error)
512 0 : goto out_parent;
513 :
514 : /* Create the new name in the orphanage. */
515 0 : error = xfs_dir_createname(sc->tp, sc->orphanage, xname, sc->ip->i_ino,
516 : adopt->orphanage_blkres);
517 0 : if (error)
518 0 : goto out_parent;
519 :
520 : /*
521 : * Bump the link count of the orphanage if we just added a
522 : * subdirectory, and update its timestamps.
523 : */
524 0 : xfs_trans_ichgtime(sc->tp, sc->orphanage,
525 : XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
526 0 : if (isdir)
527 0 : xfs_bumplink(sc->tp, sc->orphanage);
528 0 : xfs_trans_log_inode(sc->tp, sc->orphanage, XFS_ILOG_CORE);
529 :
530 : /* Bump the link count of the child. */
531 0 : xfs_bumplink(sc->tp, sc->ip);
532 0 : xfs_trans_log_inode(sc->tp, sc->ip, XFS_ILOG_CORE);
533 :
534 : /* Replace the dotdot entry if the child is a subdirectory. */
535 0 : if (isdir) {
536 0 : error = xfs_dir_replace(sc->tp, sc->ip, &xfs_name_dotdot,
537 0 : sc->orphanage->i_ino, adopt->child_blkres);
538 0 : if (error)
539 0 : goto out_parent;
540 : }
541 :
542 : /* Add a parent pointer from the file back to the lost+found. */
543 0 : if (adopt->parent) {
544 0 : error = xfs_parent_add(sc->tp, adopt->parent, sc->orphanage,
545 : xname, sc->ip);
546 0 : if (error)
547 0 : goto out_parent;
548 : }
549 :
550 : /*
551 : * Notify dirent hooks that we moved the file to /lost+found, and
552 : * finish all the deferred work so that we know the adoption is fully
553 : * recorded in the log.
554 : */
555 0 : xfs_dir_update_hook(sc->orphanage, sc->ip, 1, xname);
556 0 : error = xrep_defer_finish(sc);
557 0 : if (error)
558 0 : goto out_parent;
559 :
560 : /* Remove negative dentries from the lost+found's dcache */
561 0 : xrep_orphanage_zap_dcache(adopt);
562 0 : out_parent:
563 0 : xfs_parent_finish(sc->mp, adopt->parent);
564 0 : adopt->parent = NULL;
565 0 : return error;
566 : }
567 :
568 : /* Cancel a proposed relocation of a file to the orphanage. */
569 : void
570 91360507 : xrep_adoption_cancel(
571 : struct xrep_adoption *adopt,
572 : int error)
573 : {
574 91360507 : struct xfs_scrub *sc = adopt->sc;
575 :
576 : /*
577 : * Setting up (and hence cancelling) an adoption is the last thing that
578 : * repair code does. Hence we don't bother giving back the quota or
579 : * space reservations or unlock the inodes. Later when we have incore
580 : * state to manage, we'll need to give that back.
581 : */
582 91360507 : trace_xrep_adoption_cancel(sc->orphanage, sc->ip, error);
583 91360507 : xfs_parent_finish(sc->mp, adopt->parent);
584 91360507 : adopt->parent = NULL;
585 91360507 : }
586 :
587 : /* Release the orphanage. */
588 : void
589 650054893 : xrep_orphanage_rele(
590 : struct xfs_scrub *sc)
591 : {
592 650054893 : if (!sc->orphanage)
593 : return;
594 :
595 3772382 : if (sc->orphanage_ilock_flags)
596 1 : xfs_iunlock(sc->orphanage, sc->orphanage_ilock_flags);
597 :
598 3772382 : xchk_irele(sc, sc->orphanage);
599 3772218 : sc->orphanage = NULL;
600 : }
|