Line data Source code
1 : // SPDX-License-Identifier: GPL-2.0-or-later
2 : /*
3 : * Copyright (C) 2018-2023 Oracle. All Rights Reserved.
4 : * Author: Darrick J. Wong <djwong@kernel.org>
5 : */
6 : #include "xfs.h"
7 : #include "xfs_fs.h"
8 : #include "xfs_shared.h"
9 : #include "xfs_format.h"
10 : #include "xfs_trans_resv.h"
11 : #include "xfs_mount.h"
12 : #include "xfs_defer.h"
13 : #include "xfs_btree.h"
14 : #include "xfs_bit.h"
15 : #include "xfs_log_format.h"
16 : #include "xfs_trans.h"
17 : #include "xfs_sb.h"
18 : #include "xfs_inode.h"
19 : #include "xfs_icache.h"
20 : #include "xfs_inode_buf.h"
21 : #include "xfs_inode_fork.h"
22 : #include "xfs_ialloc.h"
23 : #include "xfs_da_format.h"
24 : #include "xfs_reflink.h"
25 : #include "xfs_alloc.h"
26 : #include "xfs_rmap.h"
27 : #include "xfs_rmap_btree.h"
28 : #include "xfs_bmap.h"
29 : #include "xfs_bmap_btree.h"
30 : #include "xfs_bmap_util.h"
31 : #include "xfs_dir2.h"
32 : #include "xfs_dir2_priv.h"
33 : #include "xfs_quota_defs.h"
34 : #include "xfs_quota.h"
35 : #include "xfs_ag.h"
36 : #include "xfs_attr_leaf.h"
37 : #include "xfs_log_priv.h"
38 : #include "xfs_symlink_remote.h"
39 : #include "scrub/xfs_scrub.h"
40 : #include "scrub/scrub.h"
41 : #include "scrub/common.h"
42 : #include "scrub/btree.h"
43 : #include "scrub/trace.h"
44 : #include "scrub/repair.h"
45 :
46 : /*
47 : * Inode Repair
48 : *
49 : * Roughly speaking, inode problems can be classified based on whether or not
50 : * they trip the dinode verifiers. If those trip, then we won't be able to
51 : * _iget ourselves the inode.
52 : *
53 : * Therefore, the xrep_dinode_* functions fix anything that will cause the
54 : * inode buffer verifier or the dinode verifier. The xrep_inode_* functions
55 : * fix things on live incore inodes.
56 : */
57 :
58 : /*
59 : * All the information we need to repair the ondisk inode if we can't iget the
60 : * incore inode. We don't allocate this buffer unless we're going to perform
61 : * a repair to the ondisk inode cluster buffer.
62 : */
63 : struct xrep_inode {
64 : /* Inode mapping that we saved from the initial lookup attempt. */
65 : struct xfs_imap imap;
66 :
67 : struct xfs_scrub *sc;
68 :
69 : /* Blocks in use on the data device by data extents or bmbt blocks. */
70 : xfs_rfsblock_t data_blocks;
71 :
72 : /* Blocks in use on the rt device. */
73 : xfs_rfsblock_t rt_blocks;
74 :
75 : /* Blocks in use by the attr fork. */
76 : xfs_rfsblock_t attr_blocks;
77 :
78 : /* Physical block containing data block 0. */
79 : xfs_fsblock_t block0;
80 :
81 : /* Number of data device extents for the data fork. */
82 : xfs_extnum_t data_extents;
83 :
84 : /*
85 : * Number of realtime device extents for the data fork. If
86 : * data_extents and rt_extents indicate that the data fork has extents
87 : * on both devices, we'll just back away slowly.
88 : */
89 : xfs_extnum_t rt_extents;
90 :
91 : /* Number of (data device) extents for the attr fork. */
92 : xfs_aextnum_t attr_extents;
93 : };
94 :
95 : /* Setup function for inode repair. */
96 : int
97 0 : xrep_setup_inode(
98 : struct xfs_scrub *sc,
99 : struct xfs_imap *imap)
100 : {
101 0 : struct xrep_inode *ri;
102 :
103 : /*
104 : * The only information that needs to be passed between inode scrub and
105 : * repair is the location of the ondisk metadata if iget fails. The
106 : * rest of struct xrep_inode is context data that we need to massage
107 : * the ondisk inode to the point that iget will work, which means that
108 : * we don't allocate anything at all if the incore inode is loaded.
109 : */
110 0 : if (!imap)
111 : return 0;
112 :
113 0 : sc->buf = kzalloc(sizeof(struct xrep_inode), XCHK_GFP_FLAGS);
114 0 : if (!sc->buf)
115 : return -ENOMEM;
116 :
117 0 : ri = sc->buf;
118 0 : memcpy(&ri->imap, imap, sizeof(struct xfs_imap));
119 0 : ri->sc = sc;
120 0 : ri->block0 = NULLFSBLOCK;
121 0 : return 0;
122 : }
123 :
124 : /* Make sure this inode cluster buffer can pass the inode buffer verifier. */
125 : STATIC void
126 0 : xrep_dinode_buf(
127 : struct xfs_scrub *sc,
128 : struct xfs_buf *bp)
129 : {
130 0 : struct xfs_mount *mp = sc->mp;
131 0 : struct xfs_trans *tp = sc->tp;
132 0 : struct xfs_perag *pag;
133 0 : struct xfs_dinode *dip;
134 0 : xfs_agnumber_t agno;
135 0 : xfs_agino_t agino;
136 0 : int ioff;
137 0 : int i;
138 0 : int ni;
139 0 : bool crc_ok;
140 0 : bool magic_ok;
141 0 : bool unlinked_ok;
142 :
143 0 : ni = XFS_BB_TO_FSB(mp, bp->b_length) * mp->m_sb.sb_inopblock;
144 0 : agno = xfs_daddr_to_agno(mp, xfs_buf_daddr(bp));
145 0 : pag = xfs_perag_get(mp, agno);
146 0 : for (i = 0; i < ni; i++) {
147 0 : ioff = i << mp->m_sb.sb_inodelog;
148 0 : dip = xfs_buf_offset(bp, ioff);
149 0 : agino = be32_to_cpu(dip->di_next_unlinked);
150 :
151 0 : unlinked_ok = magic_ok = crc_ok = false;
152 :
153 0 : if (xfs_verify_agino_or_null(pag, agino))
154 : unlinked_ok = true;
155 :
156 0 : if (dip->di_magic == cpu_to_be16(XFS_DINODE_MAGIC) &&
157 0 : xfs_dinode_good_version(mp, dip->di_version))
158 0 : magic_ok = true;
159 :
160 0 : if (xfs_verify_cksum((char *)dip, mp->m_sb.sb_inodesize,
161 : XFS_DINODE_CRC_OFF))
162 0 : crc_ok = true;
163 :
164 0 : if (magic_ok && unlinked_ok && crc_ok)
165 0 : continue;
166 :
167 0 : if (!magic_ok) {
168 0 : dip->di_magic = cpu_to_be16(XFS_DINODE_MAGIC);
169 0 : dip->di_version = 3;
170 : }
171 0 : if (!unlinked_ok) {
172 0 : xfs_emerg(mp, "IUNLINK ino 0x%llx next_unlinked %x -> NULLAGINO", sc->sm->sm_ino, be32_to_cpu(dip->di_next_unlinked));
173 0 : dip->di_next_unlinked = cpu_to_be32(NULLAGINO);
174 : }
175 0 : xfs_dinode_calc_crc(mp, dip);
176 0 : xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DINO_BUF);
177 0 : xfs_trans_log_buf(tp, bp, ioff, ioff + sizeof(*dip) - 1);
178 : }
179 0 : xfs_perag_put(pag);
180 0 : }
181 :
182 : /* Reinitialize things that never change in an inode. */
183 : STATIC void
184 0 : xrep_dinode_header(
185 : struct xfs_scrub *sc,
186 : struct xfs_dinode *dip)
187 : {
188 0 : trace_xrep_dinode_header(sc, dip);
189 :
190 0 : dip->di_magic = cpu_to_be16(XFS_DINODE_MAGIC);
191 0 : if (!xfs_dinode_good_version(sc->mp, dip->di_version))
192 0 : dip->di_version = 3;
193 0 : dip->di_ino = cpu_to_be64(sc->sm->sm_ino);
194 0 : uuid_copy(&dip->di_uuid, &sc->mp->m_sb.sb_meta_uuid);
195 0 : dip->di_gen = cpu_to_be32(sc->sm->sm_gen);
196 0 : }
197 :
198 : /* Parse enough of the directory block header to guess if this is a dir. */
199 : static inline bool
200 0 : xrep_dinode_is_dir(
201 : xfs_ino_t ino,
202 : xfs_daddr_t daddr,
203 : struct xfs_buf *bp)
204 : {
205 0 : struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr;
206 0 : struct xfs_dir2_data_free *bf;
207 0 : struct xfs_mount *mp = bp->b_mount;
208 0 : xfs_lsn_t lsn = be64_to_cpu(hdr3->lsn);
209 :
210 : /* Does the dir3 header match the filesystem? */
211 0 : if (hdr3->magic != cpu_to_be32(XFS_DIR3_BLOCK_MAGIC) &&
212 : hdr3->magic != cpu_to_be32(XFS_DIR3_DATA_MAGIC))
213 : return false;
214 :
215 0 : if (be64_to_cpu(hdr3->owner) != ino)
216 : return false;
217 :
218 0 : if (!uuid_equal(&hdr3->uuid, &mp->m_sb.sb_meta_uuid))
219 : return false;
220 :
221 0 : if (be64_to_cpu(hdr3->blkno) != daddr)
222 : return false;
223 :
224 : /* Directory blocks are always logged and must have a valid LSN. */
225 0 : if (lsn == NULLCOMMITLSN)
226 : return false;
227 0 : if (!xlog_valid_lsn(mp->m_log, lsn))
228 : return false;
229 :
230 : /*
231 : * bestfree information lives immediately after the end of the header,
232 : * so we won't run off the end of the buffer.
233 : */
234 0 : bf = xfs_dir2_data_bestfree_p(mp, bp->b_addr);
235 0 : if (!bf[0].length && bf[0].offset)
236 : return false;
237 0 : if (!bf[1].length && bf[1].offset)
238 : return false;
239 0 : if (!bf[2].length && bf[2].offset)
240 : return false;
241 :
242 0 : if (be16_to_cpu(bf[0].length) < be16_to_cpu(bf[1].length))
243 : return false;
244 0 : if (be16_to_cpu(bf[1].length) < be16_to_cpu(bf[2].length))
245 0 : return false;
246 :
247 : return true;
248 : }
249 :
250 : /* Guess the mode of this file from the contents. */
251 : STATIC uint16_t
252 0 : xrep_dinode_guess_mode(
253 : struct xrep_inode *ri,
254 : struct xfs_dinode *dip)
255 : {
256 0 : struct xfs_buf *bp;
257 0 : struct xfs_mount *mp = ri->sc->mp;
258 0 : xfs_daddr_t daddr;
259 0 : uint64_t fsize = be64_to_cpu(dip->di_size);
260 0 : unsigned int dfork_sz = XFS_DFORK_DSIZE(dip, mp);
261 0 : uint16_t mode = S_IFREG;
262 0 : int error;
263 :
264 0 : switch (dip->di_format) {
265 0 : case XFS_DINODE_FMT_LOCAL:
266 : /*
267 : * If the data fork is local format, the size of the data area
268 : * is reasonable and is big enough to contain the entire file,
269 : * we can guess the file type from the local data.
270 : *
271 : * If there are no nulls, guess this is a symbolic link.
272 : * Otherwise, this is probably a shortform directory.
273 : */
274 0 : if (dfork_sz <= XFS_LITINO(mp) && dfork_sz >= fsize) {
275 0 : if (!memchr(XFS_DFORK_DPTR(dip), 0, fsize))
276 : return S_IFLNK;
277 0 : return S_IFDIR;
278 : }
279 :
280 : /* By default, we guess regular file. */
281 : return S_IFREG;
282 0 : case XFS_DINODE_FMT_DEV:
283 : /*
284 : * If the data fork is dev format, the size of the data area is
285 : * reasonable and large enough to store a dev_t, and the file
286 : * size is zero, this could be a blockdev, a chardev, a fifo,
287 : * or a socket. There is no solid way to distinguish between
288 : * those choices, so we guess blockdev if the device number is
289 : * nonzero and chardev if it's zero (aka whiteout).
290 : */
291 0 : if (dfork_sz <= XFS_LITINO(mp) &&
292 0 : dfork_sz >= sizeof(__be32) && fsize == 0) {
293 0 : xfs_dev_t dev = xfs_dinode_get_rdev(dip);
294 :
295 0 : return dev != 0 ? S_IFBLK : S_IFCHR;
296 : }
297 :
298 : /* By default, we guess regular file. */
299 : return S_IFREG;
300 : case XFS_DINODE_FMT_EXTENTS:
301 : case XFS_DINODE_FMT_BTREE:
302 : /* There are data blocks to examine below. */
303 0 : break;
304 : default:
305 : /* Everything else is considered a regular file. */
306 : return S_IFREG;
307 : }
308 :
309 : /* There are no zero-length directories. */
310 0 : if (fsize == 0)
311 : return S_IFREG;
312 :
313 : /*
314 : * If we didn't find a written mapping for file block zero, we'll guess
315 : * that it's a sparse regular file.
316 : */
317 0 : if (ri->block0 == NULLFSBLOCK)
318 : return S_IFREG;
319 :
320 : /* Directories can't have rt extents. */
321 0 : if (ri->rt_extents > 0)
322 : return S_IFREG;
323 :
324 : /*
325 : * Read the first block of the file. Since we have no idea what kind
326 : * of file geometry (e.g. dirblock size) we might be reading into, use
327 : * an uncached buffer so that we don't pollute the buffer cache. We
328 : * can't do uncached mapped buffers, so the best we can do is guess
329 : * from the directory header.
330 : */
331 0 : daddr = XFS_FSB_TO_DADDR(mp, ri->block0);
332 0 : error = xfs_buf_read_uncached(mp->m_ddev_targp, daddr,
333 0 : XFS_FSS_TO_BB(mp, 1), 0, &bp, NULL);
334 0 : if (error)
335 : return S_IFREG;
336 :
337 0 : if (xrep_dinode_is_dir(ri->sc->sm->sm_ino, daddr, bp))
338 0 : mode = S_IFDIR;
339 :
340 0 : xfs_buf_relse(bp);
341 0 : return mode;
342 : }
343 :
344 : /* Turn di_mode into /something/ recognizable. */
345 : STATIC void
346 0 : xrep_dinode_mode(
347 : struct xrep_inode *ri,
348 : struct xfs_dinode *dip)
349 : {
350 0 : struct xfs_scrub *sc = ri->sc;
351 0 : uint16_t mode;
352 :
353 0 : trace_xrep_dinode_mode(sc, dip);
354 :
355 0 : mode = be16_to_cpu(dip->di_mode);
356 0 : if (mode == 0 || xfs_mode_to_ftype(mode) != XFS_DIR3_FT_UNKNOWN)
357 0 : return;
358 :
359 : /* bad mode, so we set it to a file that only root can read */
360 0 : mode = xrep_dinode_guess_mode(ri, dip);
361 0 : dip->di_mode = cpu_to_be16(mode);
362 0 : dip->di_uid = 0;
363 0 : dip->di_gid = 0;
364 : }
365 :
366 : /* Fix any conflicting flags that the verifiers complain about. */
367 : STATIC void
368 0 : xrep_dinode_flags(
369 : struct xfs_scrub *sc,
370 : struct xfs_dinode *dip,
371 : bool isrt)
372 : {
373 0 : struct xfs_mount *mp = sc->mp;
374 0 : uint64_t flags2;
375 0 : uint16_t mode;
376 0 : uint16_t flags;
377 :
378 0 : trace_xrep_dinode_flags(sc, dip);
379 :
380 0 : mode = be16_to_cpu(dip->di_mode);
381 0 : flags = be16_to_cpu(dip->di_flags);
382 0 : flags2 = be64_to_cpu(dip->di_flags2);
383 :
384 0 : if (isrt)
385 0 : flags |= XFS_DIFLAG_REALTIME;
386 : else
387 0 : flags &= ~XFS_DIFLAG_REALTIME;
388 :
389 0 : if (xfs_has_reflink(mp) && S_ISREG(mode))
390 0 : flags2 |= XFS_DIFLAG2_REFLINK;
391 : else
392 0 : flags2 &= ~(XFS_DIFLAG2_REFLINK | XFS_DIFLAG2_COWEXTSIZE);
393 0 : if (flags & XFS_DIFLAG_REALTIME)
394 0 : flags2 &= ~XFS_DIFLAG2_REFLINK;
395 0 : if (flags2 & XFS_DIFLAG2_REFLINK)
396 0 : flags2 &= ~XFS_DIFLAG2_DAX;
397 0 : if (!xfs_has_bigtime(mp))
398 0 : flags2 &= ~XFS_DIFLAG2_BIGTIME;
399 0 : if (!xfs_has_large_extent_counts(mp))
400 0 : flags2 &= ~XFS_DIFLAG2_NREXT64;
401 0 : if (flags2 & XFS_DIFLAG2_NREXT64)
402 0 : dip->di_nrext64_pad = 0;
403 0 : else if (dip->di_version >= 3)
404 0 : dip->di_v3_pad = 0;
405 0 : dip->di_flags = cpu_to_be16(flags);
406 0 : dip->di_flags2 = cpu_to_be64(flags2);
407 0 : }
408 :
409 : /*
410 : * Blow out symlink; now it points to the current dir. We don't have to worry
411 : * about incore state because this inode is failing the verifiers.
412 : */
413 : STATIC void
414 0 : xrep_dinode_zap_symlink(
415 : struct xfs_scrub *sc,
416 : struct xfs_dinode *dip)
417 : {
418 0 : char *p;
419 :
420 0 : trace_xrep_dinode_zap_symlink(sc, dip);
421 :
422 0 : dip->di_format = XFS_DINODE_FMT_LOCAL;
423 0 : dip->di_size = cpu_to_be64(1);
424 0 : p = XFS_DFORK_PTR(dip, XFS_DATA_FORK);
425 0 : *p = '.';
426 0 : }
427 :
428 : /*
429 : * Blow out dir, make it point to the root. In the future repair will
430 : * reconstruct this directory for us. Note that there's no in-core directory
431 : * inode because the sf verifier tripped, so we don't have to worry about the
432 : * dentry cache.
433 : */
434 : STATIC void
435 0 : xrep_dinode_zap_dir(
436 : struct xfs_scrub *sc,
437 : struct xfs_dinode *dip)
438 : {
439 0 : struct xfs_mount *mp = sc->mp;
440 0 : struct xfs_dir2_sf_hdr *sfp;
441 0 : int i8count;
442 :
443 0 : trace_xrep_dinode_zap_dir(sc, dip);
444 :
445 0 : dip->di_format = XFS_DINODE_FMT_LOCAL;
446 0 : i8count = mp->m_sb.sb_rootino > XFS_DIR2_MAX_SHORT_INUM;
447 0 : sfp = XFS_DFORK_PTR(dip, XFS_DATA_FORK);
448 0 : sfp->count = 0;
449 0 : sfp->i8count = i8count;
450 0 : xfs_dir2_sf_put_parent_ino(sfp, mp->m_sb.sb_rootino);
451 0 : dip->di_size = cpu_to_be64(xfs_dir2_sf_hdr_size(i8count));
452 0 : }
453 :
454 : /* Make sure we don't have a garbage file size. */
455 : STATIC void
456 0 : xrep_dinode_size(
457 : struct xfs_scrub *sc,
458 : struct xfs_dinode *dip)
459 : {
460 0 : uint64_t size;
461 0 : uint16_t mode;
462 :
463 0 : trace_xrep_dinode_size(sc, dip);
464 :
465 0 : mode = be16_to_cpu(dip->di_mode);
466 0 : size = be64_to_cpu(dip->di_size);
467 0 : switch (mode & S_IFMT) {
468 0 : case S_IFIFO:
469 : case S_IFCHR:
470 : case S_IFBLK:
471 : case S_IFSOCK:
472 : /* di_size can't be nonzero for special files */
473 0 : dip->di_size = 0;
474 0 : break;
475 0 : case S_IFREG:
476 : /* Regular files can't be larger than 2^63-1 bytes. */
477 0 : dip->di_size = cpu_to_be64(size & ~(1ULL << 63));
478 0 : break;
479 0 : case S_IFLNK:
480 : /*
481 : * Truncate ridiculously oversized symlinks. If the size is
482 : * zero, reset it to point to the current directory. Both of
483 : * these conditions trigger dinode verifier errors, so there
484 : * is no in-core state to reset.
485 : */
486 0 : if (size > XFS_SYMLINK_MAXLEN)
487 0 : dip->di_size = cpu_to_be64(XFS_SYMLINK_MAXLEN);
488 0 : else if (size == 0)
489 0 : xrep_dinode_zap_symlink(sc, dip);
490 : break;
491 0 : case S_IFDIR:
492 : /*
493 : * Directories can't have a size larger than 32G. If the size
494 : * is zero, reset it to an empty directory. Both of these
495 : * conditions trigger dinode verifier errors, so there is no
496 : * in-core state to reset.
497 : */
498 0 : if (size > XFS_DIR2_SPACE_SIZE)
499 0 : dip->di_size = cpu_to_be64(XFS_DIR2_SPACE_SIZE);
500 0 : else if (size == 0)
501 0 : xrep_dinode_zap_dir(sc, dip);
502 : break;
503 : }
504 0 : }
505 :
506 : /* Fix extent size hints. */
507 : STATIC void
508 0 : xrep_dinode_extsize_hints(
509 : struct xfs_scrub *sc,
510 : struct xfs_dinode *dip)
511 : {
512 0 : struct xfs_mount *mp = sc->mp;
513 0 : uint64_t flags2;
514 0 : uint16_t flags;
515 0 : uint16_t mode;
516 0 : xfs_failaddr_t fa;
517 :
518 0 : trace_xrep_dinode_extsize_hints(sc, dip);
519 :
520 0 : mode = be16_to_cpu(dip->di_mode);
521 0 : flags = be16_to_cpu(dip->di_flags);
522 0 : flags2 = be64_to_cpu(dip->di_flags2);
523 :
524 0 : fa = xfs_inode_validate_extsize(mp, be32_to_cpu(dip->di_extsize),
525 : mode, flags);
526 0 : if (fa) {
527 0 : dip->di_extsize = 0;
528 0 : dip->di_flags &= ~cpu_to_be16(XFS_DIFLAG_EXTSIZE |
529 : XFS_DIFLAG_EXTSZINHERIT);
530 : }
531 :
532 0 : if (dip->di_version < 3)
533 : return;
534 :
535 0 : fa = xfs_inode_validate_cowextsize(mp, be32_to_cpu(dip->di_cowextsize),
536 : mode, flags, flags2);
537 0 : if (fa) {
538 0 : dip->di_cowextsize = 0;
539 0 : dip->di_flags2 &= ~cpu_to_be64(XFS_DIFLAG2_COWEXTSIZE);
540 : }
541 : }
542 :
543 : /* Count extents and blocks for an inode given an rmap. */
544 : STATIC int
545 0 : xrep_dinode_walk_rmap(
546 : struct xfs_btree_cur *cur,
547 : const struct xfs_rmap_irec *rec,
548 : void *priv)
549 : {
550 0 : struct xrep_inode *ri = priv;
551 0 : int error = 0;
552 :
553 0 : if (xchk_should_terminate(ri->sc, &error))
554 0 : return error;
555 :
556 : /* We only care about this inode. */
557 0 : if (rec->rm_owner != ri->sc->sm->sm_ino)
558 : return 0;
559 :
560 0 : if (rec->rm_flags & XFS_RMAP_ATTR_FORK) {
561 0 : ri->attr_blocks += rec->rm_blockcount;
562 0 : if (!(rec->rm_flags & XFS_RMAP_BMBT_BLOCK))
563 0 : ri->attr_extents++;
564 :
565 0 : return 0;
566 : }
567 :
568 0 : ri->data_blocks += rec->rm_blockcount;
569 0 : if (!(rec->rm_flags & XFS_RMAP_BMBT_BLOCK)) {
570 0 : ri->data_extents++;
571 :
572 0 : if (rec->rm_offset == 0 &&
573 0 : !(rec->rm_flags & XFS_RMAP_UNWRITTEN)) {
574 0 : if (ri->block0 != NULLFSBLOCK)
575 : return -EFSCORRUPTED;
576 0 : ri->block0 = rec->rm_startblock;
577 : }
578 : }
579 :
580 : return 0;
581 : }
582 :
583 : /* Count extents and blocks for an inode from all AG rmap data. */
584 : STATIC int
585 0 : xrep_dinode_count_ag_rmaps(
586 : struct xrep_inode *ri,
587 : struct xfs_perag *pag)
588 : {
589 0 : struct xfs_btree_cur *cur;
590 0 : struct xfs_buf *agf;
591 0 : int error;
592 :
593 0 : error = xfs_alloc_read_agf(pag, ri->sc->tp, 0, &agf);
594 0 : if (error)
595 : return error;
596 :
597 0 : cur = xfs_rmapbt_init_cursor(ri->sc->mp, ri->sc->tp, agf, pag);
598 0 : error = xfs_rmap_query_all(cur, xrep_dinode_walk_rmap, ri);
599 0 : xfs_btree_del_cursor(cur, error);
600 0 : xfs_trans_brelse(ri->sc->tp, agf);
601 0 : return error;
602 : }
603 :
604 : /* Count extents and blocks for a given inode from all rmap data. */
605 : STATIC int
606 0 : xrep_dinode_count_rmaps(
607 : struct xrep_inode *ri)
608 : {
609 0 : struct xfs_perag *pag;
610 0 : xfs_agnumber_t agno;
611 0 : int error;
612 :
613 0 : if (!xfs_has_rmapbt(ri->sc->mp) || xfs_has_realtime(ri->sc->mp))
614 : return -EOPNOTSUPP;
615 :
616 0 : for_each_perag(ri->sc->mp, agno, pag) {
617 0 : error = xrep_dinode_count_ag_rmaps(ri, pag);
618 0 : if (error) {
619 0 : xfs_perag_rele(pag);
620 0 : return error;
621 : }
622 : }
623 :
624 : /* Can't have extents on both the rt and the data device. */
625 0 : if (ri->data_extents && ri->rt_extents)
626 : return -EFSCORRUPTED;
627 :
628 0 : trace_xrep_dinode_count_rmaps(ri->sc,
629 : ri->data_blocks, ri->rt_blocks, ri->attr_blocks,
630 : ri->data_extents, ri->rt_extents, ri->attr_extents,
631 : ri->block0);
632 0 : return 0;
633 : }
634 :
635 : /* Return true if this extents-format ifork looks like garbage. */
636 : STATIC bool
637 0 : xrep_dinode_bad_extents_fork(
638 : struct xfs_scrub *sc,
639 : struct xfs_dinode *dip,
640 : int dfork_size,
641 : int whichfork)
642 : {
643 0 : struct xfs_bmbt_irec new;
644 0 : struct xfs_bmbt_rec *dp;
645 0 : bool isrt;
646 0 : int i;
647 0 : int nex;
648 0 : int fork_size;
649 :
650 0 : nex = xfs_dfork_nextents(dip, whichfork);
651 0 : fork_size = nex * sizeof(struct xfs_bmbt_rec);
652 0 : if (fork_size < 0 || fork_size > dfork_size)
653 : return true;
654 0 : if (whichfork == XFS_ATTR_FORK && nex > ((uint16_t)-1U))
655 : return true;
656 0 : dp = XFS_DFORK_PTR(dip, whichfork);
657 :
658 0 : isrt = dip->di_flags & cpu_to_be16(XFS_DIFLAG_REALTIME);
659 0 : for (i = 0; i < nex; i++, dp++) {
660 0 : xfs_failaddr_t fa;
661 :
662 0 : xfs_bmbt_disk_get_all(dp, &new);
663 0 : fa = xfs_bmap_validate_extent_raw(sc->mp, isrt, whichfork,
664 : &new);
665 0 : if (fa)
666 : return true;
667 : }
668 :
669 : return false;
670 : }
671 :
672 : /* Return true if this btree-format ifork looks like garbage. */
673 : STATIC bool
674 0 : xrep_dinode_bad_bmbt_fork(
675 : struct xfs_scrub *sc,
676 : struct xfs_dinode *dip,
677 : int dfork_size,
678 : int whichfork)
679 : {
680 0 : struct xfs_bmdr_block *dfp;
681 0 : int i;
682 0 : int dmxr;
683 0 : int nrecs;
684 0 : int level;
685 :
686 0 : if (xfs_dfork_nextents(dip, whichfork) <=
687 0 : dfork_size / sizeof(struct xfs_bmbt_rec))
688 : return true;
689 :
690 0 : if (dfork_size < sizeof(struct xfs_bmdr_block))
691 : return true;
692 :
693 0 : dfp = XFS_DFORK_PTR(dip, whichfork);
694 0 : nrecs = be16_to_cpu(dfp->bb_numrecs);
695 0 : level = be16_to_cpu(dfp->bb_level);
696 :
697 0 : if (nrecs == 0 || XFS_BMDR_SPACE_CALC(nrecs) > dfork_size)
698 : return true;
699 0 : if (level == 0 || level >= XFS_BM_MAXLEVELS(sc->mp, whichfork))
700 0 : return true;
701 :
702 0 : dmxr = xfs_bmdr_maxrecs(dfork_size, 0);
703 0 : for (i = 1; i <= nrecs; i++) {
704 0 : struct xfs_bmbt_key *fkp;
705 0 : xfs_bmbt_ptr_t *fpp;
706 0 : xfs_fileoff_t fileoff;
707 0 : xfs_fsblock_t fsbno;
708 :
709 0 : fkp = XFS_BMDR_KEY_ADDR(dfp, i);
710 0 : fileoff = be64_to_cpu(fkp->br_startoff);
711 0 : if (!xfs_verify_fileoff(sc->mp, fileoff))
712 : return true;
713 :
714 0 : fpp = XFS_BMDR_PTR_ADDR(dfp, i, dmxr);
715 0 : fsbno = be64_to_cpu(*fpp);
716 0 : if (!xfs_verify_fsbno(sc->mp, fsbno))
717 : return true;
718 : }
719 :
720 : return false;
721 : }
722 :
723 : /*
724 : * Check the data fork for things that will fail the ifork verifiers or the
725 : * ifork formatters.
726 : */
727 : STATIC bool
728 0 : xrep_dinode_check_dfork(
729 : struct xfs_scrub *sc,
730 : struct xfs_dinode *dip,
731 : uint16_t mode)
732 : {
733 0 : void *dfork_ptr;
734 0 : int64_t data_size;
735 0 : unsigned int fmt;
736 0 : int dfork_size;
737 :
738 : /*
739 : * Verifier functions take signed int64_t, so check for bogus negative
740 : * values first.
741 : */
742 0 : data_size = be64_to_cpu(dip->di_size);
743 0 : if (data_size < 0)
744 : return true;
745 :
746 0 : fmt = XFS_DFORK_FORMAT(dip, XFS_DATA_FORK);
747 0 : switch (mode & S_IFMT) {
748 0 : case S_IFIFO:
749 : case S_IFCHR:
750 : case S_IFBLK:
751 : case S_IFSOCK:
752 0 : if (fmt != XFS_DINODE_FMT_DEV)
753 : return true;
754 : break;
755 0 : case S_IFREG:
756 0 : if (fmt == XFS_DINODE_FMT_LOCAL)
757 : return true;
758 0 : fallthrough;
759 : case S_IFLNK:
760 : case S_IFDIR:
761 0 : switch (fmt) {
762 : case XFS_DINODE_FMT_LOCAL:
763 : case XFS_DINODE_FMT_EXTENTS:
764 : case XFS_DINODE_FMT_BTREE:
765 : break;
766 : default:
767 : return true;
768 : }
769 : break;
770 : default:
771 : return true;
772 : }
773 :
774 0 : dfork_size = XFS_DFORK_SIZE(dip, sc->mp, XFS_DATA_FORK);
775 0 : dfork_ptr = XFS_DFORK_PTR(dip, XFS_DATA_FORK);
776 :
777 0 : switch (fmt) {
778 : case XFS_DINODE_FMT_DEV:
779 : break;
780 0 : case XFS_DINODE_FMT_LOCAL:
781 : /* dir/symlink structure cannot be larger than the fork */
782 0 : if (data_size > dfork_size)
783 : return true;
784 : /* directory structure must pass verification. */
785 0 : if (S_ISDIR(mode) && xfs_dir2_sf_verify_struct(sc->mp,
786 : dfork_ptr, data_size) != NULL)
787 : return true;
788 : /* symlink structure must pass verification. */
789 0 : if (S_ISLNK(mode) && xfs_symlink_sf_verify_struct(dfork_ptr,
790 : data_size) != NULL)
791 0 : return true;
792 : break;
793 0 : case XFS_DINODE_FMT_EXTENTS:
794 0 : if (xrep_dinode_bad_extents_fork(sc, dip, dfork_size,
795 : XFS_DATA_FORK))
796 0 : return true;
797 : break;
798 0 : case XFS_DINODE_FMT_BTREE:
799 0 : if (xrep_dinode_bad_bmbt_fork(sc, dip, dfork_size,
800 : XFS_DATA_FORK))
801 0 : return true;
802 : break;
803 : default:
804 : return true;
805 : }
806 :
807 : return false;
808 : }
809 :
810 : static void
811 0 : xrep_dinode_set_data_nextents(
812 : struct xfs_dinode *dip,
813 : xfs_extnum_t nextents)
814 : {
815 0 : if (xfs_dinode_has_large_extent_counts(dip))
816 0 : dip->di_big_nextents = cpu_to_be64(nextents);
817 : else
818 0 : dip->di_nextents = cpu_to_be32(nextents);
819 0 : }
820 :
821 : static void
822 0 : xrep_dinode_set_attr_nextents(
823 : struct xfs_dinode *dip,
824 : xfs_extnum_t nextents)
825 : {
826 0 : if (xfs_dinode_has_large_extent_counts(dip))
827 0 : dip->di_big_anextents = cpu_to_be32(nextents);
828 : else
829 0 : dip->di_anextents = cpu_to_be16(nextents);
830 0 : }
831 :
832 : /* Reset the data fork to something sane. */
833 : STATIC void
834 0 : xrep_dinode_zap_dfork(
835 : struct xrep_inode *ri,
836 : struct xfs_dinode *dip,
837 : uint16_t mode)
838 : {
839 0 : struct xfs_scrub *sc = ri->sc;
840 :
841 0 : trace_xrep_dinode_zap_dfork(sc, dip);
842 :
843 0 : xrep_dinode_set_data_nextents(dip, 0);
844 :
845 : /* Special files always get reset to DEV */
846 0 : switch (mode & S_IFMT) {
847 0 : case S_IFIFO:
848 : case S_IFCHR:
849 : case S_IFBLK:
850 : case S_IFSOCK:
851 0 : dip->di_format = XFS_DINODE_FMT_DEV;
852 0 : dip->di_size = 0;
853 0 : return;
854 : }
855 :
856 : /*
857 : * If we have data extents, reset to an empty map and hope the user
858 : * will run the bmapbtd checker next.
859 : */
860 0 : if (ri->data_extents || ri->rt_extents || S_ISREG(mode)) {
861 0 : dip->di_format = XFS_DINODE_FMT_EXTENTS;
862 0 : return;
863 : }
864 :
865 : /* Otherwise, reset the local format to the minimum. */
866 0 : switch (mode & S_IFMT) {
867 0 : case S_IFLNK:
868 0 : xrep_dinode_zap_symlink(sc, dip);
869 0 : break;
870 0 : case S_IFDIR:
871 0 : xrep_dinode_zap_dir(sc, dip);
872 0 : break;
873 : }
874 : }
875 :
876 : /*
877 : * Check the attr fork for things that will fail the ifork verifiers or the
878 : * ifork formatters.
879 : */
880 : STATIC bool
881 0 : xrep_dinode_check_afork(
882 : struct xfs_scrub *sc,
883 : struct xfs_dinode *dip)
884 : {
885 0 : struct xfs_attr_shortform *afork_ptr;
886 0 : size_t attr_size;
887 0 : int afork_size;
888 :
889 0 : if (XFS_DFORK_BOFF(dip) == 0)
890 0 : return dip->di_aformat != XFS_DINODE_FMT_EXTENTS ||
891 0 : xfs_dfork_attr_extents(dip) != 0;
892 :
893 0 : afork_size = XFS_DFORK_SIZE(dip, sc->mp, XFS_ATTR_FORK);
894 0 : afork_ptr = XFS_DFORK_PTR(dip, XFS_ATTR_FORK);
895 :
896 0 : switch (XFS_DFORK_FORMAT(dip, XFS_ATTR_FORK)) {
897 0 : case XFS_DINODE_FMT_LOCAL:
898 : /* Fork has to be large enough to extract the xattr size. */
899 0 : if (afork_size < sizeof(struct xfs_attr_sf_hdr))
900 : return true;
901 :
902 : /* xattr structure cannot be larger than the fork */
903 0 : attr_size = be16_to_cpu(afork_ptr->hdr.totsize);
904 0 : if (attr_size > afork_size)
905 : return true;
906 :
907 : /* xattr structure must pass verification. */
908 0 : return xfs_attr_shortform_verify_struct(afork_ptr,
909 0 : attr_size) != NULL;
910 0 : case XFS_DINODE_FMT_EXTENTS:
911 0 : if (xrep_dinode_bad_extents_fork(sc, dip, afork_size,
912 : XFS_ATTR_FORK))
913 0 : return true;
914 : break;
915 0 : case XFS_DINODE_FMT_BTREE:
916 0 : if (xrep_dinode_bad_bmbt_fork(sc, dip, afork_size,
917 : XFS_ATTR_FORK))
918 0 : return true;
919 : break;
920 : default:
921 : return true;
922 : }
923 :
924 : return false;
925 : }
926 :
927 : /*
928 : * Reset the attr fork to empty. Since the attr fork could have contained
929 : * ACLs, make the file readable only by root.
930 : */
931 : STATIC void
932 0 : xrep_dinode_zap_afork(
933 : struct xrep_inode *ri,
934 : struct xfs_dinode *dip,
935 : uint16_t mode)
936 : {
937 0 : struct xfs_scrub *sc = ri->sc;
938 :
939 0 : trace_xrep_dinode_zap_afork(sc, dip);
940 :
941 0 : dip->di_aformat = XFS_DINODE_FMT_EXTENTS;
942 0 : xrep_dinode_set_attr_nextents(dip, 0);
943 :
944 : /*
945 : * If the data fork is in btree format, removing the attr fork entirely
946 : * might cause verifier failures if the next level down in the bmbt
947 : * could now fit in the data fork area.
948 : */
949 0 : if (dip->di_format != XFS_DINODE_FMT_BTREE)
950 0 : dip->di_forkoff = 0;
951 0 : dip->di_mode = cpu_to_be16(mode & ~0777);
952 0 : dip->di_uid = 0;
953 0 : dip->di_gid = 0;
954 0 : }
955 :
956 : /* Make sure the fork offset is a sensible value. */
957 : STATIC void
958 0 : xrep_dinode_ensure_forkoff(
959 : struct xrep_inode *ri,
960 : struct xfs_dinode *dip,
961 : uint16_t mode)
962 : {
963 0 : struct xfs_bmdr_block *bmdr;
964 0 : struct xfs_scrub *sc = ri->sc;
965 0 : xfs_extnum_t attr_extents, data_extents;
966 0 : size_t bmdr_minsz = XFS_BMDR_SPACE_CALC(1);
967 0 : unsigned int lit_sz = XFS_LITINO(sc->mp);
968 0 : unsigned int afork_min, dfork_min;
969 :
970 0 : trace_xrep_dinode_ensure_forkoff(sc, dip);
971 :
972 : /*
973 : * Before calling this function, xrep_dinode_core ensured that both
974 : * forks actually fit inside their respective literal areas. If this
975 : * was not the case, the fork was reset to FMT_EXTENTS with zero
976 : * records. If the rmapbt scan found attr or data fork blocks, this
977 : * will be noted in the dinode_stats, and we must leave enough room
978 : * for the bmap repair code to reconstruct the mapping structure.
979 : *
980 : * First, compute the minimum space required for the attr fork.
981 : */
982 0 : switch (dip->di_aformat) {
983 0 : case XFS_DINODE_FMT_LOCAL:
984 : /*
985 : * If we still have a shortform xattr structure at all, that
986 : * means the attr fork area was exactly large enough to fit
987 : * the sf structure.
988 : */
989 0 : afork_min = XFS_DFORK_SIZE(dip, sc->mp, XFS_ATTR_FORK);
990 : break;
991 0 : case XFS_DINODE_FMT_EXTENTS:
992 0 : attr_extents = xfs_dfork_attr_extents(dip);
993 0 : if (attr_extents) {
994 : /*
995 : * We must maintain sufficient space to hold the entire
996 : * extent map array in the data fork. Note that we
997 : * previously zapped the fork if it had no chance of
998 : * fitting in the inode.
999 : */
1000 0 : afork_min = sizeof(struct xfs_bmbt_rec) * attr_extents;
1001 0 : } else if (ri->attr_extents > 0) {
1002 : /*
1003 : * The attr fork thinks it has zero extents, but we
1004 : * found some xattr extents. We need to leave enough
1005 : * empty space here so that the incore attr fork will
1006 : * get created (and hence trigger the attr fork bmap
1007 : * repairer).
1008 : */
1009 : afork_min = bmdr_minsz;
1010 : } else {
1011 : /* No extents on disk or found in rmapbt. */
1012 0 : afork_min = 0;
1013 : }
1014 : break;
1015 0 : case XFS_DINODE_FMT_BTREE:
1016 : /* Must have space for btree header and key/pointers. */
1017 0 : bmdr = XFS_DFORK_PTR(dip, XFS_ATTR_FORK);
1018 0 : afork_min = XFS_BMAP_BROOT_SPACE(sc->mp, bmdr);
1019 0 : break;
1020 : default:
1021 : /* We should never see any other formats. */
1022 : afork_min = 0;
1023 : break;
1024 : }
1025 :
1026 : /* Compute the minimum space required for the data fork. */
1027 0 : switch (dip->di_format) {
1028 : case XFS_DINODE_FMT_DEV:
1029 : dfork_min = sizeof(__be32);
1030 : break;
1031 0 : case XFS_DINODE_FMT_UUID:
1032 0 : dfork_min = sizeof(uuid_t);
1033 0 : break;
1034 0 : case XFS_DINODE_FMT_LOCAL:
1035 : /*
1036 : * If we still have a shortform data fork at all, that means
1037 : * the data fork area was large enough to fit whatever was in
1038 : * there.
1039 : */
1040 0 : dfork_min = be64_to_cpu(dip->di_size);
1041 0 : break;
1042 0 : case XFS_DINODE_FMT_EXTENTS:
1043 0 : data_extents = xfs_dfork_data_extents(dip);
1044 0 : if (data_extents) {
1045 : /*
1046 : * We must maintain sufficient space to hold the entire
1047 : * extent map array in the data fork. Note that we
1048 : * previously zapped the fork if it had no chance of
1049 : * fitting in the inode.
1050 : */
1051 0 : dfork_min = sizeof(struct xfs_bmbt_rec) * data_extents;
1052 0 : } else if (ri->data_extents > 0 || ri->rt_extents > 0) {
1053 : /*
1054 : * The data fork thinks it has zero extents, but we
1055 : * found some data extents. We need to leave enough
1056 : * empty space here so that the the data fork bmap
1057 : * repair will recover the mappings.
1058 : */
1059 : dfork_min = bmdr_minsz;
1060 : } else {
1061 : /* No extents on disk or found in rmapbt. */
1062 0 : dfork_min = 0;
1063 : }
1064 : break;
1065 0 : case XFS_DINODE_FMT_BTREE:
1066 : /* Must have space for btree header and key/pointers. */
1067 0 : bmdr = XFS_DFORK_PTR(dip, XFS_DATA_FORK);
1068 0 : dfork_min = XFS_BMAP_BROOT_SPACE(sc->mp, bmdr);
1069 0 : break;
1070 0 : default:
1071 0 : dfork_min = 0;
1072 0 : break;
1073 : }
1074 :
1075 : /*
1076 : * Round all values up to the nearest 8 bytes, because that is the
1077 : * precision of di_forkoff.
1078 : */
1079 0 : afork_min = roundup(afork_min, 8);
1080 0 : dfork_min = roundup(dfork_min, 8);
1081 0 : bmdr_minsz = roundup(bmdr_minsz, 8);
1082 :
1083 0 : ASSERT(dfork_min <= lit_sz);
1084 0 : ASSERT(afork_min <= lit_sz);
1085 :
1086 : /*
1087 : * If the data fork was zapped and we don't have enough space for the
1088 : * recovery fork, move the attr fork up.
1089 : */
1090 0 : if (dip->di_format == XFS_DINODE_FMT_EXTENTS &&
1091 0 : xfs_dfork_data_extents(dip) == 0 &&
1092 0 : (ri->data_extents > 0 || ri->rt_extents > 0) &&
1093 0 : bmdr_minsz > XFS_DFORK_DSIZE(dip, sc->mp)) {
1094 0 : if (bmdr_minsz + afork_min > lit_sz) {
1095 : /*
1096 : * The attr for and the stub fork we need to recover
1097 : * the data fork won't both fit. Zap the attr fork.
1098 : */
1099 0 : xrep_dinode_zap_afork(ri, dip, mode);
1100 0 : afork_min = bmdr_minsz;
1101 : } else {
1102 0 : void *before, *after;
1103 :
1104 : /* Otherwise, just slide the attr fork up. */
1105 0 : before = XFS_DFORK_APTR(dip);
1106 0 : dip->di_forkoff = bmdr_minsz >> 3;
1107 0 : after = XFS_DFORK_APTR(dip);
1108 0 : memmove(after, before, XFS_DFORK_ASIZE(dip, sc->mp));
1109 : }
1110 : }
1111 :
1112 : /*
1113 : * If the attr fork was zapped and we don't have enough space for the
1114 : * recovery fork, move the attr fork down.
1115 : */
1116 0 : if (dip->di_aformat == XFS_DINODE_FMT_EXTENTS &&
1117 0 : xfs_dfork_attr_extents(dip) == 0 &&
1118 0 : ri->attr_extents > 0 &&
1119 0 : bmdr_minsz > XFS_DFORK_ASIZE(dip, sc->mp)) {
1120 0 : if (dip->di_format == XFS_DINODE_FMT_BTREE) {
1121 : /*
1122 : * If the data fork is in btree format then we can't
1123 : * adjust forkoff because that runs the risk of
1124 : * violating the extents/btree format transition rules.
1125 : */
1126 0 : } else if (bmdr_minsz + dfork_min > lit_sz) {
1127 : /*
1128 : * If we can't move the attr fork, too bad, we lose the
1129 : * attr fork and leak its blocks.
1130 : */
1131 0 : xrep_dinode_zap_afork(ri, dip, mode);
1132 : } else {
1133 : /*
1134 : * Otherwise, just slide the attr fork down. The attr
1135 : * fork is empty, so we don't have any old contents to
1136 : * move here.
1137 : */
1138 0 : dip->di_forkoff = (lit_sz - bmdr_minsz) >> 3;
1139 : }
1140 : }
1141 0 : }
1142 :
1143 : /*
1144 : * Zap the data/attr forks if we spot anything that isn't going to pass the
1145 : * ifork verifiers or the ifork formatters, because we need to get the inode
1146 : * into good enough shape that the higher level repair functions can run.
1147 : */
1148 : STATIC void
1149 0 : xrep_dinode_zap_forks(
1150 : struct xrep_inode *ri,
1151 : struct xfs_dinode *dip)
1152 : {
1153 0 : struct xfs_scrub *sc = ri->sc;
1154 0 : xfs_extnum_t data_extents;
1155 0 : xfs_extnum_t attr_extents;
1156 0 : xfs_filblks_t nblocks;
1157 0 : uint16_t mode;
1158 0 : bool zap_datafork = false;
1159 0 : bool zap_attrfork = false;
1160 :
1161 0 : trace_xrep_dinode_zap_forks(sc, dip);
1162 :
1163 0 : mode = be16_to_cpu(dip->di_mode);
1164 :
1165 0 : data_extents = xfs_dfork_data_extents(dip);
1166 0 : attr_extents = xfs_dfork_attr_extents(dip);
1167 0 : nblocks = be64_to_cpu(dip->di_nblocks);
1168 :
1169 : /* Inode counters don't make sense? */
1170 0 : if (data_extents > nblocks)
1171 0 : zap_datafork = true;
1172 0 : if (attr_extents > nblocks)
1173 0 : zap_attrfork = true;
1174 0 : if (data_extents + attr_extents > nblocks)
1175 0 : zap_datafork = zap_attrfork = true;
1176 :
1177 0 : if (!zap_datafork)
1178 0 : zap_datafork = xrep_dinode_check_dfork(sc, dip, mode);
1179 0 : if (!zap_attrfork)
1180 0 : zap_attrfork = xrep_dinode_check_afork(sc, dip);
1181 :
1182 : /* Zap whatever's bad. */
1183 0 : if (zap_attrfork)
1184 0 : xrep_dinode_zap_afork(ri, dip, mode);
1185 0 : if (zap_datafork)
1186 0 : xrep_dinode_zap_dfork(ri, dip, mode);
1187 0 : xrep_dinode_ensure_forkoff(ri, dip, mode);
1188 0 : dip->di_nblocks = 0;
1189 0 : if (!zap_attrfork)
1190 0 : be64_add_cpu(&dip->di_nblocks, ri->attr_blocks);
1191 0 : if (!zap_datafork) {
1192 0 : be64_add_cpu(&dip->di_nblocks, ri->data_blocks);
1193 0 : be64_add_cpu(&dip->di_nblocks, ri->rt_blocks);
1194 : }
1195 0 : }
1196 :
1197 : STATIC void
1198 0 : xrep_dinode_nlinks(
1199 : struct xfs_dinode *dip)
1200 : {
1201 0 : if (dip->di_version > 1)
1202 0 : dip->di_onlink = 0;
1203 : else
1204 0 : dip->di_nlink = 0;
1205 0 : }
1206 :
1207 : /* Inode didn't pass verifiers, so fix the raw buffer and retry iget. */
1208 : STATIC int
1209 0 : xrep_dinode_core(
1210 : struct xrep_inode *ri)
1211 : {
1212 0 : struct xfs_scrub *sc = ri->sc;
1213 0 : struct xfs_buf *bp;
1214 0 : struct xfs_dinode *dip;
1215 0 : xfs_ino_t ino = sc->sm->sm_ino;
1216 0 : int error;
1217 :
1218 : /* Figure out what this inode had mapped in both forks. */
1219 0 : error = xrep_dinode_count_rmaps(ri);
1220 0 : if (error)
1221 : return error;
1222 :
1223 : /* Read the inode cluster buffer. */
1224 0 : error = xfs_trans_read_buf(sc->mp, sc->tp, sc->mp->m_ddev_targp,
1225 0 : ri->imap.im_blkno, ri->imap.im_len, XBF_UNMAPPED, &bp,
1226 : NULL);
1227 0 : if (error)
1228 : return error;
1229 :
1230 : /* Make sure we can pass the inode buffer verifier. */
1231 0 : xrep_dinode_buf(sc, bp);
1232 0 : bp->b_ops = &xfs_inode_buf_ops;
1233 :
1234 : /* Fix everything the verifier will complain about. */
1235 0 : dip = xfs_buf_offset(bp, ri->imap.im_boffset);
1236 0 : xrep_dinode_header(sc, dip);
1237 0 : xrep_dinode_nlinks(dip);
1238 0 : xrep_dinode_mode(ri, dip);
1239 0 : xrep_dinode_flags(sc, dip, ri->rt_extents > 0);
1240 0 : xrep_dinode_size(sc, dip);
1241 0 : xrep_dinode_extsize_hints(sc, dip);
1242 0 : xrep_dinode_zap_forks(ri, dip);
1243 :
1244 : /* Write out the inode. */
1245 0 : trace_xrep_dinode_fixed(sc, dip);
1246 0 : xfs_dinode_calc_crc(sc->mp, dip);
1247 0 : xfs_trans_buf_set_type(sc->tp, bp, XFS_BLFT_DINO_BUF);
1248 0 : xfs_trans_log_buf(sc->tp, bp, ri->imap.im_boffset,
1249 0 : ri->imap.im_boffset + sc->mp->m_sb.sb_inodesize - 1);
1250 :
1251 : /*
1252 : * Now that we've finished rewriting anything in the ondisk metadata
1253 : * that would prevent iget from giving us an incore inode, commit the
1254 : * inode cluster buffer updates and drop the AGI buffer that we've been
1255 : * holding since scrub setup.
1256 : */
1257 0 : error = xrep_trans_commit(sc);
1258 0 : if (error)
1259 : return error;
1260 :
1261 : /* Try again to load the inode. */
1262 0 : error = xchk_iget(sc, ino, &sc->ip);
1263 0 : if (error)
1264 : return error;
1265 :
1266 0 : xchk_ilock(sc, XFS_IOLOCK_EXCL);
1267 0 : error = xchk_trans_alloc(sc, 0);
1268 0 : if (error)
1269 : return error;
1270 :
1271 0 : error = xrep_ino_dqattach(sc);
1272 0 : if (error)
1273 : return error;
1274 :
1275 0 : xchk_ilock(sc, XFS_ILOCK_EXCL);
1276 0 : return 0;
1277 : }
1278 :
1279 : /* Fix everything xfs_dinode_verify cares about. */
1280 : STATIC int
1281 0 : xrep_dinode_problems(
1282 : struct xrep_inode *ri)
1283 : {
1284 0 : struct xfs_scrub *sc = ri->sc;
1285 0 : int error;
1286 :
1287 0 : error = xrep_dinode_core(ri);
1288 0 : if (error)
1289 : return error;
1290 :
1291 : /* We had to fix a totally busted inode, schedule quotacheck. */
1292 0 : if (XFS_IS_UQUOTA_ON(sc->mp))
1293 0 : xrep_force_quotacheck(sc, XFS_DQTYPE_USER);
1294 0 : if (XFS_IS_GQUOTA_ON(sc->mp))
1295 0 : xrep_force_quotacheck(sc, XFS_DQTYPE_GROUP);
1296 0 : if (XFS_IS_PQUOTA_ON(sc->mp))
1297 0 : xrep_force_quotacheck(sc, XFS_DQTYPE_PROJ);
1298 :
1299 : return 0;
1300 : }
1301 :
1302 : /*
1303 : * Fix problems that the verifiers don't care about. In general these are
1304 : * errors that don't cause problems elsewhere in the kernel that we can easily
1305 : * detect, so we don't check them all that rigorously.
1306 : */
1307 :
1308 : /* Make sure block and extent counts are ok. */
1309 : STATIC int
1310 0 : xrep_inode_blockcounts(
1311 : struct xfs_scrub *sc)
1312 : {
1313 0 : struct xfs_ifork *ifp;
1314 0 : xfs_filblks_t count;
1315 0 : xfs_filblks_t acount;
1316 0 : xfs_extnum_t nextents;
1317 0 : int error;
1318 :
1319 0 : trace_xrep_inode_blockcounts(sc);
1320 :
1321 : /* Set data fork counters from the data fork mappings. */
1322 0 : error = xfs_bmap_count_blocks(sc->tp, sc->ip, XFS_DATA_FORK,
1323 : &nextents, &count);
1324 0 : if (error)
1325 : return error;
1326 0 : if (xfs_has_reflink(sc->mp)) {
1327 : ; /* data fork blockcount can exceed physical storage */
1328 0 : } else if (XFS_IS_REALTIME_INODE(sc->ip)) {
1329 0 : if (count >= sc->mp->m_sb.sb_rblocks)
1330 : return -EFSCORRUPTED;
1331 : } else {
1332 0 : if (count >= sc->mp->m_sb.sb_dblocks)
1333 : return -EFSCORRUPTED;
1334 : }
1335 0 : error = xrep_ino_ensure_extent_count(sc, XFS_DATA_FORK, nextents);
1336 0 : if (error)
1337 : return error;
1338 0 : sc->ip->i_df.if_nextents = nextents;
1339 :
1340 : /* Set attr fork counters from the attr fork mappings. */
1341 0 : ifp = xfs_ifork_ptr(sc->ip, XFS_ATTR_FORK);
1342 0 : if (ifp) {
1343 0 : error = xfs_bmap_count_blocks(sc->tp, sc->ip, XFS_ATTR_FORK,
1344 : &nextents, &acount);
1345 0 : if (error)
1346 : return error;
1347 0 : if (count >= sc->mp->m_sb.sb_dblocks)
1348 : return -EFSCORRUPTED;
1349 0 : error = xrep_ino_ensure_extent_count(sc, XFS_ATTR_FORK,
1350 : nextents);
1351 0 : if (error)
1352 : return error;
1353 0 : ifp->if_nextents = nextents;
1354 : } else {
1355 0 : acount = 0;
1356 : }
1357 :
1358 0 : sc->ip->i_nblocks = count + acount;
1359 0 : return 0;
1360 : }
1361 :
1362 : /* Check for invalid uid/gid/prid. */
1363 : STATIC void
1364 0 : xrep_inode_ids(
1365 : struct xfs_scrub *sc)
1366 : {
1367 0 : trace_xrep_inode_ids(sc);
1368 :
1369 0 : if (i_uid_read(VFS_I(sc->ip)) == -1U) {
1370 0 : i_uid_write(VFS_I(sc->ip), 0);
1371 0 : VFS_I(sc->ip)->i_mode &= ~(S_ISUID | S_ISGID);
1372 0 : if (XFS_IS_UQUOTA_ON(sc->mp))
1373 0 : xrep_force_quotacheck(sc, XFS_DQTYPE_USER);
1374 : }
1375 :
1376 0 : if (i_gid_read(VFS_I(sc->ip)) == -1U) {
1377 0 : i_gid_write(VFS_I(sc->ip), 0);
1378 0 : VFS_I(sc->ip)->i_mode &= ~(S_ISUID | S_ISGID);
1379 0 : if (XFS_IS_GQUOTA_ON(sc->mp))
1380 0 : xrep_force_quotacheck(sc, XFS_DQTYPE_GROUP);
1381 : }
1382 :
1383 0 : if (sc->ip->i_projid == -1U) {
1384 0 : sc->ip->i_projid = 0;
1385 0 : if (XFS_IS_PQUOTA_ON(sc->mp))
1386 0 : xrep_force_quotacheck(sc, XFS_DQTYPE_PROJ);
1387 : }
1388 0 : }
1389 :
1390 : static inline void
1391 : xrep_clamp_nsec(
1392 : struct timespec64 *ts)
1393 : {
1394 0 : ts->tv_nsec = clamp_t(long, ts->tv_nsec, 0, NSEC_PER_SEC);
1395 : }
1396 :
1397 : /* Nanosecond counters can't have more than 1 billion. */
1398 : STATIC void
1399 0 : xrep_inode_timestamps(
1400 : struct xfs_inode *ip)
1401 : {
1402 0 : xrep_clamp_nsec(&VFS_I(ip)->i_atime);
1403 0 : xrep_clamp_nsec(&VFS_I(ip)->i_mtime);
1404 0 : xrep_clamp_nsec(&VFS_I(ip)->i_ctime);
1405 0 : xrep_clamp_nsec(&ip->i_crtime);
1406 0 : }
1407 :
1408 : /* Fix inode flags that don't make sense together. */
1409 : STATIC void
1410 0 : xrep_inode_flags(
1411 : struct xfs_scrub *sc)
1412 : {
1413 0 : uint16_t mode;
1414 :
1415 0 : trace_xrep_inode_flags(sc);
1416 :
1417 0 : mode = VFS_I(sc->ip)->i_mode;
1418 :
1419 : /* Clear junk flags */
1420 0 : if (sc->ip->i_diflags & ~XFS_DIFLAG_ANY)
1421 0 : sc->ip->i_diflags &= ~XFS_DIFLAG_ANY;
1422 :
1423 : /* NEWRTBM only applies to realtime bitmaps */
1424 0 : if (sc->ip->i_ino == sc->mp->m_sb.sb_rbmino)
1425 0 : sc->ip->i_diflags |= XFS_DIFLAG_NEWRTBM;
1426 : else
1427 0 : sc->ip->i_diflags &= ~XFS_DIFLAG_NEWRTBM;
1428 :
1429 : /* These only make sense for directories. */
1430 0 : if (!S_ISDIR(mode))
1431 0 : sc->ip->i_diflags &= ~(XFS_DIFLAG_RTINHERIT |
1432 : XFS_DIFLAG_EXTSZINHERIT |
1433 : XFS_DIFLAG_PROJINHERIT |
1434 : XFS_DIFLAG_NOSYMLINKS);
1435 :
1436 : /* These only make sense for files. */
1437 0 : if (!S_ISREG(mode))
1438 0 : sc->ip->i_diflags &= ~(XFS_DIFLAG_REALTIME |
1439 : XFS_DIFLAG_EXTSIZE);
1440 :
1441 : /* These only make sense for non-rt files. */
1442 0 : if (sc->ip->i_diflags & XFS_DIFLAG_REALTIME)
1443 0 : sc->ip->i_diflags &= ~XFS_DIFLAG_FILESTREAM;
1444 :
1445 : /* Immutable and append only? Drop the append. */
1446 0 : if ((sc->ip->i_diflags & XFS_DIFLAG_IMMUTABLE) &&
1447 : (sc->ip->i_diflags & XFS_DIFLAG_APPEND))
1448 0 : sc->ip->i_diflags &= ~XFS_DIFLAG_APPEND;
1449 :
1450 : /* Clear junk flags. */
1451 0 : if (sc->ip->i_diflags2 & ~XFS_DIFLAG2_ANY)
1452 0 : sc->ip->i_diflags2 &= ~XFS_DIFLAG2_ANY;
1453 :
1454 : /* No reflink flag unless we support it and it's a file. */
1455 0 : if (!xfs_has_reflink(sc->mp) || !S_ISREG(mode))
1456 0 : sc->ip->i_diflags2 &= ~XFS_DIFLAG2_REFLINK;
1457 :
1458 : /* DAX only applies to files and dirs. */
1459 0 : if (!(S_ISREG(mode) || S_ISDIR(mode)))
1460 0 : sc->ip->i_diflags2 &= ~XFS_DIFLAG2_DAX;
1461 :
1462 : /* No reflink files on the realtime device. */
1463 0 : if (sc->ip->i_diflags & XFS_DIFLAG_REALTIME)
1464 0 : sc->ip->i_diflags2 &= ~XFS_DIFLAG2_REFLINK;
1465 :
1466 : /* No mixing reflink and DAX yet. */
1467 0 : if (sc->ip->i_diflags2 & XFS_DIFLAG2_REFLINK)
1468 0 : sc->ip->i_diflags2 &= ~XFS_DIFLAG2_DAX;
1469 0 : }
1470 :
1471 : /*
1472 : * Fix size problems with block/node format directories. If we fail to find
1473 : * the extent list, just bail out and let the bmapbtd repair functions clean
1474 : * up that mess.
1475 : */
1476 : STATIC void
1477 0 : xrep_inode_blockdir_size(
1478 : struct xfs_scrub *sc)
1479 : {
1480 0 : struct xfs_iext_cursor icur;
1481 0 : struct xfs_bmbt_irec got;
1482 0 : struct xfs_ifork *ifp;
1483 0 : xfs_fileoff_t off;
1484 0 : int error;
1485 :
1486 0 : trace_xrep_inode_blockdir_size(sc);
1487 :
1488 : /* Find the last block before 32G; this is the dir size. */
1489 0 : error = xfs_iread_extents(sc->tp, sc->ip, XFS_DATA_FORK);
1490 0 : if (error)
1491 0 : return;
1492 :
1493 0 : ifp = xfs_ifork_ptr(sc->ip, XFS_DATA_FORK);
1494 0 : off = XFS_B_TO_FSB(sc->mp, XFS_DIR2_SPACE_SIZE);
1495 0 : if (!xfs_iext_lookup_extent_before(sc->ip, ifp, &off, &icur, &got)) {
1496 : /* zero-extents directory? */
1497 : return;
1498 : }
1499 :
1500 0 : off = got.br_startoff + got.br_blockcount;
1501 0 : sc->ip->i_disk_size = min_t(loff_t, XFS_DIR2_SPACE_SIZE,
1502 : XFS_FSB_TO_B(sc->mp, off));
1503 : }
1504 :
1505 : /* Fix size problems with short format directories. */
1506 : STATIC void
1507 0 : xrep_inode_sfdir_size(
1508 : struct xfs_scrub *sc)
1509 : {
1510 0 : struct xfs_ifork *ifp;
1511 :
1512 0 : trace_xrep_inode_sfdir_size(sc);
1513 :
1514 0 : ifp = xfs_ifork_ptr(sc->ip, XFS_DATA_FORK);
1515 0 : sc->ip->i_disk_size = ifp->if_bytes;
1516 0 : }
1517 :
1518 : /*
1519 : * Fix any irregularities in an inode's size now that we can iterate extent
1520 : * maps and access other regular inode data.
1521 : */
1522 : STATIC void
1523 0 : xrep_inode_size(
1524 : struct xfs_scrub *sc)
1525 : {
1526 0 : trace_xrep_inode_size(sc);
1527 :
1528 : /*
1529 : * Currently we only support fixing size on extents or btree format
1530 : * directories. Files can be any size and sizes for the other inode
1531 : * special types are fixed by xrep_dinode_size.
1532 : */
1533 0 : if (!S_ISDIR(VFS_I(sc->ip)->i_mode))
1534 : return;
1535 0 : switch (sc->ip->i_df.if_format) {
1536 0 : case XFS_DINODE_FMT_EXTENTS:
1537 : case XFS_DINODE_FMT_BTREE:
1538 0 : xrep_inode_blockdir_size(sc);
1539 0 : break;
1540 0 : case XFS_DINODE_FMT_LOCAL:
1541 0 : xrep_inode_sfdir_size(sc);
1542 0 : break;
1543 : }
1544 : }
1545 :
1546 : /* Fix extent size hint problems. */
1547 : STATIC void
1548 0 : xrep_inode_extsize(
1549 : struct xfs_scrub *sc)
1550 : {
1551 : /* Fix misaligned extent size hints on a directory. */
1552 0 : if ((sc->ip->i_diflags & XFS_DIFLAG_RTINHERIT) &&
1553 0 : (sc->ip->i_diflags & XFS_DIFLAG_EXTSZINHERIT) &&
1554 0 : sc->ip->i_extsize % sc->mp->m_sb.sb_rextsize > 0) {
1555 0 : sc->ip->i_extsize = 0;
1556 0 : sc->ip->i_diflags &= ~XFS_DIFLAG_EXTSZINHERIT;
1557 : }
1558 0 : }
1559 :
1560 : /* Fix any irregularities in an inode that the verifiers don't catch. */
1561 : STATIC int
1562 0 : xrep_inode_problems(
1563 : struct xfs_scrub *sc)
1564 : {
1565 0 : int error;
1566 :
1567 0 : error = xrep_inode_blockcounts(sc);
1568 0 : if (error)
1569 : return error;
1570 0 : xrep_inode_timestamps(sc->ip);
1571 0 : xrep_inode_flags(sc);
1572 0 : xrep_inode_ids(sc);
1573 0 : xrep_inode_size(sc);
1574 0 : xrep_inode_extsize(sc);
1575 :
1576 0 : trace_xrep_inode_fixed(sc);
1577 0 : xfs_trans_log_inode(sc->tp, sc->ip, XFS_ILOG_CORE);
1578 0 : return xrep_roll_trans(sc);
1579 : }
1580 :
1581 : /*
1582 : * Make sure this inode's unlinked list pointers are consistent with its
1583 : * link count.
1584 : */
1585 : STATIC int
1586 9772434 : xrep_inode_unlinked(
1587 : struct xfs_scrub *sc)
1588 : {
1589 9772434 : unsigned int nlink = VFS_I(sc->ip)->i_nlink;
1590 9772434 : int error;
1591 :
1592 : /*
1593 : * If this inode is linked from the directory tree and on the unlinked
1594 : * list, remove it from the unlinked list.
1595 : */
1596 9772434 : if (nlink > 0 && xfs_inode_on_unlinked_list(sc->ip)) {
1597 0 : struct xfs_perag *pag;
1598 0 : int error;
1599 :
1600 0 : pag = xfs_perag_get(sc->mp,
1601 0 : XFS_INO_TO_AGNO(sc->mp, sc->ip->i_ino));
1602 0 : error = xfs_iunlink_remove(sc->tp, pag, sc->ip);
1603 0 : xfs_perag_put(pag);
1604 0 : if (error)
1605 : return error;
1606 : }
1607 :
1608 : /*
1609 : * If this inode is not linked from the directory tree yet not on the
1610 : * unlinked list, put it on the unlinked list.
1611 : */
1612 9772434 : if (nlink == 0 && !xfs_inode_on_unlinked_list(sc->ip)) {
1613 0 : error = xfs_iunlink(sc->tp, sc->ip);
1614 0 : if (error)
1615 0 : return error;
1616 : }
1617 :
1618 : return 0;
1619 : }
1620 :
1621 : /* Repair an inode's fields. */
1622 : int
1623 9818577 : xrep_inode(
1624 : struct xfs_scrub *sc)
1625 : {
1626 9818577 : int error = 0;
1627 :
1628 : /*
1629 : * No inode? That means we failed the _iget verifiers. Repair all
1630 : * the things that the inode verifiers care about, then retry _iget.
1631 : */
1632 9818577 : if (!sc->ip) {
1633 0 : struct xrep_inode *ri = sc->buf;
1634 :
1635 0 : ASSERT(ri != NULL);
1636 :
1637 0 : error = xrep_dinode_problems(ri);
1638 0 : if (error)
1639 : return error;
1640 :
1641 : /* By this point we had better have a working incore inode. */
1642 0 : if (!sc->ip)
1643 : return -EFSCORRUPTED;
1644 : }
1645 :
1646 9818577 : xfs_trans_ijoin(sc->tp, sc->ip, 0);
1647 :
1648 : /* If we found corruption of any kind, try to fix it. */
1649 9818616 : if ((sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) ||
1650 : (sc->sm->sm_flags & XFS_SCRUB_OFLAG_XCORRUPT)) {
1651 0 : error = xrep_inode_problems(sc);
1652 0 : if (error)
1653 : return error;
1654 : }
1655 :
1656 : /* See if we can clear the reflink flag. */
1657 9818616 : if (xfs_is_reflink_inode(sc->ip))
1658 46097 : return xfs_reflink_clear_inode_flag(sc->ip, &sc->tp);
1659 :
1660 9772519 : return xrep_inode_unlinked(sc);
1661 : }
|