Line data Source code
1 : // SPDX-License-Identifier: GPL-2.0-or-later
2 : /*
3 : * Copyright (C) 2017-2023 Oracle. All Rights Reserved.
4 : * Author: Darrick J. Wong <djwong@kernel.org>
5 : */
6 : #include "xfs.h"
7 : #include "xfs_fs.h"
8 : #include "xfs_shared.h"
9 : #include "xfs_format.h"
10 : #include "xfs_trans_resv.h"
11 : #include "xfs_mount.h"
12 : #include "xfs_btree.h"
13 : #include "xfs_bit.h"
14 : #include "xfs_log_format.h"
15 : #include "xfs_trans.h"
16 : #include "xfs_inode.h"
17 : #include "xfs_alloc.h"
18 : #include "xfs_bmap.h"
19 : #include "xfs_bmap_btree.h"
20 : #include "xfs_rmap.h"
21 : #include "xfs_rmap_btree.h"
22 : #include "scrub/scrub.h"
23 : #include "scrub/common.h"
24 : #include "scrub/btree.h"
25 : #include "xfs_ag.h"
26 :
27 : /* Set us up with an inode's bmap. */
28 : int
29 298318780 : xchk_setup_inode_bmap(
30 : struct xfs_scrub *sc)
31 : {
32 298318780 : int error;
33 :
34 298318780 : if (xchk_need_intent_drain(sc))
35 0 : xchk_fsgates_enable(sc, XCHK_FSGATES_DRAIN);
36 :
37 298318780 : error = xchk_iget_for_scrubbing(sc);
38 298311652 : if (error)
39 1845147 : goto out;
40 :
41 296466505 : xchk_ilock(sc, XFS_IOLOCK_EXCL);
42 :
43 : /*
44 : * We don't want any ephemeral data/cow fork updates sitting around
45 : * while we inspect block mappings, so wait for directio to finish
46 : * and flush dirty data if we have delalloc reservations.
47 : */
48 296485416 : if (S_ISREG(VFS_I(sc->ip)->i_mode) &&
49 77984340 : sc->sm->sm_type != XFS_SCRUB_TYPE_BMBTA) {
50 51824326 : struct address_space *mapping = VFS_I(sc->ip)->i_mapping;
51 51824326 : bool is_repair = xchk_could_repair(sc);
52 :
53 51824326 : xchk_ilock(sc, XFS_MMAPLOCK_EXCL);
54 :
55 : /* Break all our leases, we're going to mess with things. */
56 51823941 : if (is_repair) {
57 303793 : error = xfs_break_layouts(VFS_I(sc->ip),
58 : &sc->ilock_flags, BREAK_WRITE);
59 303792 : if (error)
60 0 : goto out;
61 : }
62 :
63 51823940 : inode_dio_wait(VFS_I(sc->ip));
64 :
65 : /*
66 : * Try to flush all incore state to disk before we examine the
67 : * space mappings for the data fork. Leave accumulated errors
68 : * in the mapping for the writer threads to consume.
69 : *
70 : * On ENOSPC or EIO writeback errors, we continue into the
71 : * extent mapping checks because write failures do not
72 : * necessarily imply anything about the correctness of the file
73 : * metadata. The metadata and the file data could be on
74 : * completely separate devices; a media failure might only
75 : * affect a subset of the disk, etc. We can handle delalloc
76 : * extents in the scrubber, so leaving them in memory is fine.
77 : */
78 51823590 : error = filemap_fdatawrite(mapping);
79 51823999 : if (!error)
80 51823619 : error = filemap_fdatawait_keep_errors(mapping);
81 51823454 : if (error && (error != -ENOSPC && error != -EIO))
82 0 : goto out;
83 :
84 : /* Drop the page cache if we're repairing block mappings. */
85 51823454 : if (is_repair) {
86 303791 : error = invalidate_inode_pages2(
87 303791 : VFS_I(sc->ip)->i_mapping);
88 303790 : if (error)
89 0 : goto out;
90 : }
91 :
92 : }
93 :
94 : /* Got the inode, lock it and we're ready to go. */
95 296484543 : error = xchk_trans_alloc(sc, 0);
96 296471833 : if (error)
97 0 : goto out;
98 :
99 296471833 : error = xchk_ino_dqattach(sc);
100 296464094 : if (error)
101 0 : goto out;
102 :
103 296464094 : xchk_ilock(sc, XFS_ILOCK_EXCL);
104 298317683 : out:
105 : /* scrub teardown will unlock and release the inode */
106 298317683 : return error;
107 : }
108 :
109 : /*
110 : * Inode fork block mapping (BMBT) scrubber.
111 : * More complex than the others because we have to scrub
112 : * all the extents regardless of whether or not the fork
113 : * is in btree format.
114 : */
115 :
116 : struct xchk_bmap_info {
117 : struct xfs_scrub *sc;
118 :
119 : /* Incore extent tree cursor */
120 : struct xfs_iext_cursor icur;
121 :
122 : /* Previous fork mapping that we examined */
123 : struct xfs_bmbt_irec prev_rec;
124 :
125 : /* Is this a realtime fork? */
126 : bool is_rt;
127 :
128 : /* May mappings point to shared space? */
129 : bool is_shared;
130 :
131 : /* Was the incore extent tree loaded? */
132 : bool was_loaded;
133 :
134 : /* Which inode fork are we checking? */
135 : int whichfork;
136 : };
137 :
138 : /* Look for a corresponding rmap for this irec. */
139 : static inline bool
140 164606179 : xchk_bmap_get_rmap(
141 : struct xchk_bmap_info *info,
142 : struct xfs_bmbt_irec *irec,
143 : xfs_agblock_t agbno,
144 : uint64_t owner,
145 : struct xfs_rmap_irec *rmap)
146 : {
147 164606179 : xfs_fileoff_t offset;
148 164606179 : unsigned int rflags = 0;
149 164606179 : int has_rmap;
150 164606179 : int error;
151 :
152 164606179 : if (info->whichfork == XFS_ATTR_FORK)
153 861448 : rflags |= XFS_RMAP_ATTR_FORK;
154 164606179 : if (irec->br_state == XFS_EXT_UNWRITTEN)
155 6710435 : rflags |= XFS_RMAP_UNWRITTEN;
156 :
157 : /*
158 : * CoW staging extents are owned (on disk) by the refcountbt, so
159 : * their rmaps do not have offsets.
160 : */
161 164606179 : if (info->whichfork == XFS_COW_FORK)
162 : offset = 0;
163 : else
164 164551355 : offset = irec->br_startoff;
165 :
166 : /*
167 : * If the caller thinks this could be a shared bmbt extent (IOWs,
168 : * any data fork extent of a reflink inode) then we have to use the
169 : * range rmap lookup to make sure we get the correct owner/offset.
170 : */
171 164606179 : if (info->is_shared) {
172 36172117 : error = xfs_rmap_lookup_le_range(info->sc->sa.rmap_cur, agbno,
173 : owner, offset, rflags, rmap, &has_rmap);
174 : } else {
175 128434062 : error = xfs_rmap_lookup_le(info->sc->sa.rmap_cur, agbno,
176 : owner, offset, rflags, rmap, &has_rmap);
177 : }
178 164607319 : if (!xchk_should_check_xref(info->sc, &error, &info->sc->sa.rmap_cur))
179 : return false;
180 :
181 164607455 : if (!has_rmap)
182 0 : xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
183 : irec->br_startoff);
184 164607455 : return has_rmap;
185 : }
186 :
187 : /* Make sure that we have rmapbt records for this data/attr fork extent. */
188 : STATIC void
189 173213141 : xchk_bmap_xref_rmap(
190 : struct xchk_bmap_info *info,
191 : struct xfs_bmbt_irec *irec,
192 : xfs_agblock_t agbno)
193 : {
194 173213141 : struct xfs_rmap_irec rmap;
195 173213141 : unsigned long long rmap_end;
196 173213141 : uint64_t owner = info->sc->ip->i_ino;
197 :
198 173213141 : if (!info->sc->sa.rmap_cur || xchk_skip_xref(info->sc->sm))
199 8661629 : return;
200 :
201 : /* Find the rmap record for this irec. */
202 164551512 : if (!xchk_bmap_get_rmap(info, irec, agbno, owner, &rmap))
203 : return;
204 :
205 : /*
206 : * The rmap must be an exact match for this incore file mapping record,
207 : * which may have arisen from multiple ondisk records.
208 : */
209 164552770 : if (rmap.rm_startblock != agbno)
210 0 : xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
211 : irec->br_startoff);
212 :
213 164552770 : rmap_end = (unsigned long long)rmap.rm_startblock + rmap.rm_blockcount;
214 164552770 : if (rmap_end != agbno + irec->br_blockcount)
215 0 : xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
216 : irec->br_startoff);
217 :
218 : /* Check the logical offsets. */
219 164552770 : if (rmap.rm_offset != irec->br_startoff)
220 0 : xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
221 : irec->br_startoff);
222 :
223 164552770 : rmap_end = (unsigned long long)rmap.rm_offset + rmap.rm_blockcount;
224 164552770 : if (rmap_end != irec->br_startoff + irec->br_blockcount)
225 0 : xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
226 : irec->br_startoff);
227 :
228 : /* Check the owner */
229 164552770 : if (rmap.rm_owner != owner)
230 0 : xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
231 : irec->br_startoff);
232 :
233 : /*
234 : * Check for discrepancies between the unwritten flag in the irec and
235 : * the rmap. Note that the (in-memory) CoW fork distinguishes between
236 : * unwritten and written extents, but we don't track that in the rmap
237 : * records because the blocks are owned (on-disk) by the refcountbt,
238 : * which doesn't track unwritten state.
239 : */
240 164552770 : if (!!(irec->br_state == XFS_EXT_UNWRITTEN) !=
241 164552770 : !!(rmap.rm_flags & XFS_RMAP_UNWRITTEN))
242 0 : xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
243 : irec->br_startoff);
244 :
245 164552770 : if (!!(info->whichfork == XFS_ATTR_FORK) !=
246 164552770 : !!(rmap.rm_flags & XFS_RMAP_ATTR_FORK))
247 0 : xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
248 : irec->br_startoff);
249 164552770 : if (rmap.rm_flags & XFS_RMAP_BMBT_BLOCK)
250 0 : xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
251 : irec->br_startoff);
252 : }
253 :
254 : /* Make sure that we have rmapbt records for this COW fork extent. */
255 : STATIC void
256 54691 : xchk_bmap_xref_rmap_cow(
257 : struct xchk_bmap_info *info,
258 : struct xfs_bmbt_irec *irec,
259 : xfs_agblock_t agbno)
260 : {
261 54691 : struct xfs_rmap_irec rmap;
262 54691 : unsigned long long rmap_end;
263 54691 : uint64_t owner = XFS_RMAP_OWN_COW;
264 :
265 54691 : if (!info->sc->sa.rmap_cur || xchk_skip_xref(info->sc->sm))
266 0 : return;
267 :
268 : /* Find the rmap record for this irec. */
269 54691 : if (!xchk_bmap_get_rmap(info, irec, agbno, owner, &rmap))
270 : return;
271 :
272 : /*
273 : * CoW staging extents are owned by the refcount btree, so the rmap
274 : * can start before and end after the physical space allocated to this
275 : * mapping. There are no offsets to check.
276 : */
277 54691 : if (rmap.rm_startblock > agbno)
278 0 : xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
279 : irec->br_startoff);
280 :
281 54691 : rmap_end = (unsigned long long)rmap.rm_startblock + rmap.rm_blockcount;
282 54691 : if (rmap_end < agbno + irec->br_blockcount)
283 0 : xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
284 : irec->br_startoff);
285 :
286 : /* Check the owner */
287 54691 : if (rmap.rm_owner != owner)
288 0 : xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
289 : irec->br_startoff);
290 :
291 : /*
292 : * No flags allowed. Note that the (in-memory) CoW fork distinguishes
293 : * between unwritten and written extents, but we don't track that in
294 : * the rmap records because the blocks are owned (on-disk) by the
295 : * refcountbt, which doesn't track unwritten state.
296 : */
297 54691 : if (rmap.rm_flags & XFS_RMAP_ATTR_FORK)
298 0 : xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
299 : irec->br_startoff);
300 54691 : if (rmap.rm_flags & XFS_RMAP_BMBT_BLOCK)
301 0 : xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
302 : irec->br_startoff);
303 54691 : if (rmap.rm_flags & XFS_RMAP_UNWRITTEN)
304 0 : xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
305 : irec->br_startoff);
306 : }
307 :
308 : /* Cross-reference a single rtdev extent record. */
309 : STATIC void
310 41374183 : xchk_bmap_rt_iextent_xref(
311 : struct xfs_inode *ip,
312 : struct xchk_bmap_info *info,
313 : struct xfs_bmbt_irec *irec)
314 : {
315 41374183 : xchk_xref_is_used_rt_space(info->sc, irec->br_startblock,
316 41374183 : irec->br_blockcount);
317 41378285 : }
318 :
319 : /* Cross-reference a single datadev extent record. */
320 : STATIC void
321 173268847 : xchk_bmap_iextent_xref(
322 : struct xfs_inode *ip,
323 : struct xchk_bmap_info *info,
324 : struct xfs_bmbt_irec *irec)
325 : {
326 173268847 : struct xfs_owner_info oinfo;
327 173268847 : struct xfs_mount *mp = info->sc->mp;
328 173268847 : xfs_agnumber_t agno;
329 173268847 : xfs_agblock_t agbno;
330 173268847 : xfs_extlen_t len;
331 173268847 : int error;
332 :
333 173268847 : agno = XFS_FSB_TO_AGNO(mp, irec->br_startblock);
334 173268847 : agbno = XFS_FSB_TO_AGBNO(mp, irec->br_startblock);
335 173268847 : len = irec->br_blockcount;
336 :
337 173268847 : error = xchk_ag_init_existing(info->sc, agno, &info->sc->sa);
338 173269078 : if (!xchk_fblock_process_error(info->sc, info->whichfork,
339 : irec->br_startoff, &error))
340 0 : goto out_free;
341 :
342 173269358 : xchk_xref_is_used_space(info->sc, agbno, len);
343 173268895 : xchk_xref_is_not_inode_chunk(info->sc, agbno, len);
344 173267536 : switch (info->whichfork) {
345 171912732 : case XFS_DATA_FORK:
346 171912732 : xchk_bmap_xref_rmap(info, irec, agbno);
347 171914215 : if (!xfs_is_reflink_inode(info->sc->ip)) {
348 135741465 : xfs_rmap_ino_owner(&oinfo, info->sc->ip->i_ino,
349 : info->whichfork, irec->br_startoff);
350 135741465 : xchk_xref_is_only_owned_by(info->sc, agbno,
351 135741465 : irec->br_blockcount, &oinfo);
352 135741177 : xchk_xref_is_not_shared(info->sc, agbno,
353 135741177 : irec->br_blockcount);
354 : }
355 171914075 : xchk_xref_is_not_cow_staging(info->sc, agbno,
356 171914075 : irec->br_blockcount);
357 171914075 : break;
358 1300113 : case XFS_ATTR_FORK:
359 1300113 : xchk_bmap_xref_rmap(info, irec, agbno);
360 1300114 : xfs_rmap_ino_owner(&oinfo, info->sc->ip->i_ino,
361 : info->whichfork, irec->br_startoff);
362 1300114 : xchk_xref_is_only_owned_by(info->sc, agbno, irec->br_blockcount,
363 : &oinfo);
364 1300111 : xchk_xref_is_not_shared(info->sc, agbno,
365 1300111 : irec->br_blockcount);
366 1300118 : xchk_xref_is_not_cow_staging(info->sc, agbno,
367 1300118 : irec->br_blockcount);
368 1300118 : break;
369 54691 : case XFS_COW_FORK:
370 54691 : xchk_bmap_xref_rmap_cow(info, irec, agbno);
371 54691 : xchk_xref_is_only_owned_by(info->sc, agbno, irec->br_blockcount,
372 : &XFS_RMAP_OINFO_COW);
373 54691 : xchk_xref_is_cow_staging(info->sc, agbno,
374 54691 : irec->br_blockcount);
375 54691 : xchk_xref_is_not_shared(info->sc, agbno,
376 54691 : irec->br_blockcount);
377 54691 : break;
378 : }
379 :
380 173269189 : out_free:
381 173269189 : xchk_ag_free(info->sc, &info->sc->sa);
382 173269554 : }
383 :
384 : /*
385 : * Directories and attr forks should never have blocks that can't be addressed
386 : * by a xfs_dablk_t.
387 : */
388 : STATIC void
389 214640194 : xchk_bmap_dirattr_extent(
390 : struct xfs_inode *ip,
391 : struct xchk_bmap_info *info,
392 : struct xfs_bmbt_irec *irec)
393 : {
394 214640194 : struct xfs_mount *mp = ip->i_mount;
395 214640194 : xfs_fileoff_t off;
396 :
397 214640194 : if (!S_ISDIR(VFS_I(ip)->i_mode) && info->whichfork != XFS_ATTR_FORK)
398 : return;
399 :
400 2027930 : if (!xfs_verify_dablk(mp, irec->br_startoff))
401 0 : xchk_fblock_set_corrupt(info->sc, info->whichfork,
402 : irec->br_startoff);
403 :
404 2027926 : off = irec->br_startoff + irec->br_blockcount - 1;
405 2027926 : if (!xfs_verify_dablk(mp, off))
406 0 : xchk_fblock_set_corrupt(info->sc, info->whichfork, off);
407 : }
408 :
409 : /* Scrub a single extent record. */
410 : STATIC void
411 214639573 : xchk_bmap_iextent(
412 : struct xfs_inode *ip,
413 : struct xchk_bmap_info *info,
414 : struct xfs_bmbt_irec *irec)
415 : {
416 214639573 : struct xfs_mount *mp = info->sc->mp;
417 :
418 : /*
419 : * Check for out-of-order extents. This record could have come
420 : * from the incore list, for which there is no ordering check.
421 : */
422 214639573 : if (irec->br_startoff < info->prev_rec.br_startoff +
423 214639573 : info->prev_rec.br_blockcount)
424 0 : xchk_fblock_set_corrupt(info->sc, info->whichfork,
425 : irec->br_startoff);
426 :
427 214639573 : if (!xfs_verify_fileext(mp, irec->br_startoff, irec->br_blockcount))
428 0 : xchk_fblock_set_corrupt(info->sc, info->whichfork,
429 : irec->br_startoff);
430 :
431 214641748 : xchk_bmap_dirattr_extent(ip, info, irec);
432 :
433 : /* Make sure the extent points to a valid place. */
434 256017438 : if (info->is_rt &&
435 41375075 : !xfs_verify_rtext(mp, irec->br_startblock, irec->br_blockcount))
436 0 : xchk_fblock_set_corrupt(info->sc, info->whichfork,
437 : irec->br_startoff);
438 387911201 : if (!info->is_rt &&
439 173268683 : !xfs_verify_fsbext(mp, irec->br_startblock, irec->br_blockcount))
440 0 : xchk_fblock_set_corrupt(info->sc, info->whichfork,
441 : irec->br_startoff);
442 :
443 : /* We don't allow unwritten extents on attr forks. */
444 214642518 : if (irec->br_state == XFS_EXT_UNWRITTEN &&
445 16360980 : info->whichfork == XFS_ATTR_FORK)
446 0 : xchk_fblock_set_corrupt(info->sc, info->whichfork,
447 : irec->br_startoff);
448 :
449 214642518 : if (info->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
450 : return;
451 :
452 214642518 : if (info->is_rt)
453 41373513 : xchk_bmap_rt_iextent_xref(ip, info, irec);
454 : else
455 173269005 : xchk_bmap_iextent_xref(ip, info, irec);
456 : }
457 :
458 : /* Scrub a bmbt record. */
459 : STATIC int
460 166143831 : xchk_bmapbt_rec(
461 : struct xchk_btree *bs,
462 : const union xfs_btree_rec *rec)
463 : {
464 166143831 : struct xfs_bmbt_irec irec;
465 166143831 : struct xfs_bmbt_irec iext_irec;
466 166143831 : struct xfs_iext_cursor icur;
467 166143831 : struct xchk_bmap_info *info = bs->private;
468 166143831 : struct xfs_inode *ip = bs->cur->bc_ino.ip;
469 166143831 : struct xfs_buf *bp = NULL;
470 166143831 : struct xfs_btree_block *block;
471 166143831 : struct xfs_ifork *ifp = xfs_ifork_ptr(ip, info->whichfork);
472 166163014 : uint64_t owner;
473 166163014 : int i;
474 :
475 : /*
476 : * Check the owners of the btree blocks up to the level below
477 : * the root since the verifiers don't do that.
478 : */
479 166163014 : if (xfs_has_crc(bs->cur->bc_mp) &&
480 166163800 : bs->cur->bc_levels[0].ptr == 1) {
481 4773463 : for (i = 0; i < bs->cur->bc_nlevels - 1; i++) {
482 2666847 : block = xfs_btree_get_block(bs->cur, i, &bp);
483 2666850 : owner = be64_to_cpu(block->bb_u.l.bb_owner);
484 2666850 : if (owner != ip->i_ino)
485 0 : xchk_fblock_set_corrupt(bs->sc,
486 : info->whichfork, 0);
487 : }
488 : }
489 :
490 : /*
491 : * Check that the incore extent tree contains an extent that matches
492 : * this one exactly. We validate those cached bmaps later, so we don't
493 : * need to check them here. If the incore extent tree was just loaded
494 : * from disk by the scrubber, we assume that its contents match what's
495 : * on disk (we still hold the ILOCK) and skip the equivalence check.
496 : */
497 166163017 : if (!info->was_loaded)
498 : return 0;
499 :
500 166151901 : xfs_bmbt_disk_get_all(&rec->bmbt, &irec);
501 166162452 : if (xfs_bmap_validate_extent(ip, info->whichfork, &irec) != NULL) {
502 0 : xchk_fblock_set_corrupt(bs->sc, info->whichfork,
503 : irec.br_startoff);
504 0 : return 0;
505 : }
506 :
507 166160653 : if (!xfs_iext_lookup_extent(ip, ifp, irec.br_startoff, &icur,
508 166153309 : &iext_irec) ||
509 166153309 : irec.br_startoff != iext_irec.br_startoff ||
510 166154630 : irec.br_startblock != iext_irec.br_startblock ||
511 166155412 : irec.br_blockcount != iext_irec.br_blockcount ||
512 166155845 : irec.br_state != iext_irec.br_state)
513 140 : xchk_fblock_set_corrupt(bs->sc, info->whichfork,
514 : irec.br_startoff);
515 : return 0;
516 : }
517 :
518 : /* Scan the btree records. */
519 : STATIC int
520 1524589 : xchk_bmap_btree(
521 : struct xfs_scrub *sc,
522 : int whichfork,
523 : struct xchk_bmap_info *info)
524 : {
525 1524589 : struct xfs_owner_info oinfo;
526 1524589 : struct xfs_ifork *ifp = xfs_ifork_ptr(sc->ip, whichfork);
527 1524604 : struct xfs_mount *mp = sc->mp;
528 1524604 : struct xfs_inode *ip = sc->ip;
529 1524604 : struct xfs_btree_cur *cur;
530 1524604 : int error;
531 :
532 : /* Load the incore bmap cache if it's not loaded. */
533 1524604 : info->was_loaded = !xfs_need_iread_extents(ifp);
534 :
535 1524603 : error = xfs_iread_extents(sc->tp, ip, whichfork);
536 1524602 : if (!xchk_fblock_process_error(sc, whichfork, 0, &error))
537 0 : goto out;
538 :
539 : /* Check the btree structure. */
540 1524600 : cur = xfs_bmbt_init_cursor(mp, sc->tp, ip, whichfork);
541 1524603 : xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, whichfork);
542 1524603 : error = xchk_btree(sc, cur, xchk_bmapbt_rec, &oinfo, info);
543 1524602 : xfs_btree_del_cursor(cur, error);
544 1524612 : out:
545 1524612 : return error;
546 : }
547 :
548 : struct xchk_bmap_check_rmap_info {
549 : struct xfs_scrub *sc;
550 : int whichfork;
551 : struct xfs_iext_cursor icur;
552 : };
553 :
554 : /* Can we find bmaps that fit this rmap? */
555 : STATIC int
556 29690611127 : xchk_bmap_check_rmap(
557 : struct xfs_btree_cur *cur,
558 : const struct xfs_rmap_irec *rec,
559 : void *priv)
560 : {
561 29690611127 : struct xfs_bmbt_irec irec;
562 29690611127 : struct xfs_rmap_irec check_rec;
563 29690611127 : struct xchk_bmap_check_rmap_info *sbcri = priv;
564 29690611127 : struct xfs_ifork *ifp;
565 29690611127 : struct xfs_scrub *sc = sbcri->sc;
566 29690611127 : bool have_map;
567 :
568 : /* Is this even the right fork? */
569 29690611127 : if (rec->rm_owner != sc->ip->i_ino)
570 : return 0;
571 2740 : if ((sbcri->whichfork == XFS_ATTR_FORK) ^
572 2740 : !!(rec->rm_flags & XFS_RMAP_ATTR_FORK))
573 : return 0;
574 0 : if (rec->rm_flags & XFS_RMAP_BMBT_BLOCK)
575 : return 0;
576 :
577 : /* Now look up the bmbt record. */
578 0 : ifp = xfs_ifork_ptr(sc->ip, sbcri->whichfork);
579 0 : if (!ifp) {
580 0 : xchk_fblock_set_corrupt(sc, sbcri->whichfork,
581 0 : rec->rm_offset);
582 0 : goto out;
583 : }
584 0 : have_map = xfs_iext_lookup_extent(sc->ip, ifp, rec->rm_offset,
585 : &sbcri->icur, &irec);
586 0 : if (!have_map)
587 0 : xchk_fblock_set_corrupt(sc, sbcri->whichfork,
588 0 : rec->rm_offset);
589 : /*
590 : * bmap extent record lengths are constrained to 2^21 blocks in length
591 : * because of space constraints in the on-disk metadata structure.
592 : * However, rmap extent record lengths are constrained only by AG
593 : * length, so we have to loop through the bmbt to make sure that the
594 : * entire rmap is covered by bmbt records.
595 : */
596 0 : check_rec = *rec;
597 0 : while (have_map) {
598 0 : if (irec.br_startoff != check_rec.rm_offset)
599 0 : xchk_fblock_set_corrupt(sc, sbcri->whichfork,
600 : check_rec.rm_offset);
601 0 : if (irec.br_startblock != XFS_AGB_TO_FSB(sc->mp,
602 : cur->bc_ag.pag->pag_agno,
603 : check_rec.rm_startblock))
604 0 : xchk_fblock_set_corrupt(sc, sbcri->whichfork,
605 : check_rec.rm_offset);
606 0 : if (irec.br_blockcount > check_rec.rm_blockcount)
607 0 : xchk_fblock_set_corrupt(sc, sbcri->whichfork,
608 : check_rec.rm_offset);
609 0 : if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
610 : break;
611 0 : check_rec.rm_startblock += irec.br_blockcount;
612 0 : check_rec.rm_offset += irec.br_blockcount;
613 0 : check_rec.rm_blockcount -= irec.br_blockcount;
614 0 : if (check_rec.rm_blockcount == 0)
615 : break;
616 0 : have_map = xfs_iext_next_extent(ifp, &sbcri->icur, &irec);
617 0 : if (!have_map)
618 0 : xchk_fblock_set_corrupt(sc, sbcri->whichfork,
619 : check_rec.rm_offset);
620 : }
621 :
622 0 : out:
623 0 : if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
624 0 : return -ECANCELED;
625 : return 0;
626 : }
627 :
628 : /* Make sure each rmap has a corresponding bmbt entry. */
629 : STATIC int
630 646259 : xchk_bmap_check_ag_rmaps(
631 : struct xfs_scrub *sc,
632 : int whichfork,
633 : struct xfs_perag *pag)
634 : {
635 646259 : struct xchk_bmap_check_rmap_info sbcri;
636 646259 : struct xfs_btree_cur *cur;
637 646259 : struct xfs_buf *agf;
638 646259 : int error;
639 :
640 646259 : error = xfs_alloc_read_agf(pag, sc->tp, 0, &agf);
641 646258 : if (error)
642 : return error;
643 :
644 646257 : cur = xfs_rmapbt_init_cursor(sc->mp, sc->tp, agf, pag);
645 :
646 646256 : sbcri.sc = sc;
647 646256 : sbcri.whichfork = whichfork;
648 646256 : error = xfs_rmap_query_all(cur, xchk_bmap_check_rmap, &sbcri);
649 646259 : if (error == -ECANCELED)
650 0 : error = 0;
651 :
652 646259 : xfs_btree_del_cursor(cur, error);
653 646258 : xfs_trans_brelse(sc->tp, agf);
654 646258 : return error;
655 : }
656 :
657 : /*
658 : * Decide if we want to walk every rmap btree in the fs to make sure that each
659 : * rmap for this file fork has corresponding bmbt entries.
660 : */
661 : static bool
662 44014276 : xchk_bmap_want_check_rmaps(
663 : struct xchk_bmap_info *info)
664 : {
665 44014276 : struct xfs_scrub *sc = info->sc;
666 44014276 : struct xfs_ifork *ifp;
667 :
668 44014276 : if (!xfs_has_rmapbt(sc->mp))
669 : return false;
670 24741367 : if (info->whichfork == XFS_COW_FORK)
671 : return false;
672 21441592 : if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
673 : return false;
674 :
675 : /* Don't support realtime rmap checks yet. */
676 21441592 : if (info->is_rt)
677 : return false;
678 :
679 : /*
680 : * The inode repair code zaps broken inode forks by resetting them back
681 : * to EXTENTS format and zero extent records. If we encounter a fork
682 : * in this state along with evidence that the fork isn't supposed to be
683 : * empty, we need to scan the reverse mappings to decide if we're going
684 : * to rebuild the fork. Data forks with nonzero file size are scanned.
685 : * xattr forks are never empty of content, so they are always scanned.
686 : */
687 21441632 : ifp = xfs_ifork_ptr(sc->ip, info->whichfork);
688 21441490 : if (ifp->if_format == XFS_DINODE_FMT_EXTENTS && ifp->if_nextents == 0) {
689 9614629 : if (info->whichfork == XFS_DATA_FORK &&
690 9605714 : i_size_read(VFS_I(sc->ip)) == 0)
691 : return false;
692 :
693 158782 : return true;
694 : }
695 :
696 : return false;
697 : }
698 :
699 : /* Make sure each rmap has a corresponding bmbt entry. */
700 : STATIC int
701 158781 : xchk_bmap_check_rmaps(
702 : struct xfs_scrub *sc,
703 : int whichfork)
704 : {
705 158781 : struct xfs_perag *pag;
706 158781 : xfs_agnumber_t agno;
707 158781 : int error;
708 :
709 805038 : for_each_perag(sc->mp, agno, pag) {
710 646259 : error = xchk_bmap_check_ag_rmaps(sc, whichfork, pag);
711 646257 : if (error ||
712 646257 : (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)) {
713 0 : xfs_perag_rele(pag);
714 0 : return error;
715 : }
716 : }
717 :
718 : return 0;
719 : }
720 :
721 : /* Scrub a delalloc reservation from the incore extent map tree. */
722 : STATIC void
723 184 : xchk_bmap_iextent_delalloc(
724 : struct xfs_inode *ip,
725 : struct xchk_bmap_info *info,
726 : struct xfs_bmbt_irec *irec)
727 : {
728 184 : struct xfs_mount *mp = info->sc->mp;
729 :
730 : /*
731 : * Check for out-of-order extents. This record could have come
732 : * from the incore list, for which there is no ordering check.
733 : */
734 184 : if (irec->br_startoff < info->prev_rec.br_startoff +
735 184 : info->prev_rec.br_blockcount)
736 0 : xchk_fblock_set_corrupt(info->sc, info->whichfork,
737 : irec->br_startoff);
738 :
739 184 : if (!xfs_verify_fileext(mp, irec->br_startoff, irec->br_blockcount))
740 0 : xchk_fblock_set_corrupt(info->sc, info->whichfork,
741 : irec->br_startoff);
742 :
743 : /* Make sure the extent points to a valid place. */
744 184 : if (irec->br_blockcount > XFS_MAX_BMBT_EXTLEN)
745 0 : xchk_fblock_set_corrupt(info->sc, info->whichfork,
746 : irec->br_startoff);
747 184 : }
748 :
749 : /* Decide if this individual fork mapping is ok. */
750 : static bool
751 : xchk_bmap_iext_mapping(
752 : struct xchk_bmap_info *info,
753 : const struct xfs_bmbt_irec *irec)
754 : {
755 : /* There should never be a "hole" extent in either extent list. */
756 214643387 : if (irec->br_startblock == HOLESTARTBLOCK)
757 : return false;
758 214643387 : if (irec->br_blockcount > XFS_MAX_BMBT_EXTLEN)
759 : return false;
760 : return true;
761 : }
762 :
763 : /* Are these two mappings contiguous with each other? */
764 : static inline bool
765 188682873 : xchk_are_bmaps_contiguous(
766 : const struct xfs_bmbt_irec *b1,
767 : const struct xfs_bmbt_irec *b2)
768 : {
769 : /* Don't try to combine unallocated mappings. */
770 377365653 : if (!xfs_bmap_is_real_extent(b1))
771 : return false;
772 377365895 : if (!xfs_bmap_is_real_extent(b2))
773 : return false;
774 :
775 : /* Does b2 come right after b1 in the logical and physical range? */
776 188682689 : if (b1->br_startoff + b1->br_blockcount != b2->br_startoff)
777 : return false;
778 48071604 : if (b1->br_startblock + b1->br_blockcount != b2->br_startblock)
779 : return false;
780 5437049 : if (b1->br_state != b2->br_state)
781 5435917 : return false;
782 : return true;
783 : }
784 :
785 : /*
786 : * Walk the incore extent records, accumulating consecutive contiguous records
787 : * into a single incore mapping. Returns true if @irec has been set to a
788 : * mapping or false if there are no more mappings. Caller must ensure that
789 : * @info.icur is zeroed before the first call.
790 : */
791 : static bool
792 258646544 : xchk_bmap_iext_iter(
793 : struct xchk_bmap_info *info,
794 : struct xfs_bmbt_irec *irec)
795 : {
796 258646544 : struct xfs_bmbt_irec got;
797 258646544 : struct xfs_ifork *ifp;
798 258646544 : unsigned int nr = 0;
799 :
800 258646544 : ifp = xfs_ifork_ptr(info->sc->ip, info->whichfork);
801 :
802 : /* Advance to the next iextent record and check the mapping. */
803 258649365 : xfs_iext_next(ifp, &info->icur);
804 258650492 : if (!xfs_iext_get_extent(ifp, &info->icur, irec))
805 : return false;
806 :
807 214642255 : if (!xchk_bmap_iext_mapping(info, irec)) {
808 0 : xchk_fblock_set_corrupt(info->sc, info->whichfork,
809 : irec->br_startoff);
810 0 : return false;
811 : }
812 : nr++;
813 :
814 : /*
815 : * Iterate subsequent iextent records and merge them with the one
816 : * that we just read, if possible.
817 : */
818 214643387 : while (xfs_iext_peek_next_extent(ifp, &info->icur, &got)) {
819 188683217 : if (!xchk_are_bmaps_contiguous(irec, &got))
820 : break;
821 :
822 1132 : if (!xchk_bmap_iext_mapping(info, &got)) {
823 0 : xchk_fblock_set_corrupt(info->sc, info->whichfork,
824 : got.br_startoff);
825 0 : return false;
826 : }
827 1132 : nr++;
828 :
829 1132 : irec->br_blockcount += got.br_blockcount;
830 1132 : xfs_iext_next(ifp, &info->icur);
831 : }
832 :
833 : /*
834 : * If the merged mapping could be expressed with fewer bmbt records
835 : * than we actually found, notify the user that this fork could be
836 : * optimized. CoW forks only exist in memory so we ignore them.
837 : */
838 214643310 : if (nr > 1 && info->whichfork != XFS_COW_FORK &&
839 7 : howmany_64(irec->br_blockcount, XFS_MAX_BMBT_EXTLEN) < nr)
840 0 : xchk_ino_set_preen(info->sc, info->sc->ip->i_ino);
841 :
842 : return true;
843 : }
844 :
845 : /*
846 : * Scrub an inode fork's block mappings.
847 : *
848 : * First we scan every record in every btree block, if applicable.
849 : * Then we unconditionally scan the incore extent cache.
850 : */
851 : STATIC int
852 296585306 : xchk_bmap(
853 : struct xfs_scrub *sc,
854 : int whichfork)
855 : {
856 296585306 : struct xfs_bmbt_irec irec;
857 296585306 : struct xchk_bmap_info info = { NULL };
858 296585306 : struct xfs_mount *mp = sc->mp;
859 296585306 : struct xfs_inode *ip = sc->ip;
860 296585306 : struct xfs_ifork *ifp = xfs_ifork_ptr(ip, whichfork);
861 296592791 : xfs_fileoff_t endoff;
862 296592791 : int error = 0;
863 :
864 : /* Non-existent forks can be ignored. */
865 296592791 : if (!ifp)
866 : return -ENOENT;
867 :
868 203332987 : info.is_rt = xfs_ifork_is_realtime(ip, whichfork);
869 203336838 : info.whichfork = whichfork;
870 203336838 : info.is_shared = whichfork == XFS_DATA_FORK && xfs_is_reflink_inode(ip);
871 203336838 : info.sc = sc;
872 :
873 203336838 : switch (whichfork) {
874 : case XFS_COW_FORK:
875 : /* No CoW forks on non-reflink filesystems. */
876 3299810 : if (!xfs_has_reflink(mp)) {
877 0 : xchk_ino_set_corrupt(sc, sc->ip->i_ino);
878 0 : return 0;
879 : }
880 : break;
881 : case XFS_ATTR_FORK:
882 100013392 : if (!xfs_has_attr(mp) && !xfs_has_attr2(mp))
883 0 : xchk_ino_set_corrupt(sc, sc->ip->i_ino);
884 : break;
885 100023636 : default:
886 100023636 : ASSERT(whichfork == XFS_DATA_FORK);
887 : break;
888 : }
889 :
890 : /* Check the fork values */
891 203336838 : switch (ifp->if_format) {
892 159320998 : case XFS_DINODE_FMT_UUID:
893 : case XFS_DINODE_FMT_DEV:
894 : case XFS_DINODE_FMT_LOCAL:
895 : /* No mappings to check. */
896 159320998 : if (whichfork == XFS_COW_FORK)
897 0 : xchk_fblock_set_corrupt(sc, whichfork, 0);
898 : return 0;
899 : case XFS_DINODE_FMT_EXTENTS:
900 : break;
901 1524605 : case XFS_DINODE_FMT_BTREE:
902 1524605 : if (whichfork == XFS_COW_FORK) {
903 0 : xchk_fblock_set_corrupt(sc, whichfork, 0);
904 0 : return 0;
905 : }
906 :
907 1524605 : error = xchk_bmap_btree(sc, whichfork, &info);
908 1524604 : if (error)
909 : return error;
910 : break;
911 0 : default:
912 0 : xchk_fblock_set_corrupt(sc, whichfork, 0);
913 0 : return 0;
914 : }
915 :
916 44015838 : if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
917 : return 0;
918 :
919 : /* Find the offset of the last extent in the mapping. */
920 44014809 : error = xfs_bmap_last_offset(ip, &endoff, whichfork);
921 44014822 : if (!xchk_fblock_process_error(sc, whichfork, 0, &error))
922 0 : return error;
923 :
924 : /*
925 : * Scrub extent records. We use a special iterator function here that
926 : * combines adjacent mappings if they are logically and physically
927 : * contiguous. For large allocations that require multiple bmbt
928 : * records, this reduces the number of cross-referencing calls, which
929 : * reduces runtime. Cross referencing with the rmap is simpler because
930 : * the rmap must match the combined mapping exactly.
931 : */
932 258662995 : while (xchk_bmap_iext_iter(&info, &irec)) {
933 214641499 : if (xchk_should_terminate(sc, &error) ||
934 214643906 : (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT))
935 : return 0;
936 :
937 214643906 : if (irec.br_startoff >= endoff) {
938 0 : xchk_fblock_set_corrupt(sc, whichfork,
939 : irec.br_startoff);
940 0 : return 0;
941 : }
942 :
943 214643906 : if (isnullstartblock(irec.br_startblock))
944 184 : xchk_bmap_iextent_delalloc(ip, &info, &irec);
945 : else
946 214643722 : xchk_bmap_iextent(ip, &info, &irec);
947 214647934 : memcpy(&info.prev_rec, &irec, sizeof(struct xfs_bmbt_irec));
948 : }
949 :
950 44013894 : if (xchk_bmap_want_check_rmaps(&info)) {
951 158782 : error = xchk_bmap_check_rmaps(sc, whichfork);
952 158782 : if (!xchk_fblock_xref_process_error(sc, whichfork, 0, &error))
953 0 : return error;
954 : }
955 :
956 : return 0;
957 : }
958 :
959 : /* Scrub an inode's data fork. */
960 : int
961 100023840 : xchk_bmap_data(
962 : struct xfs_scrub *sc)
963 : {
964 100023840 : return xchk_bmap(sc, XFS_DATA_FORK);
965 : }
966 :
967 : /* Scrub an inode's attr fork. */
968 : int
969 100024162 : xchk_bmap_attr(
970 : struct xfs_scrub *sc)
971 : {
972 100024162 : return xchk_bmap(sc, XFS_ATTR_FORK);
973 : }
974 :
975 : /* Scrub an inode's CoW fork. */
976 : int
977 96548575 : xchk_bmap_cow(
978 : struct xfs_scrub *sc)
979 : {
980 96548575 : return xchk_bmap(sc, XFS_COW_FORK);
981 : }
|