Line data Source code
1 : // SPDX-License-Identifier: GPL-2.0-or-later
2 : /*
3 : * Copyright (C) 2017-2023 Oracle. All Rights Reserved.
4 : * Author: Darrick J. Wong <djwong@kernel.org>
5 : */
6 : #include "xfs.h"
7 : #include "xfs_fs.h"
8 : #include "xfs_shared.h"
9 : #include "xfs_format.h"
10 : #include "xfs_trans_resv.h"
11 : #include "xfs_mount.h"
12 : #include "xfs_btree.h"
13 : #include "xfs_bit.h"
14 : #include "xfs_log_format.h"
15 : #include "xfs_trans.h"
16 : #include "xfs_inode.h"
17 : #include "xfs_alloc.h"
18 : #include "xfs_bmap.h"
19 : #include "xfs_bmap_btree.h"
20 : #include "xfs_rmap.h"
21 : #include "xfs_rmap_btree.h"
22 : #include "scrub/scrub.h"
23 : #include "scrub/common.h"
24 : #include "scrub/btree.h"
25 : #include "xfs_ag.h"
26 :
27 : /* Set us up with an inode's bmap. */
28 : int
29 508087344 : xchk_setup_inode_bmap(
30 : struct xfs_scrub *sc)
31 : {
32 508087344 : int error;
33 :
34 508087344 : if (xchk_need_intent_drain(sc))
35 0 : xchk_fsgates_enable(sc, XCHK_FSGATES_DRAIN);
36 :
37 508087344 : error = xchk_iget_for_scrubbing(sc);
38 508699338 : if (error)
39 2686107 : goto out;
40 :
41 506013231 : xchk_ilock(sc, XFS_IOLOCK_EXCL);
42 :
43 : /*
44 : * We don't want any ephemeral data/cow fork updates sitting around
45 : * while we inspect block mappings, so wait for directio to finish
46 : * and flush dirty data if we have delalloc reservations.
47 : */
48 505436310 : if (S_ISREG(VFS_I(sc->ip)->i_mode) &&
49 231359739 : sc->sm->sm_type != XFS_SCRUB_TYPE_BMBTA) {
50 151522112 : struct address_space *mapping = VFS_I(sc->ip)->i_mapping;
51 151522112 : bool is_repair = xchk_could_repair(sc);
52 :
53 151522112 : xchk_ilock(sc, XFS_MMAPLOCK_EXCL);
54 :
55 : /* Break all our leases, we're going to mess with things. */
56 151550786 : if (is_repair) {
57 5514353 : error = xfs_break_layouts(VFS_I(sc->ip),
58 : &sc->ilock_flags, BREAK_WRITE);
59 5514503 : if (error)
60 0 : goto out;
61 : }
62 :
63 151550936 : inode_dio_wait(VFS_I(sc->ip));
64 :
65 : /*
66 : * Try to flush all incore state to disk before we examine the
67 : * space mappings for the data fork. Leave accumulated errors
68 : * in the mapping for the writer threads to consume.
69 : *
70 : * On ENOSPC or EIO writeback errors, we continue into the
71 : * extent mapping checks because write failures do not
72 : * necessarily imply anything about the correctness of the file
73 : * metadata. The metadata and the file data could be on
74 : * completely separate devices; a media failure might only
75 : * affect a subset of the disk, etc. We can handle delalloc
76 : * extents in the scrubber, so leaving them in memory is fine.
77 : */
78 151302652 : error = filemap_fdatawrite(mapping);
79 151533101 : if (!error)
80 151518714 : error = filemap_fdatawait_keep_errors(mapping);
81 151485156 : if (error && (error != -ENOSPC && error != -EIO))
82 0 : goto out;
83 :
84 : /* Drop the page cache if we're repairing block mappings. */
85 151485156 : if (is_repair) {
86 5513792 : error = invalidate_inode_pages2(
87 5513792 : VFS_I(sc->ip)->i_mapping);
88 5513606 : if (error)
89 0 : goto out;
90 : }
91 :
92 : }
93 :
94 : /* Got the inode, lock it and we're ready to go. */
95 505399168 : error = xchk_trans_alloc(sc, 0);
96 505456371 : if (error)
97 0 : goto out;
98 :
99 505456371 : error = xchk_ino_dqattach(sc);
100 505502347 : if (error)
101 0 : goto out;
102 :
103 505502347 : xchk_ilock(sc, XFS_ILOCK_EXCL);
104 507975427 : out:
105 : /* scrub teardown will unlock and release the inode */
106 507975427 : return error;
107 : }
108 :
109 : /*
110 : * Inode fork block mapping (BMBT) scrubber.
111 : * More complex than the others because we have to scrub
112 : * all the extents regardless of whether or not the fork
113 : * is in btree format.
114 : */
115 :
116 : struct xchk_bmap_info {
117 : struct xfs_scrub *sc;
118 :
119 : /* Incore extent tree cursor */
120 : struct xfs_iext_cursor icur;
121 :
122 : /* Previous fork mapping that we examined */
123 : struct xfs_bmbt_irec prev_rec;
124 :
125 : /* Is this a realtime fork? */
126 : bool is_rt;
127 :
128 : /* May mappings point to shared space? */
129 : bool is_shared;
130 :
131 : /* Was the incore extent tree loaded? */
132 : bool was_loaded;
133 :
134 : /* Which inode fork are we checking? */
135 : int whichfork;
136 : };
137 :
138 : /* Look for a corresponding rmap for this irec. */
139 : static inline bool
140 242288124 : xchk_bmap_get_rmap(
141 : struct xchk_bmap_info *info,
142 : struct xfs_bmbt_irec *irec,
143 : xfs_agblock_t agbno,
144 : uint64_t owner,
145 : struct xfs_rmap_irec *rmap)
146 : {
147 242288124 : xfs_fileoff_t offset;
148 242288124 : unsigned int rflags = 0;
149 242288124 : int has_rmap;
150 242288124 : int error;
151 :
152 242288124 : if (info->whichfork == XFS_ATTR_FORK)
153 984341 : rflags |= XFS_RMAP_ATTR_FORK;
154 242288124 : if (irec->br_state == XFS_EXT_UNWRITTEN)
155 14556831 : rflags |= XFS_RMAP_UNWRITTEN;
156 :
157 : /*
158 : * CoW staging extents are owned (on disk) by the refcountbt, so
159 : * their rmaps do not have offsets.
160 : */
161 242288124 : if (info->whichfork == XFS_COW_FORK)
162 : offset = 0;
163 : else
164 240726330 : offset = irec->br_startoff;
165 :
166 : /*
167 : * If the caller thinks this could be a shared bmbt extent (IOWs,
168 : * any data fork extent of a reflink inode) then we have to use the
169 : * range rmap lookup to make sure we get the correct owner/offset.
170 : */
171 242288124 : if (info->is_shared) {
172 106938361 : error = xfs_rmap_lookup_le_range(info->sc->sa.rmap_cur, agbno,
173 : owner, offset, rflags, rmap, &has_rmap);
174 : } else {
175 135349763 : error = xfs_rmap_lookup_le(info->sc->sa.rmap_cur, agbno,
176 : owner, offset, rflags, rmap, &has_rmap);
177 : }
178 242272209 : if (!xchk_should_check_xref(info->sc, &error, &info->sc->sa.rmap_cur))
179 : return false;
180 :
181 242252230 : if (!has_rmap)
182 0 : xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
183 : irec->br_startoff);
184 242252230 : return has_rmap;
185 : }
186 :
187 : /* Make sure that we have rmapbt records for this data/attr fork extent. */
188 : STATIC void
189 253997572 : xchk_bmap_xref_rmap(
190 : struct xchk_bmap_info *info,
191 : struct xfs_bmbt_irec *irec,
192 : xfs_agblock_t agbno)
193 : {
194 253997572 : struct xfs_rmap_irec rmap;
195 253997572 : unsigned long long rmap_end;
196 253997572 : uint64_t owner = info->sc->ip->i_ino;
197 :
198 253997572 : if (!info->sc->sa.rmap_cur || xchk_skip_xref(info->sc->sm))
199 13262610 : return;
200 :
201 : /* Find the rmap record for this irec. */
202 240734962 : if (!xchk_bmap_get_rmap(info, irec, agbno, owner, &rmap))
203 : return;
204 :
205 : /*
206 : * The rmap must be an exact match for this incore file mapping record,
207 : * which may have arisen from multiple ondisk records.
208 : */
209 240702459 : if (rmap.rm_startblock != agbno)
210 0 : xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
211 : irec->br_startoff);
212 :
213 240702459 : rmap_end = (unsigned long long)rmap.rm_startblock + rmap.rm_blockcount;
214 240702459 : if (rmap_end != agbno + irec->br_blockcount)
215 0 : xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
216 : irec->br_startoff);
217 :
218 : /* Check the logical offsets. */
219 240702459 : if (rmap.rm_offset != irec->br_startoff)
220 0 : xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
221 : irec->br_startoff);
222 :
223 240702459 : rmap_end = (unsigned long long)rmap.rm_offset + rmap.rm_blockcount;
224 240702459 : if (rmap_end != irec->br_startoff + irec->br_blockcount)
225 0 : xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
226 : irec->br_startoff);
227 :
228 : /* Check the owner */
229 240702459 : if (rmap.rm_owner != owner)
230 0 : xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
231 : irec->br_startoff);
232 :
233 : /*
234 : * Check for discrepancies between the unwritten flag in the irec and
235 : * the rmap. Note that the (in-memory) CoW fork distinguishes between
236 : * unwritten and written extents, but we don't track that in the rmap
237 : * records because the blocks are owned (on-disk) by the refcountbt,
238 : * which doesn't track unwritten state.
239 : */
240 240702459 : if (!!(irec->br_state == XFS_EXT_UNWRITTEN) !=
241 240702459 : !!(rmap.rm_flags & XFS_RMAP_UNWRITTEN))
242 0 : xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
243 : irec->br_startoff);
244 :
245 240702459 : if (!!(info->whichfork == XFS_ATTR_FORK) !=
246 240702459 : !!(rmap.rm_flags & XFS_RMAP_ATTR_FORK))
247 0 : xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
248 : irec->br_startoff);
249 240702459 : if (rmap.rm_flags & XFS_RMAP_BMBT_BLOCK)
250 0 : xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
251 : irec->br_startoff);
252 : }
253 :
254 : /* Make sure that we have rmapbt records for this COW fork extent. */
255 : STATIC void
256 1564663 : xchk_bmap_xref_rmap_cow(
257 : struct xchk_bmap_info *info,
258 : struct xfs_bmbt_irec *irec,
259 : xfs_agblock_t agbno)
260 : {
261 1564663 : struct xfs_rmap_irec rmap;
262 1564663 : unsigned long long rmap_end;
263 1564663 : uint64_t owner = XFS_RMAP_OWN_COW;
264 :
265 1564663 : if (!info->sc->sa.rmap_cur || xchk_skip_xref(info->sc->sm))
266 0 : return;
267 :
268 : /* Find the rmap record for this irec. */
269 1564663 : if (!xchk_bmap_get_rmap(info, irec, agbno, owner, &rmap))
270 : return;
271 :
272 : /*
273 : * CoW staging extents are owned by the refcount btree, so the rmap
274 : * can start before and end after the physical space allocated to this
275 : * mapping. There are no offsets to check.
276 : */
277 1564491 : if (rmap.rm_startblock > agbno)
278 0 : xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
279 : irec->br_startoff);
280 :
281 1564491 : rmap_end = (unsigned long long)rmap.rm_startblock + rmap.rm_blockcount;
282 1564491 : if (rmap_end < agbno + irec->br_blockcount)
283 0 : xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
284 : irec->br_startoff);
285 :
286 : /* Check the owner */
287 1564491 : if (rmap.rm_owner != owner)
288 0 : xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
289 : irec->br_startoff);
290 :
291 : /*
292 : * No flags allowed. Note that the (in-memory) CoW fork distinguishes
293 : * between unwritten and written extents, but we don't track that in
294 : * the rmap records because the blocks are owned (on-disk) by the
295 : * refcountbt, which doesn't track unwritten state.
296 : */
297 1564491 : if (rmap.rm_flags & XFS_RMAP_ATTR_FORK)
298 0 : xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
299 : irec->br_startoff);
300 1564491 : if (rmap.rm_flags & XFS_RMAP_BMBT_BLOCK)
301 0 : xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
302 : irec->br_startoff);
303 1564491 : if (rmap.rm_flags & XFS_RMAP_UNWRITTEN)
304 0 : xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
305 : irec->br_startoff);
306 : }
307 :
308 : /* Cross-reference a single rtdev extent record. */
309 : STATIC void
310 75298989 : xchk_bmap_rt_iextent_xref(
311 : struct xfs_inode *ip,
312 : struct xchk_bmap_info *info,
313 : struct xfs_bmbt_irec *irec)
314 : {
315 75298989 : xchk_xref_is_used_rt_space(info->sc, irec->br_startblock,
316 75298989 : irec->br_blockcount);
317 75494856 : }
318 :
319 : /* Cross-reference a single datadev extent record. */
320 : STATIC void
321 255454577 : xchk_bmap_iextent_xref(
322 : struct xfs_inode *ip,
323 : struct xchk_bmap_info *info,
324 : struct xfs_bmbt_irec *irec)
325 : {
326 255454577 : struct xfs_owner_info oinfo;
327 255454577 : struct xfs_mount *mp = info->sc->mp;
328 255454577 : xfs_agnumber_t agno;
329 255454577 : xfs_agblock_t agbno;
330 255454577 : xfs_extlen_t len;
331 255454577 : int error;
332 :
333 255454577 : agno = XFS_FSB_TO_AGNO(mp, irec->br_startblock);
334 255454577 : agbno = XFS_FSB_TO_AGBNO(mp, irec->br_startblock);
335 255441251 : len = irec->br_blockcount;
336 :
337 255441251 : error = xchk_ag_init_existing(info->sc, agno, &info->sc->sa);
338 255579890 : if (!xchk_fblock_process_error(info->sc, info->whichfork,
339 : irec->br_startoff, &error))
340 0 : goto out_free;
341 :
342 255509534 : xchk_xref_is_used_space(info->sc, agbno, len);
343 255503781 : xchk_xref_is_not_inode_chunk(info->sc, agbno, len);
344 255586884 : switch (info->whichfork) {
345 252524574 : case XFS_DATA_FORK:
346 252524574 : xchk_bmap_xref_rmap(info, irec, agbno);
347 252463081 : if (!xfs_is_reflink_inode(info->sc->ip)) {
348 145533480 : xfs_rmap_ino_owner(&oinfo, info->sc->ip->i_ino,
349 : info->whichfork, irec->br_startoff);
350 145533480 : xchk_xref_is_only_owned_by(info->sc, agbno,
351 145533480 : irec->br_blockcount, &oinfo);
352 145579980 : xchk_xref_is_not_shared(info->sc, agbno,
353 145579980 : irec->br_blockcount);
354 : }
355 252505803 : xchk_xref_is_not_cow_staging(info->sc, agbno,
356 252505803 : irec->br_blockcount);
357 252505803 : break;
358 1497520 : case XFS_ATTR_FORK:
359 1497520 : xchk_bmap_xref_rmap(info, irec, agbno);
360 1491628 : xfs_rmap_ino_owner(&oinfo, info->sc->ip->i_ino,
361 : info->whichfork, irec->br_startoff);
362 1491628 : xchk_xref_is_only_owned_by(info->sc, agbno, irec->br_blockcount,
363 : &oinfo);
364 1495546 : xchk_xref_is_not_shared(info->sc, agbno,
365 1495546 : irec->br_blockcount);
366 1495562 : xchk_xref_is_not_cow_staging(info->sc, agbno,
367 1495562 : irec->br_blockcount);
368 1495562 : break;
369 1564790 : case XFS_COW_FORK:
370 1564790 : xchk_bmap_xref_rmap_cow(info, irec, agbno);
371 1564551 : xchk_xref_is_only_owned_by(info->sc, agbno, irec->br_blockcount,
372 : &XFS_RMAP_OINFO_COW);
373 1564791 : xchk_xref_is_cow_staging(info->sc, agbno,
374 1564791 : irec->br_blockcount);
375 1564830 : xchk_xref_is_not_shared(info->sc, agbno,
376 1564830 : irec->br_blockcount);
377 1564830 : break;
378 : }
379 :
380 255569865 : out_free:
381 255569865 : xchk_ag_free(info->sc, &info->sc->sa);
382 255605224 : }
383 :
384 : /*
385 : * Directories and attr forks should never have blocks that can't be addressed
386 : * by a xfs_dablk_t.
387 : */
388 : STATIC void
389 330774234 : xchk_bmap_dirattr_extent(
390 : struct xfs_inode *ip,
391 : struct xchk_bmap_info *info,
392 : struct xfs_bmbt_irec *irec)
393 : {
394 330774234 : struct xfs_mount *mp = ip->i_mount;
395 330774234 : xfs_fileoff_t off;
396 :
397 330774234 : if (!S_ISDIR(VFS_I(ip)->i_mode) && info->whichfork != XFS_ATTR_FORK)
398 : return;
399 :
400 2620377 : if (!xfs_verify_dablk(mp, irec->br_startoff))
401 0 : xchk_fblock_set_corrupt(info->sc, info->whichfork,
402 : irec->br_startoff);
403 :
404 2622849 : off = irec->br_startoff + irec->br_blockcount - 1;
405 2622849 : if (!xfs_verify_dablk(mp, off))
406 0 : xchk_fblock_set_corrupt(info->sc, info->whichfork, off);
407 : }
408 :
409 : /* Scrub a single extent record. */
410 : STATIC void
411 330789789 : xchk_bmap_iextent(
412 : struct xfs_inode *ip,
413 : struct xchk_bmap_info *info,
414 : struct xfs_bmbt_irec *irec)
415 : {
416 330789789 : struct xfs_mount *mp = info->sc->mp;
417 :
418 : /*
419 : * Check for out-of-order extents. This record could have come
420 : * from the incore list, for which there is no ordering check.
421 : */
422 330789789 : if (irec->br_startoff < info->prev_rec.br_startoff +
423 330789789 : info->prev_rec.br_blockcount)
424 0 : xchk_fblock_set_corrupt(info->sc, info->whichfork,
425 : irec->br_startoff);
426 :
427 330789789 : if (!xfs_verify_fileext(mp, irec->br_startoff, irec->br_blockcount))
428 0 : xchk_fblock_set_corrupt(info->sc, info->whichfork,
429 : irec->br_startoff);
430 :
431 330765686 : xchk_bmap_dirattr_extent(ip, info, irec);
432 :
433 : /* Make sure the extent points to a valid place. */
434 406147790 : if (info->is_rt &&
435 75349692 : !xfs_verify_rtext(mp, irec->br_startblock, irec->br_blockcount))
436 0 : xchk_fblock_set_corrupt(info->sc, info->whichfork,
437 : irec->br_startoff);
438 586273056 : if (!info->is_rt &&
439 255457451 : !xfs_verify_fsbext(mp, irec->br_startblock, irec->br_blockcount))
440 0 : xchk_fblock_set_corrupt(info->sc, info->whichfork,
441 : irec->br_startoff);
442 :
443 : /* We don't allow unwritten extents on attr forks. */
444 330815605 : if (irec->br_state == XFS_EXT_UNWRITTEN &&
445 33956779 : info->whichfork == XFS_ATTR_FORK)
446 0 : xchk_fblock_set_corrupt(info->sc, info->whichfork,
447 : irec->br_startoff);
448 :
449 330815605 : if (info->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
450 : return;
451 :
452 330815605 : if (info->is_rt)
453 75330315 : xchk_bmap_rt_iextent_xref(ip, info, irec);
454 : else
455 255485290 : xchk_bmap_iextent_xref(ip, info, irec);
456 : }
457 :
458 : /* Scrub a bmbt record. */
459 : STATIC int
460 252536133 : xchk_bmapbt_rec(
461 : struct xchk_btree *bs,
462 : const union xfs_btree_rec *rec)
463 : {
464 252536133 : struct xfs_bmbt_irec irec;
465 252536133 : struct xfs_bmbt_irec iext_irec;
466 252536133 : struct xfs_iext_cursor icur;
467 252536133 : struct xchk_bmap_info *info = bs->private;
468 252536133 : struct xfs_inode *ip = bs->cur->bc_ino.ip;
469 252536133 : struct xfs_buf *bp = NULL;
470 252536133 : struct xfs_btree_block *block;
471 252536133 : struct xfs_ifork *ifp = xfs_ifork_ptr(ip, info->whichfork);
472 252532596 : uint64_t owner;
473 252532596 : int i;
474 :
475 : /*
476 : * Check the owners of the btree blocks up to the level below
477 : * the root since the verifiers don't do that.
478 : */
479 252532596 : if (xfs_has_crc(bs->cur->bc_mp) &&
480 252535014 : bs->cur->bc_levels[0].ptr == 1) {
481 7506472 : for (i = 0; i < bs->cur->bc_nlevels - 1; i++) {
482 4294854 : block = xfs_btree_get_block(bs->cur, i, &bp);
483 4294831 : owner = be64_to_cpu(block->bb_u.l.bb_owner);
484 4294831 : if (owner != ip->i_ino)
485 0 : xchk_fblock_set_corrupt(bs->sc,
486 : info->whichfork, 0);
487 : }
488 : }
489 :
490 : /*
491 : * Check that the incore extent tree contains an extent that matches
492 : * this one exactly. We validate those cached bmaps later, so we don't
493 : * need to check them here. If the incore extent tree was just loaded
494 : * from disk by the scrubber, we assume that its contents match what's
495 : * on disk (we still hold the ILOCK) and skip the equivalence check.
496 : */
497 252532573 : if (!info->was_loaded)
498 : return 0;
499 :
500 252536091 : xfs_bmbt_disk_get_all(&rec->bmbt, &irec);
501 252537250 : if (xfs_bmap_validate_extent(ip, info->whichfork, &irec) != NULL) {
502 0 : xchk_fblock_set_corrupt(bs->sc, info->whichfork,
503 : irec.br_startoff);
504 0 : return 0;
505 : }
506 :
507 252556873 : if (!xfs_iext_lookup_extent(ip, ifp, irec.br_startoff, &icur,
508 252608558 : &iext_irec) ||
509 252608558 : irec.br_startoff != iext_irec.br_startoff ||
510 252611879 : irec.br_startblock != iext_irec.br_startblock ||
511 252613173 : irec.br_blockcount != iext_irec.br_blockcount ||
512 252614288 : irec.br_state != iext_irec.br_state)
513 0 : xchk_fblock_set_corrupt(bs->sc, info->whichfork,
514 : irec.br_startoff);
515 : return 0;
516 : }
517 :
518 : /* Scan the btree records. */
519 : STATIC int
520 2331687 : xchk_bmap_btree(
521 : struct xfs_scrub *sc,
522 : int whichfork,
523 : struct xchk_bmap_info *info)
524 : {
525 2331687 : struct xfs_owner_info oinfo;
526 2331687 : struct xfs_ifork *ifp = xfs_ifork_ptr(sc->ip, whichfork);
527 2331247 : struct xfs_mount *mp = sc->mp;
528 2331247 : struct xfs_inode *ip = sc->ip;
529 2331247 : struct xfs_btree_cur *cur;
530 2331247 : int error;
531 :
532 : /* Load the incore bmap cache if it's not loaded. */
533 2331247 : info->was_loaded = !xfs_need_iread_extents(ifp);
534 :
535 2331266 : error = xfs_iread_extents(sc->tp, ip, whichfork);
536 2331780 : if (!xchk_fblock_process_error(sc, whichfork, 0, &error))
537 0 : goto out;
538 :
539 : /* Check the btree structure. */
540 2331725 : cur = xfs_bmbt_init_cursor(mp, sc->tp, ip, whichfork);
541 2332560 : xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, whichfork);
542 2332560 : error = xchk_btree(sc, cur, xchk_bmapbt_rec, &oinfo, info);
543 2332794 : xfs_btree_del_cursor(cur, error);
544 2332769 : out:
545 2332769 : return error;
546 : }
547 :
548 : struct xchk_bmap_check_rmap_info {
549 : struct xfs_scrub *sc;
550 : int whichfork;
551 : struct xfs_iext_cursor icur;
552 : };
553 :
554 : /* Can we find bmaps that fit this rmap? */
555 : STATIC int
556 31630663834 : xchk_bmap_check_rmap(
557 : struct xfs_btree_cur *cur,
558 : const struct xfs_rmap_irec *rec,
559 : void *priv)
560 : {
561 31630663834 : struct xfs_bmbt_irec irec;
562 31630663834 : struct xfs_rmap_irec check_rec;
563 31630663834 : struct xchk_bmap_check_rmap_info *sbcri = priv;
564 31630663834 : struct xfs_ifork *ifp;
565 31630663834 : struct xfs_scrub *sc = sbcri->sc;
566 31630663834 : bool have_map;
567 :
568 : /* Is this even the right fork? */
569 31630663834 : if (rec->rm_owner != sc->ip->i_ino)
570 : return 0;
571 16866 : if ((sbcri->whichfork == XFS_ATTR_FORK) ^
572 16866 : !!(rec->rm_flags & XFS_RMAP_ATTR_FORK))
573 : return 0;
574 0 : if (rec->rm_flags & XFS_RMAP_BMBT_BLOCK)
575 : return 0;
576 :
577 : /* Now look up the bmbt record. */
578 0 : ifp = xfs_ifork_ptr(sc->ip, sbcri->whichfork);
579 0 : if (!ifp) {
580 0 : xchk_fblock_set_corrupt(sc, sbcri->whichfork,
581 0 : rec->rm_offset);
582 0 : goto out;
583 : }
584 0 : have_map = xfs_iext_lookup_extent(sc->ip, ifp, rec->rm_offset,
585 : &sbcri->icur, &irec);
586 0 : if (!have_map)
587 0 : xchk_fblock_set_corrupt(sc, sbcri->whichfork,
588 0 : rec->rm_offset);
589 : /*
590 : * bmap extent record lengths are constrained to 2^21 blocks in length
591 : * because of space constraints in the on-disk metadata structure.
592 : * However, rmap extent record lengths are constrained only by AG
593 : * length, so we have to loop through the bmbt to make sure that the
594 : * entire rmap is covered by bmbt records.
595 : */
596 0 : check_rec = *rec;
597 0 : while (have_map) {
598 0 : if (irec.br_startoff != check_rec.rm_offset)
599 0 : xchk_fblock_set_corrupt(sc, sbcri->whichfork,
600 : check_rec.rm_offset);
601 0 : if (irec.br_startblock != XFS_AGB_TO_FSB(sc->mp,
602 : cur->bc_ag.pag->pag_agno,
603 : check_rec.rm_startblock))
604 0 : xchk_fblock_set_corrupt(sc, sbcri->whichfork,
605 : check_rec.rm_offset);
606 0 : if (irec.br_blockcount > check_rec.rm_blockcount)
607 0 : xchk_fblock_set_corrupt(sc, sbcri->whichfork,
608 : check_rec.rm_offset);
609 0 : if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
610 : break;
611 0 : check_rec.rm_startblock += irec.br_blockcount;
612 0 : check_rec.rm_offset += irec.br_blockcount;
613 0 : check_rec.rm_blockcount -= irec.br_blockcount;
614 0 : if (check_rec.rm_blockcount == 0)
615 : break;
616 0 : have_map = xfs_iext_next_extent(ifp, &sbcri->icur, &irec);
617 0 : if (!have_map)
618 0 : xchk_fblock_set_corrupt(sc, sbcri->whichfork,
619 : check_rec.rm_offset);
620 : }
621 :
622 0 : out:
623 0 : if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
624 0 : return -ECANCELED;
625 : return 0;
626 : }
627 :
628 : /* Make sure each rmap has a corresponding bmbt entry. */
629 : STATIC int
630 923406 : xchk_bmap_check_ag_rmaps(
631 : struct xfs_scrub *sc,
632 : int whichfork,
633 : struct xfs_perag *pag)
634 : {
635 923406 : struct xchk_bmap_check_rmap_info sbcri;
636 923406 : struct xfs_btree_cur *cur;
637 923406 : struct xfs_buf *agf;
638 923406 : int error;
639 :
640 923406 : error = xfs_alloc_read_agf(pag, sc->tp, 0, &agf);
641 923384 : if (error)
642 : return error;
643 :
644 923385 : cur = xfs_rmapbt_init_cursor(sc->mp, sc->tp, agf, pag);
645 :
646 923407 : sbcri.sc = sc;
647 923407 : sbcri.whichfork = whichfork;
648 923407 : error = xfs_rmap_query_all(cur, xchk_bmap_check_rmap, &sbcri);
649 923403 : if (error == -ECANCELED)
650 0 : error = 0;
651 :
652 923403 : xfs_btree_del_cursor(cur, error);
653 923409 : xfs_trans_brelse(sc->tp, agf);
654 923409 : return error;
655 : }
656 :
657 : /*
658 : * Decide if we want to walk every rmap btree in the fs to make sure that each
659 : * rmap for this file fork has corresponding bmbt entries.
660 : */
661 : static bool
662 100821809 : xchk_bmap_want_check_rmaps(
663 : struct xchk_bmap_info *info)
664 : {
665 100821809 : struct xfs_scrub *sc = info->sc;
666 100821809 : struct xfs_ifork *ifp;
667 :
668 100821809 : if (!xfs_has_rmapbt(sc->mp))
669 : return false;
670 53191467 : if (info->whichfork == XFS_COW_FORK)
671 : return false;
672 48554198 : if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
673 : return false;
674 :
675 : /* Don't support realtime rmap checks yet. */
676 48554198 : if (info->is_rt)
677 : return false;
678 :
679 : /*
680 : * The inode repair code zaps broken inode forks by resetting them back
681 : * to EXTENTS format and zero extent records. If we encounter a fork
682 : * in this state along with evidence that the fork isn't supposed to be
683 : * empty, we need to scan the reverse mappings to decide if we're going
684 : * to rebuild the fork. Data forks with nonzero file size are scanned.
685 : * xattr forks are never empty of content, so they are always scanned.
686 : */
687 48524002 : ifp = xfs_ifork_ptr(sc->ip, info->whichfork);
688 48524956 : if (ifp->if_format == XFS_DINODE_FMT_EXTENTS && ifp->if_nextents == 0) {
689 26276604 : if (info->whichfork == XFS_DATA_FORK &&
690 26257404 : i_size_read(VFS_I(sc->ip)) == 0)
691 : return false;
692 :
693 189747 : return true;
694 : }
695 :
696 : return false;
697 : }
698 :
699 : /* Make sure each rmap has a corresponding bmbt entry. */
700 : STATIC int
701 189749 : xchk_bmap_check_rmaps(
702 : struct xfs_scrub *sc,
703 : int whichfork)
704 : {
705 189749 : struct xfs_perag *pag;
706 189749 : xfs_agnumber_t agno;
707 189749 : int error;
708 :
709 1113156 : for_each_perag(sc->mp, agno, pag) {
710 923409 : error = xchk_bmap_check_ag_rmaps(sc, whichfork, pag);
711 923407 : if (error ||
712 923407 : (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)) {
713 0 : xfs_perag_rele(pag);
714 0 : return error;
715 : }
716 : }
717 :
718 : return 0;
719 : }
720 :
721 : /* Scrub a delalloc reservation from the incore extent map tree. */
722 : STATIC void
723 917 : xchk_bmap_iextent_delalloc(
724 : struct xfs_inode *ip,
725 : struct xchk_bmap_info *info,
726 : struct xfs_bmbt_irec *irec)
727 : {
728 917 : struct xfs_mount *mp = info->sc->mp;
729 :
730 : /*
731 : * Check for out-of-order extents. This record could have come
732 : * from the incore list, for which there is no ordering check.
733 : */
734 917 : if (irec->br_startoff < info->prev_rec.br_startoff +
735 917 : info->prev_rec.br_blockcount)
736 0 : xchk_fblock_set_corrupt(info->sc, info->whichfork,
737 : irec->br_startoff);
738 :
739 917 : if (!xfs_verify_fileext(mp, irec->br_startoff, irec->br_blockcount))
740 0 : xchk_fblock_set_corrupt(info->sc, info->whichfork,
741 : irec->br_startoff);
742 :
743 : /* Make sure the extent points to a valid place. */
744 917 : if (irec->br_blockcount > XFS_MAX_BMBT_EXTLEN)
745 0 : xchk_fblock_set_corrupt(info->sc, info->whichfork,
746 : irec->br_startoff);
747 917 : }
748 :
749 : /* Decide if this individual fork mapping is ok. */
750 : static bool
751 : xchk_bmap_iext_mapping(
752 : struct xchk_bmap_info *info,
753 : const struct xfs_bmbt_irec *irec)
754 : {
755 : /* There should never be a "hole" extent in either extent list. */
756 330906138 : if (irec->br_startblock == HOLESTARTBLOCK)
757 : return false;
758 330906138 : if (irec->br_blockcount > XFS_MAX_BMBT_EXTLEN)
759 : return false;
760 : return true;
761 : }
762 :
763 : /* Are these two mappings contiguous with each other? */
764 : static inline bool
765 284192728 : xchk_are_bmaps_contiguous(
766 : const struct xfs_bmbt_irec *b1,
767 : const struct xfs_bmbt_irec *b2)
768 : {
769 : /* Don't try to combine unallocated mappings. */
770 568390073 : if (!xfs_bmap_is_real_extent(b1))
771 : return false;
772 568392716 : if (!xfs_bmap_is_real_extent(b2))
773 : return false;
774 :
775 : /* Does b2 come right after b1 in the logical and physical range? */
776 284191793 : if (b1->br_startoff + b1->br_blockcount != b2->br_startoff)
777 : return false;
778 101646031 : if (b1->br_startblock + b1->br_blockcount != b2->br_startblock)
779 : return false;
780 14082127 : if (b1->br_state != b2->br_state)
781 14077341 : return false;
782 : return true;
783 : }
784 :
785 : /*
786 : * Walk the incore extent records, accumulating consecutive contiguous records
787 : * into a single incore mapping. Returns true if @irec has been set to a
788 : * mapping or false if there are no more mappings. Caller must ensure that
789 : * @info.icur is zeroed before the first call.
790 : */
791 : static bool
792 431676091 : xchk_bmap_iext_iter(
793 : struct xchk_bmap_info *info,
794 : struct xfs_bmbt_irec *irec)
795 : {
796 431676091 : struct xfs_bmbt_irec got;
797 431676091 : struct xfs_ifork *ifp;
798 431676091 : unsigned int nr = 0;
799 :
800 431676091 : ifp = xfs_ifork_ptr(info->sc->ip, info->whichfork);
801 :
802 : /* Advance to the next iextent record and check the mapping. */
803 431512653 : xfs_iext_next(ifp, &info->icur);
804 431473662 : if (!xfs_iext_get_extent(ifp, &info->icur, irec))
805 : return false;
806 :
807 330902181 : if (!xchk_bmap_iext_mapping(info, irec)) {
808 0 : xchk_fblock_set_corrupt(info->sc, info->whichfork,
809 : irec->br_startoff);
810 0 : return false;
811 : }
812 : nr++;
813 :
814 : /*
815 : * Iterate subsequent iextent records and merge them with the one
816 : * that we just read, if possible.
817 : */
818 330906138 : while (xfs_iext_peek_next_extent(ifp, &info->icur, &got)) {
819 284188311 : if (!xchk_are_bmaps_contiguous(irec, &got))
820 : break;
821 :
822 3957 : if (!xchk_bmap_iext_mapping(info, &got)) {
823 0 : xchk_fblock_set_corrupt(info->sc, info->whichfork,
824 : got.br_startoff);
825 0 : return false;
826 : }
827 3957 : nr++;
828 :
829 3957 : irec->br_blockcount += got.br_blockcount;
830 3957 : xfs_iext_next(ifp, &info->icur);
831 : }
832 :
833 : /*
834 : * If the merged mapping could be expressed with fewer bmbt records
835 : * than we actually found, notify the user that this fork could be
836 : * optimized. CoW forks only exist in memory so we ignore them.
837 : */
838 330880066 : if (nr > 1 && info->whichfork != XFS_COW_FORK &&
839 106 : howmany_64(irec->br_blockcount, XFS_MAX_BMBT_EXTLEN) < nr)
840 0 : xchk_ino_set_preen(info->sc, info->sc->ip->i_ino);
841 :
842 : return true;
843 : }
844 :
845 : /*
846 : * Scrub an inode fork's block mappings.
847 : *
848 : * First we scan every record in every btree block, if applicable.
849 : * Then we unconditionally scan the incore extent cache.
850 : */
851 : STATIC int
852 505310376 : xchk_bmap(
853 : struct xfs_scrub *sc,
854 : int whichfork)
855 : {
856 505310376 : struct xfs_bmbt_irec irec;
857 505310376 : struct xchk_bmap_info info = { NULL };
858 505310376 : struct xfs_mount *mp = sc->mp;
859 505310376 : struct xfs_inode *ip = sc->ip;
860 505310376 : struct xfs_ifork *ifp = xfs_ifork_ptr(ip, whichfork);
861 504018790 : xfs_fileoff_t endoff;
862 504018790 : int error = 0;
863 :
864 : /* Non-existent forks can be ignored. */
865 504018790 : if (!ifp)
866 : return -ENOENT;
867 :
868 344006208 : info.is_rt = xfs_ifork_is_realtime(ip, whichfork);
869 344334940 : info.whichfork = whichfork;
870 344334940 : info.is_shared = whichfork == XFS_DATA_FORK && xfs_is_reflink_inode(ip);
871 344334940 : info.sc = sc;
872 :
873 344334940 : switch (whichfork) {
874 : case XFS_COW_FORK:
875 : /* No CoW forks on non-reflink filesystems. */
876 4638856 : if (!xfs_has_reflink(mp)) {
877 0 : xchk_ino_set_corrupt(sc, sc->ip->i_ino);
878 0 : return 0;
879 : }
880 : break;
881 : case XFS_ATTR_FORK:
882 167570231 : if (!xfs_has_attr(mp) && !xfs_has_attr2(mp))
883 0 : xchk_ino_set_corrupt(sc, sc->ip->i_ino);
884 : break;
885 172125853 : default:
886 172125853 : ASSERT(whichfork == XFS_DATA_FORK);
887 : break;
888 : }
889 :
890 : /* Check the fork values */
891 344334940 : switch (ifp->if_format) {
892 244074267 : case XFS_DINODE_FMT_UUID:
893 : case XFS_DINODE_FMT_DEV:
894 : case XFS_DINODE_FMT_LOCAL:
895 : /* No mappings to check. */
896 244074267 : if (whichfork == XFS_COW_FORK)
897 0 : xchk_fblock_set_corrupt(sc, whichfork, 0);
898 : return 0;
899 : case XFS_DINODE_FMT_EXTENTS:
900 : break;
901 2331652 : case XFS_DINODE_FMT_BTREE:
902 2331652 : if (whichfork == XFS_COW_FORK) {
903 0 : xchk_fblock_set_corrupt(sc, whichfork, 0);
904 0 : return 0;
905 : }
906 :
907 2331652 : error = xchk_bmap_btree(sc, whichfork, &info);
908 2332730 : if (error)
909 : return error;
910 : break;
911 0 : default:
912 0 : xchk_fblock_set_corrupt(sc, whichfork, 0);
913 0 : return 0;
914 : }
915 :
916 100261747 : if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
917 : return 0;
918 :
919 : /* Find the offset of the last extent in the mapping. */
920 100500419 : error = xfs_bmap_last_offset(ip, &endoff, whichfork);
921 100725710 : if (!xchk_fblock_process_error(sc, whichfork, 0, &error))
922 0 : return error;
923 :
924 : /*
925 : * Scrub extent records. We use a special iterator function here that
926 : * combines adjacent mappings if they are logically and physically
927 : * contiguous. For large allocations that require multiple bmbt
928 : * records, this reduces the number of cross-referencing calls, which
929 : * reduces runtime. Cross referencing with the rmap is simpler because
930 : * the rmap must match the combined mapping exactly.
931 : */
932 431596927 : while (xchk_bmap_iext_iter(&info, &irec)) {
933 330852056 : if (xchk_should_terminate(sc, &error) ||
934 330749346 : (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT))
935 : return 0;
936 :
937 330749346 : if (irec.br_startoff >= endoff) {
938 0 : xchk_fblock_set_corrupt(sc, whichfork,
939 : irec.br_startoff);
940 0 : return 0;
941 : }
942 :
943 330749346 : if (isnullstartblock(irec.br_startblock))
944 917 : xchk_bmap_iextent_delalloc(ip, &info, &irec);
945 : else
946 330748429 : xchk_bmap_iextent(ip, &info, &irec);
947 331121453 : memcpy(&info.prev_rec, &irec, sizeof(struct xfs_bmbt_irec));
948 : }
949 :
950 100775189 : if (xchk_bmap_want_check_rmaps(&info)) {
951 189747 : error = xchk_bmap_check_rmaps(sc, whichfork);
952 189758 : if (!xchk_fblock_xref_process_error(sc, whichfork, 0, &error))
953 0 : return error;
954 : }
955 :
956 : return 0;
957 : }
958 :
959 : /* Scrub an inode's data fork. */
960 : int
961 172880408 : xchk_bmap_data(
962 : struct xfs_scrub *sc)
963 : {
964 172880408 : return xchk_bmap(sc, XFS_DATA_FORK);
965 : }
966 :
967 : /* Scrub an inode's attr fork. */
968 : int
969 172847999 : xchk_bmap_attr(
970 : struct xfs_scrub *sc)
971 : {
972 172847999 : return xchk_bmap(sc, XFS_ATTR_FORK);
973 : }
974 :
975 : /* Scrub an inode's CoW fork. */
976 : int
977 160480063 : xchk_bmap_cow(
978 : struct xfs_scrub *sc)
979 : {
980 160480063 : return xchk_bmap(sc, XFS_COW_FORK);
981 : }
|