Line data Source code
1 : // SPDX-License-Identifier: GPL-2.0-or-later
2 : /*
3 : * Copyright (C) 2018-2023 Oracle. All Rights Reserved.
4 : * Author: Darrick J. Wong <djwong@kernel.org>
5 : */
6 : #include "xfs.h"
7 : #include "xfs_fs.h"
8 : #include "xfs_shared.h"
9 : #include "xfs_format.h"
10 : #include "xfs_trans_resv.h"
11 : #include "xfs_mount.h"
12 : #include "xfs_defer.h"
13 : #include "xfs_btree.h"
14 : #include "xfs_btree_staging.h"
15 : #include "xfs_btree_mem.h"
16 : #include "xfs_bit.h"
17 : #include "xfs_log_format.h"
18 : #include "xfs_trans.h"
19 : #include "xfs_sb.h"
20 : #include "xfs_alloc.h"
21 : #include "xfs_alloc_btree.h"
22 : #include "xfs_ialloc.h"
23 : #include "xfs_ialloc_btree.h"
24 : #include "xfs_rmap.h"
25 : #include "xfs_rmap_btree.h"
26 : #include "xfs_inode.h"
27 : #include "xfs_icache.h"
28 : #include "xfs_bmap.h"
29 : #include "xfs_bmap_btree.h"
30 : #include "xfs_refcount.h"
31 : #include "xfs_refcount_btree.h"
32 : #include "xfs_ag.h"
33 : #include "xfs_rtrmap_btree.h"
34 : #include "xfs_rtgroup.h"
35 : #include "xfs_rtrefcount_btree.h"
36 : #include "scrub/xfs_scrub.h"
37 : #include "scrub/scrub.h"
38 : #include "scrub/common.h"
39 : #include "scrub/btree.h"
40 : #include "scrub/trace.h"
41 : #include "scrub/repair.h"
42 : #include "scrub/bitmap.h"
43 : #include "scrub/xfile.h"
44 : #include "scrub/xfarray.h"
45 : #include "scrub/iscan.h"
46 : #include "scrub/newbt.h"
47 : #include "scrub/reap.h"
48 : #include "scrub/xfbtree.h"
49 :
50 : /*
51 : * Reverse Mapping Btree Repair
52 : * ============================
53 : *
54 : * This is the most involved of all the AG space btree rebuilds. Everywhere
55 : * else in XFS we lock inodes and then AG data structures, but generating the
56 : * list of rmap records requires that we be able to scan both block mapping
57 : * btrees of every inode in the filesystem to see if it owns any extents in
58 : * this AG. We can't tolerate any inode updates while we do this, so we
59 : * freeze the filesystem to lock everyone else out, and grant ourselves
60 : * special privileges to run transactions with regular background reclamation
61 : * turned off.
62 : *
63 : * We also have to be very careful not to allow inode reclaim to start a
64 : * transaction because all transactions (other than our own) will block.
65 : * Deferred inode inactivation helps us out there.
66 : *
67 : * I) Reverse mappings for all non-space metadata and file data are collected
68 : * according to the following algorithm:
69 : *
70 : * 1. For each fork of each inode:
71 : * 1.1. Create a bitmap BMBIT to track bmbt blocks if necessary.
72 : * 1.2. If the incore extent map isn't loaded, walk the bmbt to accumulate
73 : * bmaps into rmap records (see 1.1.4). Set bits in BMBIT for each btree
74 : * block.
75 : * 1.3. If the incore extent map is loaded but the fork is in btree format,
76 : * just visit the bmbt blocks to set the corresponding BMBIT areas.
77 : * 1.4. From the incore extent map, accumulate each bmap that falls into our
78 : * target AG. Remember, multiple bmap records can map to a single rmap
79 : * record, so we cannot simply emit rmap records 1:1.
80 : * 1.5. Emit rmap records for each extent in BMBIT and free it.
81 : * 2. Create bitmaps INOBIT and ICHUNKBIT.
82 : * 3. For each record in the inobt, set the corresponding areas in ICHUNKBIT,
83 : * and set bits in INOBIT for each btree block. If the inobt has no records
84 : * at all, we must be careful to record its root in INOBIT.
85 : * 4. For each block in the finobt, set the corresponding INOBIT area.
86 : * 5. Emit rmap records for each extent in INOBIT and ICHUNKBIT and free them.
87 : * 6. Create bitmaps REFCBIT and COWBIT.
88 : * 7. For each CoW staging extent in the refcountbt, set the corresponding
89 : * areas in COWBIT.
90 : * 8. For each block in the refcountbt, set the corresponding REFCBIT area.
91 : * 9. Emit rmap records for each extent in REFCBIT and COWBIT and free them.
92 : * A. Emit rmap for the AG headers.
93 : * B. Emit rmap for the log, if there is one.
94 : *
95 : * II) The rmapbt shape and space metadata rmaps are computed as follows:
96 : *
97 : * 1. Count the rmaps collected in the previous step. (= NR)
98 : * 2. Estimate the number of rmapbt blocks needed to store NR records. (= RMB)
99 : * 3. Reserve RMB blocks through the newbt using the allocator in normap mode.
100 : * 4. Create bitmap AGBIT.
101 : * 5. For each reservation in the newbt, set the corresponding areas in AGBIT.
102 : * 6. For each block in the AGFL, bnobt, and cntbt, set the bits in AGBIT.
103 : * 7. Count the extents in AGBIT. (= AGNR)
104 : * 8. Estimate the number of rmapbt blocks needed for NR + AGNR rmaps. (= RMB')
105 : * 9. If RMB' >= RMB, reserve RMB' - RMB more newbt blocks, set RMB = RMB',
106 : * and clear AGBIT. Go to step 5.
107 : * A. Emit rmaps for each extent in AGBIT.
108 : *
109 : * III) The rmapbt is constructed and set in place as follows:
110 : *
111 : * 1. Sort the rmap records.
112 : * 2. Bulk load the rmaps.
113 : *
114 : * IV) Reap the old btree blocks.
115 : *
116 : * 1. Create a bitmap OLDRMBIT.
117 : * 2. For each gap in the new rmapbt, set the corresponding areas of OLDRMBIT.
118 : * 3. For each extent in the bnobt, clear the corresponding parts of OLDRMBIT.
119 : * 4. Reap the extents corresponding to the set areas in OLDRMBIT. These are
120 : * the parts of the AG that the rmap didn't find during its scan of the
121 : * primary metadata and aren't known to be in the free space, which implies
122 : * that they were the old rmapbt blocks.
123 : * 5. Commit.
124 : *
125 : * We use the 'xrep_rmap' prefix for all the rmap functions.
126 : */
127 :
128 : /* Context for collecting rmaps */
129 : struct xrep_rmap {
130 : /* new rmapbt information */
131 : struct xrep_newbt new_btree;
132 :
133 : /* lock for the xfbtree and xfile */
134 : struct mutex lock;
135 :
136 : /* rmap records generated from primary metadata */
137 : struct xfbtree *rmap_btree;
138 :
139 : struct xfs_scrub *sc;
140 :
141 : /* in-memory btree cursor for the xfs_btree_bload iteration */
142 : struct xfs_btree_cur *mcur;
143 :
144 : /* Hooks into rmap update code. */
145 : struct xfs_rmap_hook hooks;
146 :
147 : /* inode scan cursor */
148 : struct xchk_iscan iscan;
149 :
150 : /* Number of non-freespace records found. */
151 : unsigned long long nr_records;
152 :
153 : /* bnobt/cntbt contribution to btreeblks */
154 : xfs_agblock_t freesp_btblocks;
155 :
156 : /* old agf_rmap_blocks counter */
157 : unsigned int old_rmapbt_fsbcount;
158 : };
159 :
160 : /* Set us up to repair reverse mapping btrees. */
161 : int
162 9312 : xrep_setup_ag_rmapbt(
163 : struct xfs_scrub *sc)
164 : {
165 9312 : struct xrep_rmap *rr;
166 9312 : char *descr;
167 9312 : int error;
168 :
169 9312 : xchk_fsgates_enable(sc, XCHK_FSGATES_RMAP);
170 :
171 9314 : descr = xchk_xfile_ag_descr(sc, "reverse mapping records");
172 9249 : error = xrep_setup_buftarg(sc, descr);
173 9319 : kfree(descr);
174 9319 : if (error)
175 : return error;
176 :
177 9319 : rr = kzalloc(sizeof(struct xrep_rmap), XCHK_GFP_FLAGS);
178 9319 : if (!rr)
179 : return -ENOMEM;
180 :
181 9319 : rr->sc = sc;
182 9319 : sc->buf = rr;
183 9319 : return 0;
184 : }
185 :
186 : /* Make sure there's nothing funny about this mapping. */
187 : STATIC int
188 19163077 : xrep_rmap_check_mapping(
189 : struct xfs_scrub *sc,
190 : const struct xfs_rmap_irec *rec)
191 : {
192 19163077 : enum xbtree_recpacking outcome;
193 19163077 : int error;
194 :
195 19163077 : if (xfs_rmap_check_perag_irec(sc->sa.pag, rec) != NULL)
196 : return -EFSCORRUPTED;
197 :
198 : /* Make sure this isn't free space. */
199 19163078 : error = xfs_alloc_has_records(sc->sa.bno_cur, rec->rm_startblock,
200 19163078 : rec->rm_blockcount, &outcome);
201 19163081 : if (error)
202 : return error;
203 19163081 : if (outcome != XBTREE_RECPACKING_EMPTY)
204 0 : return -EFSCORRUPTED;
205 :
206 : return 0;
207 : }
208 :
209 : /* Store a reverse-mapping record. */
210 : static inline int
211 19457132 : xrep_rmap_stash(
212 : struct xrep_rmap *rr,
213 : xfs_agblock_t startblock,
214 : xfs_extlen_t blockcount,
215 : uint64_t owner,
216 : uint64_t offset,
217 : unsigned int flags)
218 : {
219 19457132 : struct xfs_rmap_irec rmap = {
220 : .rm_startblock = startblock,
221 : .rm_blockcount = blockcount,
222 : .rm_owner = owner,
223 : .rm_offset = offset,
224 : .rm_flags = flags,
225 : };
226 19457132 : struct xfs_scrub *sc = rr->sc;
227 19457132 : struct xfs_btree_cur *mcur;
228 19457132 : struct xfs_buf *mhead_bp;
229 19457132 : int error = 0;
230 :
231 19457132 : if (xchk_should_terminate(sc, &error))
232 3 : return error;
233 :
234 19457141 : if (xchk_iscan_aborted(&rr->iscan))
235 : return -EFSCORRUPTED;
236 :
237 19457140 : trace_xrep_rmap_found(sc->mp, sc->sa.pag->pag_agno, &rmap);
238 :
239 19457140 : mutex_lock(&rr->lock);
240 19457165 : error = xfbtree_head_read_buf(rr->rmap_btree, sc->tp, &mhead_bp);
241 19457180 : if (error)
242 0 : goto out_abort;
243 :
244 19457180 : mcur = xfs_rmapbt_mem_cursor(sc->sa.pag, sc->tp, mhead_bp,
245 : rr->rmap_btree);
246 19457163 : error = xfs_rmap_map_raw(mcur, &rmap);
247 19457113 : xfs_btree_del_cursor(mcur, error);
248 19457162 : if (error)
249 0 : goto out_cancel;
250 :
251 19457162 : error = xfbtree_trans_commit(rr->rmap_btree, sc->tp);
252 19457185 : if (error)
253 0 : goto out_abort;
254 :
255 19457185 : mutex_unlock(&rr->lock);
256 19457185 : return 0;
257 :
258 : out_cancel:
259 0 : xfbtree_trans_cancel(rr->rmap_btree, sc->tp);
260 0 : out_abort:
261 0 : xchk_iscan_abort(&rr->iscan);
262 0 : mutex_unlock(&rr->lock);
263 0 : return error;
264 : }
265 :
266 : struct xrep_rmap_stash_run {
267 : struct xrep_rmap *rr;
268 : uint64_t owner;
269 : unsigned int rmap_flags;
270 : };
271 :
272 : static int
273 3353395 : xrep_rmap_stash_run(
274 : uint64_t start,
275 : uint64_t len,
276 : void *priv)
277 : {
278 3353395 : struct xrep_rmap_stash_run *rsr = priv;
279 3353395 : struct xrep_rmap *rr = rsr->rr;
280 :
281 3353395 : return xrep_rmap_stash(rr, start, len, rsr->owner, 0, rsr->rmap_flags);
282 : }
283 :
284 : /*
285 : * Emit rmaps for every extent of bits set in the bitmap. Caller must ensure
286 : * that the ranges are in units of FS blocks.
287 : */
288 : STATIC int
289 9063469 : xrep_rmap_stash_bitmap(
290 : struct xrep_rmap *rr,
291 : struct xagb_bitmap *bitmap,
292 : const struct xfs_owner_info *oinfo)
293 : {
294 9063469 : struct xrep_rmap_stash_run rsr = {
295 : .rr = rr,
296 9063469 : .owner = oinfo->oi_owner,
297 : .rmap_flags = 0,
298 : };
299 :
300 9063469 : if (oinfo->oi_flags & XFS_OWNER_INFO_ATTR_FORK)
301 0 : rsr.rmap_flags |= XFS_RMAP_ATTR_FORK;
302 9063469 : if (oinfo->oi_flags & XFS_OWNER_INFO_BMBT_BLOCK)
303 9017228 : rsr.rmap_flags |= XFS_RMAP_BMBT_BLOCK;
304 :
305 9063469 : return xagb_bitmap_walk(bitmap, xrep_rmap_stash_run, &rsr);
306 : }
307 :
308 : /* Section (I): Finding all file and bmbt extents. */
309 :
310 : /* Context for accumulating rmaps for an inode fork. */
311 : struct xrep_rmap_ifork {
312 : /*
313 : * Accumulate rmap data here to turn multiple adjacent bmaps into a
314 : * single rmap.
315 : */
316 : struct xfs_rmap_irec accum;
317 :
318 : /* Bitmap of bmbt blocks in this AG. */
319 : struct xagb_bitmap bmbt_blocks;
320 :
321 : struct xrep_rmap *rr;
322 :
323 : /* Which inode fork? */
324 : int whichfork;
325 : };
326 :
327 : /* Stash an rmap that we accumulated while walking an inode fork. */
328 : STATIC int
329 35357424 : xrep_rmap_stash_accumulated(
330 : struct xrep_rmap_ifork *rf)
331 : {
332 35357424 : if (rf->accum.rm_blockcount == 0)
333 : return 0;
334 :
335 16092193 : return xrep_rmap_stash(rf->rr, rf->accum.rm_startblock,
336 : rf->accum.rm_blockcount, rf->accum.rm_owner,
337 : rf->accum.rm_offset, rf->accum.rm_flags);
338 : }
339 :
340 : /* Accumulate a bmbt record. */
341 : STATIC int
342 64311808 : xrep_rmap_visit_bmbt(
343 : struct xfs_btree_cur *cur,
344 : struct xfs_bmbt_irec *rec,
345 : void *priv)
346 : {
347 64311808 : struct xrep_rmap_ifork *rf = priv;
348 64311808 : struct xfs_mount *mp = rf->rr->sc->mp;
349 64311808 : struct xfs_rmap_irec *accum = &rf->accum;
350 64311808 : xfs_agblock_t agbno;
351 64311808 : unsigned int rmap_flags = 0;
352 64311808 : int error;
353 :
354 64311808 : if (XFS_FSB_TO_AGNO(mp, rec->br_startblock) !=
355 64311808 : rf->rr->sc->sa.pag->pag_agno)
356 : return 0;
357 :
358 16092201 : agbno = XFS_FSB_TO_AGBNO(mp, rec->br_startblock);
359 16092201 : if (rf->whichfork == XFS_ATTR_FORK)
360 128340 : rmap_flags |= XFS_RMAP_ATTR_FORK;
361 16092201 : if (rec->br_state == XFS_EXT_UNWRITTEN)
362 2125410 : rmap_flags |= XFS_RMAP_UNWRITTEN;
363 :
364 : /* If this bmap is adjacent to the previous one, just add it. */
365 16092201 : if (accum->rm_blockcount > 0 &&
366 13394703 : rec->br_startoff == accum->rm_offset + accum->rm_blockcount &&
367 2133565 : agbno == accum->rm_startblock + accum->rm_blockcount &&
368 788612 : rmap_flags == accum->rm_flags) {
369 0 : accum->rm_blockcount += rec->br_blockcount;
370 0 : return 0;
371 : }
372 :
373 : /* Otherwise stash the old rmap and start accumulating a new one. */
374 16092201 : error = xrep_rmap_stash_accumulated(rf);
375 16092189 : if (error)
376 : return error;
377 :
378 16092186 : accum->rm_startblock = agbno;
379 16092186 : accum->rm_blockcount = rec->br_blockcount;
380 16092186 : accum->rm_offset = rec->br_startoff;
381 16092186 : accum->rm_flags = rmap_flags;
382 16092186 : return 0;
383 : }
384 :
385 : /* Add a btree block to the bitmap. */
386 : STATIC int
387 20583656 : xrep_rmap_visit_iroot_btree_block(
388 : struct xfs_btree_cur *cur,
389 : int level,
390 : void *priv)
391 : {
392 20583656 : struct xrep_rmap_ifork *rf = priv;
393 20583656 : struct xfs_buf *bp;
394 20583656 : xfs_fsblock_t fsbno;
395 20583656 : xfs_agblock_t agbno;
396 :
397 20583656 : xfs_btree_get_block(cur, level, &bp);
398 20583661 : if (!bp)
399 : return 0;
400 :
401 11566433 : fsbno = XFS_DADDR_TO_FSB(cur->bc_mp, xfs_buf_daddr(bp));
402 11566433 : if (XFS_FSB_TO_AGNO(cur->bc_mp, fsbno) != rf->rr->sc->sa.pag->pag_agno)
403 : return 0;
404 :
405 2891393 : agbno = XFS_FSB_TO_AGBNO(cur->bc_mp, fsbno);
406 2891393 : return xagb_bitmap_set(&rf->bmbt_blocks, agbno, 1);
407 : }
408 :
409 : /*
410 : * Iterate a metadata btree rooted in an inode to collect rmap records for
411 : * anything in this fork that matches the AG.
412 : */
413 : STATIC int
414 9017228 : xrep_rmap_scan_iroot_btree(
415 : struct xrep_rmap_ifork *rf,
416 : struct xfs_btree_cur *cur)
417 : {
418 9017228 : struct xfs_owner_info oinfo;
419 9017228 : struct xrep_rmap *rr = rf->rr;
420 9017228 : int error;
421 :
422 9017228 : xagb_bitmap_init(&rf->bmbt_blocks);
423 :
424 : /* Record all the blocks in the btree itself. */
425 9017228 : error = xfs_btree_visit_blocks(cur, xrep_rmap_visit_iroot_btree_block,
426 : XFS_BTREE_VISIT_ALL, rf);
427 9017228 : if (error)
428 0 : goto out;
429 :
430 : /* Emit rmaps for the btree blocks. */
431 9017228 : xfs_rmap_ino_bmbt_owner(&oinfo, rf->accum.rm_owner, rf->whichfork);
432 9017228 : error = xrep_rmap_stash_bitmap(rr, &rf->bmbt_blocks, &oinfo);
433 9017226 : if (error)
434 0 : goto out;
435 :
436 : /* Stash any remaining accumulated rmaps. */
437 9017226 : error = xrep_rmap_stash_accumulated(rf);
438 9017225 : out:
439 9017225 : xagb_bitmap_destroy(&rf->bmbt_blocks);
440 9017225 : return error;
441 : }
442 :
443 : static inline bool
444 : is_rt_data_fork(
445 : struct xfs_inode *ip,
446 : int whichfork)
447 : {
448 : return XFS_IS_REALTIME_INODE(ip) && whichfork == XFS_DATA_FORK;
449 : }
450 :
451 : /*
452 : * Iterate the block mapping btree to collect rmap records for anything in this
453 : * fork that matches the AG. Sets @mappings_done to true if we've scanned the
454 : * block mappings in this fork.
455 : */
456 : STATIC int
457 8967991 : xrep_rmap_scan_bmbt(
458 : struct xrep_rmap_ifork *rf,
459 : struct xfs_inode *ip,
460 : bool *mappings_done)
461 : {
462 8967991 : struct xrep_rmap *rr = rf->rr;
463 8967991 : struct xfs_btree_cur *cur;
464 8967991 : struct xfs_ifork *ifp;
465 8967991 : int error;
466 :
467 8967991 : *mappings_done = false;
468 8967991 : ifp = xfs_ifork_ptr(ip, rf->whichfork);
469 8967991 : cur = xfs_bmbt_init_cursor(rr->sc->mp, rr->sc->tp, ip, rf->whichfork);
470 :
471 9641228 : if (!xfs_ifork_is_realtime(ip, rf->whichfork) &&
472 : xfs_need_iread_extents(ifp)) {
473 : /*
474 : * If the incore extent cache isn't loaded, scan the bmbt for
475 : * mapping records. This avoids loading the incore extent
476 : * tree, which will increase memory pressure at a time when
477 : * we're trying to run as quickly as we possibly can. Ignore
478 : * realtime extents.
479 : */
480 51248 : error = xfs_bmap_query_all(cur, xrep_rmap_visit_bmbt, rf);
481 51248 : if (error)
482 0 : goto out_cur;
483 :
484 51248 : *mappings_done = true;
485 : }
486 :
487 : /* Scan for the bmbt blocks, which always live on the data device. */
488 8967990 : error = xrep_rmap_scan_iroot_btree(rf, cur);
489 8967988 : out_cur:
490 8967988 : xfs_btree_del_cursor(cur, error);
491 8967991 : return error;
492 : }
493 :
494 : /*
495 : * Iterate the in-core extent cache to collect rmap records for anything in
496 : * this fork that matches the AG.
497 : */
498 : STATIC int
499 10248109 : xrep_rmap_scan_iext(
500 : struct xrep_rmap_ifork *rf,
501 : struct xfs_ifork *ifp)
502 : {
503 10248109 : struct xfs_bmbt_irec rec;
504 10248109 : struct xfs_iext_cursor icur;
505 10248109 : int error;
506 :
507 73629276 : for_each_xfs_iext(ifp, &icur, &rec) {
508 63381143 : if (isnullstartblock(rec.br_startblock))
509 9884 : continue;
510 63371259 : error = xrep_rmap_visit_bmbt(NULL, &rec, rf);
511 63371286 : if (error)
512 3 : return error;
513 : }
514 :
515 10248105 : return xrep_rmap_stash_accumulated(rf);
516 : }
517 :
518 : static int
519 24620 : xrep_rmap_scan_rtrmapbt(
520 : struct xrep_rmap_ifork *rf,
521 : struct xfs_inode *ip)
522 : {
523 24620 : struct xfs_scrub *sc = rf->rr->sc;
524 24620 : struct xfs_btree_cur *cur;
525 24620 : struct xfs_rtgroup *rtg;
526 24620 : xfs_rgnumber_t rgno;
527 24620 : int error;
528 :
529 24620 : if (rf->whichfork != XFS_DATA_FORK)
530 : return -EFSCORRUPTED;
531 :
532 73860 : for_each_rtgroup(sc->mp, rgno, rtg) {
533 73860 : if (ip == rtg->rtg_rmapip) {
534 24620 : cur = xfs_rtrmapbt_init_cursor(sc->mp, sc->tp, rtg, ip);
535 24620 : error = xrep_rmap_scan_iroot_btree(rf, cur);
536 24620 : xfs_btree_del_cursor(cur, error);
537 24620 : xfs_rtgroup_rele(rtg);
538 24620 : return error;
539 : }
540 : }
541 :
542 : /*
543 : * We shouldn't find an rmap format inode that isn't associated with
544 : * an rtgroup!
545 : */
546 0 : ASSERT(0);
547 0 : return -EFSCORRUPTED;
548 : }
549 :
550 : static int
551 24620 : xrep_rmap_scan_rtrefcountbt(
552 : struct xrep_rmap_ifork *rf,
553 : struct xfs_inode *ip)
554 : {
555 24620 : struct xfs_scrub *sc = rf->rr->sc;
556 24620 : struct xfs_btree_cur *cur;
557 24620 : struct xfs_rtgroup *rtg;
558 24620 : xfs_rgnumber_t rgno;
559 24620 : int error;
560 :
561 24620 : if (rf->whichfork != XFS_DATA_FORK)
562 : return -EFSCORRUPTED;
563 :
564 73859 : for_each_rtgroup(sc->mp, rgno, rtg) {
565 73858 : if (ip == rtg->rtg_refcountip) {
566 24619 : cur = xfs_rtrefcountbt_init_cursor(sc->mp, sc->tp, rtg,
567 : ip);
568 24619 : error = xrep_rmap_scan_iroot_btree(rf, cur);
569 24620 : xfs_btree_del_cursor(cur, error);
570 24620 : xfs_rtgroup_rele(rtg);
571 24620 : return error;
572 : }
573 : }
574 :
575 : /*
576 : * We shouldn't find a refcount format inode that isn't associated with
577 : * an rtgroup!
578 : */
579 0 : ASSERT(0);
580 0 : return -EFSCORRUPTED;
581 : }
582 :
583 : /* Find all the extents from a given AG in an inode fork. */
584 : STATIC int
585 188374328 : xrep_rmap_scan_ifork(
586 : struct xrep_rmap *rr,
587 : struct xfs_inode *ip,
588 : int whichfork)
589 : {
590 188374328 : struct xrep_rmap_ifork rf = {
591 188374328 : .accum = { .rm_owner = ip->i_ino, },
592 : .rr = rr,
593 : .whichfork = whichfork,
594 : };
595 188374328 : struct xfs_ifork *ifp = xfs_ifork_ptr(ip, whichfork);
596 188374629 : int error = 0;
597 :
598 188374629 : if (!ifp)
599 : return 0;
600 :
601 188361341 : if (ifp->if_format == XFS_DINODE_FMT_BTREE) {
602 8967991 : bool mappings_done;
603 :
604 : /*
605 : * Scan the bmap btree for data device mappings. This includes
606 : * the btree blocks themselves, even if this is a realtime
607 : * file.
608 : */
609 8967991 : error = xrep_rmap_scan_bmbt(&rf, ip, &mappings_done);
610 8967991 : if (error || mappings_done)
611 51248 : return error;
612 179393350 : } else if (ifp->if_format == XFS_DINODE_FMT_RMAP) {
613 24620 : return xrep_rmap_scan_rtrmapbt(&rf, ip);
614 179368730 : } else if (ifp->if_format == XFS_DINODE_FMT_REFCOUNT) {
615 24620 : return xrep_rmap_scan_rtrefcountbt(&rf, ip);
616 179344110 : } else if (ifp->if_format != XFS_DINODE_FMT_EXTENTS) {
617 : return 0;
618 : }
619 :
620 : /* Scan incore extent cache if this isn't a realtime file. */
621 40531527 : if (xfs_ifork_is_realtime(ip, whichfork))
622 : return 0;
623 :
624 10248110 : return xrep_rmap_scan_iext(&rf, ifp);
625 : }
626 :
627 : /*
628 : * Take ILOCK on a file that we want to scan.
629 : *
630 : * Select ILOCK_EXCL if the file has an unloaded data bmbt or has an unloaded
631 : * attr bmbt. Otherwise, take ILOCK_SHARED.
632 : */
633 : static inline unsigned int
634 94187071 : xrep_rmap_scan_ilock(
635 : struct xfs_inode *ip)
636 : {
637 94187071 : uint lock_mode = XFS_ILOCK_SHARED;
638 :
639 94187071 : if (xfs_need_iread_extents(&ip->i_df)) {
640 520037 : lock_mode = XFS_ILOCK_EXCL;
641 520037 : goto lock;
642 : }
643 :
644 187320775 : if (xfs_inode_has_attr_fork(ip) && xfs_need_iread_extents(&ip->i_af))
645 0 : lock_mode = XFS_ILOCK_EXCL;
646 :
647 93667099 : lock:
648 94187136 : xfs_ilock(ip, lock_mode);
649 94187314 : return lock_mode;
650 : }
651 :
652 : /* Record reverse mappings for a file. */
653 : STATIC int
654 94187101 : xrep_rmap_scan_inode(
655 : struct xrep_rmap *rr,
656 : struct xfs_inode *ip)
657 : {
658 94187101 : unsigned int lock_mode = xrep_rmap_scan_ilock(ip);
659 94187375 : int error;
660 :
661 : /* Check the data fork. */
662 94187375 : error = xrep_rmap_scan_ifork(rr, ip, XFS_DATA_FORK);
663 94187318 : if (error)
664 3 : goto out_unlock;
665 :
666 : /* Check the attr fork. */
667 94187315 : error = xrep_rmap_scan_ifork(rr, ip, XFS_ATTR_FORK);
668 94187430 : if (error)
669 0 : goto out_unlock;
670 :
671 : /* COW fork extents are "owned" by the refcount btree. */
672 :
673 94187430 : xchk_iscan_mark_visited(&rr->iscan, ip);
674 94187146 : out_unlock:
675 94187146 : xfs_iunlock(ip, lock_mode);
676 94187664 : return error;
677 : }
678 :
679 : /* Section (I): Find all AG metadata extents except for free space metadata. */
680 :
681 : struct xrep_rmap_inodes {
682 : struct xrep_rmap *rr;
683 : struct xagb_bitmap inobt_blocks; /* INOBIT */
684 : struct xagb_bitmap ichunk_blocks; /* ICHUNKBIT */
685 : };
686 :
687 : /* Record inode btree rmaps. */
688 : STATIC int
689 488061 : xrep_rmap_walk_inobt(
690 : struct xfs_btree_cur *cur,
691 : const union xfs_btree_rec *rec,
692 : void *priv)
693 : {
694 488061 : struct xfs_inobt_rec_incore irec;
695 488061 : struct xrep_rmap_inodes *ri = priv;
696 488061 : struct xfs_mount *mp = cur->bc_mp;
697 488061 : xfs_agblock_t agbno;
698 488061 : xfs_agino_t agino;
699 488061 : xfs_agino_t iperhole;
700 488061 : unsigned int i;
701 488061 : int error;
702 :
703 : /* Record the inobt blocks. */
704 488061 : error = xagb_bitmap_set_btcur_path(&ri->inobt_blocks, cur);
705 488061 : if (error)
706 : return error;
707 :
708 488061 : xfs_inobt_btrec_to_irec(mp, rec, &irec);
709 488061 : if (xfs_inobt_check_irec(cur, &irec) != NULL)
710 : return -EFSCORRUPTED;
711 :
712 488061 : agino = irec.ir_startino;
713 :
714 : /* Record a non-sparse inode chunk. */
715 488061 : if (!xfs_inobt_issparse(irec.ir_holemask)) {
716 252477 : agbno = XFS_AGINO_TO_AGBNO(mp, agino);
717 :
718 252477 : return xagb_bitmap_set(&ri->ichunk_blocks, agbno,
719 252477 : XFS_INODES_PER_CHUNK / mp->m_sb.sb_inopblock);
720 : }
721 :
722 : /* Iterate each chunk. */
723 235584 : iperhole = max_t(xfs_agino_t, mp->m_sb.sb_inopblock,
724 : XFS_INODES_PER_HOLEMASK_BIT);
725 235584 : for (i = 0, agino = irec.ir_startino;
726 2114028 : i < XFS_INOBT_HOLEMASK_BITS;
727 1878444 : i += iperhole / XFS_INODES_PER_HOLEMASK_BIT, agino += iperhole) {
728 : /* Skip holes. */
729 1878444 : if (irec.ir_holemask & (1 << i))
730 939222 : continue;
731 :
732 : /* Record the inode chunk otherwise. */
733 939222 : agbno = XFS_AGINO_TO_AGBNO(mp, agino);
734 939222 : error = xagb_bitmap_set(&ri->ichunk_blocks, agbno,
735 939222 : iperhole / mp->m_sb.sb_inopblock);
736 939222 : if (error)
737 0 : return error;
738 : }
739 :
740 : return 0;
741 : }
742 :
743 : /* Collect rmaps for the blocks containing inode btrees and the inode chunks. */
744 : STATIC int
745 9261 : xrep_rmap_find_inode_rmaps(
746 : struct xrep_rmap *rr)
747 : {
748 9261 : struct xrep_rmap_inodes ri = {
749 : .rr = rr,
750 : };
751 9261 : struct xfs_scrub *sc = rr->sc;
752 9261 : int error;
753 :
754 9261 : xagb_bitmap_init(&ri.inobt_blocks);
755 9260 : xagb_bitmap_init(&ri.ichunk_blocks);
756 :
757 : /*
758 : * Iterate every record in the inobt so we can capture all the inode
759 : * chunks and the blocks in the inobt itself.
760 : */
761 9252 : error = xfs_btree_query_all(sc->sa.ino_cur, xrep_rmap_walk_inobt, &ri);
762 9261 : if (error)
763 0 : goto out_bitmap;
764 :
765 : /*
766 : * Note that if there are zero records in the inobt then query_all does
767 : * nothing and we have to account the empty inobt root manually.
768 : */
769 9261 : if (xagb_bitmap_empty(&ri.ichunk_blocks)) {
770 4217 : struct xfs_agi *agi = sc->sa.agi_bp->b_addr;
771 :
772 4217 : error = xagb_bitmap_set(&ri.inobt_blocks,
773 4217 : be32_to_cpu(agi->agi_root), 1);
774 4217 : if (error)
775 0 : goto out_bitmap;
776 : }
777 :
778 : /* Scan the finobt too. */
779 9261 : if (xfs_has_finobt(sc->mp)) {
780 9261 : error = xagb_bitmap_set_btblocks(&ri.inobt_blocks,
781 : sc->sa.fino_cur);
782 9260 : if (error)
783 0 : goto out_bitmap;
784 : }
785 :
786 : /* Generate rmaps for everything. */
787 9260 : error = xrep_rmap_stash_bitmap(rr, &ri.inobt_blocks,
788 : &XFS_RMAP_OINFO_INOBT);
789 9258 : if (error)
790 0 : goto out_bitmap;
791 9258 : error = xrep_rmap_stash_bitmap(rr, &ri.ichunk_blocks,
792 : &XFS_RMAP_OINFO_INODES);
793 :
794 9260 : out_bitmap:
795 9260 : xagb_bitmap_destroy(&ri.inobt_blocks);
796 9261 : xagb_bitmap_destroy(&ri.ichunk_blocks);
797 9261 : return error;
798 : }
799 :
800 : /* Record a CoW staging extent. */
801 : STATIC int
802 2680 : xrep_rmap_walk_cowblocks(
803 : struct xfs_btree_cur *cur,
804 : const struct xfs_refcount_irec *irec,
805 : void *priv)
806 : {
807 2680 : struct xagb_bitmap *bitmap = priv;
808 :
809 2680 : if (!xfs_refcount_check_domain(irec) ||
810 2680 : irec->rc_domain != XFS_REFC_DOMAIN_COW)
811 : return -EFSCORRUPTED;
812 :
813 2680 : return xagb_bitmap_set(bitmap, irec->rc_startblock, irec->rc_blockcount);
814 : }
815 :
816 : /*
817 : * Collect rmaps for the blocks containing the refcount btree, and all CoW
818 : * staging extents.
819 : */
820 : STATIC int
821 9258 : xrep_rmap_find_refcount_rmaps(
822 : struct xrep_rmap *rr)
823 : {
824 9258 : struct xagb_bitmap refcountbt_blocks; /* REFCBIT */
825 9258 : struct xagb_bitmap cow_blocks; /* COWBIT */
826 9258 : struct xfs_refcount_irec low = {
827 : .rc_startblock = 0,
828 : .rc_domain = XFS_REFC_DOMAIN_COW,
829 : };
830 9258 : struct xfs_refcount_irec high = {
831 : .rc_startblock = -1U,
832 : .rc_domain = XFS_REFC_DOMAIN_COW,
833 : };
834 9258 : struct xfs_scrub *sc = rr->sc;
835 9258 : int error;
836 :
837 9258 : if (!xfs_has_reflink(sc->mp))
838 : return 0;
839 :
840 9257 : xagb_bitmap_init(&refcountbt_blocks);
841 9258 : xagb_bitmap_init(&cow_blocks);
842 :
843 : /* refcountbt */
844 9259 : error = xagb_bitmap_set_btblocks(&refcountbt_blocks, sc->sa.refc_cur);
845 9261 : if (error)
846 0 : goto out_bitmap;
847 :
848 : /* Collect rmaps for CoW staging extents. */
849 9261 : error = xfs_refcount_query_range(sc->sa.refc_cur, &low, &high,
850 : xrep_rmap_walk_cowblocks, &cow_blocks);
851 9261 : if (error)
852 0 : goto out_bitmap;
853 :
854 : /* Generate rmaps for everything. */
855 9261 : error = xrep_rmap_stash_bitmap(rr, &cow_blocks, &XFS_RMAP_OINFO_COW);
856 9261 : if (error)
857 0 : goto out_bitmap;
858 9261 : error = xrep_rmap_stash_bitmap(rr, &refcountbt_blocks,
859 : &XFS_RMAP_OINFO_REFC);
860 :
861 9261 : out_bitmap:
862 9261 : xagb_bitmap_destroy(&cow_blocks);
863 9261 : xagb_bitmap_destroy(&refcountbt_blocks);
864 9261 : return error;
865 : }
866 :
867 : /* Generate rmaps for the AG headers (AGI/AGF/AGFL) */
868 : STATIC int
869 9261 : xrep_rmap_find_agheader_rmaps(
870 : struct xrep_rmap *rr)
871 : {
872 9261 : struct xfs_scrub *sc = rr->sc;
873 :
874 : /* Create a record for the AG sb->agfl. */
875 18522 : return xrep_rmap_stash(rr, XFS_SB_BLOCK(sc->mp),
876 9261 : XFS_AGFL_BLOCK(sc->mp) - XFS_SB_BLOCK(sc->mp) + 1,
877 : XFS_RMAP_OWN_FS, 0, 0);
878 : }
879 :
880 : /* Generate rmaps for the log, if it's in this AG. */
881 : STATIC int
882 9261 : xrep_rmap_find_log_rmaps(
883 : struct xrep_rmap *rr)
884 : {
885 9261 : struct xfs_scrub *sc = rr->sc;
886 :
887 18522 : if (!xfs_ag_contains_log(sc->mp, sc->sa.pag->pag_agno))
888 : return 0;
889 :
890 2308 : return xrep_rmap_stash(rr,
891 2308 : XFS_FSB_TO_AGBNO(sc->mp, sc->mp->m_sb.sb_logstart),
892 : sc->mp->m_sb.sb_logblocks, XFS_RMAP_OWN_LOG, 0, 0);
893 : }
894 :
895 : /* Check and count all the records that we gathered. */
896 : STATIC int
897 19163078 : xrep_rmap_check_record(
898 : struct xfs_btree_cur *cur,
899 : const struct xfs_rmap_irec *rec,
900 : void *priv)
901 : {
902 19163078 : struct xrep_rmap *rr = priv;
903 19163078 : int error;
904 :
905 19163078 : error = xrep_rmap_check_mapping(rr->sc, rec);
906 19163081 : if (error)
907 : return error;
908 :
909 19163081 : rr->nr_records++;
910 19163081 : return 0;
911 : }
912 :
913 : /*
914 : * Generate all the reverse-mappings for this AG, a list of the old rmapbt
915 : * blocks, and the new btreeblks count. Figure out if we have enough free
916 : * space to reconstruct the inode btrees. The caller must clean up the lists
917 : * if anything goes wrong. This implements section (I) above.
918 : */
919 : STATIC int
920 9254 : xrep_rmap_find_rmaps(
921 : struct xrep_rmap *rr)
922 : {
923 9254 : struct xfs_scrub *sc = rr->sc;
924 9254 : struct xchk_ag *sa = &sc->sa;
925 9254 : struct xfs_inode *ip;
926 9254 : struct xfs_buf *mhead_bp;
927 9254 : struct xfs_btree_cur *mcur;
928 9254 : int error;
929 :
930 : /* Find all the per-AG metadata. */
931 9254 : xrep_ag_btcur_init(sc, &sc->sa);
932 :
933 9250 : error = xrep_rmap_find_inode_rmaps(rr);
934 9259 : if (error)
935 0 : goto end_agscan;
936 :
937 9259 : error = xrep_rmap_find_refcount_rmaps(rr);
938 9261 : if (error)
939 0 : goto end_agscan;
940 :
941 9261 : error = xrep_rmap_find_agheader_rmaps(rr);
942 9261 : if (error)
943 0 : goto end_agscan;
944 :
945 9261 : error = xrep_rmap_find_log_rmaps(rr);
946 9261 : end_agscan:
947 9261 : xchk_ag_btcur_free(&sc->sa);
948 9261 : if (error)
949 : return error;
950 :
951 : /*
952 : * Set up for a potentially lengthy filesystem scan by reducing our
953 : * transaction resource usage for the duration. Specifically:
954 : *
955 : * Unlock the AG header buffers and cancel the transaction to release
956 : * the log grant space while we scan the filesystem.
957 : *
958 : * Create a new empty transaction to eliminate the possibility of the
959 : * inode scan deadlocking on cyclical metadata.
960 : *
961 : * We pass the empty transaction to the file scanning function to avoid
962 : * repeatedly cycling empty transactions. This can be done even though
963 : * we take the IOLOCK to quiesce the file because empty transactions
964 : * do not take sb_internal.
965 : */
966 9261 : sa->agf_bp = NULL;
967 9261 : sa->agi_bp = NULL;
968 9261 : xchk_trans_cancel(sc);
969 9261 : error = xchk_trans_alloc_empty(sc);
970 9261 : if (error)
971 : return error;
972 :
973 : /* Iterate all AGs for inodes rmaps. */
974 94196912 : while ((error = xchk_iscan_iter(&rr->iscan, &ip)) == 1) {
975 94187211 : error = xrep_rmap_scan_inode(rr, ip);
976 94187664 : xchk_irele(sc, ip);
977 94187655 : if (error)
978 : break;
979 :
980 94187652 : if (xchk_should_terminate(sc, &error))
981 : break;
982 : }
983 9261 : xchk_iscan_iter_finish(&rr->iscan);
984 9261 : if (error)
985 : return error;
986 :
987 : /*
988 : * Switch out for a real transaction and lock the AG headers in
989 : * preparation for building a new tree.
990 : */
991 9257 : xchk_trans_cancel(sc);
992 9257 : error = xchk_setup_fs(sc);
993 9257 : if (error)
994 : return error;
995 9257 : error = xchk_perag_drain_and_lock(sc);
996 9257 : if (error)
997 : return error;
998 :
999 : /*
1000 : * If a hook failed to update the in-memory btree, we lack the data to
1001 : * continue the repair.
1002 : */
1003 9219 : if (xchk_iscan_aborted(&rr->iscan))
1004 : return -EFSCORRUPTED;
1005 :
1006 : /*
1007 : * Now that we have everything locked again, we need to count the
1008 : * number of rmap records stashed in the btree. This should reflect
1009 : * all actively-owned space in the filesystem. At the same time, check
1010 : * all our records before we start building a new btree, which requires
1011 : * a bnobt cursor.
1012 : */
1013 9219 : error = xfbtree_head_read_buf(rr->rmap_btree, NULL, &mhead_bp);
1014 9219 : if (error)
1015 : return error;
1016 :
1017 9219 : mcur = xfs_rmapbt_mem_cursor(rr->sc->sa.pag, NULL, mhead_bp,
1018 : rr->rmap_btree);
1019 9219 : sc->sa.bno_cur = xfs_allocbt_init_cursor(sc->mp, sc->tp, sc->sa.agf_bp,
1020 : sc->sa.pag, XFS_BTNUM_BNO);
1021 :
1022 9217 : rr->nr_records = 0;
1023 9217 : error = xfs_rmap_query_all(mcur, xrep_rmap_check_record, rr);
1024 :
1025 9219 : xfs_btree_del_cursor(sc->sa.bno_cur, error);
1026 9219 : sc->sa.bno_cur = NULL;
1027 9219 : xfs_btree_del_cursor(mcur, error);
1028 9219 : xfs_buf_relse(mhead_bp);
1029 :
1030 9219 : return error;
1031 : }
1032 :
1033 : /* Section (II): Reserving space for new rmapbt and setting free space bitmap */
1034 :
1035 : struct xrep_rmap_agfl {
1036 : struct xagb_bitmap *bitmap;
1037 : xfs_agnumber_t agno;
1038 : };
1039 :
1040 : /* Add an AGFL block to the rmap list. */
1041 : STATIC int
1042 60801 : xrep_rmap_walk_agfl(
1043 : struct xfs_mount *mp,
1044 : xfs_agblock_t agbno,
1045 : void *priv)
1046 : {
1047 60801 : struct xrep_rmap_agfl *ra = priv;
1048 :
1049 60801 : return xagb_bitmap_set(ra->bitmap, agbno, 1);
1050 : }
1051 :
1052 : /*
1053 : * Run one round of reserving space for the new rmapbt and recomputing the
1054 : * number of blocks needed to store the previously observed rmapbt records and
1055 : * the ones we'll create for the free space metadata. When we don't need more
1056 : * blocks, return a bitmap of OWN_AG extents in @freesp_blocks and set @done to
1057 : * true.
1058 : */
1059 : STATIC int
1060 9419 : xrep_rmap_try_reserve(
1061 : struct xrep_rmap *rr,
1062 : struct xfs_btree_cur *rmap_cur,
1063 : struct xagb_bitmap *freesp_blocks,
1064 : uint64_t *blocks_reserved,
1065 : bool *done)
1066 : {
1067 9419 : struct xrep_rmap_agfl ra = {
1068 : .bitmap = freesp_blocks,
1069 9419 : .agno = rr->sc->sa.pag->pag_agno,
1070 : };
1071 9419 : struct xfs_scrub *sc = rr->sc;
1072 9419 : struct xrep_newbt_resv *resv, *n;
1073 9419 : struct xfs_agf *agf = sc->sa.agf_bp->b_addr;
1074 9419 : struct xfs_buf *agfl_bp;
1075 9419 : uint64_t nr_blocks; /* RMB */
1076 9419 : uint64_t freesp_records;
1077 9419 : int error;
1078 :
1079 : /*
1080 : * We're going to recompute new_btree.bload.nr_blocks at the end of
1081 : * this function to reflect however many btree blocks we need to store
1082 : * all the rmap records (including the ones that reflect the changes we
1083 : * made to support the new rmapbt blocks), so we save the old value
1084 : * here so we can decide if we've reserved enough blocks.
1085 : */
1086 9419 : nr_blocks = rr->new_btree.bload.nr_blocks;
1087 :
1088 : /*
1089 : * Make sure we've reserved enough space for the new btree. This can
1090 : * change the shape of the free space btrees, which can cause secondary
1091 : * interactions with the rmap records because all three space btrees
1092 : * have the same rmap owner. We'll account for all that below.
1093 : */
1094 9419 : error = xrep_newbt_alloc_blocks(&rr->new_btree,
1095 9419 : nr_blocks - *blocks_reserved);
1096 9418 : if (error)
1097 : return error;
1098 :
1099 9418 : *blocks_reserved = rr->new_btree.bload.nr_blocks;
1100 :
1101 : /* Clear everything in the bitmap. */
1102 9418 : xagb_bitmap_destroy(freesp_blocks);
1103 :
1104 : /* Set all the bnobt blocks in the bitmap. */
1105 9421 : sc->sa.bno_cur = xfs_allocbt_init_cursor(sc->mp, sc->tp, sc->sa.agf_bp,
1106 : sc->sa.pag, XFS_BTNUM_BNO);
1107 9410 : error = xagb_bitmap_set_btblocks(freesp_blocks, sc->sa.bno_cur);
1108 9418 : xfs_btree_del_cursor(sc->sa.bno_cur, error);
1109 9422 : sc->sa.bno_cur = NULL;
1110 9422 : if (error)
1111 : return error;
1112 :
1113 : /* Set all the cntbt blocks in the bitmap. */
1114 9422 : sc->sa.cnt_cur = xfs_allocbt_init_cursor(sc->mp, sc->tp, sc->sa.agf_bp,
1115 : sc->sa.pag, XFS_BTNUM_CNT);
1116 9421 : error = xagb_bitmap_set_btblocks(freesp_blocks, sc->sa.cnt_cur);
1117 9417 : xfs_btree_del_cursor(sc->sa.cnt_cur, error);
1118 9422 : sc->sa.cnt_cur = NULL;
1119 9422 : if (error)
1120 : return error;
1121 :
1122 : /* Record our new btreeblks value. */
1123 9422 : rr->freesp_btblocks = xagb_bitmap_hweight(freesp_blocks) - 2;
1124 :
1125 : /* Set all the new rmapbt blocks in the bitmap. */
1126 20579 : for_each_xrep_newbt_reservation(&rr->new_btree, resv, n) {
1127 11159 : error = xagb_bitmap_set(freesp_blocks, resv->agbno, resv->len);
1128 11161 : if (error)
1129 0 : return error;
1130 : }
1131 :
1132 : /* Set all the AGFL blocks in the bitmap. */
1133 9420 : error = xfs_alloc_read_agfl(sc->sa.pag, sc->tp, &agfl_bp);
1134 9422 : if (error)
1135 : return error;
1136 :
1137 9422 : error = xfs_agfl_walk(sc->mp, agf, agfl_bp, xrep_rmap_walk_agfl, &ra);
1138 9419 : if (error)
1139 : return error;
1140 :
1141 : /* Count the extents in the bitmap. */
1142 9418 : freesp_records = xagb_bitmap_count_set_regions(freesp_blocks);
1143 :
1144 : /* Compute how many blocks we'll need for all the rmaps. */
1145 9412 : error = xfs_btree_bload_compute_geometry(rmap_cur,
1146 9412 : &rr->new_btree.bload, rr->nr_records + freesp_records);
1147 9416 : if (error)
1148 : return error;
1149 :
1150 : /* We're done when we don't need more blocks. */
1151 9416 : *done = nr_blocks >= rr->new_btree.bload.nr_blocks;
1152 9416 : return 0;
1153 : }
1154 :
1155 : /*
1156 : * Iteratively reserve space for rmap btree while recording OWN_AG rmaps for
1157 : * the free space metadata. This implements section (II) above.
1158 : */
1159 : STATIC int
1160 9219 : xrep_rmap_reserve_space(
1161 : struct xrep_rmap *rr,
1162 : struct xfs_btree_cur *rmap_cur)
1163 : {
1164 9219 : struct xagb_bitmap freesp_blocks; /* AGBIT */
1165 9219 : uint64_t blocks_reserved = 0;
1166 9219 : bool done = false;
1167 9219 : int error;
1168 :
1169 : /* Compute how many blocks we'll need for the rmaps collected so far. */
1170 9219 : error = xfs_btree_bload_compute_geometry(rmap_cur,
1171 : &rr->new_btree.bload, rr->nr_records);
1172 9219 : if (error)
1173 : return error;
1174 :
1175 : /* Last chance to abort before we start committing fixes. */
1176 9219 : if (xchk_should_terminate(rr->sc, &error))
1177 0 : return error;
1178 :
1179 9219 : xagb_bitmap_init(&freesp_blocks);
1180 :
1181 : /*
1182 : * Iteratively reserve space for the new rmapbt and recompute the
1183 : * number of blocks needed to store the previously observed rmapbt
1184 : * records and the ones we'll create for the free space metadata.
1185 : * Finish when we don't need more blocks.
1186 : */
1187 9423 : do {
1188 9423 : error = xrep_rmap_try_reserve(rr, rmap_cur, &freesp_blocks,
1189 : &blocks_reserved, &done);
1190 9418 : if (error)
1191 0 : goto out_bitmap;
1192 9418 : } while (!done);
1193 :
1194 : /* Emit rmaps for everything in the free space bitmap. */
1195 9214 : xrep_ag_btcur_init(rr->sc, &rr->sc->sa);
1196 9215 : error = xrep_rmap_stash_bitmap(rr, &freesp_blocks, &XFS_RMAP_OINFO_AG);
1197 9219 : xchk_ag_btcur_free(&rr->sc->sa);
1198 :
1199 9219 : out_bitmap:
1200 9219 : xagb_bitmap_destroy(&freesp_blocks);
1201 9219 : return error;
1202 : }
1203 :
1204 : /* Section (III): Building the new rmap btree. */
1205 :
1206 : /* Update the AGF counters. */
1207 : STATIC int
1208 9219 : xrep_rmap_reset_counters(
1209 : struct xrep_rmap *rr)
1210 : {
1211 9219 : struct xfs_scrub *sc = rr->sc;
1212 9219 : struct xfs_perag *pag = sc->sa.pag;
1213 9219 : struct xfs_agf *agf = sc->sa.agf_bp->b_addr;
1214 9219 : xfs_agblock_t rmap_btblocks;
1215 :
1216 : /*
1217 : * The AGF header contains extra information related to the reverse
1218 : * mapping btree, so we must update those fields here.
1219 : */
1220 9219 : rmap_btblocks = rr->new_btree.afake.af_blocks - 1;
1221 9219 : agf->agf_btreeblks = cpu_to_be32(rr->freesp_btblocks + rmap_btblocks);
1222 9219 : xfs_alloc_log_agf(sc->tp, sc->sa.agf_bp, XFS_AGF_BTREEBLKS);
1223 :
1224 : /*
1225 : * After we commit the new btree to disk, it is possible that the
1226 : * process to reap the old btree blocks will race with the AIL trying
1227 : * to checkpoint the old btree blocks into the filesystem. If the new
1228 : * tree is shorter than the old one, the rmapbt write verifier will
1229 : * fail and the AIL will shut down the filesystem.
1230 : *
1231 : * To avoid this, save the old incore btree height values as the alt
1232 : * height values before re-initializing the perag info from the updated
1233 : * AGF to capture all the new values.
1234 : */
1235 9219 : pag->pagf_alt_levels[XFS_BTNUM_RMAPi] =
1236 9219 : pag->pagf_levels[XFS_BTNUM_RMAPi];
1237 :
1238 : /* Reinitialize with the values we just logged. */
1239 9219 : return xrep_reinit_pagf(sc);
1240 : }
1241 :
1242 : /* Retrieve rmapbt data for bulk load. */
1243 : STATIC int
1244 154930 : xrep_rmap_get_records(
1245 : struct xfs_btree_cur *cur,
1246 : unsigned int idx,
1247 : struct xfs_btree_block *block,
1248 : unsigned int nr_wanted,
1249 : void *priv)
1250 : {
1251 154930 : struct xrep_rmap *rr = priv;
1252 154930 : union xfs_btree_rec *block_rec;
1253 154930 : unsigned int loaded;
1254 154930 : int error;
1255 :
1256 19361304 : for (loaded = 0; loaded < nr_wanted; loaded++, idx++) {
1257 19206366 : int stat = 0;
1258 :
1259 19206366 : error = xfs_btree_increment(rr->mcur, 0, &stat);
1260 19206361 : if (error)
1261 0 : return error;
1262 19206361 : if (!stat)
1263 : return -EFSCORRUPTED;
1264 :
1265 19206361 : error = xfs_rmap_get_rec(rr->mcur, &cur->bc_rec.r, &stat);
1266 19206366 : if (error)
1267 0 : return error;
1268 19206366 : if (!stat)
1269 : return -EFSCORRUPTED;
1270 :
1271 19206366 : block_rec = xfs_btree_rec_addr(cur, idx, block);
1272 19206373 : cur->bc_ops->init_rec_from_cur(cur, block_rec);
1273 : }
1274 :
1275 154938 : return loaded;
1276 : }
1277 :
1278 : /* Feed one of the new btree blocks to the bulk loader. */
1279 : STATIC int
1280 159263 : xrep_rmap_claim_block(
1281 : struct xfs_btree_cur *cur,
1282 : union xfs_btree_ptr *ptr,
1283 : void *priv)
1284 : {
1285 159263 : struct xrep_rmap *rr = priv;
1286 159263 : int error;
1287 :
1288 159263 : error = xrep_newbt_relog_autoreap(&rr->new_btree);
1289 159258 : if (error)
1290 : return error;
1291 :
1292 159258 : return xrep_newbt_claim_block(cur, &rr->new_btree, ptr);
1293 : }
1294 :
1295 : /* Custom allocation function for new rmap btrees. */
1296 : STATIC int
1297 10842 : xrep_rmap_alloc_vextent(
1298 : struct xfs_scrub *sc,
1299 : struct xfs_alloc_arg *args,
1300 : xfs_fsblock_t alloc_hint)
1301 : {
1302 10842 : int error;
1303 :
1304 : /*
1305 : * We don't want an rmap update on the allocation, since we iteratively
1306 : * compute the OWN_AG records /after/ allocating blocks for the records
1307 : * that we already know we need to store. Therefore, fix the freelist
1308 : * with the NORMAP flag set so that we don't also try to create an rmap
1309 : * for new AGFL blocks.
1310 : */
1311 10842 : error = xrep_fix_freelist(sc, XFS_ALLOC_FLAG_NORMAP);
1312 10847 : if (error)
1313 : return error;
1314 :
1315 : /*
1316 : * If xrep_fix_freelist fixed the freelist by moving blocks from the
1317 : * free space btrees or by removing blocks from the AGFL and queueing
1318 : * an EFI to free the block, the transaction will be dirty. This
1319 : * second case is of interest to us.
1320 : *
1321 : * Later on, we will need to compare gaps in the new recordset against
1322 : * the block usage of all OWN_AG owners in order to free the old
1323 : * btree's blocks, which means that we can't have EFIs for former AGFL
1324 : * blocks attached to the repair transaction when we commit the new
1325 : * btree.
1326 : *
1327 : * xrep_newbt_alloc_blocks guarantees this for us by calling
1328 : * xrep_defer_finish to commit anything that fix_freelist may have
1329 : * added to the transaction.
1330 : */
1331 10847 : return xfs_alloc_vextent_near_bno(args, alloc_hint);
1332 : }
1333 :
1334 :
1335 : /* Count the records in this btree. */
1336 : STATIC int
1337 9219 : xrep_rmap_count_records(
1338 : struct xfs_btree_cur *cur,
1339 : unsigned long long *nr)
1340 : {
1341 9219 : int running = 1;
1342 9219 : int error;
1343 :
1344 9219 : *nr = 0;
1345 :
1346 9219 : error = xfs_btree_goto_left_edge(cur);
1347 9206 : if (error)
1348 : return error;
1349 :
1350 19224768 : while (running && !(error = xfs_btree_increment(cur, 0, &running))) {
1351 19215562 : if (running)
1352 19206351 : (*nr)++;
1353 : }
1354 :
1355 : return error;
1356 : }
1357 : /*
1358 : * Use the collected rmap information to stage a new rmap btree. If this is
1359 : * successful we'll return with the new btree root information logged to the
1360 : * repair transaction but not yet committed. This implements section (III)
1361 : * above.
1362 : */
1363 : STATIC int
1364 9219 : xrep_rmap_build_new_tree(
1365 : struct xrep_rmap *rr)
1366 : {
1367 9219 : struct xfs_scrub *sc = rr->sc;
1368 9219 : struct xfs_perag *pag = sc->sa.pag;
1369 9219 : struct xfs_agf *agf = sc->sa.agf_bp->b_addr;
1370 9219 : struct xfs_btree_cur *rmap_cur;
1371 9219 : struct xfs_buf *mhead_bp;
1372 9219 : xfs_fsblock_t fsbno;
1373 9219 : int error;
1374 :
1375 : /*
1376 : * Preserve the old rmapbt block count so that we can adjust the
1377 : * per-AG rmapbt reservation after we commit the new btree root and
1378 : * want to dispose of the old btree blocks.
1379 : */
1380 9219 : rr->old_rmapbt_fsbcount = be32_to_cpu(agf->agf_rmap_blocks);
1381 :
1382 : /*
1383 : * Prepare to construct the new btree by reserving disk space for the
1384 : * new btree and setting up all the accounting information we'll need
1385 : * to root the new btree while it's under construction and before we
1386 : * attach it to the AG header. The new blocks are accounted to the
1387 : * rmapbt per-AG reservation, which we will adjust further after
1388 : * committing the new btree.
1389 : */
1390 9219 : fsbno = XFS_AGB_TO_FSB(sc->mp, pag->pag_agno, XFS_RMAP_BLOCK(sc->mp));
1391 9219 : xrep_newbt_init_ag(&rr->new_btree, sc, &XFS_RMAP_OINFO_SKIP_UPDATE,
1392 : fsbno, XFS_AG_RESV_RMAPBT);
1393 9219 : rr->new_btree.bload.get_records = xrep_rmap_get_records;
1394 9219 : rr->new_btree.bload.claim_block = xrep_rmap_claim_block;
1395 9219 : rr->new_btree.alloc_vextent = xrep_rmap_alloc_vextent;
1396 9219 : rmap_cur = xfs_rmapbt_stage_cursor(sc->mp, &rr->new_btree.afake, pag);
1397 :
1398 : /*
1399 : * Initialize @rr->new_btree, reserve space for the new rmapbt,
1400 : * and compute OWN_AG rmaps.
1401 : */
1402 9219 : error = xrep_rmap_reserve_space(rr, rmap_cur);
1403 9219 : if (error)
1404 0 : goto err_cur;
1405 :
1406 : /*
1407 : * Count the rmapbt records again, because the space reservation
1408 : * for the rmapbt itself probably added more records to the btree.
1409 : */
1410 9219 : error = xfbtree_head_read_buf(rr->rmap_btree, NULL, &mhead_bp);
1411 9219 : if (error)
1412 0 : goto err_cur;
1413 :
1414 9219 : rr->mcur = xfs_rmapbt_mem_cursor(rr->sc->sa.pag, NULL, mhead_bp,
1415 : rr->rmap_btree);
1416 :
1417 9219 : error = xrep_rmap_count_records(rr->mcur, &rr->nr_records);
1418 9216 : if (error)
1419 0 : goto err_mcur;
1420 :
1421 : /*
1422 : * Due to btree slack factors, it's possible for a new btree to be one
1423 : * level taller than the old btree. Update the incore btree height so
1424 : * that we don't trip the verifiers when writing the new btree blocks
1425 : * to disk.
1426 : */
1427 9216 : pag->pagf_alt_levels[XFS_BTNUM_RMAPi] =
1428 9216 : rr->new_btree.bload.btree_height;
1429 :
1430 : /*
1431 : * Move the cursor to the left edge of the tree so that the first
1432 : * increment in ->get_records positions us at the first record.
1433 : */
1434 9216 : error = xfs_btree_goto_left_edge(rr->mcur);
1435 9219 : if (error)
1436 0 : goto err_level;
1437 :
1438 : /* Add all observed rmap records. */
1439 9219 : error = xfs_btree_bload(rmap_cur, &rr->new_btree.bload, rr);
1440 9218 : if (error)
1441 0 : goto err_level;
1442 :
1443 : /*
1444 : * Install the new btree in the AG header. After this point the old
1445 : * btree is no longer accessible and the new tree is live.
1446 : */
1447 9218 : xfs_rmapbt_commit_staged_btree(rmap_cur, sc->tp, sc->sa.agf_bp);
1448 9219 : xfs_btree_del_cursor(rmap_cur, 0);
1449 9219 : xfs_btree_del_cursor(rr->mcur, 0);
1450 9219 : rr->mcur = NULL;
1451 9219 : xfs_buf_relse(mhead_bp);
1452 :
1453 : /*
1454 : * Now that we've written the new btree to disk, we don't need to keep
1455 : * updating the in-memory btree. Abort the scan to stop live updates.
1456 : */
1457 9219 : xchk_iscan_abort(&rr->iscan);
1458 :
1459 : /*
1460 : * The newly committed rmap recordset includes mappings for the blocks
1461 : * that we reserved to build the new btree. If there is excess space
1462 : * reservation to be freed, the corresponding rmap records must also be
1463 : * removed.
1464 : */
1465 9219 : rr->new_btree.oinfo = XFS_RMAP_OINFO_AG;
1466 :
1467 : /* Reset the AGF counters now that we've changed the btree shape. */
1468 9219 : error = xrep_rmap_reset_counters(rr);
1469 9218 : if (error)
1470 0 : goto err_newbt;
1471 :
1472 : /* Dispose of any unused blocks and the accounting information. */
1473 9218 : error = xrep_newbt_commit(&rr->new_btree);
1474 9219 : if (error)
1475 : return error;
1476 :
1477 9219 : return xrep_roll_ag_trans(sc);
1478 :
1479 0 : err_level:
1480 0 : pag->pagf_alt_levels[XFS_BTNUM_RMAPi] = 0;
1481 0 : err_mcur:
1482 0 : xfs_btree_del_cursor(rr->mcur, error);
1483 0 : xfs_buf_relse(mhead_bp);
1484 0 : err_cur:
1485 0 : xfs_btree_del_cursor(rmap_cur, error);
1486 0 : err_newbt:
1487 0 : xrep_newbt_cancel(&rr->new_btree);
1488 0 : return error;
1489 : }
1490 :
1491 : /* Section (IV): Reaping the old btree. */
1492 :
1493 : struct xrep_rmap_find_gaps {
1494 : struct xagb_bitmap rmap_gaps;
1495 : xfs_agblock_t next_agbno;
1496 : };
1497 :
1498 : /* Subtract each free extent in the bnobt from the rmap gaps. */
1499 : STATIC int
1500 931599 : xrep_rmap_find_freesp(
1501 : struct xfs_btree_cur *cur,
1502 : const struct xfs_alloc_rec_incore *rec,
1503 : void *priv)
1504 : {
1505 931599 : struct xrep_rmap_find_gaps *rfg = priv;
1506 :
1507 1863196 : return xagb_bitmap_clear(&rfg->rmap_gaps, rec->ar_startblock,
1508 931599 : rec->ar_blockcount);
1509 : }
1510 :
1511 : /* Record the free space we find, as part of cleaning out the btree. */
1512 : STATIC int
1513 19206392 : xrep_rmap_find_gaps(
1514 : struct xfs_btree_cur *cur,
1515 : const struct xfs_rmap_irec *rec,
1516 : void *priv)
1517 : {
1518 19206392 : struct xrep_rmap_find_gaps *rfg = priv;
1519 19206392 : int error;
1520 :
1521 19206392 : if (rec->rm_startblock > rfg->next_agbno) {
1522 930155 : error = xagb_bitmap_set(&rfg->rmap_gaps, rfg->next_agbno,
1523 : rec->rm_startblock - rfg->next_agbno);
1524 930154 : if (error)
1525 : return error;
1526 : }
1527 :
1528 19206391 : rfg->next_agbno = max_t(xfs_agblock_t, rfg->next_agbno,
1529 : rec->rm_startblock + rec->rm_blockcount);
1530 19206391 : return 0;
1531 : }
1532 :
1533 : /*
1534 : * Reap the old rmapbt blocks. Now that the rmapbt is fully rebuilt, we make
1535 : * a list of gaps in the rmap records and a list of the extents mentioned in
1536 : * the bnobt. Any block that's in the new rmapbt gap list but not mentioned
1537 : * in the bnobt is a block from the old rmapbt and can be removed.
1538 : */
1539 : STATIC int
1540 9219 : xrep_rmap_remove_old_tree(
1541 : struct xrep_rmap *rr)
1542 : {
1543 9219 : struct xrep_rmap_find_gaps rfg = {
1544 : .next_agbno = 0,
1545 : };
1546 9219 : struct xfs_scrub *sc = rr->sc;
1547 9219 : struct xfs_agf *agf = sc->sa.agf_bp->b_addr;
1548 9219 : struct xfs_perag *pag = sc->sa.pag;
1549 9219 : struct xfs_btree_cur *mcur;
1550 9219 : struct xfs_buf *mhead_bp;
1551 9219 : xfs_agblock_t agend;
1552 9219 : int error;
1553 :
1554 9219 : xagb_bitmap_init(&rfg.rmap_gaps);
1555 :
1556 : /* Compute free space from the new rmapbt. */
1557 9219 : error = xfbtree_head_read_buf(rr->rmap_btree, NULL, &mhead_bp);
1558 9219 : mcur = xfs_rmapbt_mem_cursor(rr->sc->sa.pag, NULL, mhead_bp,
1559 : rr->rmap_btree);
1560 :
1561 9219 : error = xfs_rmap_query_all(mcur, xrep_rmap_find_gaps, &rfg);
1562 9219 : xfs_btree_del_cursor(mcur, error);
1563 9219 : xfs_buf_relse(mhead_bp);
1564 9219 : if (error)
1565 0 : goto out_bitmap;
1566 :
1567 : /* Insert a record for space between the last rmap and EOAG. */
1568 9219 : agend = be32_to_cpu(agf->agf_length);
1569 9219 : if (rfg.next_agbno < agend) {
1570 9219 : error = xagb_bitmap_set(&rfg.rmap_gaps, rfg.next_agbno,
1571 : agend - rfg.next_agbno);
1572 9219 : if (error)
1573 0 : goto out_bitmap;
1574 : }
1575 :
1576 : /* Compute free space from the existing bnobt. */
1577 9219 : sc->sa.bno_cur = xfs_allocbt_init_cursor(sc->mp, sc->tp, sc->sa.agf_bp,
1578 : sc->sa.pag, XFS_BTNUM_BNO);
1579 9219 : error = xfs_alloc_query_all(sc->sa.bno_cur, xrep_rmap_find_freesp,
1580 : &rfg);
1581 9219 : xfs_btree_del_cursor(sc->sa.bno_cur, error);
1582 9219 : sc->sa.bno_cur = NULL;
1583 9219 : if (error)
1584 0 : goto out_bitmap;
1585 :
1586 : /*
1587 : * Free the "free" blocks that the new rmapbt knows about but the bnobt
1588 : * doesn't--these are the old rmapbt blocks. Credit the old rmapbt
1589 : * block usage count back to the per-AG rmapbt reservation (and not
1590 : * fdblocks, since the rmap btree lives in free space) to keep the
1591 : * reservation and free space accounting correct.
1592 : */
1593 9219 : error = xrep_reap_agblocks(sc, &rfg.rmap_gaps,
1594 : &XFS_RMAP_OINFO_ANY_OWNER, XFS_AG_RESV_RMAPBT);
1595 9219 : if (error)
1596 0 : goto out_bitmap;
1597 :
1598 : /*
1599 : * Now that we've zapped all the old rmapbt blocks we can turn off
1600 : * the alternate height mechanism and reset the per-AG space
1601 : * reservation.
1602 : */
1603 9219 : pag->pagf_alt_levels[XFS_BTNUM_RMAPi] = 0;
1604 9219 : sc->flags |= XREP_RESET_PERAG_RESV;
1605 9219 : out_bitmap:
1606 9219 : xagb_bitmap_destroy(&rfg.rmap_gaps);
1607 9219 : return error;
1608 : }
1609 :
1610 : static inline bool
1611 271211 : xrep_rmapbt_want_live_update(
1612 : struct xchk_iscan *iscan,
1613 : const struct xfs_owner_info *oi)
1614 : {
1615 271211 : if (xchk_iscan_aborted(iscan))
1616 : return false;
1617 :
1618 : /*
1619 : * Before unlocking the AG header to perform the inode scan, we
1620 : * recorded reverse mappings for all AG metadata except for the OWN_AG
1621 : * metadata. IOWs, the in-memory btree knows about the AG headers, the
1622 : * two inode btrees, the CoW staging extents, and the refcount btrees.
1623 : * For these types of metadata, we need to record the live updates in
1624 : * the in-memory rmap btree.
1625 : *
1626 : * However, we do not scan the free space btrees or the AGFL until we
1627 : * have re-locked the AGF and are ready to reserve space for the new
1628 : * new rmap btree, so we do not want live updates for OWN_AG metadata.
1629 : */
1630 258483 : if (XFS_RMAP_NON_INODE_OWNER(oi->oi_owner))
1631 2234 : return oi->oi_owner != XFS_RMAP_OWN_AG;
1632 :
1633 : /* Ignore updates to files that the scanner hasn't visited yet. */
1634 256249 : return xchk_iscan_want_live_update(iscan, oi->oi_owner);
1635 : }
1636 :
1637 : /*
1638 : * Apply a rmapbt update from the regular filesystem into our shadow btree.
1639 : * We're running from the thread that owns the AGF buffer and is generating
1640 : * the update, so we must be careful about which parts of the struct xrep_rmap
1641 : * that we change.
1642 : */
1643 : static int
1644 271211 : xrep_rmapbt_live_update(
1645 : struct notifier_block *nb,
1646 : unsigned long action,
1647 : void *data)
1648 : {
1649 271211 : struct xfs_rmap_update_params *p = data;
1650 271211 : struct xrep_rmap *rr;
1651 271211 : struct xfs_mount *mp;
1652 271211 : struct xfs_btree_cur *mcur;
1653 271211 : struct xfs_buf *mhead_bp;
1654 271211 : struct xfs_trans *tp;
1655 271211 : void *txcookie;
1656 271211 : int error;
1657 :
1658 271211 : rr = container_of(nb, struct xrep_rmap, hooks.update_hook.nb);
1659 271211 : mp = rr->sc->mp;
1660 :
1661 271211 : if (!xrep_rmapbt_want_live_update(&rr->iscan, &p->oinfo))
1662 143482 : goto out_unlock;
1663 :
1664 127729 : trace_xrep_rmap_live_update(mp, rr->sc->sa.pag->pag_agno, action, p);
1665 :
1666 127729 : error = xrep_trans_alloc_hook_dummy(mp, &txcookie, &tp);
1667 127729 : if (error)
1668 0 : goto out_abort;
1669 :
1670 127729 : mutex_lock(&rr->lock);
1671 127729 : error = xfbtree_head_read_buf(rr->rmap_btree, tp, &mhead_bp);
1672 127729 : if (error)
1673 0 : goto out_cancel;
1674 :
1675 127729 : mcur = xfs_rmapbt_mem_cursor(rr->sc->sa.pag, tp, mhead_bp,
1676 : rr->rmap_btree);
1677 127729 : error = __xfs_rmap_finish_intent(mcur, action, p->startblock,
1678 : p->blockcount, &p->oinfo, p->unwritten);
1679 127729 : xfs_btree_del_cursor(mcur, error);
1680 127729 : if (error)
1681 0 : goto out_cancel;
1682 :
1683 127729 : error = xfbtree_trans_commit(rr->rmap_btree, tp);
1684 127729 : if (error)
1685 0 : goto out_cancel;
1686 :
1687 127729 : xrep_trans_cancel_hook_dummy(&txcookie, tp);
1688 127729 : mutex_unlock(&rr->lock);
1689 127729 : return NOTIFY_DONE;
1690 :
1691 0 : out_cancel:
1692 0 : xfbtree_trans_cancel(rr->rmap_btree, tp);
1693 0 : xrep_trans_cancel_hook_dummy(&txcookie, tp);
1694 0 : out_abort:
1695 0 : mutex_unlock(&rr->lock);
1696 0 : xchk_iscan_abort(&rr->iscan);
1697 : out_unlock:
1698 : return NOTIFY_DONE;
1699 : }
1700 :
1701 : /* Set up the filesystem scan components. */
1702 : STATIC int
1703 9261 : xrep_rmap_setup_scan(
1704 : struct xrep_rmap *rr)
1705 : {
1706 9261 : struct xfs_scrub *sc = rr->sc;
1707 9261 : int error;
1708 :
1709 9261 : mutex_init(&rr->lock);
1710 :
1711 : /* Set up in-memory rmap btree */
1712 9261 : error = xfs_rmapbt_mem_create(sc->mp, sc->sa.pag->pag_agno,
1713 : sc->xfile_buftarg, &rr->rmap_btree);
1714 9261 : if (error)
1715 0 : goto out_mutex;
1716 :
1717 : /* Retry iget every tenth of a second for up to 30 seconds. */
1718 9261 : xchk_iscan_start(sc, 30000, 100, &rr->iscan);
1719 :
1720 : /*
1721 : * Hook into live rmap operations so that we can update our in-memory
1722 : * btree to reflect live changes on the filesystem. Since we drop the
1723 : * AGF buffer to scan all the inodes, we need this piece to avoid
1724 : * installing a stale btree.
1725 : */
1726 9250 : ASSERT(sc->flags & XCHK_FSGATES_RMAP);
1727 9250 : xfs_hook_setup(&rr->hooks.update_hook, xrep_rmapbt_live_update);
1728 9250 : error = xfs_rmap_hook_add(sc->sa.pag, &rr->hooks);
1729 9248 : if (error)
1730 0 : goto out_iscan;
1731 : return 0;
1732 :
1733 : out_iscan:
1734 0 : xchk_iscan_teardown(&rr->iscan);
1735 0 : xfbtree_destroy(rr->rmap_btree);
1736 0 : out_mutex:
1737 0 : mutex_destroy(&rr->lock);
1738 0 : return error;
1739 : }
1740 :
1741 : /* Tear down scan components. */
1742 : STATIC void
1743 9261 : xrep_rmap_teardown(
1744 : struct xrep_rmap *rr)
1745 : {
1746 9261 : struct xfs_scrub *sc = rr->sc;
1747 :
1748 9261 : xchk_iscan_abort(&rr->iscan);
1749 9261 : xfs_rmap_hook_del(sc->sa.pag, &rr->hooks);
1750 9259 : xchk_iscan_teardown(&rr->iscan);
1751 9257 : xfbtree_destroy(rr->rmap_btree);
1752 9261 : mutex_destroy(&rr->lock);
1753 9261 : }
1754 :
1755 : /* Repair the rmap btree for some AG. */
1756 : int
1757 9261 : xrep_rmapbt(
1758 : struct xfs_scrub *sc)
1759 : {
1760 9261 : struct xrep_rmap *rr = sc->buf;
1761 9261 : int error;
1762 :
1763 9261 : error = xrep_rmap_setup_scan(rr);
1764 9252 : if (error)
1765 : return error;
1766 :
1767 : /*
1768 : * Collect rmaps for everything in this AG that isn't space metadata.
1769 : * These rmaps won't change even as we try to allocate blocks.
1770 : */
1771 9252 : error = xrep_rmap_find_rmaps(rr);
1772 9261 : if (error)
1773 42 : goto out_records;
1774 :
1775 : /* Rebuild the rmap information. */
1776 9219 : error = xrep_rmap_build_new_tree(rr);
1777 9219 : if (error)
1778 0 : goto out_records;
1779 :
1780 : /* Kill the old tree. */
1781 9219 : error = xrep_rmap_remove_old_tree(rr);
1782 :
1783 9261 : out_records:
1784 9261 : xrep_rmap_teardown(rr);
1785 9261 : return error;
1786 : }
|