Line data Source code
1 : // SPDX-License-Identifier: GPL-2.0-or-later
2 : /*
3 : * Copyright (C) 2020-2023 Oracle. All Rights Reserved.
4 : * Author: Darrick J. Wong <djwong@kernel.org>
5 : */
6 : #include "xfs.h"
7 : #include "xfs_fs.h"
8 : #include "xfs_shared.h"
9 : #include "xfs_format.h"
10 : #include "xfs_trans_resv.h"
11 : #include "xfs_mount.h"
12 : #include "xfs_defer.h"
13 : #include "xfs_btree.h"
14 : #include "xfs_btree_staging.h"
15 : #include "xfs_btree_mem.h"
16 : #include "xfs_bit.h"
17 : #include "xfs_log_format.h"
18 : #include "xfs_trans.h"
19 : #include "xfs_sb.h"
20 : #include "xfs_alloc.h"
21 : #include "xfs_rmap.h"
22 : #include "xfs_rmap_btree.h"
23 : #include "xfs_rtrmap_btree.h"
24 : #include "xfs_inode.h"
25 : #include "xfs_icache.h"
26 : #include "xfs_bmap.h"
27 : #include "xfs_bmap_btree.h"
28 : #include "xfs_quota.h"
29 : #include "xfs_rtalloc.h"
30 : #include "xfs_ag.h"
31 : #include "xfs_rtgroup.h"
32 : #include "xfs_refcount.h"
33 : #include "scrub/xfs_scrub.h"
34 : #include "scrub/scrub.h"
35 : #include "scrub/common.h"
36 : #include "scrub/btree.h"
37 : #include "scrub/trace.h"
38 : #include "scrub/repair.h"
39 : #include "scrub/bitmap.h"
40 : #include "scrub/xfile.h"
41 : #include "scrub/xfarray.h"
42 : #include "scrub/iscan.h"
43 : #include "scrub/newbt.h"
44 : #include "scrub/reap.h"
45 : #include "scrub/xfbtree.h"
46 :
47 : /*
48 : * Realtime Reverse Mapping Btree Repair
49 : * =====================================
50 : *
51 : * This isn't quite as difficult as repairing the rmap btree on the data
52 : * device, since we only store the data fork extents of realtime files on the
53 : * realtime device. We still have to freeze the filesystem and stop the
54 : * background threads like we do for the rmap repair, but we only have to scan
55 : * realtime inodes.
56 : *
57 : * Collecting entries for the new realtime rmap btree is easy -- all we have
58 : * to do is generate rtrmap entries from the data fork mappings of all realtime
59 : * files in the filesystem. We then scan the rmap btrees of the data device
60 : * looking for extents belonging to the old btree and note them in a bitmap.
61 : *
62 : * To rebuild the realtime rmap btree, we bulk-load the collected mappings into
63 : * a new btree cursor and atomically swap that into the realtime inode. Then
64 : * we can free the blocks from the old btree.
65 : *
66 : * We use the 'xrep_rtrmap' prefix for all the rmap functions.
67 : */
68 :
69 : /* Context for collecting rmaps */
70 : struct xrep_rtrmap {
71 : /* new rtrmapbt information */
72 : struct xrep_newbt new_btree;
73 :
74 : /* lock for the xfbtree and xfile */
75 : struct mutex lock;
76 :
77 : /* rmap records generated from primary metadata */
78 : struct xfbtree *rtrmap_btree;
79 :
80 : struct xfs_scrub *sc;
81 :
82 : /* bitmap of old rtrmapbt blocks */
83 : struct xfsb_bitmap old_rtrmapbt_blocks;
84 :
85 : /* Hooks into rtrmap update code. */
86 : struct xfs_rmap_hook hooks;
87 :
88 : /* inode scan cursor */
89 : struct xchk_iscan iscan;
90 :
91 : /* in-memory btree cursor for the ->get_blocks walk */
92 : struct xfs_btree_cur *mcur;
93 :
94 : /* Number of records we're staging in the new btree. */
95 : uint64_t nr_records;
96 : };
97 :
98 : /* Set us up to repair rt reverse mapping btrees. */
99 : int
100 11466 : xrep_setup_rtrmapbt(
101 : struct xfs_scrub *sc)
102 : {
103 11466 : struct xrep_rtrmap *rr;
104 11466 : char *descr;
105 11466 : int error;
106 :
107 11466 : xchk_fsgates_enable(sc, XCHK_FSGATES_RMAP);
108 :
109 11465 : descr = xchk_xfile_rtgroup_descr(sc, "reverse mapping records");
110 11458 : error = xrep_setup_buftarg(sc, descr);
111 11467 : kfree(descr);
112 11467 : if (error)
113 : return error;
114 :
115 11467 : rr = kzalloc(sizeof(struct xrep_rtrmap), XCHK_GFP_FLAGS);
116 11467 : if (!rr)
117 : return -ENOMEM;
118 :
119 11467 : rr->sc = sc;
120 11467 : sc->buf = rr;
121 11467 : return 0;
122 : }
123 :
124 : /* Make sure there's nothing funny about this mapping. */
125 : STATIC int
126 51346758 : xrep_rtrmap_check_mapping(
127 : struct xfs_scrub *sc,
128 : const struct xfs_rmap_irec *rec)
129 : {
130 51346758 : xfs_rtblock_t rtbno;
131 :
132 51346758 : if (xfs_rmap_check_rtgroup_irec(sc->sr.rtg, rec) != NULL)
133 : return -EFSCORRUPTED;
134 :
135 : /* Make sure this isn't free space. */
136 51347069 : rtbno = xfs_rgbno_to_rtb(sc->mp, sc->sr.rtg->rtg_rgno,
137 51347069 : rec->rm_startblock);
138 51347076 : return xrep_require_rtext_inuse(sc, rtbno, rec->rm_blockcount, false);
139 : }
140 :
141 : /* Store a reverse-mapping record. */
142 : static inline int
143 52111991 : xrep_rtrmap_stash(
144 : struct xrep_rtrmap *rr,
145 : xfs_rgblock_t startblock,
146 : xfs_extlen_t blockcount,
147 : uint64_t owner,
148 : uint64_t offset,
149 : unsigned int flags)
150 : {
151 52111991 : struct xfs_rmap_irec rmap = {
152 : .rm_startblock = startblock,
153 : .rm_blockcount = blockcount,
154 : .rm_owner = owner,
155 : .rm_offset = offset,
156 : .rm_flags = flags,
157 : };
158 52111991 : struct xfs_scrub *sc = rr->sc;
159 52111991 : struct xfs_btree_cur *mcur;
160 52111991 : struct xfs_buf *mhead_bp;
161 52111991 : int error = 0;
162 :
163 52111991 : if (xchk_should_terminate(sc, &error))
164 2 : return error;
165 :
166 52109076 : if (xchk_iscan_aborted(&rr->iscan))
167 : return -EFSCORRUPTED;
168 :
169 52108663 : trace_xrep_rtrmap_found(sc->mp, &rmap);
170 :
171 : /* Add entry to in-memory btree. */
172 52107615 : mutex_lock(&rr->lock);
173 52112365 : error = xfbtree_head_read_buf(rr->rtrmap_btree, sc->tp, &mhead_bp);
174 52104614 : if (error)
175 0 : goto out_abort;
176 :
177 52104614 : mcur = xfs_rtrmapbt_mem_cursor(sc->sr.rtg, sc->tp, mhead_bp,
178 : rr->rtrmap_btree);
179 52108149 : error = xfs_rmap_map_raw(mcur, &rmap);
180 52112362 : xfs_btree_del_cursor(mcur, error);
181 52114735 : if (error)
182 0 : goto out_cancel;
183 :
184 52114735 : error = xfbtree_trans_commit(rr->rtrmap_btree, sc->tp);
185 52114690 : if (error)
186 0 : goto out_abort;
187 :
188 52114690 : mutex_unlock(&rr->lock);
189 52114690 : return 0;
190 :
191 : out_cancel:
192 0 : xfbtree_trans_cancel(rr->rtrmap_btree, sc->tp);
193 0 : out_abort:
194 0 : xchk_iscan_abort(&rr->iscan);
195 0 : mutex_unlock(&rr->lock);
196 0 : return error;
197 : }
198 :
199 : /* Finding all file and bmbt extents. */
200 :
201 : /* Context for accumulating rmaps for an inode fork. */
202 : struct xrep_rtrmap_ifork {
203 : /*
204 : * Accumulate rmap data here to turn multiple adjacent bmaps into a
205 : * single rmap.
206 : */
207 : struct xfs_rmap_irec accum;
208 :
209 : struct xrep_rtrmap *rr;
210 : };
211 :
212 : /* Stash an rmap that we accumulated while walking an inode fork. */
213 : STATIC int
214 73697706 : xrep_rtrmap_stash_accumulated(
215 : struct xrep_rtrmap_ifork *rf)
216 : {
217 73697706 : if (rf->accum.rm_blockcount == 0)
218 : return 0;
219 :
220 51884639 : return xrep_rtrmap_stash(rf->rr, rf->accum.rm_startblock,
221 : rf->accum.rm_blockcount, rf->accum.rm_owner,
222 : rf->accum.rm_offset, rf->accum.rm_flags);
223 : }
224 :
225 : /* Accumulate a bmbt record. */
226 : STATIC int
227 262041470 : xrep_rtrmap_visit_bmbt(
228 : struct xfs_btree_cur *cur,
229 : struct xfs_bmbt_irec *rec,
230 : void *priv)
231 : {
232 262041470 : struct xrep_rtrmap_ifork *rf = priv;
233 262041470 : struct xfs_rmap_irec *accum = &rf->accum;
234 262041470 : struct xfs_mount *mp = rf->rr->sc->mp;
235 262041470 : xfs_rgnumber_t rgno;
236 262041470 : xfs_rgblock_t rgbno;
237 262041470 : unsigned int rmap_flags = 0;
238 262041470 : int error;
239 :
240 262041470 : rgbno = xfs_rtb_to_rgbno(mp, rec->br_startblock, &rgno);
241 262131384 : if (rgno != rf->rr->sc->sr.rtg->rtg_rgno)
242 : return 0;
243 :
244 51884586 : if (rec->br_state == XFS_EXT_UNWRITTEN)
245 9781760 : rmap_flags |= XFS_RMAP_UNWRITTEN;
246 :
247 : /* If this bmap is adjacent to the previous one, just add it. */
248 51884586 : if (accum->rm_blockcount > 0 &&
249 46782292 : rec->br_startoff == accum->rm_offset + accum->rm_blockcount &&
250 27904747 : rgbno == accum->rm_startblock + accum->rm_blockcount &&
251 1851076 : rmap_flags == accum->rm_flags) {
252 0 : accum->rm_blockcount += rec->br_blockcount;
253 0 : return 0;
254 : }
255 :
256 : /* Otherwise stash the old rmap and start accumulating a new one. */
257 51884586 : error = xrep_rtrmap_stash_accumulated(rf);
258 51887223 : if (error)
259 : return error;
260 :
261 51887221 : accum->rm_startblock = rgbno;
262 51887221 : accum->rm_blockcount = rec->br_blockcount;
263 51887221 : accum->rm_offset = rec->br_startoff;
264 51887221 : accum->rm_flags = rmap_flags;
265 51887221 : return 0;
266 : }
267 :
268 : /*
269 : * Iterate the block mapping btree to collect rmap records for anything in this
270 : * fork that maps to the rt volume. Sets @mappings_done to true if we've
271 : * scanned the block mappings in this fork.
272 : */
273 : STATIC int
274 4756343 : xrep_rtrmap_scan_bmbt(
275 : struct xrep_rtrmap_ifork *rf,
276 : struct xfs_inode *ip,
277 : bool *mappings_done)
278 : {
279 4756343 : struct xrep_rtrmap *rr = rf->rr;
280 4756343 : struct xfs_btree_cur *cur;
281 4756343 : struct xfs_ifork *ifp = xfs_ifork_ptr(ip, XFS_DATA_FORK);
282 4756343 : int error = 0;
283 :
284 4756343 : *mappings_done = false;
285 :
286 : /*
287 : * If the incore extent cache is already loaded, we'll just use the
288 : * incore extent scanner to record mappings. Don't bother walking the
289 : * ondisk extent tree.
290 : */
291 4756343 : if (!xfs_need_iread_extents(ifp))
292 : return 0;
293 :
294 : /* Accumulate all the mappings in the bmap btree. */
295 0 : cur = xfs_bmbt_init_cursor(rr->sc->mp, rr->sc->tp, ip, XFS_DATA_FORK);
296 0 : error = xfs_bmap_query_all(cur, xrep_rtrmap_visit_bmbt, rf);
297 0 : xfs_btree_del_cursor(cur, error);
298 0 : if (error)
299 : return error;
300 :
301 : /* Stash any remaining accumulated rmaps and exit. */
302 0 : *mappings_done = true;
303 0 : return xrep_rtrmap_stash_accumulated(rf);
304 : }
305 :
306 : /*
307 : * Iterate the in-core extent cache to collect rmap records for anything in
308 : * this fork that matches the AG.
309 : */
310 : STATIC int
311 21816338 : xrep_rtrmap_scan_iext(
312 : struct xrep_rtrmap_ifork *rf,
313 : struct xfs_ifork *ifp)
314 : {
315 21816338 : struct xfs_bmbt_irec rec;
316 21816338 : struct xfs_iext_cursor icur;
317 21816338 : int error;
318 :
319 284314467 : for_each_xfs_iext(ifp, &icur, &rec) {
320 261762213 : if (isnullstartblock(rec.br_startblock))
321 0 : continue;
322 261762213 : error = xrep_rtrmap_visit_bmbt(NULL, &rec, rf);
323 262498131 : if (error)
324 2 : return error;
325 : }
326 :
327 21815703 : return xrep_rtrmap_stash_accumulated(rf);
328 : }
329 :
330 : /* Find all the extents on the realtime device mapped by an inode fork. */
331 : STATIC int
332 21816436 : xrep_rtrmap_scan_dfork(
333 : struct xrep_rtrmap *rr,
334 : struct xfs_inode *ip)
335 : {
336 21816436 : struct xrep_rtrmap_ifork rf = {
337 21816436 : .accum = { .rm_owner = ip->i_ino, },
338 : .rr = rr,
339 : };
340 21816436 : struct xfs_ifork *ifp = xfs_ifork_ptr(ip, XFS_DATA_FORK);
341 21816436 : int error = 0;
342 :
343 21816436 : if (ifp->if_format == XFS_DINODE_FMT_BTREE) {
344 4756345 : bool mappings_done;
345 :
346 : /*
347 : * Scan the bmbt for mappings. If the incore extent tree is
348 : * loaded, we want to scan the cached mappings since that's
349 : * faster when the extent counts are very high.
350 : */
351 4756345 : error = xrep_rtrmap_scan_bmbt(&rf, ip, &mappings_done);
352 4756331 : if (error || mappings_done)
353 0 : return error;
354 17060091 : } else if (ifp->if_format != XFS_DINODE_FMT_EXTENTS) {
355 : /* realtime data forks should only be extents or btree */
356 : return -EFSCORRUPTED;
357 : }
358 :
359 : /* Scan incore extent cache. */
360 21816422 : return xrep_rtrmap_scan_iext(&rf, ifp);
361 : }
362 :
363 : /* Record reverse mappings for a file. */
364 : STATIC int
365 51485018 : xrep_rtrmap_scan_inode(
366 : struct xrep_rtrmap *rr,
367 : struct xfs_inode *ip)
368 : {
369 51485018 : unsigned int lock_mode;
370 51485018 : int error = 0;
371 :
372 : /* Skip the rt rmap btree inode. */
373 51485018 : if (rr->sc->ip == ip)
374 : return 0;
375 :
376 51473875 : lock_mode = xfs_ilock_data_map_shared(ip);
377 :
378 : /* Check the data fork if it's on the realtime device. */
379 51474126 : if (XFS_IS_REALTIME_INODE(ip)) {
380 21817291 : error = xrep_rtrmap_scan_dfork(rr, ip);
381 21817033 : if (error)
382 2 : goto out_unlock;
383 : }
384 :
385 51473866 : xchk_iscan_mark_visited(&rr->iscan, ip);
386 51471697 : out_unlock:
387 51471697 : xfs_iunlock(ip, lock_mode);
388 51471697 : return error;
389 : }
390 :
391 : /* Record extents that belong to the realtime rmap inode. */
392 : STATIC int
393 32509921 : xrep_rtrmap_walk_rmap(
394 : struct xfs_btree_cur *cur,
395 : const struct xfs_rmap_irec *rec,
396 : void *priv)
397 : {
398 32509921 : struct xrep_rtrmap *rr = priv;
399 32509921 : struct xfs_mount *mp = cur->bc_mp;
400 32509921 : xfs_fsblock_t fsbno;
401 32509921 : int error = 0;
402 :
403 32509921 : if (xchk_should_terminate(rr->sc, &error))
404 0 : return error;
405 :
406 : /* Skip extents which are not owned by this inode and fork. */
407 32515250 : if (rec->rm_owner != rr->sc->ip->i_ino)
408 : return 0;
409 :
410 29152 : error = xrep_check_ino_btree_mapping(rr->sc, rec);
411 29152 : if (error)
412 : return error;
413 :
414 29152 : fsbno = XFS_AGB_TO_FSB(mp, cur->bc_ag.pag->pag_agno,
415 : rec->rm_startblock);
416 :
417 29152 : return xfsb_bitmap_set(&rr->old_rtrmapbt_blocks, fsbno,
418 29152 : rec->rm_blockcount);
419 : }
420 :
421 : /* Scan one AG for reverse mappings for the realtime rmap btree. */
422 : STATIC int
423 1197857 : xrep_rtrmap_scan_ag(
424 : struct xrep_rtrmap *rr,
425 : struct xfs_perag *pag)
426 : {
427 1197857 : struct xfs_scrub *sc = rr->sc;
428 1197857 : int error;
429 :
430 1197857 : error = xrep_ag_init(sc, pag, &sc->sa);
431 1198384 : if (error)
432 : return error;
433 :
434 1198388 : error = xfs_rmap_query_all(sc->sa.rmap_cur, xrep_rtrmap_walk_rmap, rr);
435 1197854 : xchk_ag_free(sc, &sc->sa);
436 1197854 : return error;
437 : }
438 :
439 : struct xrep_rtrmap_stash_run {
440 : struct xrep_rtrmap *rr;
441 : uint64_t owner;
442 : };
443 :
444 : static int
445 216969 : xrep_rtrmap_stash_run(
446 : uint64_t start,
447 : uint64_t len,
448 : void *priv)
449 : {
450 216969 : struct xrep_rtrmap_stash_run *rsr = priv;
451 216969 : struct xrep_rtrmap *rr = rsr->rr;
452 216969 : xfs_rgblock_t rgbno = start;
453 :
454 216969 : return xrep_rtrmap_stash(rr, rgbno, len, rsr->owner, 0, 0);
455 : }
456 :
457 : /*
458 : * Emit rmaps for every extent of bits set in the bitmap. Caller must ensure
459 : * that the ranges are in units of FS blocks.
460 : */
461 : STATIC int
462 11129 : xrep_rtrmap_stash_bitmap(
463 : struct xrep_rtrmap *rr,
464 : struct xrgb_bitmap *bitmap,
465 : const struct xfs_owner_info *oinfo)
466 : {
467 11129 : struct xrep_rtrmap_stash_run rsr = {
468 : .rr = rr,
469 11129 : .owner = oinfo->oi_owner,
470 : };
471 :
472 11129 : return xrgb_bitmap_walk(bitmap, xrep_rtrmap_stash_run, &rsr);
473 : }
474 :
475 : /* Record a CoW staging extent. */
476 : STATIC int
477 216969 : xrep_rtrmap_walk_cowblocks(
478 : struct xfs_btree_cur *cur,
479 : const struct xfs_refcount_irec *irec,
480 : void *priv)
481 : {
482 216969 : struct xrgb_bitmap *bitmap = priv;
483 :
484 216969 : if (!xfs_refcount_check_domain(irec) ||
485 216969 : irec->rc_domain != XFS_REFC_DOMAIN_COW)
486 : return -EFSCORRUPTED;
487 :
488 216969 : return xrgb_bitmap_set(bitmap, irec->rc_startblock,
489 216969 : irec->rc_blockcount);
490 : }
491 :
492 : /*
493 : * Collect rmaps for the blocks containing the refcount btree, and all CoW
494 : * staging extents.
495 : */
496 : STATIC int
497 11142 : xrep_rtrmap_find_refcount_rmaps(
498 : struct xrep_rtrmap *rr)
499 : {
500 11142 : struct xrgb_bitmap cow_blocks; /* COWBIT */
501 11142 : struct xfs_refcount_irec low = {
502 : .rc_startblock = 0,
503 : .rc_domain = XFS_REFC_DOMAIN_COW,
504 : };
505 11142 : struct xfs_refcount_irec high = {
506 : .rc_startblock = -1U,
507 : .rc_domain = XFS_REFC_DOMAIN_COW,
508 : };
509 11142 : struct xfs_scrub *sc = rr->sc;
510 11142 : int error;
511 :
512 11142 : if (!xfs_has_rtreflink(sc->mp))
513 : return 0;
514 :
515 11130 : xrgb_bitmap_init(&cow_blocks);
516 :
517 : /* Collect rmaps for CoW staging extents. */
518 11129 : error = xfs_refcount_query_range(sc->sr.refc_cur, &low, &high,
519 : xrep_rtrmap_walk_cowblocks, &cow_blocks);
520 11131 : if (error)
521 0 : goto out_bitmap;
522 :
523 : /* Generate rmaps for everything. */
524 11131 : error = xrep_rtrmap_stash_bitmap(rr, &cow_blocks, &XFS_RMAP_OINFO_COW);
525 11129 : if (error)
526 0 : goto out_bitmap;
527 :
528 11129 : out_bitmap:
529 11129 : xrgb_bitmap_destroy(&cow_blocks);
530 11129 : return error;
531 : }
532 :
533 : /* Count and check all collected records. */
534 : STATIC int
535 51346672 : xrep_rtrmap_check_record(
536 : struct xfs_btree_cur *cur,
537 : const struct xfs_rmap_irec *rec,
538 : void *priv)
539 : {
540 51346672 : struct xrep_rtrmap *rr = priv;
541 51346672 : int error;
542 :
543 51346672 : error = xrep_rtrmap_check_mapping(rr->sc, rec);
544 51347413 : if (error)
545 : return error;
546 :
547 51347413 : rr->nr_records++;
548 51347413 : return 0;
549 : }
550 :
551 : STATIC int
552 11141 : xrep_rtrmap_find_super_rmaps(
553 : struct xrep_rtrmap *rr)
554 : {
555 11141 : struct xfs_scrub *sc = rr->sc;
556 :
557 : /* Create a record for the rtgroup superblock. */
558 11141 : return xrep_rtrmap_stash(rr, 0, sc->mp->m_sb.sb_rextsize,
559 : XFS_RMAP_OWN_FS, 0, 0);
560 : }
561 :
562 : /* Generate all the reverse-mappings for the realtime device. */
563 : STATIC int
564 11142 : xrep_rtrmap_find_rmaps(
565 : struct xrep_rtrmap *rr)
566 : {
567 11142 : struct xfs_scrub *sc = rr->sc;
568 11142 : struct xfs_perag *pag;
569 11142 : struct xfs_inode *ip;
570 11142 : struct xfs_buf *mhead_bp;
571 11142 : struct xfs_btree_cur *mcur;
572 11142 : xfs_agnumber_t agno;
573 11142 : int error;
574 :
575 : /* Generate rmaps for the rtgroup superblock */
576 11142 : error = xrep_rtrmap_find_super_rmaps(rr);
577 11139 : if (error)
578 : return error;
579 :
580 : /* Find CoW staging extents. */
581 11139 : xrep_rtgroup_btcur_init(sc, &sc->sr);
582 11143 : error = xrep_rtrmap_find_refcount_rmaps(rr);
583 11138 : xchk_rtgroup_btcur_free(&sc->sr);
584 11142 : if (error)
585 : return error;
586 :
587 : /*
588 : * Set up for a potentially lengthy filesystem scan by reducing our
589 : * transaction resource usage for the duration. Specifically:
590 : *
591 : * Unlock the realtime metadata inodes and cancel the transaction to
592 : * release the log grant space while we scan the filesystem.
593 : *
594 : * Create a new empty transaction to eliminate the possibility of the
595 : * inode scan deadlocking on cyclical metadata.
596 : *
597 : * We pass the empty transaction to the file scanning function to avoid
598 : * repeatedly cycling empty transactions. This can be done even though
599 : * we take the IOLOCK to quiesce the file because empty transactions
600 : * do not take sb_internal.
601 : */
602 11142 : xchk_trans_cancel(sc);
603 11143 : xchk_rtgroup_unlock(sc, &sc->sr);
604 11141 : error = xchk_trans_alloc_empty(sc);
605 11140 : if (error)
606 : return error;
607 :
608 51497052 : while ((error = xchk_iscan_iter(&rr->iscan, &ip)) == 1) {
609 51484719 : error = xrep_rtrmap_scan_inode(rr, ip);
610 51480196 : xchk_irele(sc, ip);
611 51487665 : if (error)
612 : break;
613 :
614 51487663 : if (xchk_should_terminate(sc, &error))
615 : break;
616 : }
617 11143 : xchk_iscan_iter_finish(&rr->iscan);
618 11142 : if (error)
619 : return error;
620 :
621 : /*
622 : * Switch out for a real transaction and lock the RT metadata in
623 : * preparation for building a new tree.
624 : */
625 11140 : xchk_trans_cancel(sc);
626 11140 : error = xchk_setup_rt(sc);
627 11130 : if (error)
628 : return error;
629 11131 : error = xchk_rtgroup_drain_and_lock(sc, &sc->sr, XCHK_RTGLOCK_ALL);
630 11133 : if (error)
631 : return error;
632 :
633 : /*
634 : * If a hook failed to update the in-memory btree, we lack the data to
635 : * continue the repair.
636 : */
637 11064 : if (xchk_iscan_aborted(&rr->iscan))
638 : return -EFSCORRUPTED;
639 :
640 : /* Scan for old rtrmap blocks. */
641 1209428 : for_each_perag(sc->mp, agno, pag) {
642 1198002 : error = xrep_rtrmap_scan_ag(rr, pag);
643 1198365 : if (error) {
644 0 : xfs_perag_rele(pag);
645 0 : return error;
646 : }
647 : }
648 :
649 : /*
650 : * Now that we have everything locked again, we need to count the
651 : * number of rmap records stashed in the btree. This should reflect
652 : * all actively-owned rt files in the filesystem. At the same time,
653 : * check all our records before we start building a new btree, which
654 : * requires the rtbitmap lock.
655 : */
656 11073 : error = xfbtree_head_read_buf(rr->rtrmap_btree, NULL, &mhead_bp);
657 11073 : if (error)
658 : return error;
659 :
660 11073 : mcur = xfs_rtrmapbt_mem_cursor(rr->sc->sr.rtg, NULL, mhead_bp,
661 : rr->rtrmap_btree);
662 11073 : rr->nr_records = 0;
663 11073 : error = xfs_rmap_query_all(mcur, xrep_rtrmap_check_record, rr);
664 11072 : xfs_btree_del_cursor(mcur, error);
665 11073 : xfs_buf_relse(mhead_bp);
666 :
667 11073 : return error;
668 : }
669 :
670 : /* Building the new rtrmap btree. */
671 :
672 : /* Retrieve rtrmapbt data for bulk load. */
673 : STATIC int
674 418593 : xrep_rtrmap_get_records(
675 : struct xfs_btree_cur *cur,
676 : unsigned int idx,
677 : struct xfs_btree_block *block,
678 : unsigned int nr_wanted,
679 : void *priv)
680 : {
681 418593 : struct xrep_rtrmap *rr = priv;
682 418593 : union xfs_btree_rec *block_rec;
683 418593 : unsigned int loaded;
684 418593 : int error;
685 :
686 51765078 : for (loaded = 0; loaded < nr_wanted; loaded++, idx++) {
687 51346484 : int stat = 0;
688 :
689 51346484 : error = xfs_btree_increment(rr->mcur, 0, &stat);
690 51346784 : if (error)
691 0 : return error;
692 51346784 : if (!stat)
693 : return -EFSCORRUPTED;
694 :
695 51346784 : error = xfs_rmap_get_rec(rr->mcur, &cur->bc_rec.r, &stat);
696 51346346 : if (error)
697 0 : return error;
698 51346346 : if (!stat)
699 : return -EFSCORRUPTED;
700 :
701 51346346 : block_rec = xfs_btree_rec_addr(cur, idx, block);
702 51346107 : cur->bc_ops->init_rec_from_cur(cur, block_rec);
703 : }
704 :
705 418594 : return loaded;
706 : }
707 :
708 : /* Feed one of the new btree blocks to the bulk loader. */
709 : STATIC int
710 418462 : xrep_rtrmap_claim_block(
711 : struct xfs_btree_cur *cur,
712 : union xfs_btree_ptr *ptr,
713 : void *priv)
714 : {
715 418462 : struct xrep_rtrmap *rr = priv;
716 :
717 418462 : return xrep_newbt_claim_block(cur, &rr->new_btree, ptr);
718 : }
719 :
720 : /* Figure out how much space we need to create the incore btree root block. */
721 : STATIC size_t
722 11069 : xrep_rtrmap_iroot_size(
723 : struct xfs_btree_cur *cur,
724 : unsigned int level,
725 : unsigned int nr_this_level,
726 : void *priv)
727 : {
728 11069 : return xfs_rtrmap_broot_space_calc(cur->bc_mp, level, nr_this_level);
729 : }
730 :
731 : /*
732 : * Use the collected rmap information to stage a new rmap btree. If this is
733 : * successful we'll return with the new btree root information logged to the
734 : * repair transaction but not yet committed. This implements section (III)
735 : * above.
736 : */
737 : STATIC int
738 11073 : xrep_rtrmap_build_new_tree(
739 : struct xrep_rtrmap *rr)
740 : {
741 11073 : struct xfs_scrub *sc = rr->sc;
742 11073 : struct xfs_rtgroup *rtg = sc->sr.rtg;
743 11073 : struct xfs_btree_cur *rmap_cur;
744 11073 : struct xfs_buf *mhead_bp;
745 11073 : int error;
746 :
747 : /*
748 : * Prepare to construct the new btree by reserving disk space for the
749 : * new btree and setting up all the accounting information we'll need
750 : * to root the new btree while it's under construction and before we
751 : * attach it to the realtime rmapbt inode.
752 : */
753 11073 : error = xrep_newbt_init_metadir_inode(&rr->new_btree, sc);
754 11071 : if (error)
755 : return error;
756 :
757 11071 : rr->new_btree.bload.get_records = xrep_rtrmap_get_records;
758 11071 : rr->new_btree.bload.claim_block = xrep_rtrmap_claim_block;
759 11071 : rr->new_btree.bload.iroot_size = xrep_rtrmap_iroot_size;
760 :
761 11071 : rmap_cur = xfs_rtrmapbt_stage_cursor(sc->mp, rtg, rtg->rtg_rmapip,
762 : &rr->new_btree.ifake);
763 :
764 : /* Compute how many blocks we'll need for the rmaps collected. */
765 11072 : error = xfs_btree_bload_compute_geometry(rmap_cur,
766 : &rr->new_btree.bload, rr->nr_records);
767 11072 : if (error)
768 0 : goto err_cur;
769 :
770 : /* Last chance to abort before we start committing fixes. */
771 11072 : if (xchk_should_terminate(sc, &error))
772 0 : goto err_cur;
773 :
774 : /*
775 : * Guess how many blocks we're going to need to rebuild an entire
776 : * rtrmapbt from the number of extents we found, and pump up our
777 : * transaction to have sufficient block reservation. We're allowed
778 : * to exceed quota to repair inconsistent metadata, though this is
779 : * unlikely.
780 : */
781 22142 : error = xfs_trans_reserve_more_inode(sc->tp, rtg->rtg_rmapip,
782 11073 : rr->new_btree.bload.nr_blocks, 0, true);
783 11069 : if (error)
784 0 : goto err_cur;
785 :
786 : /* Reserve the space we'll need for the new btree. */
787 11069 : error = xrep_newbt_alloc_blocks(&rr->new_btree,
788 : rr->new_btree.bload.nr_blocks);
789 11068 : if (error)
790 0 : goto err_cur;
791 :
792 : /*
793 : * Create a cursor to the in-memory btree so that we can bulk load the
794 : * new btree.
795 : */
796 11068 : error = xfbtree_head_read_buf(rr->rtrmap_btree, NULL, &mhead_bp);
797 11073 : if (error)
798 0 : goto err_cur;
799 :
800 11073 : rr->mcur = xfs_rtrmapbt_mem_cursor(sc->sr.rtg, NULL, mhead_bp,
801 : rr->rtrmap_btree);
802 11072 : error = xfs_btree_goto_left_edge(rr->mcur);
803 11073 : if (error)
804 0 : goto err_mcur;
805 :
806 : /* Add all observed rmap records. */
807 11073 : rr->new_btree.ifake.if_fork->if_format = XFS_DINODE_FMT_RMAP;
808 11073 : error = xfs_btree_bload(rmap_cur, &rr->new_btree.bload, rr);
809 11060 : if (error)
810 0 : goto err_mcur;
811 :
812 : /*
813 : * Install the new rtrmap btree in the inode. After this point the old
814 : * btree is no longer accessible, the new tree is live, and we can
815 : * delete the cursor.
816 : */
817 11060 : xfs_rtrmapbt_commit_staged_btree(rmap_cur, sc->tp);
818 11067 : xrep_inode_set_nblocks(rr->sc, rr->new_btree.ifake.if_blocks);
819 11068 : xfs_btree_del_cursor(rmap_cur, 0);
820 11069 : xfs_btree_del_cursor(rr->mcur, 0);
821 11067 : rr->mcur = NULL;
822 11067 : xfs_buf_relse(mhead_bp);
823 :
824 : /*
825 : * Now that we've written the new btree to disk, we don't need to keep
826 : * updating the in-memory btree. Abort the scan to stop live updates.
827 : */
828 11071 : xchk_iscan_abort(&rr->iscan);
829 :
830 : /* Dispose of any unused blocks and the accounting information. */
831 11073 : error = xrep_newbt_commit(&rr->new_btree);
832 11065 : if (error)
833 : return error;
834 :
835 11063 : return xrep_roll_trans(sc);
836 :
837 0 : err_mcur:
838 0 : xfs_btree_del_cursor(rr->mcur, error);
839 0 : xfs_buf_relse(mhead_bp);
840 0 : err_cur:
841 0 : xfs_btree_del_cursor(rmap_cur, error);
842 0 : xrep_newbt_cancel(&rr->new_btree);
843 0 : return error;
844 : }
845 :
846 : /* Reaping the old btree. */
847 :
848 : /* Reap the old rtrmapbt blocks. */
849 : STATIC int
850 11065 : xrep_rtrmap_remove_old_tree(
851 : struct xrep_rtrmap *rr)
852 : {
853 11065 : int error;
854 :
855 : /*
856 : * Free all the extents that were allocated to the former rtrmapbt and
857 : * aren't cross-linked with something else.
858 : */
859 11065 : error = xrep_reap_metadir_fsblocks(rr->sc, &rr->old_rtrmapbt_blocks);
860 11059 : if (error)
861 : return error;
862 :
863 : /*
864 : * Ensure the proper reservation for the rtrmap inode so that we don't
865 : * fail to expand the new btree.
866 : */
867 11059 : return xrep_reset_imeta_reservation(rr->sc);
868 : }
869 :
870 : static inline bool
871 1027572 : xrep_rtrmapbt_want_live_update(
872 : struct xchk_iscan *iscan,
873 : const struct xfs_owner_info *oi)
874 : {
875 1027572 : if (xchk_iscan_aborted(iscan))
876 : return false;
877 :
878 : /*
879 : * We scanned the CoW staging extents before we started the iscan, so
880 : * we need all the updates.
881 : */
882 1027572 : if (XFS_RMAP_NON_INODE_OWNER(oi->oi_owner))
883 : return true;
884 :
885 : /* Ignore updates to files that the scanner hasn't visited yet. */
886 1003876 : return xchk_iscan_want_live_update(iscan, oi->oi_owner);
887 : }
888 :
889 : /*
890 : * Apply a rtrmapbt update from the regular filesystem into our shadow btree.
891 : * We're running from the thread that owns the rtrmap ILOCK and is generating
892 : * the update, so we must be careful about which parts of the struct
893 : * xrep_rtrmap that we change.
894 : */
895 : static int
896 1027572 : xrep_rtrmapbt_live_update(
897 : struct notifier_block *nb,
898 : unsigned long action,
899 : void *data)
900 : {
901 1027572 : struct xfs_rmap_update_params *p = data;
902 1027572 : struct xrep_rtrmap *rr;
903 1027572 : struct xfs_mount *mp;
904 1027572 : struct xfs_btree_cur *mcur;
905 1027572 : struct xfs_buf *mhead_bp;
906 1027572 : struct xfs_trans *tp;
907 1027572 : void *txcookie;
908 1027572 : int error;
909 :
910 1027572 : rr = container_of(nb, struct xrep_rtrmap, hooks.update_hook.nb);
911 1027572 : mp = rr->sc->mp;
912 :
913 1027572 : if (!xrep_rtrmapbt_want_live_update(&rr->iscan, &p->oinfo))
914 453723 : goto out_unlock;
915 :
916 573849 : trace_xrep_rtrmap_live_update(mp, rr->sc->sr.rtg->rtg_rgno, action, p);
917 :
918 573849 : error = xrep_trans_alloc_hook_dummy(mp, &txcookie, &tp);
919 573849 : if (error)
920 0 : goto out_abort;
921 :
922 573849 : mutex_lock(&rr->lock);
923 573849 : error = xfbtree_head_read_buf(rr->rtrmap_btree, tp, &mhead_bp);
924 573849 : if (error)
925 0 : goto out_cancel;
926 :
927 573849 : mcur = xfs_rtrmapbt_mem_cursor(rr->sc->sr.rtg, tp, mhead_bp,
928 : rr->rtrmap_btree);
929 1147698 : error = __xfs_rmap_finish_intent(mcur, action, p->startblock,
930 573849 : p->blockcount, &p->oinfo, p->unwritten);
931 573849 : xfs_btree_del_cursor(mcur, error);
932 573849 : if (error)
933 0 : goto out_cancel;
934 :
935 573849 : error = xfbtree_trans_commit(rr->rtrmap_btree, tp);
936 573849 : if (error)
937 0 : goto out_cancel;
938 :
939 573849 : xrep_trans_cancel_hook_dummy(&txcookie, tp);
940 573849 : mutex_unlock(&rr->lock);
941 573849 : return NOTIFY_DONE;
942 :
943 0 : out_cancel:
944 0 : xfbtree_trans_cancel(rr->rtrmap_btree, tp);
945 0 : xrep_trans_cancel_hook_dummy(&txcookie, tp);
946 0 : out_abort:
947 0 : xchk_iscan_abort(&rr->iscan);
948 0 : mutex_unlock(&rr->lock);
949 : out_unlock:
950 : return NOTIFY_DONE;
951 : }
952 :
953 : /* Set up the filesystem scan components. */
954 : STATIC int
955 11143 : xrep_rtrmap_setup_scan(
956 : struct xrep_rtrmap *rr)
957 : {
958 11143 : struct xfs_scrub *sc = rr->sc;
959 11143 : int error;
960 :
961 11143 : mutex_init(&rr->lock);
962 11143 : xfsb_bitmap_init(&rr->old_rtrmapbt_blocks);
963 :
964 : /* Set up some storage */
965 11143 : error = xfs_rtrmapbt_mem_create(sc->mp, sc->sr.rtg->rtg_rgno,
966 : sc->xfile_buftarg, &rr->rtrmap_btree);
967 11143 : if (error)
968 0 : goto out_bitmap;
969 :
970 : /* Retry iget every tenth of a second for up to 30 seconds. */
971 11143 : xchk_iscan_start(sc, 30000, 100, &rr->iscan);
972 :
973 : /*
974 : * Hook into live rtrmap operations so that we can update our in-memory
975 : * btree to reflect live changes on the filesystem. Since we drop the
976 : * rtrmap ILOCK to scan all the inodes, we need this piece to avoid
977 : * installing a stale btree.
978 : */
979 11141 : ASSERT(sc->flags & XCHK_FSGATES_RMAP);
980 11141 : xfs_hook_setup(&rr->hooks.update_hook, xrep_rtrmapbt_live_update);
981 11141 : error = xfs_rtrmap_hook_add(sc->sr.rtg, &rr->hooks);
982 11142 : if (error)
983 0 : goto out_iscan;
984 : return 0;
985 :
986 : out_iscan:
987 0 : xchk_iscan_teardown(&rr->iscan);
988 0 : xfbtree_destroy(rr->rtrmap_btree);
989 0 : out_bitmap:
990 0 : xfsb_bitmap_destroy(&rr->old_rtrmapbt_blocks);
991 0 : mutex_destroy(&rr->lock);
992 0 : return error;
993 : }
994 :
995 : /* Tear down scan components. */
996 : STATIC void
997 11132 : xrep_rtrmap_teardown(
998 : struct xrep_rtrmap *rr)
999 : {
1000 11132 : struct xfs_scrub *sc = rr->sc;
1001 :
1002 11132 : xchk_iscan_abort(&rr->iscan);
1003 11141 : xfs_rtrmap_hook_del(sc->sr.rtg, &rr->hooks);
1004 11135 : xchk_iscan_teardown(&rr->iscan);
1005 11133 : xfbtree_destroy(rr->rtrmap_btree);
1006 11135 : xfsb_bitmap_destroy(&rr->old_rtrmapbt_blocks);
1007 11131 : mutex_destroy(&rr->lock);
1008 11130 : }
1009 :
1010 : /* Repair the realtime rmap btree. */
1011 : int
1012 11152 : xrep_rtrmapbt(
1013 : struct xfs_scrub *sc)
1014 : {
1015 11152 : struct xrep_rtrmap *rr = sc->buf;
1016 11152 : int error;
1017 :
1018 : /* Make sure any problems with the fork are fixed. */
1019 11152 : error = xrep_metadata_inode_forks(sc);
1020 11153 : if (error)
1021 : return error;
1022 :
1023 11143 : error = xrep_rtrmap_setup_scan(rr);
1024 11142 : if (error)
1025 : return error;
1026 :
1027 : /* Collect rmaps for realtime files. */
1028 11141 : error = xrep_rtrmap_find_rmaps(rr);
1029 11142 : if (error)
1030 70 : goto out_records;
1031 :
1032 11072 : xfs_trans_ijoin(sc->tp, sc->ip, 0);
1033 :
1034 : /* Rebuild the rtrmap information. */
1035 11073 : error = xrep_rtrmap_build_new_tree(rr);
1036 11065 : if (error)
1037 0 : goto out_records;
1038 :
1039 : /* Kill the old tree. */
1040 11065 : error = xrep_rtrmap_remove_old_tree(rr);
1041 :
1042 11131 : out_records:
1043 11131 : xrep_rtrmap_teardown(rr);
1044 11131 : return error;
1045 : }
|