Line data Source code
1 : // SPDX-License-Identifier: GPL-2.0-or-later
2 : /*
3 : * Copyright (C) 2021-2023 Oracle. All Rights Reserved.
4 : * Author: Darrick J. Wong <djwong@kernel.org>
5 : */
6 : #include "xfs.h"
7 : #include "xfs_fs.h"
8 : #include "xfs_shared.h"
9 : #include "xfs_format.h"
10 : #include "xfs_trans_resv.h"
11 : #include "xfs_mount.h"
12 : #include "xfs_defer.h"
13 : #include "xfs_btree.h"
14 : #include "xfs_btree_staging.h"
15 : #include "xfs_bit.h"
16 : #include "xfs_log_format.h"
17 : #include "xfs_trans.h"
18 : #include "xfs_sb.h"
19 : #include "xfs_alloc.h"
20 : #include "xfs_ialloc.h"
21 : #include "xfs_rmap.h"
22 : #include "xfs_rmap_btree.h"
23 : #include "xfs_rtrmap_btree.h"
24 : #include "xfs_refcount.h"
25 : #include "xfs_rtrefcount_btree.h"
26 : #include "xfs_error.h"
27 : #include "xfs_health.h"
28 : #include "xfs_inode.h"
29 : #include "xfs_quota.h"
30 : #include "xfs_rtalloc.h"
31 : #include "xfs_ag.h"
32 : #include "xfs_rtgroup.h"
33 : #include "scrub/xfs_scrub.h"
34 : #include "scrub/scrub.h"
35 : #include "scrub/common.h"
36 : #include "scrub/btree.h"
37 : #include "scrub/trace.h"
38 : #include "scrub/repair.h"
39 : #include "scrub/bitmap.h"
40 : #include "scrub/xfile.h"
41 : #include "scrub/xfarray.h"
42 : #include "scrub/newbt.h"
43 : #include "scrub/reap.h"
44 : #include "scrub/rcbag.h"
45 :
46 : /*
47 : * Rebuilding the Reference Count Btree
48 : * ====================================
49 : *
50 : * This algorithm is "borrowed" from xfs_repair. Imagine the rmap
51 : * entries as rectangles representing extents of physical blocks, and
52 : * that the rectangles can be laid down to allow them to overlap each
53 : * other; then we know that we must emit a refcnt btree entry wherever
54 : * the amount of overlap changes, i.e. the emission stimulus is
55 : * level-triggered:
56 : *
57 : * - ---
58 : * -- ----- ---- --- ------
59 : * -- ---- ----------- ---- ---------
60 : * -------------------------------- -----------
61 : * ^ ^ ^^ ^^ ^ ^^ ^^^ ^^^^ ^ ^^ ^ ^ ^
62 : * 2 1 23 21 3 43 234 2123 1 01 2 3 0
63 : *
64 : * For our purposes, a rmap is a tuple (startblock, len, fileoff, owner).
65 : *
66 : * Note that in the actual refcnt btree we don't store the refcount < 2
67 : * cases because the bnobt tells us which blocks are free; single-use
68 : * blocks aren't recorded in the bnobt or the refcntbt. If the rmapbt
69 : * supports storing multiple entries covering a given block we could
70 : * theoretically dispense with the refcntbt and simply count rmaps, but
71 : * that's inefficient in the (hot) write path, so we'll take the cost of
72 : * the extra tree to save time. Also there's no guarantee that rmap
73 : * will be enabled.
74 : *
75 : * Given an array of rmaps sorted by physical block number, a starting
76 : * physical block (sp), a bag to hold rmaps that cover sp, and the next
77 : * physical block where the level changes (np), we can reconstruct the
78 : * rt refcount btree as follows:
79 : *
80 : * While there are still unprocessed rmaps in the array,
81 : * - Set sp to the physical block (pblk) of the next unprocessed rmap.
82 : * - Add to the bag all rmaps in the array where startblock == sp.
83 : * - Set np to the physical block where the bag size will change. This
84 : * is the minimum of (the pblk of the next unprocessed rmap) and
85 : * (startblock + len of each rmap in the bag).
86 : * - Record the bag size as old_bag_size.
87 : *
88 : * - While the bag isn't empty,
89 : * - Remove from the bag all rmaps where startblock + len == np.
90 : * - Add to the bag all rmaps in the array where startblock == np.
91 : * - If the bag size isn't old_bag_size, store the refcount entry
92 : * (sp, np - sp, bag_size) in the refcnt btree.
93 : * - If the bag is empty, break out of the inner loop.
94 : * - Set old_bag_size to the bag size
95 : * - Set sp = np.
96 : * - Set np to the physical block where the bag size will change.
97 : * This is the minimum of (the pblk of the next unprocessed rmap)
98 : * and (startblock + len of each rmap in the bag).
99 : *
100 : * Like all the other repairers, we make a list of all the refcount
101 : * records we need, then reinitialize the rt refcount btree root and
102 : * insert all the records.
103 : */
104 :
105 : struct xrep_rtrefc {
106 : /* refcount extents */
107 : struct xfarray *refcount_records;
108 :
109 : /* new refcountbt information */
110 : struct xrep_newbt new_btree;
111 :
112 : /* old refcountbt blocks */
113 : struct xfsb_bitmap old_rtrefcountbt_blocks;
114 :
115 : struct xfs_scrub *sc;
116 :
117 : /* get_records()'s position in the rt refcount record array. */
118 : xfarray_idx_t array_cur;
119 :
120 : /* # of refcountbt blocks */
121 : xfs_filblks_t btblocks;
122 : };
123 :
124 : /* Set us up to repair refcount btrees. */
125 : int
126 17083 : xrep_setup_rtrefcountbt(
127 : struct xfs_scrub *sc)
128 : {
129 17083 : char *descr;
130 17083 : int error;
131 :
132 17083 : descr = xchk_xfile_ag_descr(sc, "rmap record bag");
133 17083 : error = xrep_setup_buftarg(sc, descr);
134 17083 : kfree(descr);
135 17083 : return error;
136 : }
137 :
138 : /* Check for any obvious conflicts with this shared/CoW staging extent. */
139 : STATIC int
140 5125421 : xrep_rtrefc_check_ext(
141 : struct xfs_scrub *sc,
142 : const struct xfs_refcount_irec *rec)
143 : {
144 5125421 : xfs_rtblock_t rtbno;
145 :
146 5125421 : if (xfs_refcount_check_rtgroup_irec(sc->sr.rtg, rec) != NULL)
147 : return -EFSCORRUPTED;
148 :
149 : /* Make sure this isn't free space or misaligned. */
150 5125421 : rtbno = xfs_rgbno_to_rtb(sc->mp, sc->sr.rtg->rtg_rgno,
151 5125421 : rec->rc_startblock);
152 5125421 : return xrep_require_rtext_inuse(sc, rtbno, rec->rc_blockcount, true);
153 : }
154 :
155 : /* Record a reference count extent. */
156 : STATIC int
157 5125421 : xrep_rtrefc_stash(
158 : struct xrep_rtrefc *rr,
159 : enum xfs_refc_domain domain,
160 : xfs_rgblock_t bno,
161 : xfs_extlen_t len,
162 : uint64_t refcount)
163 : {
164 5125421 : struct xfs_refcount_irec irec = {
165 : .rc_startblock = bno,
166 : .rc_blockcount = len,
167 : .rc_refcount = refcount,
168 : .rc_domain = domain,
169 : };
170 5125421 : int error = 0;
171 :
172 5125421 : if (xchk_should_terminate(rr->sc, &error))
173 0 : return error;
174 :
175 5125421 : irec.rc_refcount = min_t(uint64_t, XFS_REFC_REFCOUNT_MAX, refcount);
176 :
177 5125421 : error = xrep_rtrefc_check_ext(rr->sc, &irec);
178 5125421 : if (error)
179 : return error;
180 :
181 5125421 : trace_xrep_rtrefc_found(rr->sc->sr.rtg, &irec);
182 :
183 5125421 : return xfarray_append(rr->refcount_records, &irec);
184 : }
185 :
186 : /* Record a CoW staging extent. */
187 : STATIC int
188 781716 : xrep_rtrefc_stash_cow(
189 : struct xrep_rtrefc *rr,
190 : xfs_rgblock_t bno,
191 : xfs_extlen_t len)
192 : {
193 781716 : return xrep_rtrefc_stash(rr, XFS_REFC_DOMAIN_COW, bno, len, 1);
194 : }
195 :
196 : /* Decide if an rmap could describe a shared extent. */
197 : static inline bool
198 : xrep_rtrefc_rmap_shareable(
199 : const struct xfs_rmap_irec *rmap)
200 : {
201 : /* rt metadata are never sharable */
202 411317373 : if (XFS_RMAP_NON_INODE_OWNER(rmap->rm_owner))
203 : return false;
204 :
205 : /* Unwritten file blocks are not shareable. */
206 410520418 : if (rmap->rm_flags & XFS_RMAP_UNWRITTEN)
207 : return false;
208 :
209 : return true;
210 : }
211 :
212 : /* Grab the next (abbreviated) rmap record from the rmapbt. */
213 : STATIC int
214 351020166 : xrep_rtrefc_walk_rmaps(
215 : struct xrep_rtrefc *rr,
216 : struct xfs_rmap_irec *rmap,
217 : bool *have_rec)
218 : {
219 351020166 : struct xfs_btree_cur *cur = rr->sc->sr.rmap_cur;
220 351020166 : struct xfs_mount *mp = cur->bc_mp;
221 351020166 : int have_gt;
222 351020166 : int error = 0;
223 :
224 351020166 : *have_rec = false;
225 :
226 : /*
227 : * Loop through the remaining rmaps. Remember CoW staging
228 : * extents and the refcountbt blocks from the old tree for later
229 : * disposal. We can only share written data fork extents, so
230 : * keep looping until we find an rmap for one.
231 : */
232 411342509 : do {
233 411342509 : if (xchk_should_terminate(rr->sc, &error))
234 0 : return error;
235 :
236 411342511 : error = xfs_btree_increment(cur, 0, &have_gt);
237 411342510 : if (error)
238 0 : return error;
239 411342510 : if (!have_gt)
240 : return 0;
241 :
242 411317373 : error = xfs_rmap_get_rec(cur, rmap, &have_gt);
243 411317375 : if (error)
244 0 : return error;
245 411317375 : if (XFS_IS_CORRUPT(mp, !have_gt)) {
246 0 : xfs_btree_mark_sick(cur);
247 0 : return -EFSCORRUPTED;
248 : }
249 :
250 411317375 : if (rmap->rm_owner == XFS_RMAP_OWN_COW) {
251 781716 : error = xrep_rtrefc_stash_cow(rr, rmap->rm_startblock,
252 : rmap->rm_blockcount);
253 781716 : if (error)
254 0 : return error;
255 410535659 : } else if (xfs_internal_inum(mp, rmap->rm_owner) ||
256 410535657 : (rmap->rm_flags & (XFS_RMAP_ATTR_FORK |
257 : XFS_RMAP_BMBT_BLOCK))) {
258 0 : xfs_btree_mark_sick(cur);
259 0 : return -EFSCORRUPTED;
260 : }
261 411317373 : } while (!xrep_rtrefc_rmap_shareable(rmap));
262 :
263 350995030 : *have_rec = true;
264 350995030 : return 0;
265 : }
266 :
267 : static inline uint32_t
268 : xrep_rtrefc_encode_startblock(
269 : const struct xfs_refcount_irec *irec)
270 : {
271 102684934 : uint32_t start;
272 :
273 102684934 : start = irec->rc_startblock & ~XFS_REFC_COWFLAG;
274 102684934 : if (irec->rc_domain == XFS_REFC_DOMAIN_COW)
275 12151301 : start |= XFS_REFC_COWFLAG;
276 :
277 102684934 : return start;
278 : }
279 :
280 : /*
281 : * Compare two refcount records. We want to sort in order of increasing block
282 : * number.
283 : */
284 : static int
285 51342467 : xrep_rtrefc_extent_cmp(
286 : const void *a,
287 : const void *b)
288 : {
289 51342467 : const struct xfs_refcount_irec *ap = a;
290 51342467 : const struct xfs_refcount_irec *bp = b;
291 51342467 : uint32_t sa, sb;
292 :
293 51342467 : sa = xrep_rtrefc_encode_startblock(ap);
294 51342467 : sb = xrep_rtrefc_encode_startblock(bp);
295 :
296 51342467 : if (sa > sb)
297 : return 1;
298 28773892 : if (sa < sb)
299 28773892 : return -1;
300 : return 0;
301 : }
302 :
303 : /*
304 : * Sort the refcount extents by startblock or else the btree records will be in
305 : * the wrong order. Make sure the records do not overlap in physical space.
306 : */
307 : STATIC int
308 15240 : xrep_rtrefc_sort_records(
309 : struct xrep_rtrefc *rr)
310 : {
311 15240 : struct xfs_refcount_irec irec;
312 15240 : xfarray_idx_t cur;
313 15240 : enum xfs_refc_domain dom = XFS_REFC_DOMAIN_SHARED;
314 15240 : xfs_rgblock_t next_rgbno = 0;
315 15240 : int error;
316 :
317 15240 : error = xfarray_sort(rr->refcount_records, xrep_rtrefc_extent_cmp,
318 : XFARRAY_SORT_KILLABLE);
319 15238 : if (error)
320 : return error;
321 :
322 5140659 : foreach_xfarray_idx(rr->refcount_records, cur) {
323 5125421 : if (xchk_should_terminate(rr->sc, &error))
324 0 : return error;
325 :
326 5125421 : error = xfarray_load(rr->refcount_records, cur, &irec);
327 5125421 : if (error)
328 0 : return error;
329 :
330 5125421 : if (dom == XFS_REFC_DOMAIN_SHARED &&
331 4352408 : irec.rc_domain == XFS_REFC_DOMAIN_COW) {
332 8703 : dom = irec.rc_domain;
333 8703 : next_rgbno = 0;
334 : }
335 :
336 5125421 : if (dom != irec.rc_domain)
337 : return -EFSCORRUPTED;
338 5125421 : if (irec.rc_startblock < next_rgbno)
339 : return -EFSCORRUPTED;
340 :
341 5125421 : next_rgbno = irec.rc_startblock + irec.rc_blockcount;
342 : }
343 :
344 15240 : return error;
345 : }
346 :
347 : /* Record extents that belong to the realtime refcount inode. */
348 : STATIC int
349 154846154 : xrep_rtrefc_walk_rmap(
350 : struct xfs_btree_cur *cur,
351 : const struct xfs_rmap_irec *rec,
352 : void *priv)
353 : {
354 154846154 : struct xrep_rtrefc *rr = priv;
355 154846154 : struct xfs_mount *mp = cur->bc_mp;
356 154846154 : xfs_fsblock_t fsbno;
357 154846154 : int error = 0;
358 :
359 154846154 : if (xchk_should_terminate(rr->sc, &error))
360 0 : return error;
361 :
362 : /* Skip extents which are not owned by this inode and fork. */
363 154846157 : if (rec->rm_owner != rr->sc->ip->i_ino)
364 : return 0;
365 :
366 10073 : error = xrep_check_ino_btree_mapping(rr->sc, rec);
367 10073 : if (error)
368 : return error;
369 :
370 10073 : fsbno = XFS_AGB_TO_FSB(mp, cur->bc_ag.pag->pag_agno,
371 : rec->rm_startblock);
372 :
373 10073 : return xfsb_bitmap_set(&rr->old_rtrefcountbt_blocks, fsbno,
374 10073 : rec->rm_blockcount);
375 : }
376 :
377 : /*
378 : * Walk forward through the rmap btree to collect all rmaps starting at
379 : * @bno in @rmap_bag. These represent the file(s) that share ownership of
380 : * the current block. Upon return, the rmap cursor points to the last record
381 : * satisfying the startblock constraint.
382 : */
383 : static int
384 184109397 : xrep_rtrefc_push_rmaps_at(
385 : struct xrep_rtrefc *rr,
386 : struct rcbag *rcstack,
387 : xfs_rgblock_t bno,
388 : struct xfs_rmap_irec *rmap,
389 : bool *have)
390 : {
391 184109397 : struct xfs_scrub *sc = rr->sc;
392 184109397 : int have_gt;
393 184109397 : int error;
394 :
395 351004893 : while (*have && rmap->rm_startblock == bno) {
396 166895496 : error = rcbag_add(rcstack, rr->sc->tp, rmap);
397 166895496 : if (error)
398 0 : return error;
399 :
400 166895496 : error = xrep_rtrefc_walk_rmaps(rr, rmap, have);
401 166895496 : if (error)
402 0 : return error;
403 : }
404 :
405 184109397 : error = xfs_btree_decrement(sc->sr.rmap_cur, 0, &have_gt);
406 184109397 : if (error)
407 : return error;
408 184109397 : if (XFS_IS_CORRUPT(sc->mp, !have_gt)) {
409 0 : xfs_btree_mark_sick(sc->sr.rmap_cur);
410 0 : return -EFSCORRUPTED;
411 : }
412 :
413 : return 0;
414 : }
415 :
416 : /* Scan one AG for reverse mappings for the realtime refcount btree. */
417 : STATIC int
418 60964 : xrep_rtrefc_scan_ag(
419 : struct xrep_rtrefc *rr,
420 : struct xfs_perag *pag)
421 : {
422 60964 : struct xfs_scrub *sc = rr->sc;
423 60964 : int error;
424 :
425 60964 : error = xrep_ag_init(sc, pag, &sc->sa);
426 60964 : if (error)
427 : return error;
428 :
429 60964 : error = xfs_rmap_query_all(sc->sa.rmap_cur, xrep_rtrefc_walk_rmap, rr);
430 60964 : xchk_ag_free(sc, &sc->sa);
431 60964 : return error;
432 : }
433 :
434 : /* Iterate all the rmap records to generate reference count data. */
435 : STATIC int
436 15241 : xrep_rtrefc_find_refcounts(
437 : struct xrep_rtrefc *rr)
438 : {
439 15241 : struct xfs_scrub *sc = rr->sc;
440 15241 : struct rcbag *rcstack;
441 15241 : struct xfs_perag *pag;
442 15241 : uint64_t old_stack_height;
443 15241 : xfs_rgblock_t sbno;
444 15241 : xfs_rgblock_t cbno;
445 15241 : xfs_rgblock_t nbno;
446 15241 : xfs_agnumber_t agno;
447 15241 : bool have;
448 15241 : int error;
449 :
450 : /* Scan for old rtrefc btree blocks. */
451 76205 : for_each_perag(sc->mp, agno, pag) {
452 60964 : error = xrep_rtrefc_scan_ag(rr, pag);
453 60964 : if (error) {
454 0 : xfs_perag_rele(pag);
455 0 : return error;
456 : }
457 : }
458 :
459 15241 : xrep_rtgroup_btcur_init(sc, &sc->sr);
460 :
461 : /*
462 : * Set up a bag to store all the rmap records that we're tracking to
463 : * generate a reference count record. If this exceeds
464 : * XFS_REFC_REFCOUNT_MAX, we clamp rc_refcount.
465 : */
466 15241 : error = rcbag_init(sc->mp, sc->xfile_buftarg, &rcstack);
467 15241 : if (error)
468 0 : goto out_cur;
469 :
470 : /* Start the rtrmapbt cursor to the left of all records. */
471 15241 : error = xfs_btree_goto_left_edge(sc->sr.rmap_cur);
472 15239 : if (error)
473 0 : goto out_bag;
474 :
475 : /* Process reverse mappings into refcount data. */
476 42225332 : while (xfs_btree_has_more_records(sc->sr.rmap_cur)) {
477 42215470 : struct xfs_rmap_irec rmap;
478 :
479 : /* Push all rmaps with pblk == sbno onto the stack */
480 42215470 : error = xrep_rtrefc_walk_rmaps(rr, &rmap, &have);
481 42215469 : if (error)
482 0 : goto out_bag;
483 42215469 : if (!have)
484 : break;
485 42210093 : sbno = cbno = rmap.rm_startblock;
486 42210093 : error = xrep_rtrefc_push_rmaps_at(rr, rcstack, sbno, &rmap,
487 : &have);
488 42210093 : if (error)
489 0 : goto out_bag;
490 :
491 : /* Set nbno to the bno of the next refcount change */
492 42210093 : error = rcbag_next_edge(rcstack, sc->tp, &rmap, have, &nbno);
493 42210093 : if (error)
494 0 : goto out_bag;
495 :
496 42210093 : ASSERT(nbno > sbno);
497 42210093 : old_stack_height = rcbag_count(rcstack);
498 :
499 : /* While stack isn't empty... */
500 141909201 : while (rcbag_count(rcstack) > 0) {
501 : /* Pop all rmaps that end at nbno */
502 141909201 : error = rcbag_remove_ending_at(rcstack, sc->tp, nbno);
503 141909201 : if (error)
504 0 : goto out_bag;
505 :
506 : /* Push array items that start at nbno */
507 141909201 : error = xrep_rtrefc_walk_rmaps(rr, &rmap, &have);
508 141909201 : if (error)
509 0 : goto out_bag;
510 141909201 : if (have) {
511 141899304 : error = xrep_rtrefc_push_rmaps_at(rr, rcstack,
512 : nbno, &rmap, &have);
513 141899304 : if (error)
514 0 : goto out_bag;
515 : }
516 :
517 : /* Emit refcount if necessary */
518 141909201 : ASSERT(nbno > cbno);
519 141909201 : if (rcbag_count(rcstack) != old_stack_height) {
520 48277626 : if (old_stack_height > 1) {
521 4343705 : error = xrep_rtrefc_stash(rr,
522 : XFS_REFC_DOMAIN_SHARED,
523 : cbno, nbno - cbno,
524 : old_stack_height);
525 4343705 : if (error)
526 0 : goto out_bag;
527 : }
528 48277626 : cbno = nbno;
529 : }
530 :
531 : /* Stack empty, go find the next rmap */
532 141909201 : if (rcbag_count(rcstack) == 0)
533 : break;
534 99699108 : old_stack_height = rcbag_count(rcstack);
535 99699108 : sbno = nbno;
536 :
537 : /* Set nbno to the bno of the next refcount change */
538 99699108 : error = rcbag_next_edge(rcstack, sc->tp, &rmap, have,
539 : &nbno);
540 99699108 : if (error)
541 0 : goto out_bag;
542 :
543 99699108 : ASSERT(nbno > sbno);
544 : }
545 : }
546 :
547 15239 : ASSERT(rcbag_count(rcstack) == 0);
548 15238 : out_bag:
549 15238 : rcbag_free(&rcstack);
550 15241 : out_cur:
551 15241 : xchk_rtgroup_btcur_free(&sc->sr);
552 15241 : return error;
553 : }
554 :
555 : /* Retrieve refcountbt data for bulk load. */
556 : STATIC int
557 21010 : xrep_rtrefc_get_records(
558 : struct xfs_btree_cur *cur,
559 : unsigned int idx,
560 : struct xfs_btree_block *block,
561 : unsigned int nr_wanted,
562 : void *priv)
563 : {
564 21010 : struct xrep_rtrefc *rr = priv;
565 21010 : union xfs_btree_rec *block_rec;
566 21010 : unsigned int loaded;
567 21010 : int error;
568 :
569 5146431 : for (loaded = 0; loaded < nr_wanted; loaded++, idx++) {
570 5125421 : error = xfarray_load(rr->refcount_records, rr->array_cur++,
571 5125421 : &cur->bc_rec.rc);
572 5125421 : if (error)
573 0 : return error;
574 :
575 5125421 : block_rec = xfs_btree_rec_addr(cur, idx, block);
576 5125421 : cur->bc_ops->init_rec_from_cur(cur, block_rec);
577 : }
578 :
579 21010 : return loaded;
580 : }
581 :
582 : /* Feed one of the new btree blocks to the bulk loader. */
583 : STATIC int
584 20490 : xrep_rtrefc_claim_block(
585 : struct xfs_btree_cur *cur,
586 : union xfs_btree_ptr *ptr,
587 : void *priv)
588 : {
589 20490 : struct xrep_rtrefc *rr = priv;
590 20490 : int error;
591 :
592 20490 : error = xrep_newbt_relog_autoreap(&rr->new_btree);
593 20490 : if (error)
594 : return error;
595 :
596 20490 : return xrep_newbt_claim_block(cur, &rr->new_btree, ptr);
597 : }
598 :
599 : /* Figure out how much space we need to create the incore btree root block. */
600 : STATIC size_t
601 15239 : xrep_rtrefc_iroot_size(
602 : struct xfs_btree_cur *cur,
603 : unsigned int level,
604 : unsigned int nr_this_level,
605 : void *priv)
606 : {
607 15239 : return xfs_rtrefcount_broot_space_calc(cur->bc_mp, level,
608 : nr_this_level);
609 : }
610 :
611 : /*
612 : * Use the collected refcount information to stage a new rt refcount btree. If
613 : * this is successful we'll return with the new btree root information logged
614 : * to the repair transaction but not yet committed.
615 : */
616 : STATIC int
617 15241 : xrep_rtrefc_build_new_tree(
618 : struct xrep_rtrefc *rr)
619 : {
620 15241 : struct xfs_scrub *sc = rr->sc;
621 15241 : struct xfs_mount *mp = sc->mp;
622 15241 : struct xfs_rtgroup *rtg = sc->sr.rtg;
623 15241 : struct xfs_btree_cur *refc_cur;
624 15241 : int error;
625 :
626 15241 : error = xrep_rtrefc_sort_records(rr);
627 15239 : if (error)
628 : return error;
629 :
630 : /*
631 : * Prepare to construct the new btree by reserving disk space for the
632 : * new btree and setting up all the accounting information we'll need
633 : * to root the new btree while it's under construction and before we
634 : * attach it to the realtime refcount inode.
635 : */
636 15239 : error = xrep_newbt_init_metadir_inode(&rr->new_btree, sc);
637 15241 : if (error)
638 : return error;
639 :
640 15240 : rr->new_btree.bload.get_records = xrep_rtrefc_get_records;
641 15240 : rr->new_btree.bload.claim_block = xrep_rtrefc_claim_block;
642 15240 : rr->new_btree.bload.iroot_size = xrep_rtrefc_iroot_size;
643 :
644 : /* Compute how many blocks we'll need. */
645 15240 : refc_cur = xfs_rtrefcountbt_stage_cursor(mp, rtg, rtg->rtg_refcountip,
646 : &rr->new_btree.ifake);
647 15239 : error = xfs_btree_bload_compute_geometry(refc_cur, &rr->new_btree.bload,
648 : xfarray_length(rr->refcount_records));
649 15235 : if (error)
650 0 : goto err_cur;
651 :
652 : /* Last chance to abort before we start committing fixes. */
653 15235 : if (xchk_should_terminate(sc, &error))
654 0 : goto err_cur;
655 :
656 : /*
657 : * Guess how many blocks we're going to need to rebuild an entire
658 : * rtrefcountbt from the number of extents we found, and pump up our
659 : * transaction to have sufficient block reservation. We're allowed
660 : * to exceed quota to repair inconsistent metadata, though this is
661 : * unlikely.
662 : */
663 30473 : error = xfs_trans_reserve_more_inode(sc->tp, rtg->rtg_refcountip,
664 15238 : rr->new_btree.bload.nr_blocks, 0, true);
665 15235 : if (error)
666 0 : goto err_cur;
667 :
668 : /* Reserve the space we'll need for the new btree. */
669 15235 : error = xrep_newbt_alloc_blocks(&rr->new_btree,
670 : rr->new_btree.bload.nr_blocks);
671 15238 : if (error)
672 0 : goto err_cur;
673 :
674 : /* Add all observed refcount records. */
675 15238 : rr->new_btree.ifake.if_fork->if_format = XFS_DINODE_FMT_REFCOUNT;
676 15238 : rr->array_cur = XFARRAY_CURSOR_INIT;
677 15238 : error = xfs_btree_bload(refc_cur, &rr->new_btree.bload, rr);
678 15236 : if (error)
679 0 : goto err_cur;
680 :
681 : /*
682 : * Install the new rtrefc btree in the inode. After this point the old
683 : * btree is no longer accessible, the new tree is live, and we can
684 : * delete the cursor.
685 : */
686 15236 : xfs_rtrefcountbt_commit_staged_btree(refc_cur, sc->tp);
687 15240 : xrep_inode_set_nblocks(rr->sc, rr->new_btree.ifake.if_blocks);
688 15237 : xfs_btree_del_cursor(refc_cur, 0);
689 :
690 : /* Dispose of any unused blocks and the accounting information. */
691 15238 : error = xrep_newbt_commit(&rr->new_btree);
692 15238 : if (error)
693 : return error;
694 :
695 15237 : return xrep_roll_trans(sc);
696 0 : err_cur:
697 0 : xfs_btree_del_cursor(refc_cur, error);
698 0 : xrep_newbt_cancel(&rr->new_btree);
699 0 : return error;
700 : }
701 :
702 : /*
703 : * Now that we've logged the roots of the new btrees, invalidate all of the
704 : * old blocks and free them.
705 : */
706 : STATIC int
707 15241 : xrep_rtrefc_remove_old_tree(
708 : struct xrep_rtrefc *rr)
709 : {
710 15241 : int error;
711 :
712 : /*
713 : * Free all the extents that were allocated to the former rtrefcountbt
714 : * and aren't cross-linked with something else.
715 : */
716 15241 : error = xrep_reap_metadir_fsblocks(rr->sc,
717 : &rr->old_rtrefcountbt_blocks);
718 15239 : if (error)
719 : return error;
720 :
721 : /*
722 : * Ensure the proper reservation for the rtrefcount inode so that we
723 : * don't fail to expand the btree.
724 : */
725 15239 : return xrep_reset_imeta_reservation(rr->sc);
726 : }
727 :
728 : /* Rebuild the rt refcount btree. */
729 : int
730 15241 : xrep_rtrefcountbt(
731 : struct xfs_scrub *sc)
732 : {
733 15241 : struct xrep_rtrefc *rr;
734 15241 : struct xfs_mount *mp = sc->mp;
735 15241 : char *descr;
736 15241 : int error;
737 :
738 : /* We require the rmapbt to rebuild anything. */
739 15241 : if (!xfs_has_rtrmapbt(mp))
740 : return -EOPNOTSUPP;
741 :
742 : /* Make sure any problems with the fork are fixed. */
743 15241 : error = xrep_metadata_inode_forks(sc);
744 15241 : if (error)
745 : return error;
746 :
747 15241 : rr = kzalloc(sizeof(struct xrep_rtrefc), XCHK_GFP_FLAGS);
748 15241 : if (!rr)
749 : return -ENOMEM;
750 15241 : rr->sc = sc;
751 :
752 : /* Set up enough storage to handle one refcount record per rt extent. */
753 15241 : descr = xchk_xfile_ag_descr(sc, "reference count records");
754 15241 : error = xfarray_create(descr, mp->m_sb.sb_rextents,
755 : sizeof(struct xfs_refcount_irec),
756 : &rr->refcount_records);
757 15241 : kfree(descr);
758 15241 : if (error)
759 0 : goto out_rr;
760 :
761 : /* Collect all reference counts. */
762 15241 : xfsb_bitmap_init(&rr->old_rtrefcountbt_blocks);
763 15241 : error = xrep_rtrefc_find_refcounts(rr);
764 15241 : if (error)
765 0 : goto out_bitmap;
766 :
767 15241 : xfs_trans_ijoin(sc->tp, sc->ip, 0);
768 :
769 : /* Rebuild the refcount information. */
770 15241 : error = xrep_rtrefc_build_new_tree(rr);
771 15240 : if (error)
772 0 : goto out_bitmap;
773 :
774 : /* Kill the old tree. */
775 15240 : error = xrep_rtrefc_remove_old_tree(rr);
776 :
777 15240 : out_bitmap:
778 15240 : xfsb_bitmap_destroy(&rr->old_rtrefcountbt_blocks);
779 15239 : xfarray_destroy(rr->refcount_records);
780 15241 : out_rr:
781 15241 : kfree(rr);
782 15241 : return error;
783 : }
|