Line data Source code
1 : // SPDX-License-Identifier: GPL-2.0-or-later
2 : /*
3 : * Copyright (C) 2018-2023 Oracle. All Rights Reserved.
4 : * Author: Darrick J. Wong <djwong@kernel.org>
5 : */
6 : #include "xfs.h"
7 : #include "xfs_fs.h"
8 : #include "xfs_shared.h"
9 : #include "xfs_format.h"
10 : #include "xfs_trans_resv.h"
11 : #include "xfs_mount.h"
12 : #include "xfs_defer.h"
13 : #include "xfs_btree.h"
14 : #include "xfs_btree_staging.h"
15 : #include "xfs_bit.h"
16 : #include "xfs_log_format.h"
17 : #include "xfs_trans.h"
18 : #include "xfs_sb.h"
19 : #include "xfs_alloc.h"
20 : #include "xfs_alloc_btree.h"
21 : #include "xfs_rmap.h"
22 : #include "xfs_rmap_btree.h"
23 : #include "xfs_inode.h"
24 : #include "xfs_refcount.h"
25 : #include "xfs_extent_busy.h"
26 : #include "xfs_health.h"
27 : #include "xfs_bmap.h"
28 : #include "xfs_ialloc.h"
29 : #include "xfs_ag.h"
30 : #include "scrub/xfs_scrub.h"
31 : #include "scrub/scrub.h"
32 : #include "scrub/common.h"
33 : #include "scrub/btree.h"
34 : #include "scrub/trace.h"
35 : #include "scrub/repair.h"
36 : #include "scrub/bitmap.h"
37 : #include "scrub/xfile.h"
38 : #include "scrub/xfarray.h"
39 : #include "scrub/newbt.h"
40 : #include "scrub/reap.h"
41 :
42 : /*
43 : * Free Space Btree Repair
44 : * =======================
45 : *
46 : * The reverse mappings are supposed to record all space usage for the entire
47 : * AG. Therefore, we can recalculate the free extents in an AG by looking for
48 : * gaps in the physical extents recorded in the rmapbt. On a reflink
49 : * filesystem this is a little more tricky in that we have to be aware that
50 : * the rmap records are allowed to overlap.
51 : *
52 : * We derive which blocks belonged to the old bnobt/cntbt by recording all the
53 : * OWN_AG extents and subtracting out the blocks owned by all other OWN_AG
54 : * metadata: the rmapbt blocks visited while iterating the reverse mappings
55 : * and the AGFL blocks.
56 : *
57 : * Once we have both of those pieces, we can reconstruct the bnobt and cntbt
58 : * by blowing out the free block state and freeing all the extents that we
59 : * found. This adds the requirement that we can't have any busy extents in
60 : * the AG because the busy code cannot handle duplicate records.
61 : *
62 : * Note that we can only rebuild both free space btrees at the same time
63 : * because the regular extent freeing infrastructure loads both btrees at the
64 : * same time.
65 : *
66 : * We use the prefix 'xrep_abt' here because we regenerate both free space
67 : * allocation btrees at the same time.
68 : */
69 :
70 : struct xrep_abt {
71 : /* Blocks owned by the rmapbt or the agfl. */
72 : struct xagb_bitmap not_allocbt_blocks;
73 :
74 : /* All OWN_AG blocks. */
75 : struct xagb_bitmap old_allocbt_blocks;
76 :
77 : /*
78 : * New bnobt information. All btree block reservations are added to
79 : * the reservation list in new_bnobt.
80 : */
81 : struct xrep_newbt new_bnobt;
82 :
83 : /* new cntbt information */
84 : struct xrep_newbt new_cntbt;
85 :
86 : /* Free space extents. */
87 : struct xfarray *free_records;
88 :
89 : struct xfs_scrub *sc;
90 :
91 : /* Number of non-null records in @free_records. */
92 : uint64_t nr_real_records;
93 :
94 : /* get_records()'s position in the free space record array. */
95 : xfarray_idx_t array_cur;
96 :
97 : /*
98 : * Next block we anticipate seeing in the rmap records. If the next
99 : * rmap record is greater than next_agbno, we have found unused space.
100 : */
101 : xfs_agblock_t next_agbno;
102 :
103 : /* Number of free blocks in this AG. */
104 : xfs_agblock_t nr_blocks;
105 :
106 : /* Longest free extent we found in the AG. */
107 : xfs_agblock_t longest;
108 : };
109 :
110 : /* Set up to repair AG free space btrees. */
111 : int
112 87316 : xrep_setup_ag_allocbt(
113 : struct xfs_scrub *sc)
114 : {
115 87316 : unsigned int busy_gen;
116 :
117 : /*
118 : * Make sure the busy extent list is clear because we can't put extents
119 : * on there twice.
120 : */
121 87316 : busy_gen = READ_ONCE(sc->sa.pag->pagb_gen);
122 87316 : if (xfs_extent_busy_list_empty(sc->sa.pag))
123 : return 0;
124 :
125 26759 : return xfs_extent_busy_flush(sc->tp, sc->sa.pag, busy_gen, 0);
126 : }
127 :
128 : /* Check for any obvious conflicts in the free extent. */
129 : STATIC int
130 48283582 : xrep_abt_check_free_ext(
131 : struct xfs_scrub *sc,
132 : const struct xfs_alloc_rec_incore *rec)
133 : {
134 48283582 : enum xbtree_recpacking outcome;
135 48283582 : int error;
136 :
137 48283582 : if (xfs_alloc_check_perag_irec(sc->sa.pag, rec) != NULL)
138 : return -EFSCORRUPTED;
139 :
140 : /* Must not be an inode chunk. */
141 48283588 : error = xfs_ialloc_has_inodes_at_extent(sc->sa.ino_cur,
142 48283588 : rec->ar_startblock, rec->ar_blockcount, &outcome);
143 48283605 : if (error)
144 : return error;
145 48283605 : if (outcome != XBTREE_RECPACKING_EMPTY)
146 : return -EFSCORRUPTED;
147 :
148 : /* Must not be shared or CoW staging. */
149 48283605 : if (sc->sa.refc_cur) {
150 48283605 : error = xfs_refcount_has_records(sc->sa.refc_cur,
151 48283605 : XFS_REFC_DOMAIN_SHARED, rec->ar_startblock,
152 48283605 : rec->ar_blockcount, &outcome);
153 48283608 : if (error)
154 : return error;
155 48283608 : if (outcome != XBTREE_RECPACKING_EMPTY)
156 : return -EFSCORRUPTED;
157 :
158 48283608 : error = xfs_refcount_has_records(sc->sa.refc_cur,
159 48283608 : XFS_REFC_DOMAIN_COW, rec->ar_startblock,
160 48283608 : rec->ar_blockcount, &outcome);
161 48283611 : if (error)
162 : return error;
163 48283611 : if (outcome != XBTREE_RECPACKING_EMPTY)
164 0 : return -EFSCORRUPTED;
165 : }
166 :
167 : return 0;
168 : }
169 :
170 : /*
171 : * Stash a free space record for all the space since the last bno we found
172 : * all the way up to @end.
173 : */
174 : static int
175 48283585 : xrep_abt_stash(
176 : struct xrep_abt *ra,
177 : xfs_agblock_t end)
178 : {
179 48283585 : struct xfs_alloc_rec_incore arec = {
180 48283585 : .ar_startblock = ra->next_agbno,
181 48283585 : .ar_blockcount = end - ra->next_agbno,
182 : };
183 48283585 : struct xfs_scrub *sc = ra->sc;
184 48283585 : int error = 0;
185 :
186 48283585 : if (xchk_should_terminate(sc, &error))
187 0 : return error;
188 :
189 48283589 : error = xrep_abt_check_free_ext(ra->sc, &arec);
190 48283610 : if (error)
191 : return error;
192 :
193 48283610 : trace_xrep_abt_found(sc->mp, sc->sa.pag->pag_agno, &arec);
194 :
195 48283605 : error = xfarray_append(ra->free_records, &arec);
196 48283606 : if (error)
197 : return error;
198 :
199 48283606 : ra->nr_blocks += arec.ar_blockcount;
200 48283606 : return 0;
201 : }
202 :
203 : /* Record extents that aren't in use from gaps in the rmap records. */
204 : STATIC int
205 958646679 : xrep_abt_walk_rmap(
206 : struct xfs_btree_cur *cur,
207 : const struct xfs_rmap_irec *rec,
208 : void *priv)
209 : {
210 958646679 : struct xrep_abt *ra = priv;
211 958646679 : int error;
212 :
213 : /* Record all the OWN_AG blocks... */
214 958646679 : if (rec->rm_owner == XFS_RMAP_OWN_AG) {
215 6528093 : error = xagb_bitmap_set(&ra->old_allocbt_blocks,
216 6528093 : rec->rm_startblock, rec->rm_blockcount);
217 6528090 : if (error)
218 : return error;
219 : }
220 :
221 : /* ...and all the rmapbt blocks... */
222 958646676 : error = xagb_bitmap_set_btcur_path(&ra->not_allocbt_blocks, cur);
223 958646437 : if (error)
224 : return error;
225 :
226 : /* ...and all the free space. */
227 958646437 : if (rec->rm_startblock > ra->next_agbno) {
228 48196280 : error = xrep_abt_stash(ra, rec->rm_startblock);
229 48196288 : if (error)
230 : return error;
231 : }
232 :
233 : /*
234 : * rmap records can overlap on reflink filesystems, so project
235 : * next_agbno as far out into the AG space as we currently know about.
236 : */
237 958646445 : ra->next_agbno = max_t(xfs_agblock_t, ra->next_agbno,
238 : rec->rm_startblock + rec->rm_blockcount);
239 958646445 : return 0;
240 : }
241 :
242 : /* Collect an AGFL block for the not-to-release list. */
243 : static int
244 647261 : xrep_abt_walk_agfl(
245 : struct xfs_mount *mp,
246 : xfs_agblock_t agbno,
247 : void *priv)
248 : {
249 647261 : struct xrep_abt *ra = priv;
250 :
251 647261 : return xagb_bitmap_set(&ra->not_allocbt_blocks, agbno, 1);
252 : }
253 :
254 : /*
255 : * Compare two free space extents by block number. We want to sort in order of
256 : * increasing block number.
257 : */
258 : static int
259 595272976 : xrep_bnobt_extent_cmp(
260 : const void *a,
261 : const void *b)
262 : {
263 1331487045 : const struct xfs_alloc_rec_incore *ap = a;
264 1331487045 : const struct xfs_alloc_rec_incore *bp = b;
265 :
266 595272976 : if (ap->ar_startblock > bp->ar_startblock)
267 : return 1;
268 792792116 : else if (ap->ar_startblock < bp->ar_startblock)
269 792792080 : return -1;
270 : return 0;
271 : }
272 :
273 : /*
274 : * Re-sort the free extents by block number so so that we can put the records
275 : * into the bnobt in the correct order. Make sure the records do not overlap
276 : * in physical space.
277 : */
278 : STATIC int
279 87316 : xrep_bnobt_sort_records(
280 : struct xrep_abt *ra)
281 : {
282 87316 : struct xfs_alloc_rec_incore arec;
283 87316 : xfarray_idx_t cur = XFARRAY_CURSOR_INIT;
284 87316 : xfs_agblock_t next_agbno = 0;
285 87316 : int error;
286 :
287 87316 : error = xfarray_sort(ra->free_records, xrep_bnobt_extent_cmp, 0);
288 87316 : if (error)
289 : return error;
290 :
291 48370928 : while ((error = xfarray_iter(ra->free_records, &cur, &arec)) == 1) {
292 48283612 : if (arec.ar_startblock < next_agbno)
293 : return -EFSCORRUPTED;
294 :
295 48283612 : next_agbno = arec.ar_startblock + arec.ar_blockcount;
296 : }
297 :
298 : return error;
299 : }
300 :
301 : /*
302 : * Compare two free space extents by length and then block number. We want
303 : * to sort first in order of increasing length and then in order of increasing
304 : * block number.
305 : */
306 : static int
307 1191976276 : xrep_cntbt_extent_cmp(
308 : const void *a,
309 : const void *b)
310 : {
311 1191976276 : const struct xfs_alloc_rec_incore *ap = a;
312 1191976276 : const struct xfs_alloc_rec_incore *bp = b;
313 :
314 1191976276 : if (ap->ar_blockcount > bp->ar_blockcount)
315 : return 1;
316 914793647 : else if (ap->ar_blockcount < bp->ar_blockcount)
317 : return -1;
318 736214069 : return xrep_bnobt_extent_cmp(a, b);
319 : }
320 :
321 : /*
322 : * Sort the free extents by length so so that we can put the records into the
323 : * cntbt in the correct order. Don't let userspace kill us if we're resorting
324 : * after allocating btree blocks.
325 : */
326 : STATIC int
327 174630 : xrep_cntbt_sort_records(
328 : struct xrep_abt *ra,
329 : bool is_resort)
330 : {
331 349262 : return xfarray_sort(ra->free_records, xrep_cntbt_extent_cmp,
332 174630 : is_resort ? 0 : XFARRAY_SORT_KILLABLE);
333 : }
334 :
335 : /*
336 : * Iterate all reverse mappings to find (1) the gaps between rmap records (all
337 : * unowned space), (2) the OWN_AG extents (which encompass the free space
338 : * btrees, the rmapbt, and the agfl), (3) the rmapbt blocks, and (4) the AGFL
339 : * blocks. The free space is (1) + (2) - (3) - (4).
340 : */
341 : STATIC int
342 87314 : xrep_abt_find_freespace(
343 : struct xrep_abt *ra)
344 : {
345 87314 : struct xfs_scrub *sc = ra->sc;
346 87314 : struct xfs_mount *mp = sc->mp;
347 87314 : struct xfs_agf *agf = sc->sa.agf_bp->b_addr;
348 87314 : struct xfs_buf *agfl_bp;
349 87314 : xfs_agblock_t agend;
350 87314 : int error;
351 :
352 87314 : xagb_bitmap_init(&ra->not_allocbt_blocks);
353 :
354 87296 : xrep_ag_btcur_init(sc, &sc->sa);
355 :
356 : /*
357 : * Iterate all the reverse mappings to find gaps in the physical
358 : * mappings, all the OWN_AG blocks, and all the rmapbt extents.
359 : */
360 87303 : error = xfs_rmap_query_all(sc->sa.rmap_cur, xrep_abt_walk_rmap, ra);
361 87316 : if (error)
362 0 : goto err;
363 :
364 : /* Insert a record for space between the last rmap and EOAG. */
365 87316 : agend = be32_to_cpu(agf->agf_length);
366 87316 : if (ra->next_agbno < agend) {
367 87316 : error = xrep_abt_stash(ra, agend);
368 87316 : if (error)
369 0 : goto err;
370 : }
371 :
372 : /* Collect all the AGFL blocks. */
373 87316 : error = xfs_alloc_read_agfl(sc->sa.pag, sc->tp, &agfl_bp);
374 87316 : if (error)
375 0 : goto err;
376 :
377 87316 : error = xfs_agfl_walk(mp, agf, agfl_bp, xrep_abt_walk_agfl, ra);
378 87316 : if (error)
379 0 : goto err_agfl;
380 :
381 : /* Compute the old bnobt/cntbt blocks. */
382 87316 : error = xagb_bitmap_disunion(&ra->old_allocbt_blocks,
383 : &ra->not_allocbt_blocks);
384 87315 : if (error)
385 0 : goto err_agfl;
386 :
387 87315 : ra->nr_real_records = xfarray_length(ra->free_records);
388 87315 : err_agfl:
389 87315 : xfs_trans_brelse(sc->tp, agfl_bp);
390 87316 : err:
391 87316 : xchk_ag_btcur_free(&sc->sa);
392 87316 : xagb_bitmap_destroy(&ra->not_allocbt_blocks);
393 87316 : return error;
394 : }
395 :
396 : /*
397 : * We're going to use the observed free space records to reserve blocks for the
398 : * new free space btrees, so we play an iterative game where we try to converge
399 : * on the number of blocks we need:
400 : *
401 : * 1. Estimate how many blocks we'll need to store the records.
402 : * 2. If the first free record has more blocks than we need, we're done.
403 : * We will have to re-sort the records prior to building the cntbt.
404 : * 3. If that record has exactly the number of blocks we need, null out the
405 : * record. We're done.
406 : * 4. Otherwise, we still need more blocks. Null out the record, subtract its
407 : * length from the number of blocks we need, and go back to step 1.
408 : *
409 : * Fortunately, we don't have to do any transaction work to play this game, so
410 : * we don't have to tear down the staging cursors.
411 : */
412 : STATIC int
413 87314 : xrep_abt_reserve_space(
414 : struct xrep_abt *ra,
415 : struct xfs_btree_cur *bno_cur,
416 : struct xfs_btree_cur *cnt_cur,
417 : bool *needs_resort)
418 : {
419 87314 : struct xfs_scrub *sc = ra->sc;
420 87314 : xfarray_idx_t record_nr;
421 87314 : unsigned int allocated = 0;
422 87314 : int error = 0;
423 :
424 87314 : record_nr = xfarray_length(ra->free_records) - 1;
425 87314 : do {
426 87314 : struct xfs_alloc_rec_incore arec;
427 87314 : uint64_t required;
428 87314 : unsigned int desired;
429 87314 : unsigned int len;
430 :
431 : /* Compute how many blocks we'll need. */
432 87314 : error = xfs_btree_bload_compute_geometry(cnt_cur,
433 : &ra->new_cntbt.bload, ra->nr_real_records);
434 87309 : if (error)
435 : break;
436 :
437 87309 : error = xfs_btree_bload_compute_geometry(bno_cur,
438 : &ra->new_bnobt.bload, ra->nr_real_records);
439 87314 : if (error)
440 : break;
441 :
442 : /* How many btree blocks do we need to store all records? */
443 87314 : required = ra->new_bnobt.bload.nr_blocks +
444 87314 : ra->new_cntbt.bload.nr_blocks;
445 87314 : ASSERT(required < INT_MAX);
446 :
447 : /* If we've reserved enough blocks, we're done. */
448 87314 : if (allocated >= required)
449 : break;
450 :
451 87314 : desired = required - allocated;
452 :
453 : /* We need space but there's none left; bye! */
454 87314 : if (ra->nr_real_records == 0) {
455 : error = -ENOSPC;
456 : break;
457 : }
458 :
459 : /* Grab the first record from the list. */
460 87314 : error = xfarray_load(ra->free_records, record_nr, &arec);
461 87315 : if (error)
462 : break;
463 :
464 87315 : ASSERT(arec.ar_blockcount <= UINT_MAX);
465 87315 : len = min_t(unsigned int, arec.ar_blockcount, desired);
466 :
467 87315 : trace_xrep_newbt_alloc_ag_blocks(sc->mp, sc->sa.pag->pag_agno,
468 : arec.ar_startblock, len, XFS_RMAP_OWN_AG);
469 :
470 87316 : error = xrep_newbt_add_extent(&ra->new_bnobt, sc->sa.pag,
471 : arec.ar_startblock, len);
472 87313 : if (error)
473 : break;
474 87313 : allocated += len;
475 87313 : ra->nr_blocks -= len;
476 :
477 87313 : if (arec.ar_blockcount > desired) {
478 : /*
479 : * Record has more space than we need. The number of
480 : * free records doesn't change, so shrink the free
481 : * record, inform the caller that the records are no
482 : * longer sorted by length, and exit.
483 : */
484 87313 : arec.ar_startblock += desired;
485 87313 : arec.ar_blockcount -= desired;
486 87313 : error = xfarray_store(ra->free_records, record_nr,
487 : &arec);
488 87316 : if (error)
489 : break;
490 :
491 87316 : *needs_resort = true;
492 87316 : return 0;
493 : }
494 :
495 : /*
496 : * We're going to use up the entire record, so unset it and
497 : * move on to the next one. This changes the number of free
498 : * records (but doesn't break the sorting order), so we must
499 : * go around the loop once more to re-run _bload_init.
500 : */
501 0 : error = xfarray_unset(ra->free_records, record_nr);
502 0 : if (error)
503 : break;
504 0 : ra->nr_real_records--;
505 0 : record_nr--;
506 : } while (1);
507 :
508 0 : return error;
509 : }
510 :
511 : STATIC int
512 87315 : xrep_abt_dispose_one(
513 : struct xrep_abt *ra,
514 : struct xrep_newbt_resv *resv)
515 : {
516 87315 : struct xfs_scrub *sc = ra->sc;
517 87315 : struct xfs_perag *pag = sc->sa.pag;
518 87315 : xfs_agblock_t free_agbno = resv->agbno + resv->used;
519 87315 : xfs_extlen_t free_aglen = resv->len - resv->used;
520 87315 : int error;
521 :
522 87315 : ASSERT(pag == resv->pag);
523 :
524 : /* Add a deferred rmap for each extent we used. */
525 87315 : if (resv->used > 0) {
526 87315 : xfs_fsblock_t fsbno;
527 :
528 87315 : fsbno = XFS_AGB_TO_FSB(sc->mp, pag->pag_agno, resv->agbno);
529 87315 : xfs_rmap_alloc_extent(sc->tp, false, fsbno, resv->used,
530 : XFS_RMAP_OWN_AG);
531 : }
532 :
533 : /*
534 : * For each reserved btree block we didn't use, add it to the free
535 : * space btree. We didn't touch fdblocks when we reserved them, so
536 : * we don't touch it now.
537 : */
538 87316 : if (free_aglen == 0)
539 : return 0;
540 :
541 0 : trace_xrep_newbt_free_blocks(sc->mp, resv->pag->pag_agno, free_agbno,
542 0 : free_aglen, ra->new_bnobt.oinfo.oi_owner);
543 :
544 0 : error = __xfs_free_extent(sc->tp, resv->pag, free_agbno, free_aglen,
545 0 : &ra->new_bnobt.oinfo, XFS_AG_RESV_IGNORE, true);
546 0 : if (error)
547 : return error;
548 :
549 0 : return xrep_defer_finish(sc);
550 : }
551 :
552 : /*
553 : * Deal with all the space we reserved. Blocks that were allocated for the
554 : * free space btrees need to have a (deferred) rmap added for the OWN_AG
555 : * allocation, and blocks that didn't get used can be freed via the usual
556 : * (deferred) means.
557 : */
558 : STATIC void
559 87316 : xrep_abt_dispose_reservations(
560 : struct xrep_abt *ra,
561 : int error)
562 : {
563 87316 : struct xrep_newbt_resv *resv, *n;
564 :
565 87316 : if (error)
566 0 : goto junkit;
567 :
568 174632 : for_each_xrep_newbt_reservation(&ra->new_bnobt, resv, n) {
569 87316 : error = xrep_abt_dispose_one(ra, resv);
570 87316 : if (error)
571 0 : goto junkit;
572 : }
573 :
574 87316 : junkit:
575 174631 : for_each_xrep_newbt_reservation(&ra->new_bnobt, resv, n) {
576 87316 : xfs_perag_put(resv->pag);
577 87316 : list_del(&resv->list);
578 87316 : kfree(resv);
579 : }
580 :
581 87315 : xrep_newbt_cancel(&ra->new_bnobt);
582 87315 : xrep_newbt_cancel(&ra->new_cntbt);
583 87314 : }
584 :
585 : /* Retrieve free space data for bulk load. */
586 : STATIC int
587 389004 : xrep_abt_get_records(
588 : struct xfs_btree_cur *cur,
589 : unsigned int idx,
590 : struct xfs_btree_block *block,
591 : unsigned int nr_wanted,
592 : void *priv)
593 : {
594 389004 : struct xfs_alloc_rec_incore *arec = &cur->bc_rec.a;
595 389004 : struct xrep_abt *ra = priv;
596 389004 : union xfs_btree_rec *block_rec;
597 389004 : unsigned int loaded;
598 389004 : int error;
599 :
600 96956227 : for (loaded = 0; loaded < nr_wanted; loaded++, idx++) {
601 96567213 : error = xfarray_load_next(ra->free_records, &ra->array_cur,
602 : arec);
603 96567223 : if (error)
604 0 : return error;
605 :
606 96567223 : ra->longest = max(ra->longest, arec->ar_blockcount);
607 :
608 96567223 : block_rec = xfs_btree_rec_addr(cur, idx, block);
609 96567223 : cur->bc_ops->init_rec_from_cur(cur, block_rec);
610 : }
611 :
612 389014 : return loaded;
613 : }
614 :
615 : /* Feed one of the new btree blocks to the bulk loader. */
616 : STATIC int
617 413849 : xrep_abt_claim_block(
618 : struct xfs_btree_cur *cur,
619 : union xfs_btree_ptr *ptr,
620 : void *priv)
621 : {
622 413849 : struct xrep_abt *ra = priv;
623 :
624 413849 : return xrep_newbt_claim_block(cur, &ra->new_bnobt, ptr);
625 : }
626 :
627 : /*
628 : * Reset the AGF counters to reflect the free space btrees that we just
629 : * rebuilt, then reinitialize the per-AG data.
630 : */
631 : STATIC int
632 87315 : xrep_abt_reset_counters(
633 : struct xrep_abt *ra)
634 : {
635 87315 : struct xfs_scrub *sc = ra->sc;
636 87315 : struct xfs_perag *pag = sc->sa.pag;
637 87315 : struct xfs_agf *agf = sc->sa.agf_bp->b_addr;
638 87315 : unsigned int freesp_btreeblks = 0;
639 :
640 : /*
641 : * Compute the contribution to agf_btreeblks for the new free space
642 : * btrees. This is the computed btree size minus anything we didn't
643 : * use.
644 : */
645 87315 : freesp_btreeblks += ra->new_bnobt.bload.nr_blocks - 1;
646 87315 : freesp_btreeblks += ra->new_cntbt.bload.nr_blocks - 1;
647 :
648 87315 : freesp_btreeblks -= xrep_newbt_unused_blocks(&ra->new_bnobt);
649 87315 : freesp_btreeblks -= xrep_newbt_unused_blocks(&ra->new_cntbt);
650 :
651 : /*
652 : * The AGF header contains extra information related to the free space
653 : * btrees, so we must update those fields here.
654 : */
655 87315 : agf->agf_btreeblks = cpu_to_be32(freesp_btreeblks +
656 : (be32_to_cpu(agf->agf_rmap_blocks) - 1));
657 87315 : agf->agf_freeblks = cpu_to_be32(ra->nr_blocks);
658 87315 : agf->agf_longest = cpu_to_be32(ra->longest);
659 87315 : xfs_alloc_log_agf(sc->tp, sc->sa.agf_bp, XFS_AGF_BTREEBLKS |
660 : XFS_AGF_LONGEST |
661 : XFS_AGF_FREEBLKS);
662 :
663 : /*
664 : * After we commit the new btree to disk, it is possible that the
665 : * process to reap the old btree blocks will race with the AIL trying
666 : * to checkpoint the old btree blocks into the filesystem. If the new
667 : * tree is shorter than the old one, the allocbt write verifier will
668 : * fail and the AIL will shut down the filesystem.
669 : *
670 : * To avoid this, save the old incore btree height values as the alt
671 : * height values before re-initializing the perag info from the updated
672 : * AGF to capture all the new values.
673 : */
674 87316 : pag->pagf_alt_levels[XFS_BTNUM_BNOi] = pag->pagf_levels[XFS_BTNUM_BNOi];
675 87316 : pag->pagf_alt_levels[XFS_BTNUM_CNTi] = pag->pagf_levels[XFS_BTNUM_CNTi];
676 :
677 : /* Reinitialize with the values we just logged. */
678 87316 : return xrep_reinit_pagf(sc);
679 : }
680 :
681 : /*
682 : * Use the collected free space information to stage new free space btrees.
683 : * If this is successful we'll return with the new btree root
684 : * information logged to the repair transaction but not yet committed.
685 : */
686 : STATIC int
687 87316 : xrep_abt_build_new_trees(
688 : struct xrep_abt *ra)
689 : {
690 87316 : struct xfs_scrub *sc = ra->sc;
691 87316 : struct xfs_btree_cur *bno_cur;
692 87316 : struct xfs_btree_cur *cnt_cur;
693 87316 : struct xfs_perag *pag = sc->sa.pag;
694 87316 : bool needs_resort = false;
695 87316 : int error;
696 :
697 : /*
698 : * Sort the free extents by length so that we can set up the free space
699 : * btrees in as few extents as possible. This reduces the amount of
700 : * deferred rmap / free work we have to do at the end.
701 : */
702 87316 : error = xrep_cntbt_sort_records(ra, false);
703 87316 : if (error)
704 : return error;
705 :
706 : /*
707 : * Prepare to construct the new btree by reserving disk space for the
708 : * new btree and setting up all the accounting information we'll need
709 : * to root the new btree while it's under construction and before we
710 : * attach it to the AG header.
711 : */
712 87316 : xrep_newbt_init_bare(&ra->new_bnobt, sc);
713 87314 : xrep_newbt_init_bare(&ra->new_cntbt, sc);
714 :
715 87314 : ra->new_bnobt.bload.get_records = xrep_abt_get_records;
716 87314 : ra->new_cntbt.bload.get_records = xrep_abt_get_records;
717 :
718 87314 : ra->new_bnobt.bload.claim_block = xrep_abt_claim_block;
719 87314 : ra->new_cntbt.bload.claim_block = xrep_abt_claim_block;
720 :
721 : /* Allocate cursors for the staged btrees. */
722 87314 : bno_cur = xfs_allocbt_stage_cursor(sc->mp, &ra->new_bnobt.afake,
723 : pag, XFS_BTNUM_BNO);
724 87314 : cnt_cur = xfs_allocbt_stage_cursor(sc->mp, &ra->new_cntbt.afake,
725 : pag, XFS_BTNUM_CNT);
726 :
727 : /* Last chance to abort before we start committing fixes. */
728 87314 : if (xchk_should_terminate(sc, &error))
729 0 : goto err_cur;
730 :
731 : /* Reserve the space we'll need for the new btrees. */
732 87314 : error = xrep_abt_reserve_space(ra, bno_cur, cnt_cur, &needs_resort);
733 87316 : if (error)
734 0 : goto err_cur;
735 :
736 : /*
737 : * If we need to re-sort the free extents by length, do so so that we
738 : * can put the records into the cntbt in the correct order.
739 : */
740 87316 : if (needs_resort) {
741 87316 : error = xrep_cntbt_sort_records(ra, needs_resort);
742 87316 : if (error)
743 0 : goto err_cur;
744 : }
745 :
746 : /*
747 : * Due to btree slack factors, it's possible for a new btree to be one
748 : * level taller than the old btree. Update the alternate incore btree
749 : * height so that we don't trip the verifiers when writing the new
750 : * btree blocks to disk.
751 : */
752 87316 : pag->pagf_alt_levels[XFS_BTNUM_BNOi] =
753 87316 : ra->new_bnobt.bload.btree_height;
754 87316 : pag->pagf_alt_levels[XFS_BTNUM_CNTi] =
755 87316 : ra->new_cntbt.bload.btree_height;
756 :
757 : /* Load the free space by length tree. */
758 87316 : ra->array_cur = XFARRAY_CURSOR_INIT;
759 87316 : ra->longest = 0;
760 87316 : error = xfs_btree_bload(cnt_cur, &ra->new_cntbt.bload, ra);
761 87316 : if (error)
762 0 : goto err_levels;
763 :
764 87316 : error = xrep_bnobt_sort_records(ra);
765 87316 : if (error)
766 : return error;
767 :
768 : /* Load the free space by block number tree. */
769 87316 : ra->array_cur = XFARRAY_CURSOR_INIT;
770 87316 : error = xfs_btree_bload(bno_cur, &ra->new_bnobt.bload, ra);
771 87315 : if (error)
772 0 : goto err_levels;
773 :
774 : /*
775 : * Install the new btrees in the AG header. After this point the old
776 : * btrees are no longer accessible and the new trees are live.
777 : */
778 87315 : xfs_allocbt_commit_staged_btree(bno_cur, sc->tp, sc->sa.agf_bp);
779 87315 : xfs_btree_del_cursor(bno_cur, 0);
780 87315 : xfs_allocbt_commit_staged_btree(cnt_cur, sc->tp, sc->sa.agf_bp);
781 87315 : xfs_btree_del_cursor(cnt_cur, 0);
782 :
783 : /* Reset the AGF counters now that we've changed the btree shape. */
784 87315 : error = xrep_abt_reset_counters(ra);
785 87315 : if (error)
786 0 : goto err_newbt;
787 :
788 : /* Dispose of any unused blocks and the accounting information. */
789 87315 : xrep_abt_dispose_reservations(ra, error);
790 :
791 87315 : return xrep_roll_ag_trans(sc);
792 :
793 0 : err_levels:
794 0 : pag->pagf_alt_levels[XFS_BTNUM_BNOi] = 0;
795 0 : pag->pagf_alt_levels[XFS_BTNUM_CNTi] = 0;
796 0 : err_cur:
797 0 : xfs_btree_del_cursor(cnt_cur, error);
798 0 : xfs_btree_del_cursor(bno_cur, error);
799 0 : err_newbt:
800 0 : xrep_abt_dispose_reservations(ra, error);
801 0 : return error;
802 : }
803 :
804 : /*
805 : * Now that we've logged the roots of the new btrees, invalidate all of the
806 : * old blocks and free them.
807 : */
808 : STATIC int
809 87315 : xrep_abt_remove_old_trees(
810 : struct xrep_abt *ra)
811 : {
812 87315 : struct xfs_perag *pag = ra->sc->sa.pag;
813 87315 : int error;
814 :
815 : /* Free the old btree blocks if they're not in use. */
816 87315 : error = xrep_reap_agblocks(ra->sc, &ra->old_allocbt_blocks,
817 : &XFS_RMAP_OINFO_AG, XFS_AG_RESV_IGNORE);
818 87316 : if (error)
819 : return error;
820 :
821 : /*
822 : * Now that we've zapped all the old allocbt blocks we can turn off
823 : * the alternate height mechanism.
824 : */
825 87316 : pag->pagf_alt_levels[XFS_BTNUM_BNOi] = 0;
826 87316 : pag->pagf_alt_levels[XFS_BTNUM_CNTi] = 0;
827 87316 : return 0;
828 : }
829 :
830 : /* Repair the freespace btrees for some AG. */
831 : int
832 87316 : xrep_allocbt(
833 : struct xfs_scrub *sc)
834 : {
835 87316 : struct xrep_abt *ra;
836 87316 : struct xfs_mount *mp = sc->mp;
837 87316 : char *descr;
838 87316 : int error;
839 :
840 : /* We require the rmapbt to rebuild anything. */
841 87316 : if (!xfs_has_rmapbt(mp))
842 : return -EOPNOTSUPP;
843 :
844 87316 : ra = kzalloc(sizeof(struct xrep_abt), XCHK_GFP_FLAGS);
845 87316 : if (!ra)
846 : return -ENOMEM;
847 87316 : ra->sc = sc;
848 :
849 : /* We rebuild both data structures. */
850 87316 : sc->sick_mask = XFS_SICK_AG_BNOBT | XFS_SICK_AG_CNTBT;
851 :
852 : /*
853 : * Make sure the busy extent list is clear because we can't put extents
854 : * on there twice. In theory we cleared this before we started, but
855 : * let's not risk the filesystem.
856 : */
857 87316 : if (!xfs_extent_busy_list_empty(sc->sa.pag)) {
858 0 : error = -EDEADLOCK;
859 0 : goto out_ra;
860 : }
861 :
862 : /* Set up enough storage to handle maximally fragmented free space. */
863 87316 : descr = xchk_xfile_ag_descr(sc, "free space records");
864 87311 : error = xfarray_create(descr, mp->m_sb.sb_agblocks / 2,
865 : sizeof(struct xfs_alloc_rec_incore),
866 : &ra->free_records);
867 87301 : kfree(descr);
868 87306 : if (error)
869 0 : goto out_ra;
870 :
871 : /* Collect the free space data and find the old btree blocks. */
872 87306 : xagb_bitmap_init(&ra->old_allocbt_blocks);
873 87298 : error = xrep_abt_find_freespace(ra);
874 87316 : if (error)
875 0 : goto out_bitmap;
876 :
877 : /* Rebuild the free space information. */
878 87316 : error = xrep_abt_build_new_trees(ra);
879 87315 : if (error)
880 0 : goto out_bitmap;
881 :
882 : /* Kill the old trees. */
883 87315 : error = xrep_abt_remove_old_trees(ra);
884 :
885 87316 : out_bitmap:
886 87316 : xagb_bitmap_destroy(&ra->old_allocbt_blocks);
887 87313 : xfarray_destroy(ra->free_records);
888 87314 : out_ra:
889 87314 : kfree(ra);
890 87314 : return error;
891 : }
892 :
893 : /* Make sure both btrees are ok after we've rebuilt them. */
894 : int
895 87316 : xrep_revalidate_allocbt(
896 : struct xfs_scrub *sc)
897 : {
898 87316 : __u32 old_type = sc->sm->sm_type;
899 87316 : int error;
900 :
901 : /*
902 : * We must update sm_type temporarily so that the tree-to-tree cross
903 : * reference checks will work in the correct direction, and also so
904 : * that tracing will report correctly if there are more errors.
905 : */
906 87316 : sc->sm->sm_type = XFS_SCRUB_TYPE_BNOBT;
907 87316 : error = xchk_bnobt(sc);
908 87316 : if (error)
909 0 : goto out;
910 :
911 87316 : sc->sm->sm_type = XFS_SCRUB_TYPE_CNTBT;
912 87316 : error = xchk_cntbt(sc);
913 87316 : out:
914 87316 : sc->sm->sm_type = old_type;
915 87316 : return error;
916 : }
|