Line data Source code
1 : // SPDX-License-Identifier: GPL-2.0-or-later
2 : /*
3 : * Copyright (C) 2018-2023 Oracle. All Rights Reserved.
4 : * Author: Darrick J. Wong <djwong@kernel.org>
5 : */
6 : #include "xfs.h"
7 : #include "xfs_fs.h"
8 : #include "xfs_shared.h"
9 : #include "xfs_format.h"
10 : #include "xfs_trans_resv.h"
11 : #include "xfs_mount.h"
12 : #include "xfs_defer.h"
13 : #include "xfs_btree.h"
14 : #include "xfs_btree_staging.h"
15 : #include "xfs_bit.h"
16 : #include "xfs_log_format.h"
17 : #include "xfs_trans.h"
18 : #include "xfs_sb.h"
19 : #include "xfs_alloc.h"
20 : #include "xfs_alloc_btree.h"
21 : #include "xfs_rmap.h"
22 : #include "xfs_rmap_btree.h"
23 : #include "xfs_inode.h"
24 : #include "xfs_refcount.h"
25 : #include "xfs_extent_busy.h"
26 : #include "xfs_health.h"
27 : #include "xfs_bmap.h"
28 : #include "xfs_ialloc.h"
29 : #include "xfs_ag.h"
30 : #include "scrub/xfs_scrub.h"
31 : #include "scrub/scrub.h"
32 : #include "scrub/common.h"
33 : #include "scrub/btree.h"
34 : #include "scrub/trace.h"
35 : #include "scrub/repair.h"
36 : #include "scrub/bitmap.h"
37 : #include "scrub/xfile.h"
38 : #include "scrub/xfarray.h"
39 : #include "scrub/newbt.h"
40 : #include "scrub/reap.h"
41 :
42 : /*
43 : * Free Space Btree Repair
44 : * =======================
45 : *
46 : * The reverse mappings are supposed to record all space usage for the entire
47 : * AG. Therefore, we can recalculate the free extents in an AG by looking for
48 : * gaps in the physical extents recorded in the rmapbt. On a reflink
49 : * filesystem this is a little more tricky in that we have to be aware that
50 : * the rmap records are allowed to overlap.
51 : *
52 : * We derive which blocks belonged to the old bnobt/cntbt by recording all the
53 : * OWN_AG extents and subtracting out the blocks owned by all other OWN_AG
54 : * metadata: the rmapbt blocks visited while iterating the reverse mappings
55 : * and the AGFL blocks.
56 : *
57 : * Once we have both of those pieces, we can reconstruct the bnobt and cntbt
58 : * by blowing out the free block state and freeing all the extents that we
59 : * found. This adds the requirement that we can't have any busy extents in
60 : * the AG because the busy code cannot handle duplicate records.
61 : *
62 : * Note that we can only rebuild both free space btrees at the same time
63 : * because the regular extent freeing infrastructure loads both btrees at the
64 : * same time.
65 : *
66 : * We use the prefix 'xrep_abt' here because we regenerate both free space
67 : * allocation btrees at the same time.
68 : */
69 :
70 : struct xrep_abt {
71 : /* Blocks owned by the rmapbt or the agfl. */
72 : struct xagb_bitmap not_allocbt_blocks;
73 :
74 : /* All OWN_AG blocks. */
75 : struct xagb_bitmap old_allocbt_blocks;
76 :
77 : /*
78 : * New bnobt information. All btree block reservations are added to
79 : * the reservation list in new_bnobt.
80 : */
81 : struct xrep_newbt new_bnobt;
82 :
83 : /* new cntbt information */
84 : struct xrep_newbt new_cntbt;
85 :
86 : /* Free space extents. */
87 : struct xfarray *free_records;
88 :
89 : struct xfs_scrub *sc;
90 :
91 : /* Number of non-null records in @free_records. */
92 : uint64_t nr_real_records;
93 :
94 : /* get_records()'s position in the free space record array. */
95 : xfarray_idx_t array_cur;
96 :
97 : /*
98 : * Next block we anticipate seeing in the rmap records. If the next
99 : * rmap record is greater than next_agbno, we have found unused space.
100 : */
101 : xfs_agblock_t next_agbno;
102 :
103 : /* Number of free blocks in this AG. */
104 : xfs_agblock_t nr_blocks;
105 :
106 : /* Longest free extent we found in the AG. */
107 : xfs_agblock_t longest;
108 : };
109 :
110 : /* Set up to repair AG free space btrees. */
111 : int
112 54612 : xrep_setup_ag_allocbt(
113 : struct xfs_scrub *sc)
114 : {
115 54612 : unsigned int busy_gen;
116 :
117 : /*
118 : * Make sure the busy extent list is clear because we can't put extents
119 : * on there twice.
120 : */
121 54612 : busy_gen = READ_ONCE(sc->sa.pag->pagb_gen);
122 54612 : if (xfs_extent_busy_list_empty(sc->sa.pag))
123 : return 0;
124 :
125 22690 : return xfs_extent_busy_flush(sc->tp, sc->sa.pag, busy_gen, 0);
126 : }
127 :
128 : /* Check for any obvious conflicts in the free extent. */
129 : STATIC int
130 44574732 : xrep_abt_check_free_ext(
131 : struct xfs_scrub *sc,
132 : const struct xfs_alloc_rec_incore *rec)
133 : {
134 44574732 : enum xbtree_recpacking outcome;
135 44574732 : int error;
136 :
137 44574732 : if (xfs_alloc_check_perag_irec(sc->sa.pag, rec) != NULL)
138 : return -EFSCORRUPTED;
139 :
140 : /* Must not be an inode chunk. */
141 44571153 : error = xfs_ialloc_has_inodes_at_extent(sc->sa.ino_cur,
142 44571153 : rec->ar_startblock, rec->ar_blockcount, &outcome);
143 44574974 : if (error)
144 : return error;
145 44574974 : if (outcome != XBTREE_RECPACKING_EMPTY)
146 : return -EFSCORRUPTED;
147 :
148 : /* Must not be shared or CoW staging. */
149 44574974 : if (sc->sa.refc_cur) {
150 44574354 : error = xfs_refcount_has_records(sc->sa.refc_cur,
151 44574354 : XFS_REFC_DOMAIN_SHARED, rec->ar_startblock,
152 44574354 : rec->ar_blockcount, &outcome);
153 44578084 : if (error)
154 : return error;
155 44578084 : if (outcome != XBTREE_RECPACKING_EMPTY)
156 : return -EFSCORRUPTED;
157 :
158 44578084 : error = xfs_refcount_has_records(sc->sa.refc_cur,
159 44578084 : XFS_REFC_DOMAIN_COW, rec->ar_startblock,
160 44578084 : rec->ar_blockcount, &outcome);
161 44578284 : if (error)
162 : return error;
163 44578284 : if (outcome != XBTREE_RECPACKING_EMPTY)
164 0 : return -EFSCORRUPTED;
165 : }
166 :
167 : return 0;
168 : }
169 :
170 : /*
171 : * Stash a free space record for all the space since the last bno we found
172 : * all the way up to @end.
173 : */
174 : static int
175 44575951 : xrep_abt_stash(
176 : struct xrep_abt *ra,
177 : xfs_agblock_t end)
178 : {
179 44575951 : struct xfs_alloc_rec_incore arec = {
180 44575951 : .ar_startblock = ra->next_agbno,
181 44575951 : .ar_blockcount = end - ra->next_agbno,
182 : };
183 44575951 : struct xfs_scrub *sc = ra->sc;
184 44575951 : int error = 0;
185 :
186 44575951 : if (xchk_should_terminate(sc, &error))
187 0 : return error;
188 :
189 44572474 : error = xrep_abt_check_free_ext(ra->sc, &arec);
190 44578718 : if (error)
191 : return error;
192 :
193 44578547 : trace_xrep_abt_found(sc->mp, sc->sa.pag->pag_agno, &arec);
194 :
195 44577520 : error = xfarray_append(ra->free_records, &arec);
196 44577907 : if (error)
197 : return error;
198 :
199 44577907 : ra->nr_blocks += arec.ar_blockcount;
200 44577907 : return 0;
201 : }
202 :
203 : /* Record extents that aren't in use from gaps in the rmap records. */
204 : STATIC int
205 255024160 : xrep_abt_walk_rmap(
206 : struct xfs_btree_cur *cur,
207 : const struct xfs_rmap_irec *rec,
208 : void *priv)
209 : {
210 255024160 : struct xrep_abt *ra = priv;
211 255024160 : int error;
212 :
213 : /* Record all the OWN_AG blocks... */
214 255024160 : if (rec->rm_owner == XFS_RMAP_OWN_AG) {
215 1865457 : error = xagb_bitmap_set(&ra->old_allocbt_blocks,
216 1865457 : rec->rm_startblock, rec->rm_blockcount);
217 1865031 : if (error)
218 : return error;
219 : }
220 :
221 : /* ...and all the rmapbt blocks... */
222 255023734 : error = xagb_bitmap_set_btcur_path(&ra->not_allocbt_blocks, cur);
223 255028430 : if (error)
224 : return error;
225 :
226 : /* ...and all the free space. */
227 255028430 : if (rec->rm_startblock > ra->next_agbno) {
228 44533832 : error = xrep_abt_stash(ra, rec->rm_startblock);
229 44539781 : if (error)
230 : return error;
231 : }
232 :
233 : /*
234 : * rmap records can overlap on reflink filesystems, so project
235 : * next_agbno as far out into the AG space as we currently know about.
236 : */
237 255034379 : ra->next_agbno = max_t(xfs_agblock_t, ra->next_agbno,
238 : rec->rm_startblock + rec->rm_blockcount);
239 255034379 : return 0;
240 : }
241 :
242 : /* Collect an AGFL block for the not-to-release list. */
243 : static int
244 286972 : xrep_abt_walk_agfl(
245 : struct xfs_mount *mp,
246 : xfs_agblock_t agbno,
247 : void *priv)
248 : {
249 286972 : struct xrep_abt *ra = priv;
250 :
251 286972 : return xagb_bitmap_set(&ra->not_allocbt_blocks, agbno, 1);
252 : }
253 :
254 : /*
255 : * Compare two free space extents by block number. We want to sort in order of
256 : * increasing block number.
257 : */
258 : static int
259 605640334 : xrep_bnobt_extent_cmp(
260 : const void *a,
261 : const void *b)
262 : {
263 1488459693 : const struct xfs_alloc_rec_incore *ap = a;
264 1488459693 : const struct xfs_alloc_rec_incore *bp = b;
265 :
266 605640334 : if (ap->ar_startblock > bp->ar_startblock)
267 : return 1;
268 808900306 : else if (ap->ar_startblock < bp->ar_startblock)
269 808881679 : return -1;
270 : return 0;
271 : }
272 :
273 : /*
274 : * Re-sort the free extents by block number so so that we can put the records
275 : * into the bnobt in the correct order. Make sure the records do not overlap
276 : * in physical space.
277 : */
278 : STATIC int
279 40210 : xrep_bnobt_sort_records(
280 : struct xrep_abt *ra)
281 : {
282 40210 : struct xfs_alloc_rec_incore arec;
283 40210 : xfarray_idx_t cur = XFARRAY_CURSOR_INIT;
284 40210 : xfs_agblock_t next_agbno = 0;
285 40210 : int error;
286 :
287 40210 : error = xfarray_sort(ra->free_records, xrep_bnobt_extent_cmp, 0);
288 40169 : if (error)
289 : return error;
290 :
291 44623154 : while ((error = xfarray_iter(ra->free_records, &cur, &arec)) == 1) {
292 44582985 : if (arec.ar_startblock < next_agbno)
293 : return -EFSCORRUPTED;
294 :
295 44582985 : next_agbno = arec.ar_startblock + arec.ar_blockcount;
296 : }
297 :
298 : return error;
299 : }
300 :
301 : /*
302 : * Compare two free space extents by length and then block number. We want
303 : * to sort first in order of increasing length and then in order of increasing
304 : * block number.
305 : */
306 : static int
307 1192077935 : xrep_cntbt_extent_cmp(
308 : const void *a,
309 : const void *b)
310 : {
311 1192077935 : const struct xfs_alloc_rec_incore *ap = a;
312 1192077935 : const struct xfs_alloc_rec_incore *bp = b;
313 :
314 1192077935 : if (ap->ar_blockcount > bp->ar_blockcount)
315 : return 1;
316 1037366906 : else if (ap->ar_blockcount < bp->ar_blockcount)
317 : return -1;
318 882819359 : return xrep_bnobt_extent_cmp(a, b);
319 : }
320 :
321 : /*
322 : * Sort the free extents by length so so that we can put the records into the
323 : * cntbt in the correct order. Don't let userspace kill us if we're resorting
324 : * after allocating btree blocks.
325 : */
326 : STATIC int
327 79839 : xrep_cntbt_sort_records(
328 : struct xrep_abt *ra,
329 : bool is_resort)
330 : {
331 160127 : return xfarray_sort(ra->free_records, xrep_cntbt_extent_cmp,
332 79839 : is_resort ? 0 : XFARRAY_SORT_KILLABLE);
333 : }
334 :
335 : /*
336 : * Iterate all reverse mappings to find (1) the gaps between rmap records (all
337 : * unowned space), (2) the OWN_AG extents (which encompass the free space
338 : * btrees, the rmapbt, and the agfl), (3) the rmapbt blocks, and (4) the AGFL
339 : * blocks. The free space is (1) + (2) - (3) - (4).
340 : */
341 : STATIC int
342 39859 : xrep_abt_find_freespace(
343 : struct xrep_abt *ra)
344 : {
345 39859 : struct xfs_scrub *sc = ra->sc;
346 39859 : struct xfs_mount *mp = sc->mp;
347 39859 : struct xfs_agf *agf = sc->sa.agf_bp->b_addr;
348 39859 : struct xfs_buf *agfl_bp;
349 39859 : xfs_agblock_t agend;
350 39859 : int error;
351 :
352 39859 : xagb_bitmap_init(&ra->not_allocbt_blocks);
353 :
354 39854 : xrep_ag_btcur_init(sc, &sc->sa);
355 :
356 : /*
357 : * Iterate all the reverse mappings to find gaps in the physical
358 : * mappings, all the OWN_AG blocks, and all the rmapbt extents.
359 : */
360 40151 : error = xfs_rmap_query_all(sc->sa.rmap_cur, xrep_abt_walk_rmap, ra);
361 40126 : if (error)
362 0 : goto err;
363 :
364 : /* Insert a record for space between the last rmap and EOAG. */
365 40126 : agend = be32_to_cpu(agf->agf_length);
366 40126 : if (ra->next_agbno < agend) {
367 38805 : error = xrep_abt_stash(ra, agend);
368 38938 : if (error)
369 0 : goto err;
370 : }
371 :
372 : /* Collect all the AGFL blocks. */
373 40259 : error = xfs_alloc_read_agfl(sc->sa.pag, sc->tp, &agfl_bp);
374 40144 : if (error)
375 0 : goto err;
376 :
377 40144 : error = xfs_agfl_walk(mp, agf, agfl_bp, xrep_abt_walk_agfl, ra);
378 40161 : if (error)
379 0 : goto err_agfl;
380 :
381 : /* Compute the old bnobt/cntbt blocks. */
382 40161 : error = xagb_bitmap_disunion(&ra->old_allocbt_blocks,
383 : &ra->not_allocbt_blocks);
384 39888 : if (error)
385 0 : goto err_agfl;
386 :
387 39888 : ra->nr_real_records = xfarray_length(ra->free_records);
388 39844 : err_agfl:
389 39844 : xfs_trans_brelse(sc->tp, agfl_bp);
390 40154 : err:
391 40154 : xchk_ag_btcur_free(&sc->sa);
392 40194 : xagb_bitmap_destroy(&ra->not_allocbt_blocks);
393 40056 : return error;
394 : }
395 :
396 : /*
397 : * We're going to use the observed free space records to reserve blocks for the
398 : * new free space btrees, so we play an iterative game where we try to converge
399 : * on the number of blocks we need:
400 : *
401 : * 1. Estimate how many blocks we'll need to store the records.
402 : * 2. If the first free record has more blocks than we need, we're done.
403 : * We will have to re-sort the records prior to building the cntbt.
404 : * 3. If that record has exactly the number of blocks we need, null out the
405 : * record. We're done.
406 : * 4. Otherwise, we still need more blocks. Null out the record, subtract its
407 : * length from the number of blocks we need, and go back to step 1.
408 : *
409 : * Fortunately, we don't have to do any transaction work to play this game, so
410 : * we don't have to tear down the staging cursors.
411 : */
412 : STATIC int
413 40058 : xrep_abt_reserve_space(
414 : struct xrep_abt *ra,
415 : struct xfs_btree_cur *bno_cur,
416 : struct xfs_btree_cur *cnt_cur,
417 : bool *needs_resort)
418 : {
419 40058 : struct xfs_scrub *sc = ra->sc;
420 40058 : xfarray_idx_t record_nr;
421 40058 : unsigned int allocated = 0;
422 40058 : int error = 0;
423 :
424 40058 : record_nr = xfarray_length(ra->free_records) - 1;
425 40119 : do {
426 40083 : struct xfs_alloc_rec_incore arec;
427 40083 : uint64_t required;
428 40083 : unsigned int desired;
429 40083 : unsigned int len;
430 :
431 : /* Compute how many blocks we'll need. */
432 40083 : error = xfs_btree_bload_compute_geometry(cnt_cur,
433 : &ra->new_cntbt.bload, ra->nr_real_records);
434 39920 : if (error)
435 : break;
436 :
437 39920 : error = xfs_btree_bload_compute_geometry(bno_cur,
438 : &ra->new_bnobt.bload, ra->nr_real_records);
439 40133 : if (error)
440 : break;
441 :
442 : /* How many btree blocks do we need to store all records? */
443 40133 : required = ra->new_bnobt.bload.nr_blocks +
444 40133 : ra->new_cntbt.bload.nr_blocks;
445 40133 : ASSERT(required < INT_MAX);
446 :
447 : /* If we've reserved enough blocks, we're done. */
448 40133 : if (allocated >= required)
449 : break;
450 :
451 40129 : desired = required - allocated;
452 :
453 : /* We need space but there's none left; bye! */
454 40129 : if (ra->nr_real_records == 0) {
455 : error = -ENOSPC;
456 : break;
457 : }
458 :
459 : /* Grab the first record from the list. */
460 40113 : error = xfarray_load(ra->free_records, record_nr, &arec);
461 39990 : if (error)
462 : break;
463 :
464 39990 : ASSERT(arec.ar_blockcount <= UINT_MAX);
465 39990 : len = min_t(unsigned int, arec.ar_blockcount, desired);
466 :
467 39990 : trace_xrep_newbt_alloc_ag_blocks(sc->mp, sc->sa.pag->pag_agno,
468 : arec.ar_startblock, len, XFS_RMAP_OWN_AG);
469 :
470 39831 : error = xrep_newbt_add_extent(&ra->new_bnobt, sc->sa.pag,
471 : arec.ar_startblock, len);
472 39733 : if (error)
473 : break;
474 39733 : allocated += len;
475 39733 : ra->nr_blocks -= len;
476 :
477 39733 : if (arec.ar_blockcount > desired) {
478 : /*
479 : * Record has more space than we need. The number of
480 : * free records doesn't change, so shrink the free
481 : * record, inform the caller that the records are no
482 : * longer sorted by length, and exit.
483 : */
484 39697 : arec.ar_startblock += desired;
485 39697 : arec.ar_blockcount -= desired;
486 39697 : error = xfarray_store(ra->free_records, record_nr,
487 : &arec);
488 40137 : if (error)
489 : break;
490 :
491 40139 : *needs_resort = true;
492 40139 : return 0;
493 : }
494 :
495 : /*
496 : * We're going to use up the entire record, so unset it and
497 : * move on to the next one. This changes the number of free
498 : * records (but doesn't break the sorting order), so we must
499 : * go around the loop once more to re-run _bload_init.
500 : */
501 36 : error = xfarray_unset(ra->free_records, record_nr);
502 36 : if (error)
503 : break;
504 36 : ra->nr_real_records--;
505 36 : record_nr--;
506 : } while (1);
507 :
508 19 : return error;
509 : }
510 :
511 : STATIC int
512 40194 : xrep_abt_dispose_one(
513 : struct xrep_abt *ra,
514 : struct xrep_newbt_resv *resv)
515 : {
516 40194 : struct xfs_scrub *sc = ra->sc;
517 40194 : struct xfs_perag *pag = sc->sa.pag;
518 40194 : xfs_agblock_t free_agbno = resv->agbno + resv->used;
519 40194 : xfs_extlen_t free_aglen = resv->len - resv->used;
520 40194 : int error;
521 :
522 40194 : ASSERT(pag == resv->pag);
523 :
524 : /* Add a deferred rmap for each extent we used. */
525 40194 : if (resv->used > 0)
526 40184 : xfs_rmap_alloc_extent(sc->tp, pag->pag_agno, resv->agbno,
527 : resv->used, XFS_RMAP_OWN_AG);
528 :
529 : /*
530 : * For each reserved btree block we didn't use, add it to the free
531 : * space btree. We didn't touch fdblocks when we reserved them, so
532 : * we don't touch it now.
533 : */
534 40235 : if (free_aglen == 0)
535 : return 0;
536 :
537 0 : trace_xrep_newbt_free_blocks(sc->mp, resv->pag->pag_agno, free_agbno,
538 0 : free_aglen, ra->new_bnobt.oinfo.oi_owner);
539 :
540 0 : error = __xfs_free_extent(sc->tp, resv->pag, free_agbno, free_aglen,
541 0 : &ra->new_bnobt.oinfo, XFS_AG_RESV_IGNORE, true);
542 0 : if (error)
543 : return error;
544 :
545 0 : return xrep_defer_finish(sc);
546 : }
547 :
548 : /*
549 : * Deal with all the space we reserved. Blocks that were allocated for the
550 : * free space btrees need to have a (deferred) rmap added for the OWN_AG
551 : * allocation, and blocks that didn't get used can be freed via the usual
552 : * (deferred) means.
553 : */
554 : STATIC void
555 40195 : xrep_abt_dispose_reservations(
556 : struct xrep_abt *ra,
557 : int error)
558 : {
559 40195 : struct xrep_newbt_resv *resv, *n;
560 :
561 40195 : if (error)
562 16 : goto junkit;
563 :
564 80396 : for_each_xrep_newbt_reservation(&ra->new_bnobt, resv, n) {
565 40217 : error = xrep_abt_dispose_one(ra, resv);
566 40217 : if (error)
567 0 : goto junkit;
568 : }
569 :
570 40179 : junkit:
571 80395 : for_each_xrep_newbt_reservation(&ra->new_bnobt, resv, n) {
572 40194 : xfs_perag_put(resv->pag);
573 40240 : list_del(&resv->list);
574 40233 : kfree(resv);
575 : }
576 :
577 40201 : xrep_newbt_cancel(&ra->new_bnobt);
578 40138 : xrep_newbt_cancel(&ra->new_cntbt);
579 40155 : }
580 :
581 : /* Retrieve free space data for bulk load. */
582 : STATIC int
583 276765 : xrep_abt_get_records(
584 : struct xfs_btree_cur *cur,
585 : unsigned int idx,
586 : struct xfs_btree_block *block,
587 : unsigned int nr_wanted,
588 : void *priv)
589 : {
590 276765 : struct xfs_alloc_rec_incore *arec = &cur->bc_rec.a;
591 276765 : struct xrep_abt *ra = priv;
592 276765 : union xfs_btree_rec *block_rec;
593 276765 : unsigned int loaded;
594 276765 : int error;
595 :
596 89438440 : for (loaded = 0; loaded < nr_wanted; loaded++, idx++) {
597 89161072 : error = xfarray_load_next(ra->free_records, &ra->array_cur,
598 : arec);
599 89163637 : if (error)
600 0 : return error;
601 :
602 89163637 : ra->longest = max(ra->longest, arec->ar_blockcount);
603 :
604 89163637 : block_rec = xfs_btree_rec_addr(cur, idx, block);
605 89161073 : cur->bc_ops->init_rec_from_cur(cur, block_rec);
606 : }
607 :
608 277368 : return loaded;
609 : }
610 :
611 : /* Feed one of the new btree blocks to the bulk loader. */
612 : STATIC int
613 299560 : xrep_abt_claim_block(
614 : struct xfs_btree_cur *cur,
615 : union xfs_btree_ptr *ptr,
616 : void *priv)
617 : {
618 299560 : struct xrep_abt *ra = priv;
619 :
620 299560 : return xrep_newbt_claim_block(cur, &ra->new_bnobt, ptr);
621 : }
622 :
623 : /*
624 : * Reset the AGF counters to reflect the free space btrees that we just
625 : * rebuilt, then reinitialize the per-AG data.
626 : */
627 : STATIC int
628 40227 : xrep_abt_reset_counters(
629 : struct xrep_abt *ra)
630 : {
631 40227 : struct xfs_scrub *sc = ra->sc;
632 40227 : struct xfs_perag *pag = sc->sa.pag;
633 40227 : struct xfs_agf *agf = sc->sa.agf_bp->b_addr;
634 40227 : unsigned int freesp_btreeblks = 0;
635 :
636 : /*
637 : * Compute the contribution to agf_btreeblks for the new free space
638 : * btrees. This is the computed btree size minus anything we didn't
639 : * use.
640 : */
641 40227 : freesp_btreeblks += ra->new_bnobt.bload.nr_blocks - 1;
642 40227 : freesp_btreeblks += ra->new_cntbt.bload.nr_blocks - 1;
643 :
644 40227 : freesp_btreeblks -= xrep_newbt_unused_blocks(&ra->new_bnobt);
645 40223 : freesp_btreeblks -= xrep_newbt_unused_blocks(&ra->new_cntbt);
646 :
647 : /*
648 : * The AGF header contains extra information related to the free space
649 : * btrees, so we must update those fields here.
650 : */
651 40229 : agf->agf_btreeblks = cpu_to_be32(freesp_btreeblks +
652 : (be32_to_cpu(agf->agf_rmap_blocks) - 1));
653 40229 : agf->agf_freeblks = cpu_to_be32(ra->nr_blocks);
654 40229 : agf->agf_longest = cpu_to_be32(ra->longest);
655 40229 : xfs_alloc_log_agf(sc->tp, sc->sa.agf_bp, XFS_AGF_BTREEBLKS |
656 : XFS_AGF_LONGEST |
657 : XFS_AGF_FREEBLKS);
658 :
659 : /*
660 : * After we commit the new btree to disk, it is possible that the
661 : * process to reap the old btree blocks will race with the AIL trying
662 : * to checkpoint the old btree blocks into the filesystem. If the new
663 : * tree is shorter than the old one, the allocbt write verifier will
664 : * fail and the AIL will shut down the filesystem.
665 : *
666 : * To avoid this, save the old incore btree height values as the alt
667 : * height values before re-initializing the perag info from the updated
668 : * AGF to capture all the new values.
669 : */
670 40241 : pag->pagf_alt_levels[XFS_BTNUM_BNOi] = pag->pagf_levels[XFS_BTNUM_BNOi];
671 40241 : pag->pagf_alt_levels[XFS_BTNUM_CNTi] = pag->pagf_levels[XFS_BTNUM_CNTi];
672 :
673 : /* Reinitialize with the values we just logged. */
674 40241 : return xrep_reinit_pagf(sc);
675 : }
676 :
677 : /*
678 : * Use the collected free space information to stage new free space btrees.
679 : * If this is successful we'll return with the new btree root
680 : * information logged to the repair transaction but not yet committed.
681 : */
682 : STATIC int
683 39905 : xrep_abt_build_new_trees(
684 : struct xrep_abt *ra)
685 : {
686 39905 : struct xfs_scrub *sc = ra->sc;
687 39905 : struct xfs_btree_cur *bno_cur;
688 39905 : struct xfs_btree_cur *cnt_cur;
689 39905 : struct xfs_perag *pag = sc->sa.pag;
690 39905 : bool needs_resort = false;
691 39905 : int error;
692 :
693 : /*
694 : * Sort the free extents by length so that we can set up the free space
695 : * btrees in as few extents as possible. This reduces the amount of
696 : * deferred rmap / free work we have to do at the end.
697 : */
698 39905 : error = xrep_cntbt_sort_records(ra, false);
699 40140 : if (error)
700 : return error;
701 :
702 : /*
703 : * Prepare to construct the new btree by reserving disk space for the
704 : * new btree and setting up all the accounting information we'll need
705 : * to root the new btree while it's under construction and before we
706 : * attach it to the AG header.
707 : */
708 40100 : xrep_newbt_init_bare(&ra->new_bnobt, sc);
709 39824 : xrep_newbt_init_bare(&ra->new_cntbt, sc);
710 :
711 39962 : ra->new_bnobt.bload.get_records = xrep_abt_get_records;
712 39962 : ra->new_cntbt.bload.get_records = xrep_abt_get_records;
713 :
714 39962 : ra->new_bnobt.bload.claim_block = xrep_abt_claim_block;
715 39962 : ra->new_cntbt.bload.claim_block = xrep_abt_claim_block;
716 :
717 : /* Allocate cursors for the staged btrees. */
718 39962 : bno_cur = xfs_allocbt_stage_cursor(sc->mp, &ra->new_bnobt.afake,
719 : pag, XFS_BTNUM_BNO);
720 39948 : cnt_cur = xfs_allocbt_stage_cursor(sc->mp, &ra->new_cntbt.afake,
721 : pag, XFS_BTNUM_CNT);
722 :
723 : /* Last chance to abort before we start committing fixes. */
724 40035 : if (xchk_should_terminate(sc, &error))
725 0 : goto err_cur;
726 :
727 : /* Reserve the space we'll need for the new btrees. */
728 40093 : error = xrep_abt_reserve_space(ra, bno_cur, cnt_cur, &needs_resort);
729 40137 : if (error)
730 16 : goto err_cur;
731 :
732 : /*
733 : * If we need to re-sort the free extents by length, do so so that we
734 : * can put the records into the cntbt in the correct order.
735 : */
736 40121 : if (needs_resort) {
737 40117 : error = xrep_cntbt_sort_records(ra, needs_resort);
738 40131 : if (error)
739 0 : goto err_cur;
740 : }
741 :
742 : /*
743 : * Due to btree slack factors, it's possible for a new btree to be one
744 : * level taller than the old btree. Update the alternate incore btree
745 : * height so that we don't trip the verifiers when writing the new
746 : * btree blocks to disk.
747 : */
748 40135 : pag->pagf_alt_levels[XFS_BTNUM_BNOi] =
749 40135 : ra->new_bnobt.bload.btree_height;
750 40135 : pag->pagf_alt_levels[XFS_BTNUM_CNTi] =
751 40135 : ra->new_cntbt.bload.btree_height;
752 :
753 : /* Load the free space by length tree. */
754 40135 : ra->array_cur = XFARRAY_CURSOR_INIT;
755 40135 : ra->longest = 0;
756 40135 : error = xfs_btree_bload(cnt_cur, &ra->new_cntbt.bload, ra);
757 40227 : if (error)
758 0 : goto err_levels;
759 :
760 40227 : error = xrep_bnobt_sort_records(ra);
761 40213 : if (error)
762 : return error;
763 :
764 : /* Load the free space by block number tree. */
765 40212 : ra->array_cur = XFARRAY_CURSOR_INIT;
766 40212 : error = xfs_btree_bload(bno_cur, &ra->new_bnobt.bload, ra);
767 40230 : if (error)
768 0 : goto err_levels;
769 :
770 : /*
771 : * Install the new btrees in the AG header. After this point the old
772 : * btrees are no longer accessible and the new trees are live.
773 : */
774 40230 : xfs_allocbt_commit_staged_btree(bno_cur, sc->tp, sc->sa.agf_bp);
775 40213 : xfs_btree_del_cursor(bno_cur, 0);
776 40227 : xfs_allocbt_commit_staged_btree(cnt_cur, sc->tp, sc->sa.agf_bp);
777 40227 : xfs_btree_del_cursor(cnt_cur, 0);
778 :
779 : /* Reset the AGF counters now that we've changed the btree shape. */
780 40235 : error = xrep_abt_reset_counters(ra);
781 40194 : if (error)
782 0 : goto err_newbt;
783 :
784 : /* Dispose of any unused blocks and the accounting information. */
785 40194 : xrep_abt_dispose_reservations(ra, error);
786 :
787 40137 : return xrep_roll_ag_trans(sc);
788 :
789 0 : err_levels:
790 0 : pag->pagf_alt_levels[XFS_BTNUM_BNOi] = 0;
791 0 : pag->pagf_alt_levels[XFS_BTNUM_CNTi] = 0;
792 16 : err_cur:
793 16 : xfs_btree_del_cursor(cnt_cur, error);
794 16 : xfs_btree_del_cursor(bno_cur, error);
795 16 : err_newbt:
796 16 : xrep_abt_dispose_reservations(ra, error);
797 16 : return error;
798 : }
799 :
800 : /*
801 : * Now that we've logged the roots of the new btrees, invalidate all of the
802 : * old blocks and free them.
803 : */
804 : STATIC int
805 39997 : xrep_abt_remove_old_trees(
806 : struct xrep_abt *ra)
807 : {
808 39997 : struct xfs_perag *pag = ra->sc->sa.pag;
809 39997 : int error;
810 :
811 : /* Free the old btree blocks if they're not in use. */
812 39997 : error = xrep_reap_agblocks(ra->sc, &ra->old_allocbt_blocks,
813 : &XFS_RMAP_OINFO_AG, XFS_AG_RESV_IGNORE);
814 40083 : if (error)
815 : return error;
816 :
817 : /*
818 : * Now that we've zapped all the old allocbt blocks we can turn off
819 : * the alternate height mechanism.
820 : */
821 40083 : pag->pagf_alt_levels[XFS_BTNUM_BNOi] = 0;
822 40083 : pag->pagf_alt_levels[XFS_BTNUM_CNTi] = 0;
823 40083 : return 0;
824 : }
825 :
826 : /* Repair the freespace btrees for some AG. */
827 : int
828 54515 : xrep_allocbt(
829 : struct xfs_scrub *sc)
830 : {
831 54515 : struct xrep_abt *ra;
832 54515 : struct xfs_mount *mp = sc->mp;
833 54515 : char *descr;
834 54515 : int error;
835 :
836 : /* We require the rmapbt to rebuild anything. */
837 54515 : if (!xfs_has_rmapbt(mp))
838 : return -EOPNOTSUPP;
839 :
840 40202 : ra = kzalloc(sizeof(struct xrep_abt), XCHK_GFP_FLAGS);
841 40217 : if (!ra)
842 : return -ENOMEM;
843 40217 : ra->sc = sc;
844 :
845 : /* We rebuild both data structures. */
846 40217 : sc->sick_mask = XFS_SICK_AG_BNOBT | XFS_SICK_AG_CNTBT;
847 :
848 : /*
849 : * Make sure the busy extent list is clear because we can't put extents
850 : * on there twice. In theory we cleared this before we started, but
851 : * let's not risk the filesystem.
852 : */
853 40217 : if (!xfs_extent_busy_list_empty(sc->sa.pag)) {
854 0 : error = -EDEADLOCK;
855 0 : goto out_ra;
856 : }
857 :
858 : /* Set up enough storage to handle maximally fragmented free space. */
859 40221 : descr = xchk_xfile_ag_descr(sc, "free space records");
860 39981 : error = xfarray_create(descr, mp->m_sb.sb_agblocks / 2,
861 : sizeof(struct xfs_alloc_rec_incore),
862 : &ra->free_records);
863 39840 : kfree(descr);
864 40002 : if (error)
865 0 : goto out_ra;
866 :
867 : /* Collect the free space data and find the old btree blocks. */
868 40002 : xagb_bitmap_init(&ra->old_allocbt_blocks);
869 39758 : error = xrep_abt_find_freespace(ra);
870 40144 : if (error)
871 0 : goto out_bitmap;
872 :
873 : /* Rebuild the free space information. */
874 40144 : error = xrep_abt_build_new_trees(ra);
875 40127 : if (error)
876 16 : goto out_bitmap;
877 :
878 : /* Kill the old trees. */
879 40111 : error = xrep_abt_remove_old_trees(ra);
880 :
881 40100 : out_bitmap:
882 40100 : xagb_bitmap_destroy(&ra->old_allocbt_blocks);
883 40093 : xfarray_destroy(ra->free_records);
884 40137 : out_ra:
885 40137 : kfree(ra);
886 40137 : return error;
887 : }
888 :
889 : /* Make sure both btrees are ok after we've rebuilt them. */
890 : int
891 40071 : xrep_revalidate_allocbt(
892 : struct xfs_scrub *sc)
893 : {
894 40071 : __u32 old_type = sc->sm->sm_type;
895 40071 : int error;
896 :
897 : /*
898 : * We must update sm_type temporarily so that the tree-to-tree cross
899 : * reference checks will work in the correct direction, and also so
900 : * that tracing will report correctly if there are more errors.
901 : */
902 40071 : sc->sm->sm_type = XFS_SCRUB_TYPE_BNOBT;
903 40071 : error = xchk_bnobt(sc);
904 40180 : if (error)
905 0 : goto out;
906 :
907 40180 : sc->sm->sm_type = XFS_SCRUB_TYPE_CNTBT;
908 40180 : error = xchk_cntbt(sc);
909 40219 : out:
910 40219 : sc->sm->sm_type = old_type;
911 40219 : return error;
912 : }
|