LCOV - code coverage report
Current view: top level - fs/xfs/scrub - alloc_repair.c (source / functions) Hit Total Coverage
Test: fstests of 6.5.0-rc4-xfsa @ Mon Jul 31 20:08:27 PDT 2023 Lines: 293 335 87.5 %
Date: 2023-07-31 20:08:27 Functions: 20 20 100.0 %

          Line data    Source code
       1             : // SPDX-License-Identifier: GPL-2.0-or-later
       2             : /*
       3             :  * Copyright (C) 2018-2023 Oracle.  All Rights Reserved.
       4             :  * Author: Darrick J. Wong <djwong@kernel.org>
       5             :  */
       6             : #include "xfs.h"
       7             : #include "xfs_fs.h"
       8             : #include "xfs_shared.h"
       9             : #include "xfs_format.h"
      10             : #include "xfs_trans_resv.h"
      11             : #include "xfs_mount.h"
      12             : #include "xfs_defer.h"
      13             : #include "xfs_btree.h"
      14             : #include "xfs_btree_staging.h"
      15             : #include "xfs_bit.h"
      16             : #include "xfs_log_format.h"
      17             : #include "xfs_trans.h"
      18             : #include "xfs_sb.h"
      19             : #include "xfs_alloc.h"
      20             : #include "xfs_alloc_btree.h"
      21             : #include "xfs_rmap.h"
      22             : #include "xfs_rmap_btree.h"
      23             : #include "xfs_inode.h"
      24             : #include "xfs_refcount.h"
      25             : #include "xfs_extent_busy.h"
      26             : #include "xfs_health.h"
      27             : #include "xfs_bmap.h"
      28             : #include "xfs_ialloc.h"
      29             : #include "xfs_ag.h"
      30             : #include "scrub/xfs_scrub.h"
      31             : #include "scrub/scrub.h"
      32             : #include "scrub/common.h"
      33             : #include "scrub/btree.h"
      34             : #include "scrub/trace.h"
      35             : #include "scrub/repair.h"
      36             : #include "scrub/bitmap.h"
      37             : #include "scrub/xfile.h"
      38             : #include "scrub/xfarray.h"
      39             : #include "scrub/newbt.h"
      40             : #include "scrub/reap.h"
      41             : 
      42             : /*
      43             :  * Free Space Btree Repair
      44             :  * =======================
      45             :  *
      46             :  * The reverse mappings are supposed to record all space usage for the entire
      47             :  * AG.  Therefore, we can recalculate the free extents in an AG by looking for
      48             :  * gaps in the physical extents recorded in the rmapbt.  On a reflink
      49             :  * filesystem this is a little more tricky in that we have to be aware that
      50             :  * the rmap records are allowed to overlap.
      51             :  *
      52             :  * We derive which blocks belonged to the old bnobt/cntbt by recording all the
      53             :  * OWN_AG extents and subtracting out the blocks owned by all other OWN_AG
      54             :  * metadata: the rmapbt blocks visited while iterating the reverse mappings
      55             :  * and the AGFL blocks.
      56             :  *
      57             :  * Once we have both of those pieces, we can reconstruct the bnobt and cntbt
      58             :  * by blowing out the free block state and freeing all the extents that we
      59             :  * found.  This adds the requirement that we can't have any busy extents in
      60             :  * the AG because the busy code cannot handle duplicate records.
      61             :  *
      62             :  * Note that we can only rebuild both free space btrees at the same time
      63             :  * because the regular extent freeing infrastructure loads both btrees at the
      64             :  * same time.
      65             :  *
      66             :  * We use the prefix 'xrep_abt' here because we regenerate both free space
      67             :  * allocation btrees at the same time.
      68             :  */
      69             : 
      70             : struct xrep_abt {
      71             :         /* Blocks owned by the rmapbt or the agfl. */
      72             :         struct xagb_bitmap      not_allocbt_blocks;
      73             : 
      74             :         /* All OWN_AG blocks. */
      75             :         struct xagb_bitmap      old_allocbt_blocks;
      76             : 
      77             :         /*
      78             :          * New bnobt information.  All btree block reservations are added to
      79             :          * the reservation list in new_bnobt.
      80             :          */
      81             :         struct xrep_newbt       new_bnobt;
      82             : 
      83             :         /* new cntbt information */
      84             :         struct xrep_newbt       new_cntbt;
      85             : 
      86             :         /* Free space extents. */
      87             :         struct xfarray          *free_records;
      88             : 
      89             :         struct xfs_scrub        *sc;
      90             : 
      91             :         /* Number of non-null records in @free_records. */
      92             :         uint64_t                nr_real_records;
      93             : 
      94             :         /* get_records()'s position in the free space record array. */
      95             :         xfarray_idx_t           array_cur;
      96             : 
      97             :         /*
      98             :          * Next block we anticipate seeing in the rmap records.  If the next
      99             :          * rmap record is greater than next_agbno, we have found unused space.
     100             :          */
     101             :         xfs_agblock_t           next_agbno;
     102             : 
     103             :         /* Number of free blocks in this AG. */
     104             :         xfs_agblock_t           nr_blocks;
     105             : 
     106             :         /* Longest free extent we found in the AG. */
     107             :         xfs_agblock_t           longest;
     108             : };
     109             : 
     110             : /* Set up to repair AG free space btrees. */
     111             : int
     112       87316 : xrep_setup_ag_allocbt(
     113             :         struct xfs_scrub        *sc)
     114             : {
     115       87316 :         unsigned int            busy_gen;
     116             : 
     117             :         /*
     118             :          * Make sure the busy extent list is clear because we can't put extents
     119             :          * on there twice.
     120             :          */
     121       87316 :         busy_gen = READ_ONCE(sc->sa.pag->pagb_gen);
     122       87316 :         if (xfs_extent_busy_list_empty(sc->sa.pag))
     123             :                 return 0;
     124             : 
     125       26759 :         return xfs_extent_busy_flush(sc->tp, sc->sa.pag, busy_gen, 0);
     126             : }
     127             : 
     128             : /* Check for any obvious conflicts in the free extent. */
     129             : STATIC int
     130    48283582 : xrep_abt_check_free_ext(
     131             :         struct xfs_scrub        *sc,
     132             :         const struct xfs_alloc_rec_incore *rec)
     133             : {
     134    48283582 :         enum xbtree_recpacking  outcome;
     135    48283582 :         int                     error;
     136             : 
     137    48283582 :         if (xfs_alloc_check_perag_irec(sc->sa.pag, rec) != NULL)
     138             :                 return -EFSCORRUPTED;
     139             : 
     140             :         /* Must not be an inode chunk. */
     141    48283588 :         error = xfs_ialloc_has_inodes_at_extent(sc->sa.ino_cur,
     142    48283588 :                         rec->ar_startblock, rec->ar_blockcount, &outcome);
     143    48283605 :         if (error)
     144             :                 return error;
     145    48283605 :         if (outcome != XBTREE_RECPACKING_EMPTY)
     146             :                 return -EFSCORRUPTED;
     147             : 
     148             :         /* Must not be shared or CoW staging. */
     149    48283605 :         if (sc->sa.refc_cur) {
     150    48283605 :                 error = xfs_refcount_has_records(sc->sa.refc_cur,
     151    48283605 :                                 XFS_REFC_DOMAIN_SHARED, rec->ar_startblock,
     152    48283605 :                                 rec->ar_blockcount, &outcome);
     153    48283608 :                 if (error)
     154             :                         return error;
     155    48283608 :                 if (outcome != XBTREE_RECPACKING_EMPTY)
     156             :                         return -EFSCORRUPTED;
     157             : 
     158    48283608 :                 error = xfs_refcount_has_records(sc->sa.refc_cur,
     159    48283608 :                                 XFS_REFC_DOMAIN_COW, rec->ar_startblock,
     160    48283608 :                                 rec->ar_blockcount, &outcome);
     161    48283611 :                 if (error)
     162             :                         return error;
     163    48283611 :                 if (outcome != XBTREE_RECPACKING_EMPTY)
     164           0 :                         return -EFSCORRUPTED;
     165             :         }
     166             : 
     167             :         return 0;
     168             : }
     169             : 
     170             : /*
     171             :  * Stash a free space record for all the space since the last bno we found
     172             :  * all the way up to @end.
     173             :  */
     174             : static int
     175    48283585 : xrep_abt_stash(
     176             :         struct xrep_abt         *ra,
     177             :         xfs_agblock_t           end)
     178             : {
     179    48283585 :         struct xfs_alloc_rec_incore arec = {
     180    48283585 :                 .ar_startblock  = ra->next_agbno,
     181    48283585 :                 .ar_blockcount  = end - ra->next_agbno,
     182             :         };
     183    48283585 :         struct xfs_scrub        *sc = ra->sc;
     184    48283585 :         int                     error = 0;
     185             : 
     186    48283585 :         if (xchk_should_terminate(sc, &error))
     187           0 :                 return error;
     188             : 
     189    48283589 :         error = xrep_abt_check_free_ext(ra->sc, &arec);
     190    48283610 :         if (error)
     191             :                 return error;
     192             : 
     193    48283610 :         trace_xrep_abt_found(sc->mp, sc->sa.pag->pag_agno, &arec);
     194             : 
     195    48283605 :         error = xfarray_append(ra->free_records, &arec);
     196    48283606 :         if (error)
     197             :                 return error;
     198             : 
     199    48283606 :         ra->nr_blocks += arec.ar_blockcount;
     200    48283606 :         return 0;
     201             : }
     202             : 
     203             : /* Record extents that aren't in use from gaps in the rmap records. */
     204             : STATIC int
     205   958646679 : xrep_abt_walk_rmap(
     206             :         struct xfs_btree_cur            *cur,
     207             :         const struct xfs_rmap_irec      *rec,
     208             :         void                            *priv)
     209             : {
     210   958646679 :         struct xrep_abt                 *ra = priv;
     211   958646679 :         int                             error;
     212             : 
     213             :         /* Record all the OWN_AG blocks... */
     214   958646679 :         if (rec->rm_owner == XFS_RMAP_OWN_AG) {
     215     6528093 :                 error = xagb_bitmap_set(&ra->old_allocbt_blocks,
     216     6528093 :                                 rec->rm_startblock, rec->rm_blockcount);
     217     6528090 :                 if (error)
     218             :                         return error;
     219             :         }
     220             : 
     221             :         /* ...and all the rmapbt blocks... */
     222   958646676 :         error = xagb_bitmap_set_btcur_path(&ra->not_allocbt_blocks, cur);
     223   958646437 :         if (error)
     224             :                 return error;
     225             : 
     226             :         /* ...and all the free space. */
     227   958646437 :         if (rec->rm_startblock > ra->next_agbno) {
     228    48196280 :                 error = xrep_abt_stash(ra, rec->rm_startblock);
     229    48196288 :                 if (error)
     230             :                         return error;
     231             :         }
     232             : 
     233             :         /*
     234             :          * rmap records can overlap on reflink filesystems, so project
     235             :          * next_agbno as far out into the AG space as we currently know about.
     236             :          */
     237   958646445 :         ra->next_agbno = max_t(xfs_agblock_t, ra->next_agbno,
     238             :                         rec->rm_startblock + rec->rm_blockcount);
     239   958646445 :         return 0;
     240             : }
     241             : 
     242             : /* Collect an AGFL block for the not-to-release list. */
     243             : static int
     244      647261 : xrep_abt_walk_agfl(
     245             :         struct xfs_mount        *mp,
     246             :         xfs_agblock_t           agbno,
     247             :         void                    *priv)
     248             : {
     249      647261 :         struct xrep_abt         *ra = priv;
     250             : 
     251      647261 :         return xagb_bitmap_set(&ra->not_allocbt_blocks, agbno, 1);
     252             : }
     253             : 
     254             : /*
     255             :  * Compare two free space extents by block number.  We want to sort in order of
     256             :  * increasing block number.
     257             :  */
     258             : static int
     259   595272976 : xrep_bnobt_extent_cmp(
     260             :         const void              *a,
     261             :         const void              *b)
     262             : {
     263  1331487045 :         const struct xfs_alloc_rec_incore *ap = a;
     264  1331487045 :         const struct xfs_alloc_rec_incore *bp = b;
     265             : 
     266   595272976 :         if (ap->ar_startblock > bp->ar_startblock)
     267             :                 return 1;
     268   792792116 :         else if (ap->ar_startblock < bp->ar_startblock)
     269   792792080 :                 return -1;
     270             :         return 0;
     271             : }
     272             : 
     273             : /*
     274             :  * Re-sort the free extents by block number so so that we can put the records
     275             :  * into the bnobt in the correct order.  Make sure the records do not overlap
     276             :  * in physical space.
     277             :  */
     278             : STATIC int
     279       87316 : xrep_bnobt_sort_records(
     280             :         struct xrep_abt                 *ra)
     281             : {
     282       87316 :         struct xfs_alloc_rec_incore     arec;
     283       87316 :         xfarray_idx_t                   cur = XFARRAY_CURSOR_INIT;
     284       87316 :         xfs_agblock_t                   next_agbno = 0;
     285       87316 :         int                             error;
     286             : 
     287       87316 :         error = xfarray_sort(ra->free_records, xrep_bnobt_extent_cmp, 0);
     288       87316 :         if (error)
     289             :                 return error;
     290             : 
     291    48370928 :         while ((error = xfarray_iter(ra->free_records, &cur, &arec)) == 1) {
     292    48283612 :                 if (arec.ar_startblock < next_agbno)
     293             :                         return -EFSCORRUPTED;
     294             : 
     295    48283612 :                 next_agbno = arec.ar_startblock + arec.ar_blockcount;
     296             :         }
     297             : 
     298             :         return error;
     299             : }
     300             : 
     301             : /*
     302             :  * Compare two free space extents by length and then block number.  We want
     303             :  * to sort first in order of increasing length and then in order of increasing
     304             :  * block number.
     305             :  */
     306             : static int
     307  1191976276 : xrep_cntbt_extent_cmp(
     308             :         const void                      *a,
     309             :         const void                      *b)
     310             : {
     311  1191976276 :         const struct xfs_alloc_rec_incore *ap = a;
     312  1191976276 :         const struct xfs_alloc_rec_incore *bp = b;
     313             : 
     314  1191976276 :         if (ap->ar_blockcount > bp->ar_blockcount)
     315             :                 return 1;
     316   914793647 :         else if (ap->ar_blockcount < bp->ar_blockcount)
     317             :                 return -1;
     318   736214069 :         return xrep_bnobt_extent_cmp(a, b);
     319             : }
     320             : 
     321             : /*
     322             :  * Sort the free extents by length so so that we can put the records into the
     323             :  * cntbt in the correct order.  Don't let userspace kill us if we're resorting
     324             :  * after allocating btree blocks.
     325             :  */
     326             : STATIC int
     327      174630 : xrep_cntbt_sort_records(
     328             :         struct xrep_abt                 *ra,
     329             :         bool                            is_resort)
     330             : {
     331      349262 :         return xfarray_sort(ra->free_records, xrep_cntbt_extent_cmp,
     332      174630 :                         is_resort ? 0 : XFARRAY_SORT_KILLABLE);
     333             : }
     334             : 
     335             : /*
     336             :  * Iterate all reverse mappings to find (1) the gaps between rmap records (all
     337             :  * unowned space), (2) the OWN_AG extents (which encompass the free space
     338             :  * btrees, the rmapbt, and the agfl), (3) the rmapbt blocks, and (4) the AGFL
     339             :  * blocks.  The free space is (1) + (2) - (3) - (4).
     340             :  */
     341             : STATIC int
     342       87314 : xrep_abt_find_freespace(
     343             :         struct xrep_abt         *ra)
     344             : {
     345       87314 :         struct xfs_scrub        *sc = ra->sc;
     346       87314 :         struct xfs_mount        *mp = sc->mp;
     347       87314 :         struct xfs_agf          *agf = sc->sa.agf_bp->b_addr;
     348       87314 :         struct xfs_buf          *agfl_bp;
     349       87314 :         xfs_agblock_t           agend;
     350       87314 :         int                     error;
     351             : 
     352       87314 :         xagb_bitmap_init(&ra->not_allocbt_blocks);
     353             : 
     354       87296 :         xrep_ag_btcur_init(sc, &sc->sa);
     355             : 
     356             :         /*
     357             :          * Iterate all the reverse mappings to find gaps in the physical
     358             :          * mappings, all the OWN_AG blocks, and all the rmapbt extents.
     359             :          */
     360       87303 :         error = xfs_rmap_query_all(sc->sa.rmap_cur, xrep_abt_walk_rmap, ra);
     361       87316 :         if (error)
     362           0 :                 goto err;
     363             : 
     364             :         /* Insert a record for space between the last rmap and EOAG. */
     365       87316 :         agend = be32_to_cpu(agf->agf_length);
     366       87316 :         if (ra->next_agbno < agend) {
     367       87316 :                 error = xrep_abt_stash(ra, agend);
     368       87316 :                 if (error)
     369           0 :                         goto err;
     370             :         }
     371             : 
     372             :         /* Collect all the AGFL blocks. */
     373       87316 :         error = xfs_alloc_read_agfl(sc->sa.pag, sc->tp, &agfl_bp);
     374       87316 :         if (error)
     375           0 :                 goto err;
     376             : 
     377       87316 :         error = xfs_agfl_walk(mp, agf, agfl_bp, xrep_abt_walk_agfl, ra);
     378       87316 :         if (error)
     379           0 :                 goto err_agfl;
     380             : 
     381             :         /* Compute the old bnobt/cntbt blocks. */
     382       87316 :         error = xagb_bitmap_disunion(&ra->old_allocbt_blocks,
     383             :                         &ra->not_allocbt_blocks);
     384       87315 :         if (error)
     385           0 :                 goto err_agfl;
     386             : 
     387       87315 :         ra->nr_real_records = xfarray_length(ra->free_records);
     388       87315 : err_agfl:
     389       87315 :         xfs_trans_brelse(sc->tp, agfl_bp);
     390       87316 : err:
     391       87316 :         xchk_ag_btcur_free(&sc->sa);
     392       87316 :         xagb_bitmap_destroy(&ra->not_allocbt_blocks);
     393       87316 :         return error;
     394             : }
     395             : 
     396             : /*
     397             :  * We're going to use the observed free space records to reserve blocks for the
     398             :  * new free space btrees, so we play an iterative game where we try to converge
     399             :  * on the number of blocks we need:
     400             :  *
     401             :  * 1. Estimate how many blocks we'll need to store the records.
     402             :  * 2. If the first free record has more blocks than we need, we're done.
     403             :  *    We will have to re-sort the records prior to building the cntbt.
     404             :  * 3. If that record has exactly the number of blocks we need, null out the
     405             :  *    record.  We're done.
     406             :  * 4. Otherwise, we still need more blocks.  Null out the record, subtract its
     407             :  *    length from the number of blocks we need, and go back to step 1.
     408             :  *
     409             :  * Fortunately, we don't have to do any transaction work to play this game, so
     410             :  * we don't have to tear down the staging cursors.
     411             :  */
     412             : STATIC int
     413       87314 : xrep_abt_reserve_space(
     414             :         struct xrep_abt         *ra,
     415             :         struct xfs_btree_cur    *bno_cur,
     416             :         struct xfs_btree_cur    *cnt_cur,
     417             :         bool                    *needs_resort)
     418             : {
     419       87314 :         struct xfs_scrub        *sc = ra->sc;
     420       87314 :         xfarray_idx_t           record_nr;
     421       87314 :         unsigned int            allocated = 0;
     422       87314 :         int                     error = 0;
     423             : 
     424       87314 :         record_nr = xfarray_length(ra->free_records) - 1;
     425       87314 :         do {
     426       87314 :                 struct xfs_alloc_rec_incore arec;
     427       87314 :                 uint64_t                required;
     428       87314 :                 unsigned int            desired;
     429       87314 :                 unsigned int            len;
     430             : 
     431             :                 /* Compute how many blocks we'll need. */
     432       87314 :                 error = xfs_btree_bload_compute_geometry(cnt_cur,
     433             :                                 &ra->new_cntbt.bload, ra->nr_real_records);
     434       87309 :                 if (error)
     435             :                         break;
     436             : 
     437       87309 :                 error = xfs_btree_bload_compute_geometry(bno_cur,
     438             :                                 &ra->new_bnobt.bload, ra->nr_real_records);
     439       87314 :                 if (error)
     440             :                         break;
     441             : 
     442             :                 /* How many btree blocks do we need to store all records? */
     443       87314 :                 required = ra->new_bnobt.bload.nr_blocks +
     444       87314 :                            ra->new_cntbt.bload.nr_blocks;
     445       87314 :                 ASSERT(required < INT_MAX);
     446             : 
     447             :                 /* If we've reserved enough blocks, we're done. */
     448       87314 :                 if (allocated >= required)
     449             :                         break;
     450             : 
     451       87314 :                 desired = required - allocated;
     452             : 
     453             :                 /* We need space but there's none left; bye! */
     454       87314 :                 if (ra->nr_real_records == 0) {
     455             :                         error = -ENOSPC;
     456             :                         break;
     457             :                 }
     458             : 
     459             :                 /* Grab the first record from the list. */
     460       87314 :                 error = xfarray_load(ra->free_records, record_nr, &arec);
     461       87315 :                 if (error)
     462             :                         break;
     463             : 
     464       87315 :                 ASSERT(arec.ar_blockcount <= UINT_MAX);
     465       87315 :                 len = min_t(unsigned int, arec.ar_blockcount, desired);
     466             : 
     467       87315 :                 trace_xrep_newbt_alloc_ag_blocks(sc->mp, sc->sa.pag->pag_agno,
     468             :                                 arec.ar_startblock, len, XFS_RMAP_OWN_AG);
     469             : 
     470       87316 :                 error = xrep_newbt_add_extent(&ra->new_bnobt, sc->sa.pag,
     471             :                                 arec.ar_startblock, len);
     472       87313 :                 if (error)
     473             :                         break;
     474       87313 :                 allocated += len;
     475       87313 :                 ra->nr_blocks -= len;
     476             : 
     477       87313 :                 if (arec.ar_blockcount > desired) {
     478             :                         /*
     479             :                          * Record has more space than we need.  The number of
     480             :                          * free records doesn't change, so shrink the free
     481             :                          * record, inform the caller that the records are no
     482             :                          * longer sorted by length, and exit.
     483             :                          */
     484       87313 :                         arec.ar_startblock += desired;
     485       87313 :                         arec.ar_blockcount -= desired;
     486       87313 :                         error = xfarray_store(ra->free_records, record_nr,
     487             :                                         &arec);
     488       87316 :                         if (error)
     489             :                                 break;
     490             : 
     491       87316 :                         *needs_resort = true;
     492       87316 :                         return 0;
     493             :                 }
     494             : 
     495             :                 /*
     496             :                  * We're going to use up the entire record, so unset it and
     497             :                  * move on to the next one.  This changes the number of free
     498             :                  * records (but doesn't break the sorting order), so we must
     499             :                  * go around the loop once more to re-run _bload_init.
     500             :                  */
     501           0 :                 error = xfarray_unset(ra->free_records, record_nr);
     502           0 :                 if (error)
     503             :                         break;
     504           0 :                 ra->nr_real_records--;
     505           0 :                 record_nr--;
     506             :         } while (1);
     507             : 
     508           0 :         return error;
     509             : }
     510             : 
     511             : STATIC int
     512       87315 : xrep_abt_dispose_one(
     513             :         struct xrep_abt         *ra,
     514             :         struct xrep_newbt_resv  *resv)
     515             : {
     516       87315 :         struct xfs_scrub        *sc = ra->sc;
     517       87315 :         struct xfs_perag        *pag = sc->sa.pag;
     518       87315 :         xfs_agblock_t           free_agbno = resv->agbno + resv->used;
     519       87315 :         xfs_extlen_t            free_aglen = resv->len - resv->used;
     520       87315 :         int                     error;
     521             : 
     522       87315 :         ASSERT(pag == resv->pag);
     523             : 
     524             :         /* Add a deferred rmap for each extent we used. */
     525       87315 :         if (resv->used > 0) {
     526       87315 :                 xfs_fsblock_t   fsbno;
     527             : 
     528       87315 :                 fsbno = XFS_AGB_TO_FSB(sc->mp, pag->pag_agno, resv->agbno);
     529       87315 :                 xfs_rmap_alloc_extent(sc->tp, false, fsbno, resv->used,
     530             :                                 XFS_RMAP_OWN_AG);
     531             :         }
     532             : 
     533             :         /*
     534             :          * For each reserved btree block we didn't use, add it to the free
     535             :          * space btree.  We didn't touch fdblocks when we reserved them, so
     536             :          * we don't touch it now.
     537             :          */
     538       87316 :         if (free_aglen == 0)
     539             :                 return 0;
     540             : 
     541           0 :         trace_xrep_newbt_free_blocks(sc->mp, resv->pag->pag_agno, free_agbno,
     542           0 :                         free_aglen, ra->new_bnobt.oinfo.oi_owner);
     543             : 
     544           0 :         error = __xfs_free_extent(sc->tp, resv->pag, free_agbno, free_aglen,
     545           0 :                         &ra->new_bnobt.oinfo, XFS_AG_RESV_IGNORE, true);
     546           0 :         if (error)
     547             :                 return error;
     548             : 
     549           0 :         return xrep_defer_finish(sc);
     550             : }
     551             : 
     552             : /*
     553             :  * Deal with all the space we reserved.  Blocks that were allocated for the
     554             :  * free space btrees need to have a (deferred) rmap added for the OWN_AG
     555             :  * allocation, and blocks that didn't get used can be freed via the usual
     556             :  * (deferred) means.
     557             :  */
     558             : STATIC void
     559       87316 : xrep_abt_dispose_reservations(
     560             :         struct xrep_abt         *ra,
     561             :         int                     error)
     562             : {
     563       87316 :         struct xrep_newbt_resv  *resv, *n;
     564             : 
     565       87316 :         if (error)
     566           0 :                 goto junkit;
     567             : 
     568      174632 :         for_each_xrep_newbt_reservation(&ra->new_bnobt, resv, n) {
     569       87316 :                 error = xrep_abt_dispose_one(ra, resv);
     570       87316 :                 if (error)
     571           0 :                         goto junkit;
     572             :         }
     573             : 
     574       87316 : junkit:
     575      174631 :         for_each_xrep_newbt_reservation(&ra->new_bnobt, resv, n) {
     576       87316 :                 xfs_perag_put(resv->pag);
     577       87316 :                 list_del(&resv->list);
     578       87316 :                 kfree(resv);
     579             :         }
     580             : 
     581       87315 :         xrep_newbt_cancel(&ra->new_bnobt);
     582       87315 :         xrep_newbt_cancel(&ra->new_cntbt);
     583       87314 : }
     584             : 
     585             : /* Retrieve free space data for bulk load. */
     586             : STATIC int
     587      389004 : xrep_abt_get_records(
     588             :         struct xfs_btree_cur            *cur,
     589             :         unsigned int                    idx,
     590             :         struct xfs_btree_block          *block,
     591             :         unsigned int                    nr_wanted,
     592             :         void                            *priv)
     593             : {
     594      389004 :         struct xfs_alloc_rec_incore     *arec = &cur->bc_rec.a;
     595      389004 :         struct xrep_abt                 *ra = priv;
     596      389004 :         union xfs_btree_rec             *block_rec;
     597      389004 :         unsigned int                    loaded;
     598      389004 :         int                             error;
     599             : 
     600    96956227 :         for (loaded = 0; loaded < nr_wanted; loaded++, idx++) {
     601    96567213 :                 error = xfarray_load_next(ra->free_records, &ra->array_cur,
     602             :                                 arec);
     603    96567223 :                 if (error)
     604           0 :                         return error;
     605             : 
     606    96567223 :                 ra->longest = max(ra->longest, arec->ar_blockcount);
     607             : 
     608    96567223 :                 block_rec = xfs_btree_rec_addr(cur, idx, block);
     609    96567223 :                 cur->bc_ops->init_rec_from_cur(cur, block_rec);
     610             :         }
     611             : 
     612      389014 :         return loaded;
     613             : }
     614             : 
     615             : /* Feed one of the new btree blocks to the bulk loader. */
     616             : STATIC int
     617      413849 : xrep_abt_claim_block(
     618             :         struct xfs_btree_cur    *cur,
     619             :         union xfs_btree_ptr     *ptr,
     620             :         void                    *priv)
     621             : {
     622      413849 :         struct xrep_abt         *ra = priv;
     623             : 
     624      413849 :         return xrep_newbt_claim_block(cur, &ra->new_bnobt, ptr);
     625             : }
     626             : 
     627             : /*
     628             :  * Reset the AGF counters to reflect the free space btrees that we just
     629             :  * rebuilt, then reinitialize the per-AG data.
     630             :  */
     631             : STATIC int
     632       87315 : xrep_abt_reset_counters(
     633             :         struct xrep_abt         *ra)
     634             : {
     635       87315 :         struct xfs_scrub        *sc = ra->sc;
     636       87315 :         struct xfs_perag        *pag = sc->sa.pag;
     637       87315 :         struct xfs_agf          *agf = sc->sa.agf_bp->b_addr;
     638       87315 :         unsigned int            freesp_btreeblks = 0;
     639             : 
     640             :         /*
     641             :          * Compute the contribution to agf_btreeblks for the new free space
     642             :          * btrees.  This is the computed btree size minus anything we didn't
     643             :          * use.
     644             :          */
     645       87315 :         freesp_btreeblks += ra->new_bnobt.bload.nr_blocks - 1;
     646       87315 :         freesp_btreeblks += ra->new_cntbt.bload.nr_blocks - 1;
     647             : 
     648       87315 :         freesp_btreeblks -= xrep_newbt_unused_blocks(&ra->new_bnobt);
     649       87315 :         freesp_btreeblks -= xrep_newbt_unused_blocks(&ra->new_cntbt);
     650             : 
     651             :         /*
     652             :          * The AGF header contains extra information related to the free space
     653             :          * btrees, so we must update those fields here.
     654             :          */
     655       87315 :         agf->agf_btreeblks = cpu_to_be32(freesp_btreeblks +
     656             :                                 (be32_to_cpu(agf->agf_rmap_blocks) - 1));
     657       87315 :         agf->agf_freeblks = cpu_to_be32(ra->nr_blocks);
     658       87315 :         agf->agf_longest = cpu_to_be32(ra->longest);
     659       87315 :         xfs_alloc_log_agf(sc->tp, sc->sa.agf_bp, XFS_AGF_BTREEBLKS |
     660             :                                                  XFS_AGF_LONGEST |
     661             :                                                  XFS_AGF_FREEBLKS);
     662             : 
     663             :         /*
     664             :          * After we commit the new btree to disk, it is possible that the
     665             :          * process to reap the old btree blocks will race with the AIL trying
     666             :          * to checkpoint the old btree blocks into the filesystem.  If the new
     667             :          * tree is shorter than the old one, the allocbt write verifier will
     668             :          * fail and the AIL will shut down the filesystem.
     669             :          *
     670             :          * To avoid this, save the old incore btree height values as the alt
     671             :          * height values before re-initializing the perag info from the updated
     672             :          * AGF to capture all the new values.
     673             :          */
     674       87316 :         pag->pagf_alt_levels[XFS_BTNUM_BNOi] = pag->pagf_levels[XFS_BTNUM_BNOi];
     675       87316 :         pag->pagf_alt_levels[XFS_BTNUM_CNTi] = pag->pagf_levels[XFS_BTNUM_CNTi];
     676             : 
     677             :         /* Reinitialize with the values we just logged. */
     678       87316 :         return xrep_reinit_pagf(sc);
     679             : }
     680             : 
     681             : /*
     682             :  * Use the collected free space information to stage new free space btrees.
     683             :  * If this is successful we'll return with the new btree root
     684             :  * information logged to the repair transaction but not yet committed.
     685             :  */
     686             : STATIC int
     687       87316 : xrep_abt_build_new_trees(
     688             :         struct xrep_abt         *ra)
     689             : {
     690       87316 :         struct xfs_scrub        *sc = ra->sc;
     691       87316 :         struct xfs_btree_cur    *bno_cur;
     692       87316 :         struct xfs_btree_cur    *cnt_cur;
     693       87316 :         struct xfs_perag        *pag = sc->sa.pag;
     694       87316 :         bool                    needs_resort = false;
     695       87316 :         int                     error;
     696             : 
     697             :         /*
     698             :          * Sort the free extents by length so that we can set up the free space
     699             :          * btrees in as few extents as possible.  This reduces the amount of
     700             :          * deferred rmap / free work we have to do at the end.
     701             :          */
     702       87316 :         error = xrep_cntbt_sort_records(ra, false);
     703       87316 :         if (error)
     704             :                 return error;
     705             : 
     706             :         /*
     707             :          * Prepare to construct the new btree by reserving disk space for the
     708             :          * new btree and setting up all the accounting information we'll need
     709             :          * to root the new btree while it's under construction and before we
     710             :          * attach it to the AG header.
     711             :          */
     712       87316 :         xrep_newbt_init_bare(&ra->new_bnobt, sc);
     713       87314 :         xrep_newbt_init_bare(&ra->new_cntbt, sc);
     714             : 
     715       87314 :         ra->new_bnobt.bload.get_records = xrep_abt_get_records;
     716       87314 :         ra->new_cntbt.bload.get_records = xrep_abt_get_records;
     717             : 
     718       87314 :         ra->new_bnobt.bload.claim_block = xrep_abt_claim_block;
     719       87314 :         ra->new_cntbt.bload.claim_block = xrep_abt_claim_block;
     720             : 
     721             :         /* Allocate cursors for the staged btrees. */
     722       87314 :         bno_cur = xfs_allocbt_stage_cursor(sc->mp, &ra->new_bnobt.afake,
     723             :                         pag, XFS_BTNUM_BNO);
     724       87314 :         cnt_cur = xfs_allocbt_stage_cursor(sc->mp, &ra->new_cntbt.afake,
     725             :                         pag, XFS_BTNUM_CNT);
     726             : 
     727             :         /* Last chance to abort before we start committing fixes. */
     728       87314 :         if (xchk_should_terminate(sc, &error))
     729           0 :                 goto err_cur;
     730             : 
     731             :         /* Reserve the space we'll need for the new btrees. */
     732       87314 :         error = xrep_abt_reserve_space(ra, bno_cur, cnt_cur, &needs_resort);
     733       87316 :         if (error)
     734           0 :                 goto err_cur;
     735             : 
     736             :         /*
     737             :          * If we need to re-sort the free extents by length, do so so that we
     738             :          * can put the records into the cntbt in the correct order.
     739             :          */
     740       87316 :         if (needs_resort) {
     741       87316 :                 error = xrep_cntbt_sort_records(ra, needs_resort);
     742       87316 :                 if (error)
     743           0 :                         goto err_cur;
     744             :         }
     745             : 
     746             :         /*
     747             :          * Due to btree slack factors, it's possible for a new btree to be one
     748             :          * level taller than the old btree.  Update the alternate incore btree
     749             :          * height so that we don't trip the verifiers when writing the new
     750             :          * btree blocks to disk.
     751             :          */
     752       87316 :         pag->pagf_alt_levels[XFS_BTNUM_BNOi] =
     753       87316 :                                         ra->new_bnobt.bload.btree_height;
     754       87316 :         pag->pagf_alt_levels[XFS_BTNUM_CNTi] =
     755       87316 :                                         ra->new_cntbt.bload.btree_height;
     756             : 
     757             :         /* Load the free space by length tree. */
     758       87316 :         ra->array_cur = XFARRAY_CURSOR_INIT;
     759       87316 :         ra->longest = 0;
     760       87316 :         error = xfs_btree_bload(cnt_cur, &ra->new_cntbt.bload, ra);
     761       87316 :         if (error)
     762           0 :                 goto err_levels;
     763             : 
     764       87316 :         error = xrep_bnobt_sort_records(ra);
     765       87316 :         if (error)
     766             :                 return error;
     767             : 
     768             :         /* Load the free space by block number tree. */
     769       87316 :         ra->array_cur = XFARRAY_CURSOR_INIT;
     770       87316 :         error = xfs_btree_bload(bno_cur, &ra->new_bnobt.bload, ra);
     771       87315 :         if (error)
     772           0 :                 goto err_levels;
     773             : 
     774             :         /*
     775             :          * Install the new btrees in the AG header.  After this point the old
     776             :          * btrees are no longer accessible and the new trees are live.
     777             :          */
     778       87315 :         xfs_allocbt_commit_staged_btree(bno_cur, sc->tp, sc->sa.agf_bp);
     779       87315 :         xfs_btree_del_cursor(bno_cur, 0);
     780       87315 :         xfs_allocbt_commit_staged_btree(cnt_cur, sc->tp, sc->sa.agf_bp);
     781       87315 :         xfs_btree_del_cursor(cnt_cur, 0);
     782             : 
     783             :         /* Reset the AGF counters now that we've changed the btree shape. */
     784       87315 :         error = xrep_abt_reset_counters(ra);
     785       87315 :         if (error)
     786           0 :                 goto err_newbt;
     787             : 
     788             :         /* Dispose of any unused blocks and the accounting information. */
     789       87315 :         xrep_abt_dispose_reservations(ra, error);
     790             : 
     791       87315 :         return xrep_roll_ag_trans(sc);
     792             : 
     793           0 : err_levels:
     794           0 :         pag->pagf_alt_levels[XFS_BTNUM_BNOi] = 0;
     795           0 :         pag->pagf_alt_levels[XFS_BTNUM_CNTi] = 0;
     796           0 : err_cur:
     797           0 :         xfs_btree_del_cursor(cnt_cur, error);
     798           0 :         xfs_btree_del_cursor(bno_cur, error);
     799           0 : err_newbt:
     800           0 :         xrep_abt_dispose_reservations(ra, error);
     801           0 :         return error;
     802             : }
     803             : 
     804             : /*
     805             :  * Now that we've logged the roots of the new btrees, invalidate all of the
     806             :  * old blocks and free them.
     807             :  */
     808             : STATIC int
     809       87315 : xrep_abt_remove_old_trees(
     810             :         struct xrep_abt         *ra)
     811             : {
     812       87315 :         struct xfs_perag        *pag = ra->sc->sa.pag;
     813       87315 :         int                     error;
     814             : 
     815             :         /* Free the old btree blocks if they're not in use. */
     816       87315 :         error = xrep_reap_agblocks(ra->sc, &ra->old_allocbt_blocks,
     817             :                         &XFS_RMAP_OINFO_AG, XFS_AG_RESV_IGNORE);
     818       87316 :         if (error)
     819             :                 return error;
     820             : 
     821             :         /*
     822             :          * Now that we've zapped all the old allocbt blocks we can turn off
     823             :          * the alternate height mechanism.
     824             :          */
     825       87316 :         pag->pagf_alt_levels[XFS_BTNUM_BNOi] = 0;
     826       87316 :         pag->pagf_alt_levels[XFS_BTNUM_CNTi] = 0;
     827       87316 :         return 0;
     828             : }
     829             : 
     830             : /* Repair the freespace btrees for some AG. */
     831             : int
     832       87316 : xrep_allocbt(
     833             :         struct xfs_scrub        *sc)
     834             : {
     835       87316 :         struct xrep_abt         *ra;
     836       87316 :         struct xfs_mount        *mp = sc->mp;
     837       87316 :         char                    *descr;
     838       87316 :         int                     error;
     839             : 
     840             :         /* We require the rmapbt to rebuild anything. */
     841       87316 :         if (!xfs_has_rmapbt(mp))
     842             :                 return -EOPNOTSUPP;
     843             : 
     844       87316 :         ra = kzalloc(sizeof(struct xrep_abt), XCHK_GFP_FLAGS);
     845       87316 :         if (!ra)
     846             :                 return -ENOMEM;
     847       87316 :         ra->sc = sc;
     848             : 
     849             :         /* We rebuild both data structures. */
     850       87316 :         sc->sick_mask = XFS_SICK_AG_BNOBT | XFS_SICK_AG_CNTBT;
     851             : 
     852             :         /*
     853             :          * Make sure the busy extent list is clear because we can't put extents
     854             :          * on there twice.  In theory we cleared this before we started, but
     855             :          * let's not risk the filesystem.
     856             :          */
     857       87316 :         if (!xfs_extent_busy_list_empty(sc->sa.pag)) {
     858           0 :                 error = -EDEADLOCK;
     859           0 :                 goto out_ra;
     860             :         }
     861             : 
     862             :         /* Set up enough storage to handle maximally fragmented free space. */
     863       87316 :         descr = xchk_xfile_ag_descr(sc, "free space records");
     864       87311 :         error = xfarray_create(descr, mp->m_sb.sb_agblocks / 2,
     865             :                         sizeof(struct xfs_alloc_rec_incore),
     866             :                         &ra->free_records);
     867       87301 :         kfree(descr);
     868       87306 :         if (error)
     869           0 :                 goto out_ra;
     870             : 
     871             :         /* Collect the free space data and find the old btree blocks. */
     872       87306 :         xagb_bitmap_init(&ra->old_allocbt_blocks);
     873       87298 :         error = xrep_abt_find_freespace(ra);
     874       87316 :         if (error)
     875           0 :                 goto out_bitmap;
     876             : 
     877             :         /* Rebuild the free space information. */
     878       87316 :         error = xrep_abt_build_new_trees(ra);
     879       87315 :         if (error)
     880           0 :                 goto out_bitmap;
     881             : 
     882             :         /* Kill the old trees. */
     883       87315 :         error = xrep_abt_remove_old_trees(ra);
     884             : 
     885       87316 : out_bitmap:
     886       87316 :         xagb_bitmap_destroy(&ra->old_allocbt_blocks);
     887       87313 :         xfarray_destroy(ra->free_records);
     888       87314 : out_ra:
     889       87314 :         kfree(ra);
     890       87314 :         return error;
     891             : }
     892             : 
     893             : /* Make sure both btrees are ok after we've rebuilt them. */
     894             : int
     895       87316 : xrep_revalidate_allocbt(
     896             :         struct xfs_scrub        *sc)
     897             : {
     898       87316 :         __u32                   old_type = sc->sm->sm_type;
     899       87316 :         int                     error;
     900             : 
     901             :         /*
     902             :          * We must update sm_type temporarily so that the tree-to-tree cross
     903             :          * reference checks will work in the correct direction, and also so
     904             :          * that tracing will report correctly if there are more errors.
     905             :          */
     906       87316 :         sc->sm->sm_type = XFS_SCRUB_TYPE_BNOBT;
     907       87316 :         error = xchk_bnobt(sc);
     908       87316 :         if (error)
     909           0 :                 goto out;
     910             : 
     911       87316 :         sc->sm->sm_type = XFS_SCRUB_TYPE_CNTBT;
     912       87316 :         error = xchk_cntbt(sc);
     913       87316 : out:
     914       87316 :         sc->sm->sm_type = old_type;
     915       87316 :         return error;
     916             : }

Generated by: LCOV version 1.14