LCOV - code coverage report
Current view: top level - fs/xfs/scrub - alloc_repair.c (source / functions) Hit Total Coverage
Test: fstests of 6.5.0-rc3-achx @ Mon Jul 31 20:08:12 PDT 2023 Lines: 305 333 91.6 %
Date: 2023-07-31 20:08:12 Functions: 20 20 100.0 %

          Line data    Source code
       1             : // SPDX-License-Identifier: GPL-2.0-or-later
       2             : /*
       3             :  * Copyright (C) 2018-2023 Oracle.  All Rights Reserved.
       4             :  * Author: Darrick J. Wong <djwong@kernel.org>
       5             :  */
       6             : #include "xfs.h"
       7             : #include "xfs_fs.h"
       8             : #include "xfs_shared.h"
       9             : #include "xfs_format.h"
      10             : #include "xfs_trans_resv.h"
      11             : #include "xfs_mount.h"
      12             : #include "xfs_defer.h"
      13             : #include "xfs_btree.h"
      14             : #include "xfs_btree_staging.h"
      15             : #include "xfs_bit.h"
      16             : #include "xfs_log_format.h"
      17             : #include "xfs_trans.h"
      18             : #include "xfs_sb.h"
      19             : #include "xfs_alloc.h"
      20             : #include "xfs_alloc_btree.h"
      21             : #include "xfs_rmap.h"
      22             : #include "xfs_rmap_btree.h"
      23             : #include "xfs_inode.h"
      24             : #include "xfs_refcount.h"
      25             : #include "xfs_extent_busy.h"
      26             : #include "xfs_health.h"
      27             : #include "xfs_bmap.h"
      28             : #include "xfs_ialloc.h"
      29             : #include "xfs_ag.h"
      30             : #include "scrub/xfs_scrub.h"
      31             : #include "scrub/scrub.h"
      32             : #include "scrub/common.h"
      33             : #include "scrub/btree.h"
      34             : #include "scrub/trace.h"
      35             : #include "scrub/repair.h"
      36             : #include "scrub/bitmap.h"
      37             : #include "scrub/xfile.h"
      38             : #include "scrub/xfarray.h"
      39             : #include "scrub/newbt.h"
      40             : #include "scrub/reap.h"
      41             : 
      42             : /*
      43             :  * Free Space Btree Repair
      44             :  * =======================
      45             :  *
      46             :  * The reverse mappings are supposed to record all space usage for the entire
      47             :  * AG.  Therefore, we can recalculate the free extents in an AG by looking for
      48             :  * gaps in the physical extents recorded in the rmapbt.  On a reflink
      49             :  * filesystem this is a little more tricky in that we have to be aware that
      50             :  * the rmap records are allowed to overlap.
      51             :  *
      52             :  * We derive which blocks belonged to the old bnobt/cntbt by recording all the
      53             :  * OWN_AG extents and subtracting out the blocks owned by all other OWN_AG
      54             :  * metadata: the rmapbt blocks visited while iterating the reverse mappings
      55             :  * and the AGFL blocks.
      56             :  *
      57             :  * Once we have both of those pieces, we can reconstruct the bnobt and cntbt
      58             :  * by blowing out the free block state and freeing all the extents that we
      59             :  * found.  This adds the requirement that we can't have any busy extents in
      60             :  * the AG because the busy code cannot handle duplicate records.
      61             :  *
      62             :  * Note that we can only rebuild both free space btrees at the same time
      63             :  * because the regular extent freeing infrastructure loads both btrees at the
      64             :  * same time.
      65             :  *
      66             :  * We use the prefix 'xrep_abt' here because we regenerate both free space
      67             :  * allocation btrees at the same time.
      68             :  */
      69             : 
      70             : struct xrep_abt {
      71             :         /* Blocks owned by the rmapbt or the agfl. */
      72             :         struct xagb_bitmap      not_allocbt_blocks;
      73             : 
      74             :         /* All OWN_AG blocks. */
      75             :         struct xagb_bitmap      old_allocbt_blocks;
      76             : 
      77             :         /*
      78             :          * New bnobt information.  All btree block reservations are added to
      79             :          * the reservation list in new_bnobt.
      80             :          */
      81             :         struct xrep_newbt       new_bnobt;
      82             : 
      83             :         /* new cntbt information */
      84             :         struct xrep_newbt       new_cntbt;
      85             : 
      86             :         /* Free space extents. */
      87             :         struct xfarray          *free_records;
      88             : 
      89             :         struct xfs_scrub        *sc;
      90             : 
      91             :         /* Number of non-null records in @free_records. */
      92             :         uint64_t                nr_real_records;
      93             : 
      94             :         /* get_records()'s position in the free space record array. */
      95             :         xfarray_idx_t           array_cur;
      96             : 
      97             :         /*
      98             :          * Next block we anticipate seeing in the rmap records.  If the next
      99             :          * rmap record is greater than next_agbno, we have found unused space.
     100             :          */
     101             :         xfs_agblock_t           next_agbno;
     102             : 
     103             :         /* Number of free blocks in this AG. */
     104             :         xfs_agblock_t           nr_blocks;
     105             : 
     106             :         /* Longest free extent we found in the AG. */
     107             :         xfs_agblock_t           longest;
     108             : };
     109             : 
     110             : /* Set up to repair AG free space btrees. */
     111             : int
     112       54612 : xrep_setup_ag_allocbt(
     113             :         struct xfs_scrub        *sc)
     114             : {
     115       54612 :         unsigned int            busy_gen;
     116             : 
     117             :         /*
     118             :          * Make sure the busy extent list is clear because we can't put extents
     119             :          * on there twice.
     120             :          */
     121       54612 :         busy_gen = READ_ONCE(sc->sa.pag->pagb_gen);
     122       54612 :         if (xfs_extent_busy_list_empty(sc->sa.pag))
     123             :                 return 0;
     124             : 
     125       22690 :         return xfs_extent_busy_flush(sc->tp, sc->sa.pag, busy_gen, 0);
     126             : }
     127             : 
     128             : /* Check for any obvious conflicts in the free extent. */
     129             : STATIC int
     130    44574732 : xrep_abt_check_free_ext(
     131             :         struct xfs_scrub        *sc,
     132             :         const struct xfs_alloc_rec_incore *rec)
     133             : {
     134    44574732 :         enum xbtree_recpacking  outcome;
     135    44574732 :         int                     error;
     136             : 
     137    44574732 :         if (xfs_alloc_check_perag_irec(sc->sa.pag, rec) != NULL)
     138             :                 return -EFSCORRUPTED;
     139             : 
     140             :         /* Must not be an inode chunk. */
     141    44571153 :         error = xfs_ialloc_has_inodes_at_extent(sc->sa.ino_cur,
     142    44571153 :                         rec->ar_startblock, rec->ar_blockcount, &outcome);
     143    44574974 :         if (error)
     144             :                 return error;
     145    44574974 :         if (outcome != XBTREE_RECPACKING_EMPTY)
     146             :                 return -EFSCORRUPTED;
     147             : 
     148             :         /* Must not be shared or CoW staging. */
     149    44574974 :         if (sc->sa.refc_cur) {
     150    44574354 :                 error = xfs_refcount_has_records(sc->sa.refc_cur,
     151    44574354 :                                 XFS_REFC_DOMAIN_SHARED, rec->ar_startblock,
     152    44574354 :                                 rec->ar_blockcount, &outcome);
     153    44578084 :                 if (error)
     154             :                         return error;
     155    44578084 :                 if (outcome != XBTREE_RECPACKING_EMPTY)
     156             :                         return -EFSCORRUPTED;
     157             : 
     158    44578084 :                 error = xfs_refcount_has_records(sc->sa.refc_cur,
     159    44578084 :                                 XFS_REFC_DOMAIN_COW, rec->ar_startblock,
     160    44578084 :                                 rec->ar_blockcount, &outcome);
     161    44578284 :                 if (error)
     162             :                         return error;
     163    44578284 :                 if (outcome != XBTREE_RECPACKING_EMPTY)
     164           0 :                         return -EFSCORRUPTED;
     165             :         }
     166             : 
     167             :         return 0;
     168             : }
     169             : 
     170             : /*
     171             :  * Stash a free space record for all the space since the last bno we found
     172             :  * all the way up to @end.
     173             :  */
     174             : static int
     175    44575951 : xrep_abt_stash(
     176             :         struct xrep_abt         *ra,
     177             :         xfs_agblock_t           end)
     178             : {
     179    44575951 :         struct xfs_alloc_rec_incore arec = {
     180    44575951 :                 .ar_startblock  = ra->next_agbno,
     181    44575951 :                 .ar_blockcount  = end - ra->next_agbno,
     182             :         };
     183    44575951 :         struct xfs_scrub        *sc = ra->sc;
     184    44575951 :         int                     error = 0;
     185             : 
     186    44575951 :         if (xchk_should_terminate(sc, &error))
     187           0 :                 return error;
     188             : 
     189    44572474 :         error = xrep_abt_check_free_ext(ra->sc, &arec);
     190    44578718 :         if (error)
     191             :                 return error;
     192             : 
     193    44578547 :         trace_xrep_abt_found(sc->mp, sc->sa.pag->pag_agno, &arec);
     194             : 
     195    44577520 :         error = xfarray_append(ra->free_records, &arec);
     196    44577907 :         if (error)
     197             :                 return error;
     198             : 
     199    44577907 :         ra->nr_blocks += arec.ar_blockcount;
     200    44577907 :         return 0;
     201             : }
     202             : 
     203             : /* Record extents that aren't in use from gaps in the rmap records. */
     204             : STATIC int
     205   255024160 : xrep_abt_walk_rmap(
     206             :         struct xfs_btree_cur            *cur,
     207             :         const struct xfs_rmap_irec      *rec,
     208             :         void                            *priv)
     209             : {
     210   255024160 :         struct xrep_abt                 *ra = priv;
     211   255024160 :         int                             error;
     212             : 
     213             :         /* Record all the OWN_AG blocks... */
     214   255024160 :         if (rec->rm_owner == XFS_RMAP_OWN_AG) {
     215     1865457 :                 error = xagb_bitmap_set(&ra->old_allocbt_blocks,
     216     1865457 :                                 rec->rm_startblock, rec->rm_blockcount);
     217     1865031 :                 if (error)
     218             :                         return error;
     219             :         }
     220             : 
     221             :         /* ...and all the rmapbt blocks... */
     222   255023734 :         error = xagb_bitmap_set_btcur_path(&ra->not_allocbt_blocks, cur);
     223   255028430 :         if (error)
     224             :                 return error;
     225             : 
     226             :         /* ...and all the free space. */
     227   255028430 :         if (rec->rm_startblock > ra->next_agbno) {
     228    44533832 :                 error = xrep_abt_stash(ra, rec->rm_startblock);
     229    44539781 :                 if (error)
     230             :                         return error;
     231             :         }
     232             : 
     233             :         /*
     234             :          * rmap records can overlap on reflink filesystems, so project
     235             :          * next_agbno as far out into the AG space as we currently know about.
     236             :          */
     237   255034379 :         ra->next_agbno = max_t(xfs_agblock_t, ra->next_agbno,
     238             :                         rec->rm_startblock + rec->rm_blockcount);
     239   255034379 :         return 0;
     240             : }
     241             : 
     242             : /* Collect an AGFL block for the not-to-release list. */
     243             : static int
     244      286972 : xrep_abt_walk_agfl(
     245             :         struct xfs_mount        *mp,
     246             :         xfs_agblock_t           agbno,
     247             :         void                    *priv)
     248             : {
     249      286972 :         struct xrep_abt         *ra = priv;
     250             : 
     251      286972 :         return xagb_bitmap_set(&ra->not_allocbt_blocks, agbno, 1);
     252             : }
     253             : 
     254             : /*
     255             :  * Compare two free space extents by block number.  We want to sort in order of
     256             :  * increasing block number.
     257             :  */
     258             : static int
     259   605640334 : xrep_bnobt_extent_cmp(
     260             :         const void              *a,
     261             :         const void              *b)
     262             : {
     263  1488459693 :         const struct xfs_alloc_rec_incore *ap = a;
     264  1488459693 :         const struct xfs_alloc_rec_incore *bp = b;
     265             : 
     266   605640334 :         if (ap->ar_startblock > bp->ar_startblock)
     267             :                 return 1;
     268   808900306 :         else if (ap->ar_startblock < bp->ar_startblock)
     269   808881679 :                 return -1;
     270             :         return 0;
     271             : }
     272             : 
     273             : /*
     274             :  * Re-sort the free extents by block number so so that we can put the records
     275             :  * into the bnobt in the correct order.  Make sure the records do not overlap
     276             :  * in physical space.
     277             :  */
     278             : STATIC int
     279       40210 : xrep_bnobt_sort_records(
     280             :         struct xrep_abt                 *ra)
     281             : {
     282       40210 :         struct xfs_alloc_rec_incore     arec;
     283       40210 :         xfarray_idx_t                   cur = XFARRAY_CURSOR_INIT;
     284       40210 :         xfs_agblock_t                   next_agbno = 0;
     285       40210 :         int                             error;
     286             : 
     287       40210 :         error = xfarray_sort(ra->free_records, xrep_bnobt_extent_cmp, 0);
     288       40169 :         if (error)
     289             :                 return error;
     290             : 
     291    44623154 :         while ((error = xfarray_iter(ra->free_records, &cur, &arec)) == 1) {
     292    44582985 :                 if (arec.ar_startblock < next_agbno)
     293             :                         return -EFSCORRUPTED;
     294             : 
     295    44582985 :                 next_agbno = arec.ar_startblock + arec.ar_blockcount;
     296             :         }
     297             : 
     298             :         return error;
     299             : }
     300             : 
     301             : /*
     302             :  * Compare two free space extents by length and then block number.  We want
     303             :  * to sort first in order of increasing length and then in order of increasing
     304             :  * block number.
     305             :  */
     306             : static int
     307  1192077935 : xrep_cntbt_extent_cmp(
     308             :         const void                      *a,
     309             :         const void                      *b)
     310             : {
     311  1192077935 :         const struct xfs_alloc_rec_incore *ap = a;
     312  1192077935 :         const struct xfs_alloc_rec_incore *bp = b;
     313             : 
     314  1192077935 :         if (ap->ar_blockcount > bp->ar_blockcount)
     315             :                 return 1;
     316  1037366906 :         else if (ap->ar_blockcount < bp->ar_blockcount)
     317             :                 return -1;
     318   882819359 :         return xrep_bnobt_extent_cmp(a, b);
     319             : }
     320             : 
     321             : /*
     322             :  * Sort the free extents by length so so that we can put the records into the
     323             :  * cntbt in the correct order.  Don't let userspace kill us if we're resorting
     324             :  * after allocating btree blocks.
     325             :  */
     326             : STATIC int
     327       79839 : xrep_cntbt_sort_records(
     328             :         struct xrep_abt                 *ra,
     329             :         bool                            is_resort)
     330             : {
     331      160127 :         return xfarray_sort(ra->free_records, xrep_cntbt_extent_cmp,
     332       79839 :                         is_resort ? 0 : XFARRAY_SORT_KILLABLE);
     333             : }
     334             : 
     335             : /*
     336             :  * Iterate all reverse mappings to find (1) the gaps between rmap records (all
     337             :  * unowned space), (2) the OWN_AG extents (which encompass the free space
     338             :  * btrees, the rmapbt, and the agfl), (3) the rmapbt blocks, and (4) the AGFL
     339             :  * blocks.  The free space is (1) + (2) - (3) - (4).
     340             :  */
     341             : STATIC int
     342       39859 : xrep_abt_find_freespace(
     343             :         struct xrep_abt         *ra)
     344             : {
     345       39859 :         struct xfs_scrub        *sc = ra->sc;
     346       39859 :         struct xfs_mount        *mp = sc->mp;
     347       39859 :         struct xfs_agf          *agf = sc->sa.agf_bp->b_addr;
     348       39859 :         struct xfs_buf          *agfl_bp;
     349       39859 :         xfs_agblock_t           agend;
     350       39859 :         int                     error;
     351             : 
     352       39859 :         xagb_bitmap_init(&ra->not_allocbt_blocks);
     353             : 
     354       39854 :         xrep_ag_btcur_init(sc, &sc->sa);
     355             : 
     356             :         /*
     357             :          * Iterate all the reverse mappings to find gaps in the physical
     358             :          * mappings, all the OWN_AG blocks, and all the rmapbt extents.
     359             :          */
     360       40151 :         error = xfs_rmap_query_all(sc->sa.rmap_cur, xrep_abt_walk_rmap, ra);
     361       40126 :         if (error)
     362           0 :                 goto err;
     363             : 
     364             :         /* Insert a record for space between the last rmap and EOAG. */
     365       40126 :         agend = be32_to_cpu(agf->agf_length);
     366       40126 :         if (ra->next_agbno < agend) {
     367       38805 :                 error = xrep_abt_stash(ra, agend);
     368       38938 :                 if (error)
     369           0 :                         goto err;
     370             :         }
     371             : 
     372             :         /* Collect all the AGFL blocks. */
     373       40259 :         error = xfs_alloc_read_agfl(sc->sa.pag, sc->tp, &agfl_bp);
     374       40144 :         if (error)
     375           0 :                 goto err;
     376             : 
     377       40144 :         error = xfs_agfl_walk(mp, agf, agfl_bp, xrep_abt_walk_agfl, ra);
     378       40161 :         if (error)
     379           0 :                 goto err_agfl;
     380             : 
     381             :         /* Compute the old bnobt/cntbt blocks. */
     382       40161 :         error = xagb_bitmap_disunion(&ra->old_allocbt_blocks,
     383             :                         &ra->not_allocbt_blocks);
     384       39888 :         if (error)
     385           0 :                 goto err_agfl;
     386             : 
     387       39888 :         ra->nr_real_records = xfarray_length(ra->free_records);
     388       39844 : err_agfl:
     389       39844 :         xfs_trans_brelse(sc->tp, agfl_bp);
     390       40154 : err:
     391       40154 :         xchk_ag_btcur_free(&sc->sa);
     392       40194 :         xagb_bitmap_destroy(&ra->not_allocbt_blocks);
     393       40056 :         return error;
     394             : }
     395             : 
     396             : /*
     397             :  * We're going to use the observed free space records to reserve blocks for the
     398             :  * new free space btrees, so we play an iterative game where we try to converge
     399             :  * on the number of blocks we need:
     400             :  *
     401             :  * 1. Estimate how many blocks we'll need to store the records.
     402             :  * 2. If the first free record has more blocks than we need, we're done.
     403             :  *    We will have to re-sort the records prior to building the cntbt.
     404             :  * 3. If that record has exactly the number of blocks we need, null out the
     405             :  *    record.  We're done.
     406             :  * 4. Otherwise, we still need more blocks.  Null out the record, subtract its
     407             :  *    length from the number of blocks we need, and go back to step 1.
     408             :  *
     409             :  * Fortunately, we don't have to do any transaction work to play this game, so
     410             :  * we don't have to tear down the staging cursors.
     411             :  */
     412             : STATIC int
     413       40058 : xrep_abt_reserve_space(
     414             :         struct xrep_abt         *ra,
     415             :         struct xfs_btree_cur    *bno_cur,
     416             :         struct xfs_btree_cur    *cnt_cur,
     417             :         bool                    *needs_resort)
     418             : {
     419       40058 :         struct xfs_scrub        *sc = ra->sc;
     420       40058 :         xfarray_idx_t           record_nr;
     421       40058 :         unsigned int            allocated = 0;
     422       40058 :         int                     error = 0;
     423             : 
     424       40058 :         record_nr = xfarray_length(ra->free_records) - 1;
     425       40119 :         do {
     426       40083 :                 struct xfs_alloc_rec_incore arec;
     427       40083 :                 uint64_t                required;
     428       40083 :                 unsigned int            desired;
     429       40083 :                 unsigned int            len;
     430             : 
     431             :                 /* Compute how many blocks we'll need. */
     432       40083 :                 error = xfs_btree_bload_compute_geometry(cnt_cur,
     433             :                                 &ra->new_cntbt.bload, ra->nr_real_records);
     434       39920 :                 if (error)
     435             :                         break;
     436             : 
     437       39920 :                 error = xfs_btree_bload_compute_geometry(bno_cur,
     438             :                                 &ra->new_bnobt.bload, ra->nr_real_records);
     439       40133 :                 if (error)
     440             :                         break;
     441             : 
     442             :                 /* How many btree blocks do we need to store all records? */
     443       40133 :                 required = ra->new_bnobt.bload.nr_blocks +
     444       40133 :                            ra->new_cntbt.bload.nr_blocks;
     445       40133 :                 ASSERT(required < INT_MAX);
     446             : 
     447             :                 /* If we've reserved enough blocks, we're done. */
     448       40133 :                 if (allocated >= required)
     449             :                         break;
     450             : 
     451       40129 :                 desired = required - allocated;
     452             : 
     453             :                 /* We need space but there's none left; bye! */
     454       40129 :                 if (ra->nr_real_records == 0) {
     455             :                         error = -ENOSPC;
     456             :                         break;
     457             :                 }
     458             : 
     459             :                 /* Grab the first record from the list. */
     460       40113 :                 error = xfarray_load(ra->free_records, record_nr, &arec);
     461       39990 :                 if (error)
     462             :                         break;
     463             : 
     464       39990 :                 ASSERT(arec.ar_blockcount <= UINT_MAX);
     465       39990 :                 len = min_t(unsigned int, arec.ar_blockcount, desired);
     466             : 
     467       39990 :                 trace_xrep_newbt_alloc_ag_blocks(sc->mp, sc->sa.pag->pag_agno,
     468             :                                 arec.ar_startblock, len, XFS_RMAP_OWN_AG);
     469             : 
     470       39831 :                 error = xrep_newbt_add_extent(&ra->new_bnobt, sc->sa.pag,
     471             :                                 arec.ar_startblock, len);
     472       39733 :                 if (error)
     473             :                         break;
     474       39733 :                 allocated += len;
     475       39733 :                 ra->nr_blocks -= len;
     476             : 
     477       39733 :                 if (arec.ar_blockcount > desired) {
     478             :                         /*
     479             :                          * Record has more space than we need.  The number of
     480             :                          * free records doesn't change, so shrink the free
     481             :                          * record, inform the caller that the records are no
     482             :                          * longer sorted by length, and exit.
     483             :                          */
     484       39697 :                         arec.ar_startblock += desired;
     485       39697 :                         arec.ar_blockcount -= desired;
     486       39697 :                         error = xfarray_store(ra->free_records, record_nr,
     487             :                                         &arec);
     488       40137 :                         if (error)
     489             :                                 break;
     490             : 
     491       40139 :                         *needs_resort = true;
     492       40139 :                         return 0;
     493             :                 }
     494             : 
     495             :                 /*
     496             :                  * We're going to use up the entire record, so unset it and
     497             :                  * move on to the next one.  This changes the number of free
     498             :                  * records (but doesn't break the sorting order), so we must
     499             :                  * go around the loop once more to re-run _bload_init.
     500             :                  */
     501          36 :                 error = xfarray_unset(ra->free_records, record_nr);
     502          36 :                 if (error)
     503             :                         break;
     504          36 :                 ra->nr_real_records--;
     505          36 :                 record_nr--;
     506             :         } while (1);
     507             : 
     508          19 :         return error;
     509             : }
     510             : 
     511             : STATIC int
     512       40194 : xrep_abt_dispose_one(
     513             :         struct xrep_abt         *ra,
     514             :         struct xrep_newbt_resv  *resv)
     515             : {
     516       40194 :         struct xfs_scrub        *sc = ra->sc;
     517       40194 :         struct xfs_perag        *pag = sc->sa.pag;
     518       40194 :         xfs_agblock_t           free_agbno = resv->agbno + resv->used;
     519       40194 :         xfs_extlen_t            free_aglen = resv->len - resv->used;
     520       40194 :         int                     error;
     521             : 
     522       40194 :         ASSERT(pag == resv->pag);
     523             : 
     524             :         /* Add a deferred rmap for each extent we used. */
     525       40194 :         if (resv->used > 0)
     526       40184 :                 xfs_rmap_alloc_extent(sc->tp, pag->pag_agno, resv->agbno,
     527             :                                 resv->used, XFS_RMAP_OWN_AG);
     528             : 
     529             :         /*
     530             :          * For each reserved btree block we didn't use, add it to the free
     531             :          * space btree.  We didn't touch fdblocks when we reserved them, so
     532             :          * we don't touch it now.
     533             :          */
     534       40235 :         if (free_aglen == 0)
     535             :                 return 0;
     536             : 
     537           0 :         trace_xrep_newbt_free_blocks(sc->mp, resv->pag->pag_agno, free_agbno,
     538           0 :                         free_aglen, ra->new_bnobt.oinfo.oi_owner);
     539             : 
     540           0 :         error = __xfs_free_extent(sc->tp, resv->pag, free_agbno, free_aglen,
     541           0 :                         &ra->new_bnobt.oinfo, XFS_AG_RESV_IGNORE, true);
     542           0 :         if (error)
     543             :                 return error;
     544             : 
     545           0 :         return xrep_defer_finish(sc);
     546             : }
     547             : 
     548             : /*
     549             :  * Deal with all the space we reserved.  Blocks that were allocated for the
     550             :  * free space btrees need to have a (deferred) rmap added for the OWN_AG
     551             :  * allocation, and blocks that didn't get used can be freed via the usual
     552             :  * (deferred) means.
     553             :  */
     554             : STATIC void
     555       40195 : xrep_abt_dispose_reservations(
     556             :         struct xrep_abt         *ra,
     557             :         int                     error)
     558             : {
     559       40195 :         struct xrep_newbt_resv  *resv, *n;
     560             : 
     561       40195 :         if (error)
     562          16 :                 goto junkit;
     563             : 
     564       80396 :         for_each_xrep_newbt_reservation(&ra->new_bnobt, resv, n) {
     565       40217 :                 error = xrep_abt_dispose_one(ra, resv);
     566       40217 :                 if (error)
     567           0 :                         goto junkit;
     568             :         }
     569             : 
     570       40179 : junkit:
     571       80395 :         for_each_xrep_newbt_reservation(&ra->new_bnobt, resv, n) {
     572       40194 :                 xfs_perag_put(resv->pag);
     573       40240 :                 list_del(&resv->list);
     574       40233 :                 kfree(resv);
     575             :         }
     576             : 
     577       40201 :         xrep_newbt_cancel(&ra->new_bnobt);
     578       40138 :         xrep_newbt_cancel(&ra->new_cntbt);
     579       40155 : }
     580             : 
     581             : /* Retrieve free space data for bulk load. */
     582             : STATIC int
     583      276765 : xrep_abt_get_records(
     584             :         struct xfs_btree_cur            *cur,
     585             :         unsigned int                    idx,
     586             :         struct xfs_btree_block          *block,
     587             :         unsigned int                    nr_wanted,
     588             :         void                            *priv)
     589             : {
     590      276765 :         struct xfs_alloc_rec_incore     *arec = &cur->bc_rec.a;
     591      276765 :         struct xrep_abt                 *ra = priv;
     592      276765 :         union xfs_btree_rec             *block_rec;
     593      276765 :         unsigned int                    loaded;
     594      276765 :         int                             error;
     595             : 
     596    89438440 :         for (loaded = 0; loaded < nr_wanted; loaded++, idx++) {
     597    89161072 :                 error = xfarray_load_next(ra->free_records, &ra->array_cur,
     598             :                                 arec);
     599    89163637 :                 if (error)
     600           0 :                         return error;
     601             : 
     602    89163637 :                 ra->longest = max(ra->longest, arec->ar_blockcount);
     603             : 
     604    89163637 :                 block_rec = xfs_btree_rec_addr(cur, idx, block);
     605    89161073 :                 cur->bc_ops->init_rec_from_cur(cur, block_rec);
     606             :         }
     607             : 
     608      277368 :         return loaded;
     609             : }
     610             : 
     611             : /* Feed one of the new btree blocks to the bulk loader. */
     612             : STATIC int
     613      299560 : xrep_abt_claim_block(
     614             :         struct xfs_btree_cur    *cur,
     615             :         union xfs_btree_ptr     *ptr,
     616             :         void                    *priv)
     617             : {
     618      299560 :         struct xrep_abt         *ra = priv;
     619             : 
     620      299560 :         return xrep_newbt_claim_block(cur, &ra->new_bnobt, ptr);
     621             : }
     622             : 
     623             : /*
     624             :  * Reset the AGF counters to reflect the free space btrees that we just
     625             :  * rebuilt, then reinitialize the per-AG data.
     626             :  */
     627             : STATIC int
     628       40227 : xrep_abt_reset_counters(
     629             :         struct xrep_abt         *ra)
     630             : {
     631       40227 :         struct xfs_scrub        *sc = ra->sc;
     632       40227 :         struct xfs_perag        *pag = sc->sa.pag;
     633       40227 :         struct xfs_agf          *agf = sc->sa.agf_bp->b_addr;
     634       40227 :         unsigned int            freesp_btreeblks = 0;
     635             : 
     636             :         /*
     637             :          * Compute the contribution to agf_btreeblks for the new free space
     638             :          * btrees.  This is the computed btree size minus anything we didn't
     639             :          * use.
     640             :          */
     641       40227 :         freesp_btreeblks += ra->new_bnobt.bload.nr_blocks - 1;
     642       40227 :         freesp_btreeblks += ra->new_cntbt.bload.nr_blocks - 1;
     643             : 
     644       40227 :         freesp_btreeblks -= xrep_newbt_unused_blocks(&ra->new_bnobt);
     645       40223 :         freesp_btreeblks -= xrep_newbt_unused_blocks(&ra->new_cntbt);
     646             : 
     647             :         /*
     648             :          * The AGF header contains extra information related to the free space
     649             :          * btrees, so we must update those fields here.
     650             :          */
     651       40229 :         agf->agf_btreeblks = cpu_to_be32(freesp_btreeblks +
     652             :                                 (be32_to_cpu(agf->agf_rmap_blocks) - 1));
     653       40229 :         agf->agf_freeblks = cpu_to_be32(ra->nr_blocks);
     654       40229 :         agf->agf_longest = cpu_to_be32(ra->longest);
     655       40229 :         xfs_alloc_log_agf(sc->tp, sc->sa.agf_bp, XFS_AGF_BTREEBLKS |
     656             :                                                  XFS_AGF_LONGEST |
     657             :                                                  XFS_AGF_FREEBLKS);
     658             : 
     659             :         /*
     660             :          * After we commit the new btree to disk, it is possible that the
     661             :          * process to reap the old btree blocks will race with the AIL trying
     662             :          * to checkpoint the old btree blocks into the filesystem.  If the new
     663             :          * tree is shorter than the old one, the allocbt write verifier will
     664             :          * fail and the AIL will shut down the filesystem.
     665             :          *
     666             :          * To avoid this, save the old incore btree height values as the alt
     667             :          * height values before re-initializing the perag info from the updated
     668             :          * AGF to capture all the new values.
     669             :          */
     670       40241 :         pag->pagf_alt_levels[XFS_BTNUM_BNOi] = pag->pagf_levels[XFS_BTNUM_BNOi];
     671       40241 :         pag->pagf_alt_levels[XFS_BTNUM_CNTi] = pag->pagf_levels[XFS_BTNUM_CNTi];
     672             : 
     673             :         /* Reinitialize with the values we just logged. */
     674       40241 :         return xrep_reinit_pagf(sc);
     675             : }
     676             : 
     677             : /*
     678             :  * Use the collected free space information to stage new free space btrees.
     679             :  * If this is successful we'll return with the new btree root
     680             :  * information logged to the repair transaction but not yet committed.
     681             :  */
     682             : STATIC int
     683       39905 : xrep_abt_build_new_trees(
     684             :         struct xrep_abt         *ra)
     685             : {
     686       39905 :         struct xfs_scrub        *sc = ra->sc;
     687       39905 :         struct xfs_btree_cur    *bno_cur;
     688       39905 :         struct xfs_btree_cur    *cnt_cur;
     689       39905 :         struct xfs_perag        *pag = sc->sa.pag;
     690       39905 :         bool                    needs_resort = false;
     691       39905 :         int                     error;
     692             : 
     693             :         /*
     694             :          * Sort the free extents by length so that we can set up the free space
     695             :          * btrees in as few extents as possible.  This reduces the amount of
     696             :          * deferred rmap / free work we have to do at the end.
     697             :          */
     698       39905 :         error = xrep_cntbt_sort_records(ra, false);
     699       40140 :         if (error)
     700             :                 return error;
     701             : 
     702             :         /*
     703             :          * Prepare to construct the new btree by reserving disk space for the
     704             :          * new btree and setting up all the accounting information we'll need
     705             :          * to root the new btree while it's under construction and before we
     706             :          * attach it to the AG header.
     707             :          */
     708       40100 :         xrep_newbt_init_bare(&ra->new_bnobt, sc);
     709       39824 :         xrep_newbt_init_bare(&ra->new_cntbt, sc);
     710             : 
     711       39962 :         ra->new_bnobt.bload.get_records = xrep_abt_get_records;
     712       39962 :         ra->new_cntbt.bload.get_records = xrep_abt_get_records;
     713             : 
     714       39962 :         ra->new_bnobt.bload.claim_block = xrep_abt_claim_block;
     715       39962 :         ra->new_cntbt.bload.claim_block = xrep_abt_claim_block;
     716             : 
     717             :         /* Allocate cursors for the staged btrees. */
     718       39962 :         bno_cur = xfs_allocbt_stage_cursor(sc->mp, &ra->new_bnobt.afake,
     719             :                         pag, XFS_BTNUM_BNO);
     720       39948 :         cnt_cur = xfs_allocbt_stage_cursor(sc->mp, &ra->new_cntbt.afake,
     721             :                         pag, XFS_BTNUM_CNT);
     722             : 
     723             :         /* Last chance to abort before we start committing fixes. */
     724       40035 :         if (xchk_should_terminate(sc, &error))
     725           0 :                 goto err_cur;
     726             : 
     727             :         /* Reserve the space we'll need for the new btrees. */
     728       40093 :         error = xrep_abt_reserve_space(ra, bno_cur, cnt_cur, &needs_resort);
     729       40137 :         if (error)
     730          16 :                 goto err_cur;
     731             : 
     732             :         /*
     733             :          * If we need to re-sort the free extents by length, do so so that we
     734             :          * can put the records into the cntbt in the correct order.
     735             :          */
     736       40121 :         if (needs_resort) {
     737       40117 :                 error = xrep_cntbt_sort_records(ra, needs_resort);
     738       40131 :                 if (error)
     739           0 :                         goto err_cur;
     740             :         }
     741             : 
     742             :         /*
     743             :          * Due to btree slack factors, it's possible for a new btree to be one
     744             :          * level taller than the old btree.  Update the alternate incore btree
     745             :          * height so that we don't trip the verifiers when writing the new
     746             :          * btree blocks to disk.
     747             :          */
     748       40135 :         pag->pagf_alt_levels[XFS_BTNUM_BNOi] =
     749       40135 :                                         ra->new_bnobt.bload.btree_height;
     750       40135 :         pag->pagf_alt_levels[XFS_BTNUM_CNTi] =
     751       40135 :                                         ra->new_cntbt.bload.btree_height;
     752             : 
     753             :         /* Load the free space by length tree. */
     754       40135 :         ra->array_cur = XFARRAY_CURSOR_INIT;
     755       40135 :         ra->longest = 0;
     756       40135 :         error = xfs_btree_bload(cnt_cur, &ra->new_cntbt.bload, ra);
     757       40227 :         if (error)
     758           0 :                 goto err_levels;
     759             : 
     760       40227 :         error = xrep_bnobt_sort_records(ra);
     761       40213 :         if (error)
     762             :                 return error;
     763             : 
     764             :         /* Load the free space by block number tree. */
     765       40212 :         ra->array_cur = XFARRAY_CURSOR_INIT;
     766       40212 :         error = xfs_btree_bload(bno_cur, &ra->new_bnobt.bload, ra);
     767       40230 :         if (error)
     768           0 :                 goto err_levels;
     769             : 
     770             :         /*
     771             :          * Install the new btrees in the AG header.  After this point the old
     772             :          * btrees are no longer accessible and the new trees are live.
     773             :          */
     774       40230 :         xfs_allocbt_commit_staged_btree(bno_cur, sc->tp, sc->sa.agf_bp);
     775       40213 :         xfs_btree_del_cursor(bno_cur, 0);
     776       40227 :         xfs_allocbt_commit_staged_btree(cnt_cur, sc->tp, sc->sa.agf_bp);
     777       40227 :         xfs_btree_del_cursor(cnt_cur, 0);
     778             : 
     779             :         /* Reset the AGF counters now that we've changed the btree shape. */
     780       40235 :         error = xrep_abt_reset_counters(ra);
     781       40194 :         if (error)
     782           0 :                 goto err_newbt;
     783             : 
     784             :         /* Dispose of any unused blocks and the accounting information. */
     785       40194 :         xrep_abt_dispose_reservations(ra, error);
     786             : 
     787       40137 :         return xrep_roll_ag_trans(sc);
     788             : 
     789           0 : err_levels:
     790           0 :         pag->pagf_alt_levels[XFS_BTNUM_BNOi] = 0;
     791           0 :         pag->pagf_alt_levels[XFS_BTNUM_CNTi] = 0;
     792          16 : err_cur:
     793          16 :         xfs_btree_del_cursor(cnt_cur, error);
     794          16 :         xfs_btree_del_cursor(bno_cur, error);
     795          16 : err_newbt:
     796          16 :         xrep_abt_dispose_reservations(ra, error);
     797          16 :         return error;
     798             : }
     799             : 
     800             : /*
     801             :  * Now that we've logged the roots of the new btrees, invalidate all of the
     802             :  * old blocks and free them.
     803             :  */
     804             : STATIC int
     805       39997 : xrep_abt_remove_old_trees(
     806             :         struct xrep_abt         *ra)
     807             : {
     808       39997 :         struct xfs_perag        *pag = ra->sc->sa.pag;
     809       39997 :         int                     error;
     810             : 
     811             :         /* Free the old btree blocks if they're not in use. */
     812       39997 :         error = xrep_reap_agblocks(ra->sc, &ra->old_allocbt_blocks,
     813             :                         &XFS_RMAP_OINFO_AG, XFS_AG_RESV_IGNORE);
     814       40083 :         if (error)
     815             :                 return error;
     816             : 
     817             :         /*
     818             :          * Now that we've zapped all the old allocbt blocks we can turn off
     819             :          * the alternate height mechanism.
     820             :          */
     821       40083 :         pag->pagf_alt_levels[XFS_BTNUM_BNOi] = 0;
     822       40083 :         pag->pagf_alt_levels[XFS_BTNUM_CNTi] = 0;
     823       40083 :         return 0;
     824             : }
     825             : 
     826             : /* Repair the freespace btrees for some AG. */
     827             : int
     828       54515 : xrep_allocbt(
     829             :         struct xfs_scrub        *sc)
     830             : {
     831       54515 :         struct xrep_abt         *ra;
     832       54515 :         struct xfs_mount        *mp = sc->mp;
     833       54515 :         char                    *descr;
     834       54515 :         int                     error;
     835             : 
     836             :         /* We require the rmapbt to rebuild anything. */
     837       54515 :         if (!xfs_has_rmapbt(mp))
     838             :                 return -EOPNOTSUPP;
     839             : 
     840       40202 :         ra = kzalloc(sizeof(struct xrep_abt), XCHK_GFP_FLAGS);
     841       40217 :         if (!ra)
     842             :                 return -ENOMEM;
     843       40217 :         ra->sc = sc;
     844             : 
     845             :         /* We rebuild both data structures. */
     846       40217 :         sc->sick_mask = XFS_SICK_AG_BNOBT | XFS_SICK_AG_CNTBT;
     847             : 
     848             :         /*
     849             :          * Make sure the busy extent list is clear because we can't put extents
     850             :          * on there twice.  In theory we cleared this before we started, but
     851             :          * let's not risk the filesystem.
     852             :          */
     853       40217 :         if (!xfs_extent_busy_list_empty(sc->sa.pag)) {
     854           0 :                 error = -EDEADLOCK;
     855           0 :                 goto out_ra;
     856             :         }
     857             : 
     858             :         /* Set up enough storage to handle maximally fragmented free space. */
     859       40221 :         descr = xchk_xfile_ag_descr(sc, "free space records");
     860       39981 :         error = xfarray_create(descr, mp->m_sb.sb_agblocks / 2,
     861             :                         sizeof(struct xfs_alloc_rec_incore),
     862             :                         &ra->free_records);
     863       39840 :         kfree(descr);
     864       40002 :         if (error)
     865           0 :                 goto out_ra;
     866             : 
     867             :         /* Collect the free space data and find the old btree blocks. */
     868       40002 :         xagb_bitmap_init(&ra->old_allocbt_blocks);
     869       39758 :         error = xrep_abt_find_freespace(ra);
     870       40144 :         if (error)
     871           0 :                 goto out_bitmap;
     872             : 
     873             :         /* Rebuild the free space information. */
     874       40144 :         error = xrep_abt_build_new_trees(ra);
     875       40127 :         if (error)
     876          16 :                 goto out_bitmap;
     877             : 
     878             :         /* Kill the old trees. */
     879       40111 :         error = xrep_abt_remove_old_trees(ra);
     880             : 
     881       40100 : out_bitmap:
     882       40100 :         xagb_bitmap_destroy(&ra->old_allocbt_blocks);
     883       40093 :         xfarray_destroy(ra->free_records);
     884       40137 : out_ra:
     885       40137 :         kfree(ra);
     886       40137 :         return error;
     887             : }
     888             : 
     889             : /* Make sure both btrees are ok after we've rebuilt them. */
     890             : int
     891       40071 : xrep_revalidate_allocbt(
     892             :         struct xfs_scrub        *sc)
     893             : {
     894       40071 :         __u32                   old_type = sc->sm->sm_type;
     895       40071 :         int                     error;
     896             : 
     897             :         /*
     898             :          * We must update sm_type temporarily so that the tree-to-tree cross
     899             :          * reference checks will work in the correct direction, and also so
     900             :          * that tracing will report correctly if there are more errors.
     901             :          */
     902       40071 :         sc->sm->sm_type = XFS_SCRUB_TYPE_BNOBT;
     903       40071 :         error = xchk_bnobt(sc);
     904       40180 :         if (error)
     905           0 :                 goto out;
     906             : 
     907       40180 :         sc->sm->sm_type = XFS_SCRUB_TYPE_CNTBT;
     908       40180 :         error = xchk_cntbt(sc);
     909       40219 : out:
     910       40219 :         sc->sm->sm_type = old_type;
     911       40219 :         return error;
     912             : }

Generated by: LCOV version 1.14