LCOV - code coverage report
Current view: top level - fs/xfs/scrub - alloc_repair.c (source / functions) Hit Total Coverage
Test: fstests of 6.5.0-rc4-xfsx @ Mon Jul 31 20:08:34 PDT 2023 Lines: 307 335 91.6 %
Date: 2023-07-31 20:08:34 Functions: 20 20 100.0 %

          Line data    Source code
       1             : // SPDX-License-Identifier: GPL-2.0-or-later
       2             : /*
       3             :  * Copyright (C) 2018-2023 Oracle.  All Rights Reserved.
       4             :  * Author: Darrick J. Wong <djwong@kernel.org>
       5             :  */
       6             : #include "xfs.h"
       7             : #include "xfs_fs.h"
       8             : #include "xfs_shared.h"
       9             : #include "xfs_format.h"
      10             : #include "xfs_trans_resv.h"
      11             : #include "xfs_mount.h"
      12             : #include "xfs_defer.h"
      13             : #include "xfs_btree.h"
      14             : #include "xfs_btree_staging.h"
      15             : #include "xfs_bit.h"
      16             : #include "xfs_log_format.h"
      17             : #include "xfs_trans.h"
      18             : #include "xfs_sb.h"
      19             : #include "xfs_alloc.h"
      20             : #include "xfs_alloc_btree.h"
      21             : #include "xfs_rmap.h"
      22             : #include "xfs_rmap_btree.h"
      23             : #include "xfs_inode.h"
      24             : #include "xfs_refcount.h"
      25             : #include "xfs_extent_busy.h"
      26             : #include "xfs_health.h"
      27             : #include "xfs_bmap.h"
      28             : #include "xfs_ialloc.h"
      29             : #include "xfs_ag.h"
      30             : #include "scrub/xfs_scrub.h"
      31             : #include "scrub/scrub.h"
      32             : #include "scrub/common.h"
      33             : #include "scrub/btree.h"
      34             : #include "scrub/trace.h"
      35             : #include "scrub/repair.h"
      36             : #include "scrub/bitmap.h"
      37             : #include "scrub/xfile.h"
      38             : #include "scrub/xfarray.h"
      39             : #include "scrub/newbt.h"
      40             : #include "scrub/reap.h"
      41             : 
      42             : /*
      43             :  * Free Space Btree Repair
      44             :  * =======================
      45             :  *
      46             :  * The reverse mappings are supposed to record all space usage for the entire
      47             :  * AG.  Therefore, we can recalculate the free extents in an AG by looking for
      48             :  * gaps in the physical extents recorded in the rmapbt.  On a reflink
      49             :  * filesystem this is a little more tricky in that we have to be aware that
      50             :  * the rmap records are allowed to overlap.
      51             :  *
      52             :  * We derive which blocks belonged to the old bnobt/cntbt by recording all the
      53             :  * OWN_AG extents and subtracting out the blocks owned by all other OWN_AG
      54             :  * metadata: the rmapbt blocks visited while iterating the reverse mappings
      55             :  * and the AGFL blocks.
      56             :  *
      57             :  * Once we have both of those pieces, we can reconstruct the bnobt and cntbt
      58             :  * by blowing out the free block state and freeing all the extents that we
      59             :  * found.  This adds the requirement that we can't have any busy extents in
      60             :  * the AG because the busy code cannot handle duplicate records.
      61             :  *
      62             :  * Note that we can only rebuild both free space btrees at the same time
      63             :  * because the regular extent freeing infrastructure loads both btrees at the
      64             :  * same time.
      65             :  *
      66             :  * We use the prefix 'xrep_abt' here because we regenerate both free space
      67             :  * allocation btrees at the same time.
      68             :  */
      69             : 
      70             : struct xrep_abt {
      71             :         /* Blocks owned by the rmapbt or the agfl. */
      72             :         struct xagb_bitmap      not_allocbt_blocks;
      73             : 
      74             :         /* All OWN_AG blocks. */
      75             :         struct xagb_bitmap      old_allocbt_blocks;
      76             : 
      77             :         /*
      78             :          * New bnobt information.  All btree block reservations are added to
      79             :          * the reservation list in new_bnobt.
      80             :          */
      81             :         struct xrep_newbt       new_bnobt;
      82             : 
      83             :         /* new cntbt information */
      84             :         struct xrep_newbt       new_cntbt;
      85             : 
      86             :         /* Free space extents. */
      87             :         struct xfarray          *free_records;
      88             : 
      89             :         struct xfs_scrub        *sc;
      90             : 
      91             :         /* Number of non-null records in @free_records. */
      92             :         uint64_t                nr_real_records;
      93             : 
      94             :         /* get_records()'s position in the free space record array. */
      95             :         xfarray_idx_t           array_cur;
      96             : 
      97             :         /*
      98             :          * Next block we anticipate seeing in the rmap records.  If the next
      99             :          * rmap record is greater than next_agbno, we have found unused space.
     100             :          */
     101             :         xfs_agblock_t           next_agbno;
     102             : 
     103             :         /* Number of free blocks in this AG. */
     104             :         xfs_agblock_t           nr_blocks;
     105             : 
     106             :         /* Longest free extent we found in the AG. */
     107             :         xfs_agblock_t           longest;
     108             : };
     109             : 
     110             : /* Set up to repair AG free space btrees. */
     111             : int
     112      143120 : xrep_setup_ag_allocbt(
     113             :         struct xfs_scrub        *sc)
     114             : {
     115      143120 :         unsigned int            busy_gen;
     116             : 
     117             :         /*
     118             :          * Make sure the busy extent list is clear because we can't put extents
     119             :          * on there twice.
     120             :          */
     121      143120 :         busy_gen = READ_ONCE(sc->sa.pag->pagb_gen);
     122      143120 :         if (xfs_extent_busy_list_empty(sc->sa.pag))
     123             :                 return 0;
     124             : 
     125       51955 :         return xfs_extent_busy_flush(sc->tp, sc->sa.pag, busy_gen, 0);
     126             : }
     127             : 
     128             : /* Check for any obvious conflicts in the free extent. */
     129             : STATIC int
     130    45034377 : xrep_abt_check_free_ext(
     131             :         struct xfs_scrub        *sc,
     132             :         const struct xfs_alloc_rec_incore *rec)
     133             : {
     134    45034377 :         enum xbtree_recpacking  outcome;
     135    45034377 :         int                     error;
     136             : 
     137    45034377 :         if (xfs_alloc_check_perag_irec(sc->sa.pag, rec) != NULL)
     138             :                 return -EFSCORRUPTED;
     139             : 
     140             :         /* Must not be an inode chunk. */
     141    45033716 :         error = xfs_ialloc_has_inodes_at_extent(sc->sa.ino_cur,
     142    45033716 :                         rec->ar_startblock, rec->ar_blockcount, &outcome);
     143    45035139 :         if (error)
     144             :                 return error;
     145    45035139 :         if (outcome != XBTREE_RECPACKING_EMPTY)
     146             :                 return -EFSCORRUPTED;
     147             : 
     148             :         /* Must not be shared or CoW staging. */
     149    45035139 :         if (sc->sa.refc_cur) {
     150    45034462 :                 error = xfs_refcount_has_records(sc->sa.refc_cur,
     151    45034462 :                                 XFS_REFC_DOMAIN_SHARED, rec->ar_startblock,
     152    45034462 :                                 rec->ar_blockcount, &outcome);
     153    45033843 :                 if (error)
     154             :                         return error;
     155    45033843 :                 if (outcome != XBTREE_RECPACKING_EMPTY)
     156             :                         return -EFSCORRUPTED;
     157             : 
     158    45033843 :                 error = xfs_refcount_has_records(sc->sa.refc_cur,
     159    45033843 :                                 XFS_REFC_DOMAIN_COW, rec->ar_startblock,
     160    45033843 :                                 rec->ar_blockcount, &outcome);
     161    45033854 :                 if (error)
     162             :                         return error;
     163    45033854 :                 if (outcome != XBTREE_RECPACKING_EMPTY)
     164           0 :                         return -EFSCORRUPTED;
     165             :         }
     166             : 
     167             :         return 0;
     168             : }
     169             : 
     170             : /*
     171             :  * Stash a free space record for all the space since the last bno we found
     172             :  * all the way up to @end.
     173             :  */
     174             : static int
     175    45034135 : xrep_abt_stash(
     176             :         struct xrep_abt         *ra,
     177             :         xfs_agblock_t           end)
     178             : {
     179    45034135 :         struct xfs_alloc_rec_incore arec = {
     180    45034135 :                 .ar_startblock  = ra->next_agbno,
     181    45034135 :                 .ar_blockcount  = end - ra->next_agbno,
     182             :         };
     183    45034135 :         struct xfs_scrub        *sc = ra->sc;
     184    45034135 :         int                     error = 0;
     185             : 
     186    45034135 :         if (xchk_should_terminate(sc, &error))
     187           0 :                 return error;
     188             : 
     189    45033959 :         error = xrep_abt_check_free_ext(ra->sc, &arec);
     190    45034615 :         if (error)
     191             :                 return error;
     192             : 
     193    45034622 :         trace_xrep_abt_found(sc->mp, sc->sa.pag->pag_agno, &arec);
     194             : 
     195    45034240 :         error = xfarray_append(ra->free_records, &arec);
     196    45033650 :         if (error)
     197             :                 return error;
     198             : 
     199    45033650 :         ra->nr_blocks += arec.ar_blockcount;
     200    45033650 :         return 0;
     201             : }
     202             : 
     203             : /* Record extents that aren't in use from gaps in the rmap records. */
     204             : STATIC int
     205   970328237 : xrep_abt_walk_rmap(
     206             :         struct xfs_btree_cur            *cur,
     207             :         const struct xfs_rmap_irec      *rec,
     208             :         void                            *priv)
     209             : {
     210   970328237 :         struct xrep_abt                 *ra = priv;
     211   970328237 :         int                             error;
     212             : 
     213             :         /* Record all the OWN_AG blocks... */
     214   970328237 :         if (rec->rm_owner == XFS_RMAP_OWN_AG) {
     215     6604519 :                 error = xagb_bitmap_set(&ra->old_allocbt_blocks,
     216     6604519 :                                 rec->rm_startblock, rec->rm_blockcount);
     217     6603578 :                 if (error)
     218             :                         return error;
     219             :         }
     220             : 
     221             :         /* ...and all the rmapbt blocks... */
     222   970327296 :         error = xagb_bitmap_set_btcur_path(&ra->not_allocbt_blocks, cur);
     223   970336606 :         if (error)
     224             :                 return error;
     225             : 
     226             :         /* ...and all the free space. */
     227   970336606 :         if (rec->rm_startblock > ra->next_agbno) {
     228    44892698 :                 error = xrep_abt_stash(ra, rec->rm_startblock);
     229    44892581 :                 if (error)
     230             :                         return error;
     231             :         }
     232             : 
     233             :         /*
     234             :          * rmap records can overlap on reflink filesystems, so project
     235             :          * next_agbno as far out into the AG space as we currently know about.
     236             :          */
     237   970336489 :         ra->next_agbno = max_t(xfs_agblock_t, ra->next_agbno,
     238             :                         rec->rm_startblock + rec->rm_blockcount);
     239   970336489 :         return 0;
     240             : }
     241             : 
     242             : /* Collect an AGFL block for the not-to-release list. */
     243             : static int
     244      997465 : xrep_abt_walk_agfl(
     245             :         struct xfs_mount        *mp,
     246             :         xfs_agblock_t           agbno,
     247             :         void                    *priv)
     248             : {
     249      997465 :         struct xrep_abt         *ra = priv;
     250             : 
     251      997465 :         return xagb_bitmap_set(&ra->not_allocbt_blocks, agbno, 1);
     252             : }
     253             : 
     254             : /*
     255             :  * Compare two free space extents by block number.  We want to sort in order of
     256             :  * increasing block number.
     257             :  */
     258             : static int
     259   606767470 : xrep_bnobt_extent_cmp(
     260             :         const void              *a,
     261             :         const void              *b)
     262             : {
     263  1490414635 :         const struct xfs_alloc_rec_incore *ap = a;
     264  1490414635 :         const struct xfs_alloc_rec_incore *bp = b;
     265             : 
     266   606767470 :         if (ap->ar_startblock > bp->ar_startblock)
     267             :                 return 1;
     268   809806925 :         else if (ap->ar_startblock < bp->ar_startblock)
     269   809805108 :                 return -1;
     270             :         return 0;
     271             : }
     272             : 
     273             : /*
     274             :  * Re-sort the free extents by block number so so that we can put the records
     275             :  * into the bnobt in the correct order.  Make sure the records do not overlap
     276             :  * in physical space.
     277             :  */
     278             : STATIC int
     279      142281 : xrep_bnobt_sort_records(
     280             :         struct xrep_abt                 *ra)
     281             : {
     282      142281 :         struct xfs_alloc_rec_incore     arec;
     283      142281 :         xfarray_idx_t                   cur = XFARRAY_CURSOR_INIT;
     284      142281 :         xfs_agblock_t                   next_agbno = 0;
     285      142281 :         int                             error;
     286             : 
     287      142281 :         error = xfarray_sort(ra->free_records, xrep_bnobt_extent_cmp, 0);
     288      142248 :         if (error)
     289             :                 return error;
     290             : 
     291    45176759 :         while ((error = xfarray_iter(ra->free_records, &cur, &arec)) == 1) {
     292    45034511 :                 if (arec.ar_startblock < next_agbno)
     293             :                         return -EFSCORRUPTED;
     294             : 
     295    45034511 :                 next_agbno = arec.ar_startblock + arec.ar_blockcount;
     296             :         }
     297             : 
     298             :         return error;
     299             : }
     300             : 
     301             : /*
     302             :  * Compare two free space extents by length and then block number.  We want
     303             :  * to sort first in order of increasing length and then in order of increasing
     304             :  * block number.
     305             :  */
     306             : static int
     307  1195476368 : xrep_cntbt_extent_cmp(
     308             :         const void                      *a,
     309             :         const void                      *b)
     310             : {
     311  1195476368 :         const struct xfs_alloc_rec_incore *ap = a;
     312  1195476368 :         const struct xfs_alloc_rec_incore *bp = b;
     313             : 
     314  1195476368 :         if (ap->ar_blockcount > bp->ar_blockcount)
     315             :                 return 1;
     316  1039644627 :         else if (ap->ar_blockcount < bp->ar_blockcount)
     317             :                 return -1;
     318   883647165 :         return xrep_bnobt_extent_cmp(a, b);
     319             : }
     320             : 
     321             : /*
     322             :  * Sort the free extents by length so so that we can put the records into the
     323             :  * cntbt in the correct order.  Don't let userspace kill us if we're resorting
     324             :  * after allocating btree blocks.
     325             :  */
     326             : STATIC int
     327      283819 : xrep_cntbt_sort_records(
     328             :         struct xrep_abt                 *ra,
     329             :         bool                            is_resort)
     330             : {
     331      568207 :         return xfarray_sort(ra->free_records, xrep_cntbt_extent_cmp,
     332      283819 :                         is_resort ? 0 : XFARRAY_SORT_KILLABLE);
     333             : }
     334             : 
     335             : /*
     336             :  * Iterate all reverse mappings to find (1) the gaps between rmap records (all
     337             :  * unowned space), (2) the OWN_AG extents (which encompass the free space
     338             :  * btrees, the rmapbt, and the agfl), (3) the rmapbt blocks, and (4) the AGFL
     339             :  * blocks.  The free space is (1) + (2) - (3) - (4).
     340             :  */
     341             : STATIC int
     342      142006 : xrep_abt_find_freespace(
     343             :         struct xrep_abt         *ra)
     344             : {
     345      142006 :         struct xfs_scrub        *sc = ra->sc;
     346      142006 :         struct xfs_mount        *mp = sc->mp;
     347      142006 :         struct xfs_agf          *agf = sc->sa.agf_bp->b_addr;
     348      142006 :         struct xfs_buf          *agfl_bp;
     349      142006 :         xfs_agblock_t           agend;
     350      142006 :         int                     error;
     351             : 
     352      142006 :         xagb_bitmap_init(&ra->not_allocbt_blocks);
     353             : 
     354      141906 :         xrep_ag_btcur_init(sc, &sc->sa);
     355             : 
     356             :         /*
     357             :          * Iterate all the reverse mappings to find gaps in the physical
     358             :          * mappings, all the OWN_AG blocks, and all the rmapbt extents.
     359             :          */
     360      142216 :         error = xfs_rmap_query_all(sc->sa.rmap_cur, xrep_abt_walk_rmap, ra);
     361      142110 :         if (error)
     362           0 :                 goto err;
     363             : 
     364             :         /* Insert a record for space between the last rmap and EOAG. */
     365      142110 :         agend = be32_to_cpu(agf->agf_length);
     366      142110 :         if (ra->next_agbno < agend) {
     367      141010 :                 error = xrep_abt_stash(ra, agend);
     368      141188 :                 if (error)
     369           0 :                         goto err;
     370             :         }
     371             : 
     372             :         /* Collect all the AGFL blocks. */
     373      142288 :         error = xfs_alloc_read_agfl(sc->sa.pag, sc->tp, &agfl_bp);
     374      142130 :         if (error)
     375           0 :                 goto err;
     376             : 
     377      142130 :         error = xfs_agfl_walk(mp, agf, agfl_bp, xrep_abt_walk_agfl, ra);
     378      142211 :         if (error)
     379           0 :                 goto err_agfl;
     380             : 
     381             :         /* Compute the old bnobt/cntbt blocks. */
     382      142211 :         error = xagb_bitmap_disunion(&ra->old_allocbt_blocks,
     383             :                         &ra->not_allocbt_blocks);
     384      141681 :         if (error)
     385           0 :                 goto err_agfl;
     386             : 
     387      141681 :         ra->nr_real_records = xfarray_length(ra->free_records);
     388      141575 : err_agfl:
     389      141575 :         xfs_trans_brelse(sc->tp, agfl_bp);
     390      142196 : err:
     391      142196 :         xchk_ag_btcur_free(&sc->sa);
     392      142280 :         xagb_bitmap_destroy(&ra->not_allocbt_blocks);
     393      142088 :         return error;
     394             : }
     395             : 
     396             : /*
     397             :  * We're going to use the observed free space records to reserve blocks for the
     398             :  * new free space btrees, so we play an iterative game where we try to converge
     399             :  * on the number of blocks we need:
     400             :  *
     401             :  * 1. Estimate how many blocks we'll need to store the records.
     402             :  * 2. If the first free record has more blocks than we need, we're done.
     403             :  *    We will have to re-sort the records prior to building the cntbt.
     404             :  * 3. If that record has exactly the number of blocks we need, null out the
     405             :  *    record.  We're done.
     406             :  * 4. Otherwise, we still need more blocks.  Null out the record, subtract its
     407             :  *    length from the number of blocks we need, and go back to step 1.
     408             :  *
     409             :  * Fortunately, we don't have to do any transaction work to play this game, so
     410             :  * we don't have to tear down the staging cursors.
     411             :  */
     412             : STATIC int
     413      142212 : xrep_abt_reserve_space(
     414             :         struct xrep_abt         *ra,
     415             :         struct xfs_btree_cur    *bno_cur,
     416             :         struct xfs_btree_cur    *cnt_cur,
     417             :         bool                    *needs_resort)
     418             : {
     419      142212 :         struct xfs_scrub        *sc = ra->sc;
     420      142212 :         xfarray_idx_t           record_nr;
     421      142212 :         unsigned int            allocated = 0;
     422      142212 :         int                     error = 0;
     423             : 
     424      142212 :         record_nr = xfarray_length(ra->free_records) - 1;
     425      142204 :         do {
     426      142168 :                 struct xfs_alloc_rec_incore arec;
     427      142168 :                 uint64_t                required;
     428      142168 :                 unsigned int            desired;
     429      142168 :                 unsigned int            len;
     430             : 
     431             :                 /* Compute how many blocks we'll need. */
     432      142168 :                 error = xfs_btree_bload_compute_geometry(cnt_cur,
     433             :                                 &ra->new_cntbt.bload, ra->nr_real_records);
     434      141814 :                 if (error)
     435             :                         break;
     436             : 
     437      141814 :                 error = xfs_btree_bload_compute_geometry(bno_cur,
     438             :                                 &ra->new_bnobt.bload, ra->nr_real_records);
     439      142190 :                 if (error)
     440             :                         break;
     441             : 
     442             :                 /* How many btree blocks do we need to store all records? */
     443      142190 :                 required = ra->new_bnobt.bload.nr_blocks +
     444      142190 :                            ra->new_cntbt.bload.nr_blocks;
     445      142190 :                 ASSERT(required < INT_MAX);
     446             : 
     447             :                 /* If we've reserved enough blocks, we're done. */
     448      142190 :                 if (allocated >= required)
     449             :                         break;
     450             : 
     451      142185 :                 desired = required - allocated;
     452             : 
     453             :                 /* We need space but there's none left; bye! */
     454      142185 :                 if (ra->nr_real_records == 0) {
     455             :                         error = -ENOSPC;
     456             :                         break;
     457             :                 }
     458             : 
     459             :                 /* Grab the first record from the list. */
     460      142155 :                 error = xfarray_load(ra->free_records, record_nr, &arec);
     461      141969 :                 if (error)
     462             :                         break;
     463             : 
     464      141969 :                 ASSERT(arec.ar_blockcount <= UINT_MAX);
     465      141969 :                 len = min_t(unsigned int, arec.ar_blockcount, desired);
     466             : 
     467      141969 :                 trace_xrep_newbt_alloc_ag_blocks(sc->mp, sc->sa.pag->pag_agno,
     468             :                                 arec.ar_startblock, len, XFS_RMAP_OWN_AG);
     469             : 
     470      141686 :                 error = xrep_newbt_add_extent(&ra->new_bnobt, sc->sa.pag,
     471             :                                 arec.ar_startblock, len);
     472      141747 :                 if (error)
     473             :                         break;
     474      141747 :                 allocated += len;
     475      141747 :                 ra->nr_blocks -= len;
     476             : 
     477      141747 :                 if (arec.ar_blockcount > desired) {
     478             :                         /*
     479             :                          * Record has more space than we need.  The number of
     480             :                          * free records doesn't change, so shrink the free
     481             :                          * record, inform the caller that the records are no
     482             :                          * longer sorted by length, and exit.
     483             :                          */
     484      141711 :                         arec.ar_startblock += desired;
     485      141711 :                         arec.ar_blockcount -= desired;
     486      141711 :                         error = xfarray_store(ra->free_records, record_nr,
     487             :                                         &arec);
     488      142162 :                         if (error)
     489             :                                 break;
     490             : 
     491      142163 :                         *needs_resort = true;
     492      142163 :                         return 0;
     493             :                 }
     494             : 
     495             :                 /*
     496             :                  * We're going to use up the entire record, so unset it and
     497             :                  * move on to the next one.  This changes the number of free
     498             :                  * records (but doesn't break the sorting order), so we must
     499             :                  * go around the loop once more to re-run _bload_init.
     500             :                  */
     501          36 :                 error = xfarray_unset(ra->free_records, record_nr);
     502          36 :                 if (error)
     503             :                         break;
     504          36 :                 ra->nr_real_records--;
     505          36 :                 record_nr--;
     506             :         } while (1);
     507             : 
     508          34 :         return error;
     509             : }
     510             : 
     511             : STATIC int
     512      142244 : xrep_abt_dispose_one(
     513             :         struct xrep_abt         *ra,
     514             :         struct xrep_newbt_resv  *resv)
     515             : {
     516      142244 :         struct xfs_scrub        *sc = ra->sc;
     517      142244 :         struct xfs_perag        *pag = sc->sa.pag;
     518      142244 :         xfs_agblock_t           free_agbno = resv->agbno + resv->used;
     519      142244 :         xfs_extlen_t            free_aglen = resv->len - resv->used;
     520      142244 :         int                     error;
     521             : 
     522      142244 :         ASSERT(pag == resv->pag);
     523             : 
     524             :         /* Add a deferred rmap for each extent we used. */
     525      142244 :         if (resv->used > 0) {
     526      142219 :                 xfs_fsblock_t   fsbno;
     527             : 
     528      142219 :                 fsbno = XFS_AGB_TO_FSB(sc->mp, pag->pag_agno, resv->agbno);
     529      142219 :                 xfs_rmap_alloc_extent(sc->tp, false, fsbno, resv->used,
     530             :                                 XFS_RMAP_OWN_AG);
     531             :         }
     532             : 
     533             :         /*
     534             :          * For each reserved btree block we didn't use, add it to the free
     535             :          * space btree.  We didn't touch fdblocks when we reserved them, so
     536             :          * we don't touch it now.
     537             :          */
     538      142285 :         if (free_aglen == 0)
     539             :                 return 0;
     540             : 
     541           0 :         trace_xrep_newbt_free_blocks(sc->mp, resv->pag->pag_agno, free_agbno,
     542           0 :                         free_aglen, ra->new_bnobt.oinfo.oi_owner);
     543             : 
     544           0 :         error = __xfs_free_extent(sc->tp, resv->pag, free_agbno, free_aglen,
     545           0 :                         &ra->new_bnobt.oinfo, XFS_AG_RESV_IGNORE, true);
     546           0 :         if (error)
     547             :                 return error;
     548             : 
     549           0 :         return xrep_defer_finish(sc);
     550             : }
     551             : 
     552             : /*
     553             :  * Deal with all the space we reserved.  Blocks that were allocated for the
     554             :  * free space btrees need to have a (deferred) rmap added for the OWN_AG
     555             :  * allocation, and blocks that didn't get used can be freed via the usual
     556             :  * (deferred) means.
     557             :  */
     558             : STATIC void
     559      142252 : xrep_abt_dispose_reservations(
     560             :         struct xrep_abt         *ra,
     561             :         int                     error)
     562             : {
     563      142252 :         struct xrep_newbt_resv  *resv, *n;
     564             : 
     565      142252 :         if (error)
     566          30 :                 goto junkit;
     567             : 
     568      284427 :         for_each_xrep_newbt_reservation(&ra->new_bnobt, resv, n) {
     569      142248 :                 error = xrep_abt_dispose_one(ra, resv);
     570      142205 :                 if (error)
     571           0 :                         goto junkit;
     572             :         }
     573             : 
     574      142179 : junkit:
     575      284415 :         for_each_xrep_newbt_reservation(&ra->new_bnobt, resv, n) {
     576      142173 :                 xfs_perag_put(resv->pag);
     577      142318 :                 list_del(&resv->list);
     578      142294 :                 kfree(resv);
     579             :         }
     580             : 
     581      142242 :         xrep_newbt_cancel(&ra->new_bnobt);
     582      142103 :         xrep_newbt_cancel(&ra->new_cntbt);
     583      142235 : }
     584             : 
     585             : /* Retrieve free space data for bulk load. */
     586             : STATIC int
     587      482369 : xrep_abt_get_records(
     588             :         struct xfs_btree_cur            *cur,
     589             :         unsigned int                    idx,
     590             :         struct xfs_btree_block          *block,
     591             :         unsigned int                    nr_wanted,
     592             :         void                            *priv)
     593             : {
     594      482369 :         struct xfs_alloc_rec_incore     *arec = &cur->bc_rec.a;
     595      482369 :         struct xrep_abt                 *ra = priv;
     596      482369 :         union xfs_btree_rec             *block_rec;
     597      482369 :         unsigned int                    loaded;
     598      482369 :         int                             error;
     599             : 
     600    90548729 :         for (loaded = 0; loaded < nr_wanted; loaded++, idx++) {
     601    90065480 :                 error = xfarray_load_next(ra->free_records, &ra->array_cur,
     602             :                                 arec);
     603    90069467 :                 if (error)
     604           0 :                         return error;
     605             : 
     606    90069467 :                 ra->longest = max(ra->longest, arec->ar_blockcount);
     607             : 
     608    90069467 :                 block_rec = xfs_btree_rec_addr(cur, idx, block);
     609    90066502 :                 cur->bc_ops->init_rec_from_cur(cur, block_rec);
     610             :         }
     611             : 
     612      483249 :         return loaded;
     613             : }
     614             : 
     615             : /* Feed one of the new btree blocks to the bulk loader. */
     616             : STATIC int
     617      505422 : xrep_abt_claim_block(
     618             :         struct xfs_btree_cur    *cur,
     619             :         union xfs_btree_ptr     *ptr,
     620             :         void                    *priv)
     621             : {
     622      505422 :         struct xrep_abt         *ra = priv;
     623             : 
     624      505422 :         return xrep_newbt_claim_block(cur, &ra->new_bnobt, ptr);
     625             : }
     626             : 
     627             : /*
     628             :  * Reset the AGF counters to reflect the free space btrees that we just
     629             :  * rebuilt, then reinitialize the per-AG data.
     630             :  */
     631             : STATIC int
     632      142292 : xrep_abt_reset_counters(
     633             :         struct xrep_abt         *ra)
     634             : {
     635      142292 :         struct xfs_scrub        *sc = ra->sc;
     636      142292 :         struct xfs_perag        *pag = sc->sa.pag;
     637      142292 :         struct xfs_agf          *agf = sc->sa.agf_bp->b_addr;
     638      142292 :         unsigned int            freesp_btreeblks = 0;
     639             : 
     640             :         /*
     641             :          * Compute the contribution to agf_btreeblks for the new free space
     642             :          * btrees.  This is the computed btree size minus anything we didn't
     643             :          * use.
     644             :          */
     645      142292 :         freesp_btreeblks += ra->new_bnobt.bload.nr_blocks - 1;
     646      142292 :         freesp_btreeblks += ra->new_cntbt.bload.nr_blocks - 1;
     647             : 
     648      142292 :         freesp_btreeblks -= xrep_newbt_unused_blocks(&ra->new_bnobt);
     649      142311 :         freesp_btreeblks -= xrep_newbt_unused_blocks(&ra->new_cntbt);
     650             : 
     651             :         /*
     652             :          * The AGF header contains extra information related to the free space
     653             :          * btrees, so we must update those fields here.
     654             :          */
     655      142317 :         agf->agf_btreeblks = cpu_to_be32(freesp_btreeblks +
     656             :                                 (be32_to_cpu(agf->agf_rmap_blocks) - 1));
     657      142317 :         agf->agf_freeblks = cpu_to_be32(ra->nr_blocks);
     658      142317 :         agf->agf_longest = cpu_to_be32(ra->longest);
     659      142317 :         xfs_alloc_log_agf(sc->tp, sc->sa.agf_bp, XFS_AGF_BTREEBLKS |
     660             :                                                  XFS_AGF_LONGEST |
     661             :                                                  XFS_AGF_FREEBLKS);
     662             : 
     663             :         /*
     664             :          * After we commit the new btree to disk, it is possible that the
     665             :          * process to reap the old btree blocks will race with the AIL trying
     666             :          * to checkpoint the old btree blocks into the filesystem.  If the new
     667             :          * tree is shorter than the old one, the allocbt write verifier will
     668             :          * fail and the AIL will shut down the filesystem.
     669             :          *
     670             :          * To avoid this, save the old incore btree height values as the alt
     671             :          * height values before re-initializing the perag info from the updated
     672             :          * AGF to capture all the new values.
     673             :          */
     674      142327 :         pag->pagf_alt_levels[XFS_BTNUM_BNOi] = pag->pagf_levels[XFS_BTNUM_BNOi];
     675      142327 :         pag->pagf_alt_levels[XFS_BTNUM_CNTi] = pag->pagf_levels[XFS_BTNUM_CNTi];
     676             : 
     677             :         /* Reinitialize with the values we just logged. */
     678      142327 :         return xrep_reinit_pagf(sc);
     679             : }
     680             : 
     681             : /*
     682             :  * Use the collected free space information to stage new free space btrees.
     683             :  * If this is successful we'll return with the new btree root
     684             :  * information logged to the repair transaction but not yet committed.
     685             :  */
     686             : STATIC int
     687      141948 : xrep_abt_build_new_trees(
     688             :         struct xrep_abt         *ra)
     689             : {
     690      141948 :         struct xfs_scrub        *sc = ra->sc;
     691      141948 :         struct xfs_btree_cur    *bno_cur;
     692      141948 :         struct xfs_btree_cur    *cnt_cur;
     693      141948 :         struct xfs_perag        *pag = sc->sa.pag;
     694      141948 :         bool                    needs_resort = false;
     695      141948 :         int                     error;
     696             : 
     697             :         /*
     698             :          * Sort the free extents by length so that we can set up the free space
     699             :          * btrees in as few extents as possible.  This reduces the amount of
     700             :          * deferred rmap / free work we have to do at the end.
     701             :          */
     702      141948 :         error = xrep_cntbt_sort_records(ra, false);
     703      142152 :         if (error)
     704             :                 return error;
     705             : 
     706             :         /*
     707             :          * Prepare to construct the new btree by reserving disk space for the
     708             :          * new btree and setting up all the accounting information we'll need
     709             :          * to root the new btree while it's under construction and before we
     710             :          * attach it to the AG header.
     711             :          */
     712      142121 :         xrep_newbt_init_bare(&ra->new_bnobt, sc);
     713      141787 :         xrep_newbt_init_bare(&ra->new_cntbt, sc);
     714             : 
     715      142134 :         ra->new_bnobt.bload.get_records = xrep_abt_get_records;
     716      142134 :         ra->new_cntbt.bload.get_records = xrep_abt_get_records;
     717             : 
     718      142134 :         ra->new_bnobt.bload.claim_block = xrep_abt_claim_block;
     719      142134 :         ra->new_cntbt.bload.claim_block = xrep_abt_claim_block;
     720             : 
     721             :         /* Allocate cursors for the staged btrees. */
     722      142134 :         bno_cur = xfs_allocbt_stage_cursor(sc->mp, &ra->new_bnobt.afake,
     723             :                         pag, XFS_BTNUM_BNO);
     724      142069 :         cnt_cur = xfs_allocbt_stage_cursor(sc->mp, &ra->new_cntbt.afake,
     725             :                         pag, XFS_BTNUM_CNT);
     726             : 
     727             :         /* Last chance to abort before we start committing fixes. */
     728      142259 :         if (xchk_should_terminate(sc, &error))
     729           0 :                 goto err_cur;
     730             : 
     731             :         /* Reserve the space we'll need for the new btrees. */
     732      142234 :         error = xrep_abt_reserve_space(ra, bno_cur, cnt_cur, &needs_resort);
     733      142187 :         if (error)
     734          30 :                 goto err_cur;
     735             : 
     736             :         /*
     737             :          * If we need to re-sort the free extents by length, do so so that we
     738             :          * can put the records into the cntbt in the correct order.
     739             :          */
     740      142157 :         if (needs_resort) {
     741      142152 :                 error = xrep_cntbt_sort_records(ra, needs_resort);
     742      142173 :                 if (error)
     743           0 :                         goto err_cur;
     744             :         }
     745             : 
     746             :         /*
     747             :          * Due to btree slack factors, it's possible for a new btree to be one
     748             :          * level taller than the old btree.  Update the alternate incore btree
     749             :          * height so that we don't trip the verifiers when writing the new
     750             :          * btree blocks to disk.
     751             :          */
     752      142178 :         pag->pagf_alt_levels[XFS_BTNUM_BNOi] =
     753      142178 :                                         ra->new_bnobt.bload.btree_height;
     754      142178 :         pag->pagf_alt_levels[XFS_BTNUM_CNTi] =
     755      142178 :                                         ra->new_cntbt.bload.btree_height;
     756             : 
     757             :         /* Load the free space by length tree. */
     758      142178 :         ra->array_cur = XFARRAY_CURSOR_INIT;
     759      142178 :         ra->longest = 0;
     760      142178 :         error = xfs_btree_bload(cnt_cur, &ra->new_cntbt.bload, ra);
     761      142295 :         if (error)
     762           0 :                 goto err_levels;
     763             : 
     764      142295 :         error = xrep_bnobt_sort_records(ra);
     765      142297 :         if (error)
     766             :                 return error;
     767             : 
     768             :         /* Load the free space by block number tree. */
     769      142285 :         ra->array_cur = XFARRAY_CURSOR_INIT;
     770      142285 :         error = xfs_btree_bload(bno_cur, &ra->new_bnobt.bload, ra);
     771      142324 :         if (error)
     772           0 :                 goto err_levels;
     773             : 
     774             :         /*
     775             :          * Install the new btrees in the AG header.  After this point the old
     776             :          * btrees are no longer accessible and the new trees are live.
     777             :          */
     778      142324 :         xfs_allocbt_commit_staged_btree(bno_cur, sc->tp, sc->sa.agf_bp);
     779      142277 :         xfs_btree_del_cursor(bno_cur, 0);
     780      142281 :         xfs_allocbt_commit_staged_btree(cnt_cur, sc->tp, sc->sa.agf_bp);
     781      142282 :         xfs_btree_del_cursor(cnt_cur, 0);
     782             : 
     783             :         /* Reset the AGF counters now that we've changed the btree shape. */
     784      142325 :         error = xrep_abt_reset_counters(ra);
     785      142262 :         if (error)
     786           0 :                 goto err_newbt;
     787             : 
     788             :         /* Dispose of any unused blocks and the accounting information. */
     789      142262 :         xrep_abt_dispose_reservations(ra, error);
     790             : 
     791      142230 :         return xrep_roll_ag_trans(sc);
     792             : 
     793           0 : err_levels:
     794           0 :         pag->pagf_alt_levels[XFS_BTNUM_BNOi] = 0;
     795           0 :         pag->pagf_alt_levels[XFS_BTNUM_CNTi] = 0;
     796          30 : err_cur:
     797          30 :         xfs_btree_del_cursor(cnt_cur, error);
     798          30 :         xfs_btree_del_cursor(bno_cur, error);
     799          30 : err_newbt:
     800          30 :         xrep_abt_dispose_reservations(ra, error);
     801          30 :         return error;
     802             : }
     803             : 
     804             : /*
     805             :  * Now that we've logged the roots of the new btrees, invalidate all of the
     806             :  * old blocks and free them.
     807             :  */
     808             : STATIC int
     809      141943 : xrep_abt_remove_old_trees(
     810             :         struct xrep_abt         *ra)
     811             : {
     812      141943 :         struct xfs_perag        *pag = ra->sc->sa.pag;
     813      141943 :         int                     error;
     814             : 
     815             :         /* Free the old btree blocks if they're not in use. */
     816      141943 :         error = xrep_reap_agblocks(ra->sc, &ra->old_allocbt_blocks,
     817             :                         &XFS_RMAP_OINFO_AG, XFS_AG_RESV_IGNORE);
     818      142041 :         if (error)
     819             :                 return error;
     820             : 
     821             :         /*
     822             :          * Now that we've zapped all the old allocbt blocks we can turn off
     823             :          * the alternate height mechanism.
     824             :          */
     825      142041 :         pag->pagf_alt_levels[XFS_BTNUM_BNOi] = 0;
     826      142041 :         pag->pagf_alt_levels[XFS_BTNUM_CNTi] = 0;
     827      142041 :         return 0;
     828             : }
     829             : 
     830             : /* Repair the freespace btrees for some AG. */
     831             : int
     832      143229 : xrep_allocbt(
     833             :         struct xfs_scrub        *sc)
     834             : {
     835      143229 :         struct xrep_abt         *ra;
     836      143229 :         struct xfs_mount        *mp = sc->mp;
     837      143229 :         char                    *descr;
     838      143229 :         int                     error;
     839             : 
     840             :         /* We require the rmapbt to rebuild anything. */
     841      143229 :         if (!xfs_has_rmapbt(mp))
     842             :                 return -EOPNOTSUPP;
     843             : 
     844      142290 :         ra = kzalloc(sizeof(struct xrep_abt), XCHK_GFP_FLAGS);
     845      142284 :         if (!ra)
     846             :                 return -ENOMEM;
     847      142284 :         ra->sc = sc;
     848             : 
     849             :         /* We rebuild both data structures. */
     850      142284 :         sc->sick_mask = XFS_SICK_AG_BNOBT | XFS_SICK_AG_CNTBT;
     851             : 
     852             :         /*
     853             :          * Make sure the busy extent list is clear because we can't put extents
     854             :          * on there twice.  In theory we cleared this before we started, but
     855             :          * let's not risk the filesystem.
     856             :          */
     857      142284 :         if (!xfs_extent_busy_list_empty(sc->sa.pag)) {
     858           0 :                 error = -EDEADLOCK;
     859           0 :                 goto out_ra;
     860             :         }
     861             : 
     862             :         /* Set up enough storage to handle maximally fragmented free space. */
     863      142216 :         descr = xchk_xfile_ag_descr(sc, "free space records");
     864      141991 :         error = xfarray_create(descr, mp->m_sb.sb_agblocks / 2,
     865             :                         sizeof(struct xfs_alloc_rec_incore),
     866             :                         &ra->free_records);
     867      142030 :         kfree(descr);
     868      142037 :         if (error)
     869           0 :                 goto out_ra;
     870             : 
     871             :         /* Collect the free space data and find the old btree blocks. */
     872      142037 :         xagb_bitmap_init(&ra->old_allocbt_blocks);
     873      141935 :         error = xrep_abt_find_freespace(ra);
     874      142083 :         if (error)
     875           0 :                 goto out_bitmap;
     876             : 
     877             :         /* Rebuild the free space information. */
     878      142083 :         error = xrep_abt_build_new_trees(ra);
     879      142131 :         if (error)
     880          30 :                 goto out_bitmap;
     881             : 
     882             :         /* Kill the old trees. */
     883      142101 :         error = xrep_abt_remove_old_trees(ra);
     884             : 
     885      141979 : out_bitmap:
     886      141979 :         xagb_bitmap_destroy(&ra->old_allocbt_blocks);
     887      141965 :         xfarray_destroy(ra->free_records);
     888      142044 : out_ra:
     889      142044 :         kfree(ra);
     890      142044 :         return error;
     891             : }
     892             : 
     893             : /* Make sure both btrees are ok after we've rebuilt them. */
     894             : int
     895      142148 : xrep_revalidate_allocbt(
     896             :         struct xfs_scrub        *sc)
     897             : {
     898      142148 :         __u32                   old_type = sc->sm->sm_type;
     899      142148 :         int                     error;
     900             : 
     901             :         /*
     902             :          * We must update sm_type temporarily so that the tree-to-tree cross
     903             :          * reference checks will work in the correct direction, and also so
     904             :          * that tracing will report correctly if there are more errors.
     905             :          */
     906      142148 :         sc->sm->sm_type = XFS_SCRUB_TYPE_BNOBT;
     907      142148 :         error = xchk_bnobt(sc);
     908      142256 :         if (error)
     909           0 :                 goto out;
     910             : 
     911      142256 :         sc->sm->sm_type = XFS_SCRUB_TYPE_CNTBT;
     912      142256 :         error = xchk_cntbt(sc);
     913      142298 : out:
     914      142298 :         sc->sm->sm_type = old_type;
     915      142298 :         return error;
     916             : }

Generated by: LCOV version 1.14