LCOV - code coverage report
Current view: top level - fs/xfs/scrub - newbt.c (source / functions) Hit Total Coverage
Test: fstests of 6.5.0-rc4-xfsa @ Mon Jul 31 20:08:27 PDT 2023 Lines: 231 278 83.1 %
Date: 2023-07-31 20:08:27 Functions: 21 22 95.5 %

          Line data    Source code
       1             : // SPDX-License-Identifier: GPL-2.0-or-later
       2             : /*
       3             :  * Copyright (C) 2022-2023 Oracle.  All Rights Reserved.
       4             :  * Author: Darrick J. Wong <djwong@kernel.org>
       5             :  */
       6             : #include "xfs.h"
       7             : #include "xfs_fs.h"
       8             : #include "xfs_shared.h"
       9             : #include "xfs_format.h"
      10             : #include "xfs_trans_resv.h"
      11             : #include "xfs_mount.h"
      12             : #include "xfs_btree.h"
      13             : #include "xfs_btree_staging.h"
      14             : #include "xfs_log_format.h"
      15             : #include "xfs_trans.h"
      16             : #include "xfs_log.h"
      17             : #include "xfs_sb.h"
      18             : #include "xfs_inode.h"
      19             : #include "xfs_alloc.h"
      20             : #include "xfs_rmap.h"
      21             : #include "xfs_ag.h"
      22             : #include "xfs_defer.h"
      23             : #include "xfs_extfree_item.h"
      24             : #include "xfs_imeta.h"
      25             : #include "xfs_quota.h"
      26             : #include "scrub/scrub.h"
      27             : #include "scrub/common.h"
      28             : #include "scrub/trace.h"
      29             : #include "scrub/repair.h"
      30             : #include "scrub/newbt.h"
      31             : 
      32             : /*
      33             :  * Estimate proper slack values for a btree that's being reloaded.
      34             :  *
      35             :  * Under most circumstances, we'll take whatever default loading value the
      36             :  * btree bulk loading code calculates for us.  However, there are some
      37             :  * exceptions to this rule:
      38             :  *
      39             :  * (1) If someone turned one of the debug knobs.
      40             :  * (2) If this is a per-AG btree and the AG has less than ~9% space free.
      41             :  * (3) If this is an inode btree and the FS has less than ~9% space free.
      42             :  *
      43             :  * Note that we actually use 3/32 for the comparison to avoid division.
      44             :  */
      45             : static void
      46      499641 : xrep_newbt_estimate_slack(
      47             :         struct xrep_newbt       *xnr)
      48             : {
      49      499641 :         struct xfs_scrub        *sc = xnr->sc;
      50      499641 :         struct xfs_btree_bload  *bload = &xnr->bload;
      51      499641 :         uint64_t                free;
      52      499641 :         uint64_t                sz;
      53             : 
      54             :         /*
      55             :          * The xfs_globals values are set to -1 (i.e. take the bload defaults)
      56             :          * unless someone has set them otherwise, so we just pull the values
      57             :          * here.
      58             :          */
      59      499641 :         bload->leaf_slack = xfs_globals.bload_leaf_slack;
      60      499641 :         bload->node_slack = xfs_globals.bload_node_slack;
      61             : 
      62      499641 :         if (sc->ops->type == ST_PERAG) {
      63      347841 :                 free = sc->sa.pag->pagf_freeblks;
      64      347841 :                 sz = xfs_ag_block_count(sc->mp, sc->sa.pag->pag_agno);
      65             :         } else {
      66      151800 :                 free = percpu_counter_sum(&sc->mp->m_fdblocks);
      67      151802 :                 sz = sc->mp->m_sb.sb_dblocks;
      68             :         }
      69             : 
      70             :         /* No further changes if there's more than 3/32ths space left. */
      71      499647 :         if (free >= ((sz * 3) >> 5))
      72             :                 return;
      73             : 
      74             :         /* We're low on space; load the btrees as tightly as possible. */
      75           0 :         if (bload->leaf_slack < 0)
      76           0 :                 bload->leaf_slack = 0;
      77           0 :         if (bload->node_slack < 0)
      78           0 :                 bload->node_slack = 0;
      79             : }
      80             : 
      81             : /* Initialize accounting resources for staging a new AG btree. */
      82             : void
      83      499656 : xrep_newbt_init_ag(
      84             :         struct xrep_newbt               *xnr,
      85             :         struct xfs_scrub                *sc,
      86             :         const struct xfs_owner_info     *oinfo,
      87             :         xfs_fsblock_t                   alloc_hint,
      88             :         enum xfs_ag_resv_type           resv)
      89             : {
      90      499656 :         memset(xnr, 0, sizeof(struct xrep_newbt));
      91      499656 :         xnr->sc = sc;
      92      499656 :         xnr->oinfo = *oinfo; /* structure copy */
      93      499656 :         xnr->alloc_hint = alloc_hint;
      94      499656 :         xnr->resv = resv;
      95      499656 :         INIT_LIST_HEAD(&xnr->resv_list);
      96      499656 :         xnr->bload.max_dirty = XFS_B_TO_FSBT(sc->mp, 256U << 10); /* 256K */
      97      499656 :         xrep_newbt_estimate_slack(xnr);
      98      499650 : }
      99             : 
     100             : /* Initialize accounting resources for staging a new inode fork btree. */
     101             : int
     102      133368 : xrep_newbt_init_inode(
     103             :         struct xrep_newbt               *xnr,
     104             :         struct xfs_scrub                *sc,
     105             :         int                             whichfork,
     106             :         const struct xfs_owner_info     *oinfo)
     107             : {
     108      133368 :         struct xfs_ifork                *ifp;
     109             : 
     110      133368 :         ifp = kmem_cache_zalloc(xfs_ifork_cache, XCHK_GFP_FLAGS);
     111      133368 :         if (!ifp)
     112             :                 return -ENOMEM;
     113             : 
     114      133368 :         xrep_newbt_init_ag(xnr, sc, oinfo,
     115      133368 :                         XFS_INO_TO_FSB(sc->mp, sc->ip->i_ino),
     116             :                         XFS_AG_RESV_NONE);
     117      133368 :         xnr->ifake.if_fork = ifp;
     118      133368 :         xnr->ifake.if_fork_size = xfs_inode_fork_size(sc->ip, whichfork);
     119      133368 :         xnr->ifake.if_whichfork = whichfork;
     120      133368 :         return 0;
     121             : }
     122             : 
     123             : /*
     124             :  * Initialize accounting resources for staging a new metadata inode btree.
     125             :  * If the inode has an imeta space reservation, the caller must adjust the
     126             :  * imeta reservation at btree commit.
     127             :  */
     128             : int
     129       18433 : xrep_newbt_init_metadir_inode(
     130             :         struct xrep_newbt               *xnr,
     131             :         struct xfs_scrub                *sc)
     132             : {
     133       18433 :         struct xfs_owner_info           oinfo;
     134       18433 :         struct xfs_ifork                *ifp;
     135             : 
     136       18433 :         ASSERT(xfs_is_metadir_inode(sc->ip));
     137       18433 :         ASSERT(XFS_IS_DQDETACHED(sc->mp, sc->ip));
     138             : 
     139       18433 :         xfs_rmap_ino_bmbt_owner(&oinfo, sc->ip->i_ino, XFS_DATA_FORK);
     140             : 
     141       18433 :         ifp = kmem_cache_zalloc(xfs_ifork_cache, XCHK_GFP_FLAGS);
     142       18434 :         if (!ifp)
     143             :                 return -ENOMEM;
     144             : 
     145             :         /*
     146             :          * Allocate new metadir btree blocks with XFS_AG_RESV_NONE because the
     147             :          * inode metadata space reservations can only account allocated space
     148             :          * to the i_nblocks.  We do not want to change the inode core fields
     149             :          * until we're ready to commit the new tree, so we allocate the blocks
     150             :          * as if they were regular file blocks.  This exposes us to a higher
     151             :          * risk of the repair being cancelled due to ENOSPC.
     152             :          */
     153       18434 :         xrep_newbt_init_ag(xnr, sc, &oinfo,
     154       18434 :                         XFS_INO_TO_FSB(sc->mp, sc->ip->i_ino),
     155             :                         XFS_AG_RESV_NONE);
     156       18434 :         xnr->ifake.if_fork = ifp;
     157       18434 :         xnr->ifake.if_fork_size = xfs_inode_fork_size(sc->ip, XFS_DATA_FORK);
     158             :         xnr->ifake.if_whichfork = XFS_DATA_FORK;
     159       18434 :         return 0;
     160             : }
     161             : 
     162             : /*
     163             :  * Initialize accounting resources for staging a new btree.  Callers are
     164             :  * expected to add their own reservations (and clean them up) manually.
     165             :  */
     166             : void
     167      174628 : xrep_newbt_init_bare(
     168             :         struct xrep_newbt               *xnr,
     169             :         struct xfs_scrub                *sc)
     170             : {
     171      174628 :         xrep_newbt_init_ag(xnr, sc, &XFS_RMAP_OINFO_ANY_OWNER, NULLFSBLOCK,
     172             :                         XFS_AG_RESV_NONE);
     173      174629 : }
     174             : 
     175             : /*
     176             :  * Set up automatic reaping of the blocks reserved for btree reconstruction in
     177             :  * case we crash by logging a deferred free item for each extent we allocate so
     178             :  * that we can get all of the space back if we crash before we can commit the
     179             :  * new btree.  This function returns a token that can be used to cancel
     180             :  * automatic reaping if repair is successful.
     181             :  */
     182             : static int
     183      239760 : xrep_newbt_schedule_autoreap(
     184             :         struct xrep_newbt               *xnr,
     185             :         struct xrep_newbt_resv          *resv)
     186             : {
     187      239760 :         struct xfs_extent_free_item     efi_item = {
     188      239760 :                 .xefi_blockcount        = resv->len,
     189      239760 :                 .xefi_owner             = xnr->oinfo.oi_owner,
     190             :                 .xefi_flags             = XFS_EFI_SKIP_DISCARD,
     191      239760 :                 .xefi_pag               = resv->pag,
     192             :         };
     193      239760 :         struct xfs_scrub                *sc = xnr->sc;
     194      239760 :         struct xfs_log_item             *lip;
     195      239760 :         LIST_HEAD(items);
     196             : 
     197      239760 :         ASSERT(xnr->oinfo.oi_offset == 0);
     198             : 
     199      239760 :         efi_item.xefi_startblock = XFS_AGB_TO_FSB(sc->mp, resv->pag->pag_agno,
     200             :                         resv->agbno);
     201      239760 :         if (xnr->oinfo.oi_flags & XFS_OWNER_INFO_ATTR_FORK)
     202           0 :                 efi_item.xefi_flags |= XFS_EFI_ATTR_FORK;
     203      239760 :         if (xnr->oinfo.oi_flags & XFS_OWNER_INFO_BMBT_BLOCK)
     204       56588 :                 efi_item.xefi_flags |= XFS_EFI_BMBT_BLOCK;
     205             : 
     206      239760 :         INIT_LIST_HEAD(&efi_item.xefi_list);
     207      239760 :         list_add(&efi_item.xefi_list, &items);
     208             : 
     209      239746 :         xfs_perag_intent_hold(resv->pag);
     210      239734 :         lip = xfs_extent_free_defer_type.create_intent(sc->tp, &items, 1,
     211             :                         false);
     212      239756 :         ASSERT(lip != NULL && !IS_ERR(lip));
     213             : 
     214      239756 :         resv->efi = lip;
     215      239756 :         return 0;
     216             : }
     217             : 
     218             : /*
     219             :  * Earlier, we logged EFIs for the extents that we allocated to hold the new
     220             :  * btree so that we could automatically roll back those allocations if the
     221             :  * system crashed.  Now we log an EFD to cancel the EFI, either because the
     222             :  * repair succeeded and the new blocks are in use; or because the repair was
     223             :  * cancelled and we're about to free the extents directly.
     224             :  */
     225             : static inline void
     226      239754 : xrep_newbt_finish_autoreap(
     227             :         struct xfs_scrub        *sc,
     228             :         struct xrep_newbt_resv  *resv)
     229             : {
     230      239754 :         struct xfs_efd_log_item *efdp;
     231      239754 :         struct xfs_extent       *extp;
     232      239754 :         struct xfs_log_item     *efd_lip;
     233             : 
     234      239754 :         efd_lip = xfs_extent_free_defer_type.create_done(sc->tp, resv->efi, 1);
     235      239758 :         efdp = container_of(efd_lip, struct xfs_efd_log_item, efd_item);
     236      239758 :         extp = efdp->efd_format.efd_extents;
     237      239758 :         extp->ext_start = XFS_AGB_TO_FSB(sc->mp, resv->pag->pag_agno,
     238             :                                          resv->agbno);
     239      239758 :         extp->ext_len = resv->len;
     240      239758 :         efdp->efd_next_extent++;
     241      239758 :         set_bit(XFS_LI_DIRTY, &efd_lip->li_flags);
     242      239758 :         xfs_perag_intent_rele(resv->pag);
     243      239761 : }
     244             : 
     245             : /* Abort an EFI logged for a new btree block reservation. */
     246             : static inline void
     247           0 : xrep_newbt_cancel_autoreap(
     248             :         struct xrep_newbt_resv  *resv)
     249             : {
     250           0 :         xfs_extent_free_defer_type.abort_intent(resv->efi);
     251           0 :         xfs_perag_intent_rele(resv->pag);
     252           0 : }
     253             : 
     254             : /*
     255             :  * Relog the EFIs attached to a staging btree so that we don't pin the log
     256             :  * tail.  Same logic as xfs_defer_relog.
     257             :  */
     258             : int
     259      724772 : xrep_newbt_relog_autoreap(
     260             :         struct xrep_newbt       *xnr)
     261             : {
     262      724772 :         struct xrep_newbt_resv  *resv;
     263      724772 :         unsigned int            efi_bytes = 0;
     264             : 
     265     1859781 :         list_for_each_entry(resv, &xnr->resv_list, list) {
     266             :                 /*
     267             :                  * If the log intent item for this deferred op is in a
     268             :                  * different checkpoint, relog it to keep the log tail moving
     269             :                  * forward.  We're ok with this being racy because an incorrect
     270             :                  * decision means we'll be a little slower at pushing the tail.
     271             :                  */
     272     1135006 :                 if (!resv->efi || xfs_log_item_in_current_chkpt(resv->efi))
     273     1132797 :                         continue;
     274             : 
     275        2212 :                 resv->efi = xfs_trans_item_relog(resv->efi, xnr->sc->tp);
     276             : 
     277             :                 /*
     278             :                  * If free space is very fragmented, it's possible that the new
     279             :                  * btree will be allocated a large number of small extents.
     280             :                  * On an active system, it's possible that so many of those
     281             :                  * EFIs will need relogging here that doing them all in one
     282             :                  * transaction will overflow the reservation.
     283             :                  *
     284             :                  * Each allocation for the new btree (xrep_newbt_resv) points
     285             :                  * to a unique single-mapping EFI, so each relog operation logs
     286             :                  * a single-mapping EFD followed by a new EFI.  Each single
     287             :                  * mapping EF[ID] item consumes about 128 bytes, so we'll
     288             :                  * assume 256 bytes per relog.  Roll if we consume more than
     289             :                  * half of the transaction reservation.
     290             :                  */
     291        2212 :                 efi_bytes += 256;
     292        2212 :                 if (efi_bytes > xnr->sc->tp->t_log_res / 2) {
     293           0 :                         int     error;
     294             : 
     295           0 :                         error = xrep_roll_trans(xnr->sc);
     296           0 :                         if (error)
     297           0 :                                 return error;
     298             : 
     299             :                         efi_bytes = 0;
     300             :                 }
     301             :         }
     302             : 
     303      724775 :         if (xnr->sc->tp->t_flags & XFS_TRANS_DIRTY)
     304        2105 :                 return xrep_roll_trans(xnr->sc);
     305             :         return 0;
     306             : }
     307             : 
     308             : /*
     309             :  * Designate specific blocks to be used to build our new btree.  @pag must be
     310             :  * a passive reference.
     311             :  */
     312             : STATIC int
     313      327073 : xrep_newbt_add_blocks(
     314             :         struct xrep_newbt               *xnr,
     315             :         struct xfs_perag                *pag,
     316             :         xfs_agblock_t                   agbno,
     317             :         xfs_extlen_t                    len,
     318             :         bool                            autoreap)
     319             : {
     320      327073 :         struct xrep_newbt_resv          *resv;
     321      327073 :         int                             error;
     322             : 
     323      327073 :         resv = kmalloc(sizeof(struct xrep_newbt_resv), XCHK_GFP_FLAGS);
     324      327067 :         if (!resv)
     325             :                 return -ENOMEM;
     326             : 
     327      327067 :         INIT_LIST_HEAD(&resv->list);
     328      327067 :         resv->agbno = agbno;
     329      327067 :         resv->len = len;
     330      327067 :         resv->used = 0;
     331      327067 :         resv->pag = xfs_perag_hold(pag);
     332             : 
     333      327064 :         if (autoreap) {
     334      239749 :                 error = xrep_newbt_schedule_autoreap(xnr, resv);
     335      239756 :                 if (error)
     336           0 :                         goto out_pag;
     337             :         }
     338             : 
     339      327071 :         list_add_tail(&resv->list, &xnr->resv_list);
     340      327071 :         return 0;
     341             : out_pag:
     342           0 :         xfs_perag_put(resv->pag);
     343           0 :         kfree(resv);
     344           0 :         return error;
     345             : }
     346             : 
     347             : /*
     348             :  * Add an extent to the new btree reservation pool.  Callers are required to
     349             :  * handle any automatic reaping if the repair is cancelled.  @pag must be a
     350             :  * passive reference.
     351             :  */
     352             : int
     353       87314 : xrep_newbt_add_extent(
     354             :         struct xrep_newbt               *xnr,
     355             :         struct xfs_perag                *pag,
     356             :         xfs_agblock_t                   agbno,
     357             :         xfs_extlen_t                    len)
     358             : {
     359       87314 :         return xrep_newbt_add_blocks(xnr, pag, agbno, len, false);
     360             : }
     361             : 
     362             : /* Don't let our allocation hint take us beyond this AG */
     363             : static inline void
     364      183147 : xrep_newbt_validate_ag_alloc_hint(
     365             :         struct xrep_newbt       *xnr)
     366             : {
     367      183147 :         struct xfs_scrub        *sc = xnr->sc;
     368      183147 :         xfs_agnumber_t          agno = XFS_FSB_TO_AGNO(sc->mp, xnr->alloc_hint);
     369             : 
     370      366283 :         if (agno == sc->sa.pag->pag_agno &&
     371      183148 :             xfs_verify_fsbno(sc->mp, xnr->alloc_hint))
     372             :                 return;
     373             : 
     374           0 :         xnr->alloc_hint = XFS_AGB_TO_FSB(sc->mp, sc->sa.pag->pag_agno,
     375             :                                          XFS_AGFL_BLOCK(sc->mp) + 1);
     376             : }
     377             : 
     378             : /* Allocate disk space for a new per-AG btree. */
     379             : STATIC int
     380      173432 : xrep_newbt_alloc_ag_blocks(
     381             :         struct xrep_newbt       *xnr,
     382             :         uint64_t                nr_blocks)
     383             : {
     384      173432 :         struct xfs_scrub        *sc = xnr->sc;
     385      173432 :         int                     error = 0;
     386             : 
     387      173432 :         ASSERT(sc->sa.pag != NULL);
     388      173432 :         ASSERT(xnr->resv != XFS_AG_RESV_IMETA);
     389             : 
     390      356603 :         while (nr_blocks > 0) {
     391      183163 :                 struct xfs_alloc_arg    args = {
     392      183163 :                         .tp             = sc->tp,
     393      183163 :                         .mp             = sc->mp,
     394             :                         .oinfo          = xnr->oinfo,
     395             :                         .minlen         = 1,
     396             :                         .maxlen         = nr_blocks,
     397             :                         .prod           = 1,
     398      183163 :                         .resv           = xnr->resv,
     399             :                 };
     400             : 
     401      183163 :                 xrep_newbt_validate_ag_alloc_hint(xnr);
     402             : 
     403      183148 :                 if (xnr->alloc_vextent)
     404       10845 :                         error = xnr->alloc_vextent(sc, &args, xnr->alloc_hint);
     405             :                 else
     406      172303 :                         error = xfs_alloc_vextent_near_bno(&args,
     407             :                                         xnr->alloc_hint);
     408      183167 :                 if (error)
     409           0 :                         return error;
     410      183167 :                 if (args.fsbno == NULLFSBLOCK)
     411             :                         return -ENOSPC;
     412             : 
     413      183167 :                 trace_xrep_newbt_alloc_ag_blocks(sc->mp, args.agno, args.agbno,
     414      183167 :                                 args.len, xnr->oinfo.oi_owner);
     415             : 
     416      183167 :                 error = xrep_newbt_add_blocks(xnr, sc->sa.pag, args.agbno,
     417             :                                 args.len, true);
     418      183169 :                 if (error)
     419           0 :                         return error;
     420             : 
     421      183169 :                 nr_blocks -= args.len;
     422      183169 :                 xnr->alloc_hint = args.fsbno + args.len;
     423             : 
     424      183169 :                 error = xrep_defer_finish(sc);
     425      183171 :                 if (error)
     426           0 :                         return error;
     427             :         }
     428             : 
     429             :         return 0;
     430             : }
     431             : 
     432             : /* Don't let our allocation hint take us beyond EOFS */
     433             : static inline void
     434       56588 : xrep_newbt_validate_file_alloc_hint(
     435             :         struct xrep_newbt       *xnr)
     436             : {
     437       56588 :         struct xfs_scrub        *sc = xnr->sc;
     438             : 
     439       56588 :         if (xfs_verify_fsbno(sc->mp, xnr->alloc_hint))
     440             :                 return;
     441             : 
     442           0 :         xnr->alloc_hint = XFS_AGB_TO_FSB(sc->mp, 0, XFS_AGFL_BLOCK(sc->mp) + 1);
     443             : }
     444             : 
     445             : /* Allocate disk space for our new file-based btree. */
     446             : STATIC int
     447       53488 : xrep_newbt_alloc_file_blocks(
     448             :         struct xrep_newbt       *xnr,
     449             :         uint64_t                nr_blocks)
     450             : {
     451       53488 :         struct xfs_scrub        *sc = xnr->sc;
     452       53488 :         int                     error = 0;
     453             : 
     454       53488 :         ASSERT(xnr->resv != XFS_AG_RESV_IMETA);
     455             : 
     456      110076 :         while (nr_blocks > 0) {
     457       56590 :                 struct xfs_alloc_arg    args = {
     458       56590 :                         .tp             = sc->tp,
     459       56590 :                         .mp             = sc->mp,
     460             :                         .oinfo          = xnr->oinfo,
     461             :                         .minlen         = 1,
     462             :                         .maxlen         = nr_blocks,
     463             :                         .prod           = 1,
     464       56590 :                         .resv           = xnr->resv,
     465             :                 };
     466       56590 :                 struct xfs_perag        *pag;
     467             : 
     468       56590 :                 xrep_newbt_validate_file_alloc_hint(xnr);
     469             : 
     470       56588 :                 if (xnr->alloc_vextent)
     471           0 :                         error = xnr->alloc_vextent(sc, &args, xnr->alloc_hint);
     472             :                 else
     473       56588 :                         error = xfs_alloc_vextent_start_ag(&args,
     474             :                                         xnr->alloc_hint);
     475       56588 :                 if (error)
     476           0 :                         return error;
     477       56588 :                 if (args.fsbno == NULLFSBLOCK)
     478             :                         return -ENOSPC;
     479             : 
     480       56588 :                 trace_xrep_newbt_alloc_file_blocks(sc->mp, args.agno,
     481       56588 :                                 args.agbno, args.len, xnr->oinfo.oi_owner);
     482             : 
     483       56588 :                 pag = xfs_perag_get(sc->mp, args.agno);
     484       56588 :                 if (!pag) {
     485           0 :                         ASSERT(0);
     486           0 :                         return -EFSCORRUPTED;
     487             :                 }
     488             : 
     489       56588 :                 error = xrep_newbt_add_blocks(xnr, pag, args.agbno, args.len,
     490             :                                 true);
     491       56588 :                 xfs_perag_put(pag);
     492       56588 :                 if (error)
     493           0 :                         return error;
     494             : 
     495       56588 :                 nr_blocks -= args.len;
     496       56588 :                 xnr->alloc_hint = args.fsbno + args.len;
     497             : 
     498       56588 :                 error = xrep_defer_finish(sc);
     499       56588 :                 if (error)
     500           0 :                         return error;
     501             :         }
     502             : 
     503             :         return 0;
     504             : }
     505             : 
     506             : /* Allocate disk space for our new btree. */
     507             : int
     508      226896 : xrep_newbt_alloc_blocks(
     509             :         struct xrep_newbt       *xnr,
     510             :         uint64_t                nr_blocks)
     511             : {
     512      226896 :         if (xnr->sc->ip)
     513       53486 :                 return xrep_newbt_alloc_file_blocks(xnr, nr_blocks);
     514      173410 :         return xrep_newbt_alloc_ag_blocks(xnr, nr_blocks);
     515             : }
     516             : 
     517             : /*
     518             :  * How many extent freeing items can we attach to a transaction before we want
     519             :  * to finish the chain so that unreserving new btree blocks doesn't overrun
     520             :  * the transaction reservation?
     521             :  */
     522             : #define XREP_REAP_MAX_NEWBT_EFIS        (128)
     523             : 
     524             : /*
     525             :  * Free the unused part of an extent.  Returns the number of EFIs logged or
     526             :  * a negative errno.
     527             :  */
     528             : STATIC int
     529      239762 : xrep_newbt_free_extent(
     530             :         struct xrep_newbt       *xnr,
     531             :         struct xrep_newbt_resv  *resv,
     532             :         bool                    btree_committed)
     533             : {
     534      239762 :         struct xfs_scrub        *sc = xnr->sc;
     535      239762 :         xfs_agblock_t           free_agbno = resv->agbno;
     536      239762 :         xfs_extlen_t            free_aglen = resv->len;
     537      239762 :         xfs_fsblock_t           fsbno;
     538      239762 :         int                     error;
     539             : 
     540             :         /*
     541             :          * If we used space and committed the btree, remove those blocks from
     542             :          * the extent before we act on it.
     543             :          */
     544      239762 :         if (btree_committed) {
     545      239762 :                 free_agbno += resv->used;
     546      239762 :                 free_aglen -= resv->used;
     547             :         }
     548             : 
     549      239762 :         xrep_newbt_finish_autoreap(sc, resv);
     550             : 
     551      239761 :         if (free_aglen == 0)
     552             :                 return 0;
     553             : 
     554           0 :         trace_xrep_newbt_free_blocks(sc->mp, resv->pag->pag_agno, free_agbno,
     555           0 :                         free_aglen, xnr->oinfo.oi_owner);
     556             : 
     557           0 :         ASSERT(xnr->resv != XFS_AG_RESV_AGFL);
     558           0 :         ASSERT(xnr->resv != XFS_AG_RESV_IGNORE);
     559             : 
     560             :         /*
     561             :          * Use EFIs to free the reservations.  This reduces the chance
     562             :          * that we leak blocks if the system goes down.
     563             :          */
     564           0 :         fsbno = XFS_AGB_TO_FSB(sc->mp, resv->pag->pag_agno, free_agbno);
     565           0 :         error = xfs_free_extent_later(sc->tp, fsbno, free_aglen, &xnr->oinfo,
     566             :                         xnr->resv, XFS_FREE_EXTENT_SKIP_DISCARD);
     567           0 :         if (error)
     568           0 :                 return error;
     569             : 
     570             :         return 1;
     571             : }
     572             : 
     573             : /* Free all the accounting info and disk space we reserved for a new btree. */
     574             : STATIC int
     575      499660 : xrep_newbt_free(
     576             :         struct xrep_newbt       *xnr,
     577             :         bool                    btree_committed)
     578             : {
     579      499660 :         struct xfs_scrub        *sc = xnr->sc;
     580      499660 :         struct xrep_newbt_resv  *resv, *n;
     581      499660 :         unsigned int            freed = 0;
     582      499660 :         int                     error = 0;
     583             : 
     584             :         /*
     585             :          * If the filesystem already went down, we can't free the blocks.  Skip
     586             :          * ahead to freeing the incore metadata because we can't fix anything.
     587             :          */
     588      999320 :         if (xfs_is_shutdown(sc->mp))
     589           0 :                 goto junkit;
     590             : 
     591      739423 :         list_for_each_entry_safe(resv, n, &xnr->resv_list, list) {
     592      239761 :                 int             ret;
     593             : 
     594      239761 :                 ret = xrep_newbt_free_extent(xnr, resv, btree_committed);
     595      239761 :                 list_del(&resv->list);
     596      239761 :                 xfs_perag_put(resv->pag);
     597      239762 :                 kfree(resv);
     598      239763 :                 if (ret < 0) {
     599           0 :                         error = ret;
     600           0 :                         goto junkit;
     601             :                 }
     602             : 
     603      239763 :                 freed += ret;
     604      239763 :                 if (freed >= XREP_REAP_MAX_NEWBT_EFIS) {
     605           0 :                         error = xrep_defer_finish(sc);
     606           0 :                         if (error)
     607           0 :                                 goto junkit;
     608             :                         freed = 0;
     609             :                 }
     610             :         }
     611             : 
     612      499662 :         if (freed)
     613           0 :                 error = xrep_defer_finish(sc);
     614             : 
     615      499662 : junkit:
     616             :         /*
     617             :          * If we still have reservations attached to @newbt, cleanup must have
     618             :          * failed and the filesystem is about to go down.  Clean up the incore
     619             :          * reservations.
     620             :          */
     621      499671 :         list_for_each_entry_safe(resv, n, &xnr->resv_list, list) {
     622           0 :                 xrep_newbt_cancel_autoreap(resv);
     623           0 :                 list_del(&resv->list);
     624           0 :                 xfs_perag_put(resv->pag);
     625           0 :                 kfree(resv);
     626             :         }
     627             : 
     628      499671 :         if (sc->ip) {
     629      151801 :                 kmem_cache_free(xfs_ifork_cache, xnr->ifake.if_fork);
     630      151797 :                 xnr->ifake.if_fork = NULL;
     631             :         }
     632             : 
     633      499667 :         return error;
     634             : }
     635             : 
     636             : /*
     637             :  * Free all the accounting info and unused disk space allocations after
     638             :  * committing a new btree.
     639             :  */
     640             : int
     641      325030 : xrep_newbt_commit(
     642             :         struct xrep_newbt       *xnr)
     643             : {
     644      325030 :         return xrep_newbt_free(xnr, true);
     645             : }
     646             : 
     647             : /*
     648             :  * Free all the accounting info and all of the disk space we reserved for a new
     649             :  * btree that we're not going to commit.  We want to try to roll things back
     650             :  * cleanly for things like ENOSPC midway through allocation.
     651             :  */
     652             : void
     653      174629 : xrep_newbt_cancel(
     654             :         struct xrep_newbt       *xnr)
     655             : {
     656      174629 :         xrep_newbt_free(xnr, false);
     657      174630 : }
     658             : 
     659             : /* Feed one of the reserved btree blocks to the bulk loader. */
     660             : int
     661     1187495 : xrep_newbt_claim_block(
     662             :         struct xfs_btree_cur    *cur,
     663             :         struct xrep_newbt       *xnr,
     664             :         union xfs_btree_ptr     *ptr)
     665             : {
     666     1187495 :         struct xrep_newbt_resv  *resv;
     667     1187495 :         struct xfs_mount        *mp = cur->bc_mp;
     668     1187495 :         xfs_agblock_t           agbno;
     669             : 
     670             :         /*
     671             :          * The first item in the list should always have a free block unless
     672             :          * we're completely out.
     673             :          */
     674     1187495 :         resv = list_first_entry(&xnr->resv_list, struct xrep_newbt_resv, list);
     675     1187495 :         if (resv->used == resv->len)
     676             :                 return -ENOSPC;
     677             : 
     678             :         /*
     679             :          * Peel off a block from the start of the reservation.  We allocate
     680             :          * blocks in order to place blocks on disk in increasing record or key
     681             :          * order.  The block reservations tend to end up on the list in
     682             :          * decreasing order, which hopefully results in leaf blocks ending up
     683             :          * together.
     684             :          */
     685     1187495 :         agbno = resv->agbno + resv->used;
     686     1187495 :         resv->used++;
     687             : 
     688             :         /* If we used all the blocks in this reservation, move it to the end. */
     689     1187495 :         if (resv->used == resv->len)
     690      327056 :                 list_move_tail(&resv->list, &xnr->resv_list);
     691             : 
     692     1187503 :         trace_xrep_newbt_claim_block(mp, resv->pag->pag_agno, agbno, 1,
     693     1187503 :                         xnr->oinfo.oi_owner);
     694             : 
     695     1187487 :         if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
     696      413505 :                 ptr->l = cpu_to_be64(XFS_AGB_TO_FSB(mp, resv->pag->pag_agno,
     697             :                                                                 agbno));
     698             :         else
     699     1547964 :                 ptr->s = cpu_to_be32(agbno);
     700             :         return 0;
     701             : }
     702             : 
     703             : /* How many reserved blocks are unused? */
     704             : unsigned int
     705      174630 : xrep_newbt_unused_blocks(
     706             :         struct xrep_newbt       *xnr)
     707             : {
     708      174630 :         struct xrep_newbt_resv  *resv;
     709      174630 :         unsigned int            unused = 0;
     710             : 
     711      261945 :         list_for_each_entry(resv, &xnr->resv_list, list)
     712       87315 :                 unused += resv->len - resv->used;
     713      174630 :         return unused;
     714             : }

Generated by: LCOV version 1.14