LCOV - code coverage report
Current view: top level - fs/xfs/scrub - newbt.c (source / functions) Hit Total Coverage
Test: fstests of 6.5.0-rc4-xfsx @ Mon Jul 31 20:08:34 PDT 2023 Lines: 247 278 88.8 %
Date: 2023-07-31 20:08:34 Functions: 21 22 95.5 %

          Line data    Source code
       1             : // SPDX-License-Identifier: GPL-2.0-or-later
       2             : /*
       3             :  * Copyright (C) 2022-2023 Oracle.  All Rights Reserved.
       4             :  * Author: Darrick J. Wong <djwong@kernel.org>
       5             :  */
       6             : #include "xfs.h"
       7             : #include "xfs_fs.h"
       8             : #include "xfs_shared.h"
       9             : #include "xfs_format.h"
      10             : #include "xfs_trans_resv.h"
      11             : #include "xfs_mount.h"
      12             : #include "xfs_btree.h"
      13             : #include "xfs_btree_staging.h"
      14             : #include "xfs_log_format.h"
      15             : #include "xfs_trans.h"
      16             : #include "xfs_log.h"
      17             : #include "xfs_sb.h"
      18             : #include "xfs_inode.h"
      19             : #include "xfs_alloc.h"
      20             : #include "xfs_rmap.h"
      21             : #include "xfs_ag.h"
      22             : #include "xfs_defer.h"
      23             : #include "xfs_extfree_item.h"
      24             : #include "xfs_imeta.h"
      25             : #include "xfs_quota.h"
      26             : #include "scrub/scrub.h"
      27             : #include "scrub/common.h"
      28             : #include "scrub/trace.h"
      29             : #include "scrub/repair.h"
      30             : #include "scrub/newbt.h"
      31             : 
      32             : /*
      33             :  * Estimate proper slack values for a btree that's being reloaded.
      34             :  *
      35             :  * Under most circumstances, we'll take whatever default loading value the
      36             :  * btree bulk loading code calculates for us.  However, there are some
      37             :  * exceptions to this rule:
      38             :  *
      39             :  * (1) If someone turned one of the debug knobs.
      40             :  * (2) If this is a per-AG btree and the AG has less than ~9% space free.
      41             :  * (3) If this is an inode btree and the FS has less than ~9% space free.
      42             :  *
      43             :  * Note that we actually use 3/32 for the comparison to avoid division.
      44             :  */
      45             : static void
      46     4076337 : xrep_newbt_estimate_slack(
      47             :         struct xrep_newbt       *xnr)
      48             : {
      49     4076337 :         struct xfs_scrub        *sc = xnr->sc;
      50     4076337 :         struct xfs_btree_bload  *bload = &xnr->bload;
      51     4076337 :         uint64_t                free;
      52     4076337 :         uint64_t                sz;
      53             : 
      54             :         /*
      55             :          * The xfs_globals values are set to -1 (i.e. take the bload defaults)
      56             :          * unless someone has set them otherwise, so we just pull the values
      57             :          * here.
      58             :          */
      59     4076337 :         bload->leaf_slack = xfs_globals.bload_leaf_slack;
      60     4076337 :         bload->node_slack = xfs_globals.bload_node_slack;
      61             : 
      62     4076337 :         if (sc->ops->type == ST_PERAG) {
      63      696496 :                 free = sc->sa.pag->pagf_freeblks;
      64      696496 :                 sz = xfs_ag_block_count(sc->mp, sc->sa.pag->pag_agno);
      65             :         } else {
      66     3379841 :                 free = percpu_counter_sum(&sc->mp->m_fdblocks);
      67     3379871 :                 sz = sc->mp->m_sb.sb_dblocks;
      68             :         }
      69             : 
      70             :         /* No further changes if there's more than 3/32ths space left. */
      71     4076091 :         if (free >= ((sz * 3) >> 5))
      72             :                 return;
      73             : 
      74             :         /* We're low on space; load the btrees as tightly as possible. */
      75       40015 :         if (bload->leaf_slack < 0)
      76       40017 :                 bload->leaf_slack = 0;
      77       40015 :         if (bload->node_slack < 0)
      78       40015 :                 bload->node_slack = 0;
      79             : }
      80             : 
      81             : /* Initialize accounting resources for staging a new AG btree. */
      82             : void
      83     4076456 : xrep_newbt_init_ag(
      84             :         struct xrep_newbt               *xnr,
      85             :         struct xfs_scrub                *sc,
      86             :         const struct xfs_owner_info     *oinfo,
      87             :         xfs_fsblock_t                   alloc_hint,
      88             :         enum xfs_ag_resv_type           resv)
      89             : {
      90     4076456 :         memset(xnr, 0, sizeof(struct xrep_newbt));
      91     4076456 :         xnr->sc = sc;
      92     4076456 :         xnr->oinfo = *oinfo; /* structure copy */
      93     4076456 :         xnr->alloc_hint = alloc_hint;
      94     4076456 :         xnr->resv = resv;
      95     4076456 :         INIT_LIST_HEAD(&xnr->resv_list);
      96     4076456 :         xnr->bload.max_dirty = XFS_B_TO_FSBT(sc->mp, 256U << 10); /* 256K */
      97     4076456 :         xrep_newbt_estimate_slack(xnr);
      98     4076023 : }
      99             : 
     100             : /* Initialize accounting resources for staging a new inode fork btree. */
     101             : int
     102     3340419 : xrep_newbt_init_inode(
     103             :         struct xrep_newbt               *xnr,
     104             :         struct xfs_scrub                *sc,
     105             :         int                             whichfork,
     106             :         const struct xfs_owner_info     *oinfo)
     107             : {
     108     3340419 :         struct xfs_ifork                *ifp;
     109             : 
     110     3340419 :         ifp = kmem_cache_zalloc(xfs_ifork_cache, XCHK_GFP_FLAGS);
     111     3340419 :         if (!ifp)
     112             :                 return -ENOMEM;
     113             : 
     114    16702095 :         xrep_newbt_init_ag(xnr, sc, oinfo,
     115     3340419 :                         XFS_INO_TO_FSB(sc->mp, sc->ip->i_ino),
     116             :                         XFS_AG_RESV_NONE);
     117     3340419 :         xnr->ifake.if_fork = ifp;
     118     3340419 :         xnr->ifake.if_fork_size = xfs_inode_fork_size(sc->ip, whichfork);
     119     3340419 :         xnr->ifake.if_whichfork = whichfork;
     120     3340419 :         return 0;
     121             : }
     122             : 
     123             : /*
     124             :  * Initialize accounting resources for staging a new metadata inode btree.
     125             :  * If the inode has an imeta space reservation, the caller must adjust the
     126             :  * imeta reservation at btree commit.
     127             :  */
     128             : int
     129       39426 : xrep_newbt_init_metadir_inode(
     130             :         struct xrep_newbt               *xnr,
     131             :         struct xfs_scrub                *sc)
     132             : {
     133       39426 :         struct xfs_owner_info           oinfo;
     134       39426 :         struct xfs_ifork                *ifp;
     135             : 
     136       39426 :         ASSERT(xfs_is_metadir_inode(sc->ip));
     137       39426 :         ASSERT(XFS_IS_DQDETACHED(sc->mp, sc->ip));
     138             : 
     139       39426 :         xfs_rmap_ino_bmbt_owner(&oinfo, sc->ip->i_ino, XFS_DATA_FORK);
     140             : 
     141       39426 :         ifp = kmem_cache_zalloc(xfs_ifork_cache, XCHK_GFP_FLAGS);
     142       39442 :         if (!ifp)
     143             :                 return -ENOMEM;
     144             : 
     145             :         /*
     146             :          * Allocate new metadir btree blocks with XFS_AG_RESV_NONE because the
     147             :          * inode metadata space reservations can only account allocated space
     148             :          * to the i_nblocks.  We do not want to change the inode core fields
     149             :          * until we're ready to commit the new tree, so we allocate the blocks
     150             :          * as if they were regular file blocks.  This exposes us to a higher
     151             :          * risk of the repair being cancelled due to ENOSPC.
     152             :          */
     153      197210 :         xrep_newbt_init_ag(xnr, sc, &oinfo,
     154       39442 :                         XFS_INO_TO_FSB(sc->mp, sc->ip->i_ino),
     155             :                         XFS_AG_RESV_NONE);
     156       39452 :         xnr->ifake.if_fork = ifp;
     157       39452 :         xnr->ifake.if_fork_size = xfs_inode_fork_size(sc->ip, XFS_DATA_FORK);
     158             :         xnr->ifake.if_whichfork = XFS_DATA_FORK;
     159       39452 :         return 0;
     160             : }
     161             : 
     162             : /*
     163             :  * Initialize accounting resources for staging a new btree.  Callers are
     164             :  * expected to add their own reservations (and clean them up) manually.
     165             :  */
     166             : void
     167      283692 : xrep_newbt_init_bare(
     168             :         struct xrep_newbt               *xnr,
     169             :         struct xfs_scrub                *sc)
     170             : {
     171      283692 :         xrep_newbt_init_ag(xnr, sc, &XFS_RMAP_OINFO_ANY_OWNER, NULLFSBLOCK,
     172             :                         XFS_AG_RESV_NONE);
     173      283803 : }
     174             : 
     175             : /*
     176             :  * Set up automatic reaping of the blocks reserved for btree reconstruction in
     177             :  * case we crash by logging a deferred free item for each extent we allocate so
     178             :  * that we can get all of the space back if we crash before we can commit the
     179             :  * new btree.  This function returns a token that can be used to cancel
     180             :  * automatic reaping if repair is successful.
     181             :  */
     182             : static int
     183      586873 : xrep_newbt_schedule_autoreap(
     184             :         struct xrep_newbt               *xnr,
     185             :         struct xrep_newbt_resv          *resv)
     186             : {
     187      586873 :         struct xfs_extent_free_item     efi_item = {
     188      586873 :                 .xefi_blockcount        = resv->len,
     189      586873 :                 .xefi_owner             = xnr->oinfo.oi_owner,
     190             :                 .xefi_flags             = XFS_EFI_SKIP_DISCARD,
     191      586873 :                 .xefi_pag               = resv->pag,
     192             :         };
     193      586873 :         struct xfs_scrub                *sc = xnr->sc;
     194      586873 :         struct xfs_log_item             *lip;
     195      586873 :         LIST_HEAD(items);
     196             : 
     197      586873 :         ASSERT(xnr->oinfo.oi_offset == 0);
     198             : 
     199      586873 :         efi_item.xefi_startblock = XFS_AGB_TO_FSB(sc->mp, resv->pag->pag_agno,
     200             :                         resv->agbno);
     201      586873 :         if (xnr->oinfo.oi_flags & XFS_OWNER_INFO_ATTR_FORK)
     202           6 :                 efi_item.xefi_flags |= XFS_EFI_ATTR_FORK;
     203      586873 :         if (xnr->oinfo.oi_flags & XFS_OWNER_INFO_BMBT_BLOCK)
     204      153003 :                 efi_item.xefi_flags |= XFS_EFI_BMBT_BLOCK;
     205             : 
     206      586873 :         INIT_LIST_HEAD(&efi_item.xefi_list);
     207      586873 :         list_add(&efi_item.xefi_list, &items);
     208             : 
     209      586426 :         xfs_perag_intent_hold(resv->pag);
     210      586970 :         lip = xfs_extent_free_defer_type.create_intent(sc->tp, &items, 1,
     211             :                         false);
     212      586895 :         ASSERT(lip != NULL && !IS_ERR(lip));
     213             : 
     214      586895 :         resv->efi = lip;
     215      586895 :         return 0;
     216             : }
     217             : 
     218             : /*
     219             :  * Earlier, we logged EFIs for the extents that we allocated to hold the new
     220             :  * btree so that we could automatically roll back those allocations if the
     221             :  * system crashed.  Now we log an EFD to cancel the EFI, either because the
     222             :  * repair succeeded and the new blocks are in use; or because the repair was
     223             :  * cancelled and we're about to free the extents directly.
     224             :  */
     225             : static inline void
     226      587023 : xrep_newbt_finish_autoreap(
     227             :         struct xfs_scrub        *sc,
     228             :         struct xrep_newbt_resv  *resv)
     229             : {
     230      587023 :         struct xfs_efd_log_item *efdp;
     231      587023 :         struct xfs_extent       *extp;
     232      587023 :         struct xfs_log_item     *efd_lip;
     233             : 
     234      587023 :         efd_lip = xfs_extent_free_defer_type.create_done(sc->tp, resv->efi, 1);
     235      587200 :         efdp = container_of(efd_lip, struct xfs_efd_log_item, efd_item);
     236      587200 :         extp = efdp->efd_format.efd_extents;
     237      587200 :         extp->ext_start = XFS_AGB_TO_FSB(sc->mp, resv->pag->pag_agno,
     238             :                                          resv->agbno);
     239      587200 :         extp->ext_len = resv->len;
     240      587200 :         efdp->efd_next_extent++;
     241      587200 :         set_bit(XFS_LI_DIRTY, &efd_lip->li_flags);
     242      587411 :         xfs_perag_intent_rele(resv->pag);
     243      587339 : }
     244             : 
     245             : /* Abort an EFI logged for a new btree block reservation. */
     246             : static inline void
     247           0 : xrep_newbt_cancel_autoreap(
     248             :         struct xrep_newbt_resv  *resv)
     249             : {
     250           0 :         xfs_extent_free_defer_type.abort_intent(resv->efi);
     251           0 :         xfs_perag_intent_rele(resv->pag);
     252           0 : }
     253             : 
     254             : /*
     255             :  * Relog the EFIs attached to a staging btree so that we don't pin the log
     256             :  * tail.  Same logic as xfs_defer_relog.
     257             :  */
     258             : int
     259     1595540 : xrep_newbt_relog_autoreap(
     260             :         struct xrep_newbt       *xnr)
     261             : {
     262     1595540 :         struct xrep_newbt_resv  *resv;
     263     1595540 :         unsigned int            efi_bytes = 0;
     264             : 
     265     4245663 :         list_for_each_entry(resv, &xnr->resv_list, list) {
     266             :                 /*
     267             :                  * If the log intent item for this deferred op is in a
     268             :                  * different checkpoint, relog it to keep the log tail moving
     269             :                  * forward.  We're ok with this being racy because an incorrect
     270             :                  * decision means we'll be a little slower at pushing the tail.
     271             :                  */
     272     2649706 :                 if (!resv->efi || xfs_log_item_in_current_chkpt(resv->efi))
     273     2645216 :                         continue;
     274             : 
     275        4903 :                 resv->efi = xfs_trans_item_relog(resv->efi, xnr->sc->tp);
     276             : 
     277             :                 /*
     278             :                  * If free space is very fragmented, it's possible that the new
     279             :                  * btree will be allocated a large number of small extents.
     280             :                  * On an active system, it's possible that so many of those
     281             :                  * EFIs will need relogging here that doing them all in one
     282             :                  * transaction will overflow the reservation.
     283             :                  *
     284             :                  * Each allocation for the new btree (xrep_newbt_resv) points
     285             :                  * to a unique single-mapping EFI, so each relog operation logs
     286             :                  * a single-mapping EFD followed by a new EFI.  Each single
     287             :                  * mapping EF[ID] item consumes about 128 bytes, so we'll
     288             :                  * assume 256 bytes per relog.  Roll if we consume more than
     289             :                  * half of the transaction reservation.
     290             :                  */
     291        4907 :                 efi_bytes += 256;
     292        4907 :                 if (efi_bytes > xnr->sc->tp->t_log_res / 2) {
     293           0 :                         int     error;
     294             : 
     295           0 :                         error = xrep_roll_trans(xnr->sc);
     296           0 :                         if (error)
     297           0 :                                 return error;
     298             : 
     299             :                         efi_bytes = 0;
     300             :                 }
     301             :         }
     302             : 
     303     1595957 :         if (xnr->sc->tp->t_flags & XFS_TRANS_DIRTY)
     304        4734 :                 return xrep_roll_trans(xnr->sc);
     305             :         return 0;
     306             : }
     307             : 
     308             : /*
     309             :  * Designate specific blocks to be used to build our new btree.  @pag must be
     310             :  * a passive reference.
     311             :  */
     312             : STATIC int
     313      728111 : xrep_newbt_add_blocks(
     314             :         struct xrep_newbt               *xnr,
     315             :         struct xfs_perag                *pag,
     316             :         xfs_agblock_t                   agbno,
     317             :         xfs_extlen_t                    len,
     318             :         bool                            autoreap)
     319             : {
     320      728111 :         struct xrep_newbt_resv          *resv;
     321      728111 :         int                             error;
     322             : 
     323      728111 :         resv = kmalloc(sizeof(struct xrep_newbt_resv), XCHK_GFP_FLAGS);
     324      728334 :         if (!resv)
     325             :                 return -ENOMEM;
     326             : 
     327      728334 :         INIT_LIST_HEAD(&resv->list);
     328      728334 :         resv->agbno = agbno;
     329      728334 :         resv->len = len;
     330      728334 :         resv->used = 0;
     331      728334 :         resv->pag = xfs_perag_hold(pag);
     332             : 
     333      729018 :         if (autoreap) {
     334      586864 :                 error = xrep_newbt_schedule_autoreap(xnr, resv);
     335      586889 :                 if (error)
     336           0 :                         goto out_pag;
     337             :         }
     338             : 
     339      729043 :         list_add_tail(&resv->list, &xnr->resv_list);
     340      729043 :         return 0;
     341             : out_pag:
     342           0 :         xfs_perag_put(resv->pag);
     343           0 :         kfree(resv);
     344           0 :         return error;
     345             : }
     346             : 
     347             : /*
     348             :  * Add an extent to the new btree reservation pool.  Callers are required to
     349             :  * handle any automatic reaping if the repair is cancelled.  @pag must be a
     350             :  * passive reference.
     351             :  */
     352             : int
     353      141792 : xrep_newbt_add_extent(
     354             :         struct xrep_newbt               *xnr,
     355             :         struct xfs_perag                *pag,
     356             :         xfs_agblock_t                   agbno,
     357             :         xfs_extlen_t                    len)
     358             : {
     359      141792 :         return xrep_newbt_add_blocks(xnr, pag, agbno, len, false);
     360             : }
     361             : 
     362             : /* Don't let our allocation hint take us beyond this AG */
     363             : static inline void
     364      433731 : xrep_newbt_validate_ag_alloc_hint(
     365             :         struct xrep_newbt       *xnr)
     366             : {
     367      433731 :         struct xfs_scrub        *sc = xnr->sc;
     368      433731 :         xfs_agnumber_t          agno = XFS_FSB_TO_AGNO(sc->mp, xnr->alloc_hint);
     369             : 
     370      866504 :         if (agno == sc->sa.pag->pag_agno &&
     371      433591 :             xfs_verify_fsbno(sc->mp, xnr->alloc_hint))
     372             :                 return;
     373             : 
     374           2 :         xnr->alloc_hint = XFS_AGB_TO_FSB(sc->mp, sc->sa.pag->pag_agno,
     375             :                                          XFS_AGFL_BLOCK(sc->mp) + 1);
     376             : }
     377             : 
     378             : /* Allocate disk space for a new per-AG btree. */
     379             : STATIC int
     380      413179 : xrep_newbt_alloc_ag_blocks(
     381             :         struct xrep_newbt       *xnr,
     382             :         uint64_t                nr_blocks)
     383             : {
     384      413179 :         struct xfs_scrub        *sc = xnr->sc;
     385      413179 :         int                     error = 0;
     386             : 
     387      413179 :         ASSERT(sc->sa.pag != NULL);
     388      413179 :         ASSERT(xnr->resv != XFS_AG_RESV_IMETA);
     389             : 
     390      846706 :         while (nr_blocks > 0) {
     391      433379 :                 struct xfs_alloc_arg    args = {
     392      433379 :                         .tp             = sc->tp,
     393      433379 :                         .mp             = sc->mp,
     394             :                         .oinfo          = xnr->oinfo,
     395             :                         .minlen         = 1,
     396             :                         .maxlen         = nr_blocks,
     397             :                         .prod           = 1,
     398      433379 :                         .resv           = xnr->resv,
     399             :                 };
     400             : 
     401      433379 :                 xrep_newbt_validate_ag_alloc_hint(xnr);
     402             : 
     403      432708 :                 if (xnr->alloc_vextent)
     404       37829 :                         error = xnr->alloc_vextent(sc, &args, xnr->alloc_hint);
     405             :                 else
     406      394879 :                         error = xfs_alloc_vextent_near_bno(&args,
     407             :                                         xnr->alloc_hint);
     408      433815 :                 if (error)
     409         150 :                         return error;
     410      433815 :                 if (args.fsbno == NULLFSBLOCK)
     411             :                         return -ENOSPC;
     412             : 
     413      433665 :                 trace_xrep_newbt_alloc_ag_blocks(sc->mp, args.agno, args.agbno,
     414      433665 :                                 args.len, xnr->oinfo.oi_owner);
     415             : 
     416      433250 :                 error = xrep_newbt_add_blocks(xnr, sc->sa.pag, args.agbno,
     417             :                                 args.len, true);
     418      433853 :                 if (error)
     419           0 :                         return error;
     420             : 
     421      433853 :                 nr_blocks -= args.len;
     422      433853 :                 xnr->alloc_hint = args.fsbno + args.len;
     423             : 
     424      433853 :                 error = xrep_defer_finish(sc);
     425      433527 :                 if (error)
     426           0 :                         return error;
     427             :         }
     428             : 
     429             :         return 0;
     430             : }
     431             : 
     432             : /* Don't let our allocation hint take us beyond EOFS */
     433             : static inline void
     434      153002 : xrep_newbt_validate_file_alloc_hint(
     435             :         struct xrep_newbt       *xnr)
     436             : {
     437      153002 :         struct xfs_scrub        *sc = xnr->sc;
     438             : 
     439      153002 :         if (xfs_verify_fsbno(sc->mp, xnr->alloc_hint))
     440             :                 return;
     441             : 
     442           1 :         xnr->alloc_hint = XFS_AGB_TO_FSB(sc->mp, 0, XFS_AGFL_BLOCK(sc->mp) + 1);
     443             : }
     444             : 
     445             : /* Allocate disk space for our new file-based btree. */
     446             : STATIC int
     447      138466 : xrep_newbt_alloc_file_blocks(
     448             :         struct xrep_newbt       *xnr,
     449             :         uint64_t                nr_blocks)
     450             : {
     451      138466 :         struct xfs_scrub        *sc = xnr->sc;
     452      138466 :         int                     error = 0;
     453             : 
     454      138466 :         ASSERT(xnr->resv != XFS_AG_RESV_IMETA);
     455             : 
     456      291469 :         while (nr_blocks > 0) {
     457      153004 :                 struct xfs_alloc_arg    args = {
     458      153004 :                         .tp             = sc->tp,
     459      153004 :                         .mp             = sc->mp,
     460             :                         .oinfo          = xnr->oinfo,
     461             :                         .minlen         = 1,
     462             :                         .maxlen         = nr_blocks,
     463             :                         .prod           = 1,
     464      153004 :                         .resv           = xnr->resv,
     465             :                 };
     466      153004 :                 struct xfs_perag        *pag;
     467             : 
     468      153004 :                 xrep_newbt_validate_file_alloc_hint(xnr);
     469             : 
     470      153003 :                 if (xnr->alloc_vextent)
     471           0 :                         error = xnr->alloc_vextent(sc, &args, xnr->alloc_hint);
     472             :                 else
     473      153003 :                         error = xfs_alloc_vextent_start_ag(&args,
     474             :                                         xnr->alloc_hint);
     475      153003 :                 if (error)
     476           0 :                         return error;
     477      153003 :                 if (args.fsbno == NULLFSBLOCK)
     478             :                         return -ENOSPC;
     479             : 
     480      153003 :                 trace_xrep_newbt_alloc_file_blocks(sc->mp, args.agno,
     481      153003 :                                 args.agbno, args.len, xnr->oinfo.oi_owner);
     482             : 
     483      153002 :                 pag = xfs_perag_get(sc->mp, args.agno);
     484      153003 :                 if (!pag) {
     485           0 :                         ASSERT(0);
     486           0 :                         return -EFSCORRUPTED;
     487             :                 }
     488             : 
     489      153003 :                 error = xrep_newbt_add_blocks(xnr, pag, args.agbno, args.len,
     490             :                                 true);
     491      153003 :                 xfs_perag_put(pag);
     492      153003 :                 if (error)
     493           0 :                         return error;
     494             : 
     495      153003 :                 nr_blocks -= args.len;
     496      153003 :                 xnr->alloc_hint = args.fsbno + args.len;
     497             : 
     498      153003 :                 error = xrep_defer_finish(sc);
     499      153003 :                 if (error)
     500           0 :                         return error;
     501             :         }
     502             : 
     503             :         return 0;
     504             : }
     505             : 
     506             : /* Allocate disk space for our new btree. */
     507             : int
     508      551595 : xrep_newbt_alloc_blocks(
     509             :         struct xrep_newbt       *xnr,
     510             :         uint64_t                nr_blocks)
     511             : {
     512      551595 :         if (xnr->sc->ip)
     513      138463 :                 return xrep_newbt_alloc_file_blocks(xnr, nr_blocks);
     514      413132 :         return xrep_newbt_alloc_ag_blocks(xnr, nr_blocks);
     515             : }
     516             : 
     517             : /*
     518             :  * How many extent freeing items can we attach to a transaction before we want
     519             :  * to finish the chain so that unreserving new btree blocks doesn't overrun
     520             :  * the transaction reservation?
     521             :  */
     522             : #define XREP_REAP_MAX_NEWBT_EFIS        (128)
     523             : 
     524             : /*
     525             :  * Free the unused part of an extent.  Returns the number of EFIs logged or
     526             :  * a negative errno.
     527             :  */
     528             : STATIC int
     529      587054 : xrep_newbt_free_extent(
     530             :         struct xrep_newbt       *xnr,
     531             :         struct xrep_newbt_resv  *resv,
     532             :         bool                    btree_committed)
     533             : {
     534      587054 :         struct xfs_scrub        *sc = xnr->sc;
     535      587054 :         xfs_agblock_t           free_agbno = resv->agbno;
     536      587054 :         xfs_extlen_t            free_aglen = resv->len;
     537      587054 :         xfs_fsblock_t           fsbno;
     538      587054 :         int                     error;
     539             : 
     540             :         /*
     541             :          * If we used space and committed the btree, remove those blocks from
     542             :          * the extent before we act on it.
     543             :          */
     544      587054 :         if (btree_committed) {
     545      586563 :                 free_agbno += resv->used;
     546      586563 :                 free_aglen -= resv->used;
     547             :         }
     548             : 
     549      587054 :         xrep_newbt_finish_autoreap(sc, resv);
     550             : 
     551      587337 :         if (free_aglen == 0)
     552             :                 return 0;
     553             : 
     554         447 :         trace_xrep_newbt_free_blocks(sc->mp, resv->pag->pag_agno, free_agbno,
     555         447 :                         free_aglen, xnr->oinfo.oi_owner);
     556             : 
     557         447 :         ASSERT(xnr->resv != XFS_AG_RESV_AGFL);
     558         447 :         ASSERT(xnr->resv != XFS_AG_RESV_IGNORE);
     559             : 
     560             :         /*
     561             :          * Use EFIs to free the reservations.  This reduces the chance
     562             :          * that we leak blocks if the system goes down.
     563             :          */
     564         447 :         fsbno = XFS_AGB_TO_FSB(sc->mp, resv->pag->pag_agno, free_agbno);
     565         447 :         error = xfs_free_extent_later(sc->tp, fsbno, free_aglen, &xnr->oinfo,
     566             :                         xnr->resv, XFS_FREE_EXTENT_SKIP_DISCARD);
     567         447 :         if (error)
     568           0 :                 return error;
     569             : 
     570             :         return 1;
     571             : }
     572             : 
     573             : /* Free all the accounting info and disk space we reserved for a new btree. */
     574             : STATIC int
     575     4077669 : xrep_newbt_free(
     576             :         struct xrep_newbt       *xnr,
     577             :         bool                    btree_committed)
     578             : {
     579     4077669 :         struct xfs_scrub        *sc = xnr->sc;
     580     4077669 :         struct xrep_newbt_resv  *resv, *n;
     581     4077669 :         unsigned int            freed = 0;
     582     4077669 :         int                     error = 0;
     583             : 
     584             :         /*
     585             :          * If the filesystem already went down, we can't free the blocks.  Skip
     586             :          * ahead to freeing the incore metadata because we can't fix anything.
     587             :          */
     588     8155338 :         if (xfs_is_shutdown(sc->mp))
     589           0 :                 goto junkit;
     590             : 
     591     4664844 :         list_for_each_entry_safe(resv, n, &xnr->resv_list, list) {
     592      587139 :                 int             ret;
     593             : 
     594      587139 :                 ret = xrep_newbt_free_extent(xnr, resv, btree_committed);
     595      587340 :                 list_del(&resv->list);
     596      587311 :                 xfs_perag_put(resv->pag);
     597      587384 :                 kfree(resv);
     598      587175 :                 if (ret < 0) {
     599           0 :                         error = ret;
     600           0 :                         goto junkit;
     601             :                 }
     602             : 
     603      587175 :                 freed += ret;
     604      587175 :                 if (freed >= XREP_REAP_MAX_NEWBT_EFIS) {
     605           0 :                         error = xrep_defer_finish(sc);
     606           0 :                         if (error)
     607           0 :                                 goto junkit;
     608             :                         freed = 0;
     609             :                 }
     610             :         }
     611             : 
     612     4077705 :         if (freed)
     613          38 :                 error = xrep_defer_finish(sc);
     614             : 
     615     4077667 : junkit:
     616             :         /*
     617             :          * If we still have reservations attached to @newbt, cleanup must have
     618             :          * failed and the filesystem is about to go down.  Clean up the incore
     619             :          * reservations.
     620             :          */
     621     4077671 :         list_for_each_entry_safe(resv, n, &xnr->resv_list, list) {
     622           0 :                 xrep_newbt_cancel_autoreap(resv);
     623           0 :                 list_del(&resv->list);
     624           0 :                 xfs_perag_put(resv->pag);
     625           0 :                 kfree(resv);
     626             :         }
     627             : 
     628     4077671 :         if (sc->ip) {
     629     3379834 :                 kmem_cache_free(xfs_ifork_cache, xnr->ifake.if_fork);
     630     3379839 :                 xnr->ifake.if_fork = NULL;
     631             :         }
     632             : 
     633     4077676 :         return error;
     634             : }
     635             : 
     636             : /*
     637             :  * Free all the accounting info and unused disk space allocations after
     638             :  * committing a new btree.
     639             :  */
     640             : int
     641     3793012 : xrep_newbt_commit(
     642             :         struct xrep_newbt       *xnr)
     643             : {
     644     3793012 :         return xrep_newbt_free(xnr, true);
     645             : }
     646             : 
     647             : /*
     648             :  * Free all the accounting info and all of the disk space we reserved for a new
     649             :  * btree that we're not going to commit.  We want to try to roll things back
     650             :  * cleanly for things like ENOSPC midway through allocation.
     651             :  */
     652             : void
     653      284545 : xrep_newbt_cancel(
     654             :         struct xrep_newbt       *xnr)
     655             : {
     656      284545 :         xrep_newbt_free(xnr, false);
     657      284539 : }
     658             : 
     659             : /* Feed one of the reserved btree blocks to the bulk loader. */
     660             : int
     661     2519578 : xrep_newbt_claim_block(
     662             :         struct xfs_btree_cur    *cur,
     663             :         struct xrep_newbt       *xnr,
     664             :         union xfs_btree_ptr     *ptr)
     665             : {
     666     2519578 :         struct xrep_newbt_resv  *resv;
     667     2519578 :         struct xfs_mount        *mp = cur->bc_mp;
     668     2519578 :         xfs_agblock_t           agbno;
     669             : 
     670             :         /*
     671             :          * The first item in the list should always have a free block unless
     672             :          * we're completely out.
     673             :          */
     674     2519578 :         resv = list_first_entry(&xnr->resv_list, struct xrep_newbt_resv, list);
     675     2519578 :         if (resv->used == resv->len)
     676             :                 return -ENOSPC;
     677             : 
     678             :         /*
     679             :          * Peel off a block from the start of the reservation.  We allocate
     680             :          * blocks in order to place blocks on disk in increasing record or key
     681             :          * order.  The block reservations tend to end up on the list in
     682             :          * decreasing order, which hopefully results in leaf blocks ending up
     683             :          * together.
     684             :          */
     685     2519578 :         agbno = resv->agbno + resv->used;
     686     2519578 :         resv->used++;
     687             : 
     688             :         /* If we used all the blocks in this reservation, move it to the end. */
     689     2519578 :         if (resv->used == resv->len)
     690      728249 :                 list_move_tail(&resv->list, &xnr->resv_list);
     691             : 
     692     2519684 :         trace_xrep_newbt_claim_block(mp, resv->pag->pag_agno, agbno, 1,
     693     2519684 :                         xnr->oinfo.oi_owner);
     694             : 
     695     2519682 :         if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
     696      930010 :                 ptr->l = cpu_to_be64(XFS_AGB_TO_FSB(mp, resv->pag->pag_agno,
     697             :                                                                 agbno));
     698             :         else
     699     1589672 :                 ptr->s = cpu_to_be32(agbno);
     700             :         return 0;
     701             : }
     702             : 
     703             : /* How many reserved blocks are unused? */
     704             : unsigned int
     705      284588 : xrep_newbt_unused_blocks(
     706             :         struct xrep_newbt       *xnr)
     707             : {
     708      284588 :         struct xrep_newbt_resv  *resv;
     709      284588 :         unsigned int            unused = 0;
     710             : 
     711      426886 :         list_for_each_entry(resv, &xnr->resv_list, list)
     712      142298 :                 unused += resv->len - resv->used;
     713      284588 :         return unused;
     714             : }

Generated by: LCOV version 1.14