LCOV - code coverage report
Current view: top level - fs/xfs/libxfs - xfs_ag_resv.c (source / functions) Hit Total Coverage
Test: fstests of 6.5.0-rc4-xfsx @ Mon Jul 31 20:08:34 PDT 2023 Lines: 133 164 81.1 %
Date: 2023-07-31 20:08:34 Functions: 8 8 100.0 %

          Line data    Source code
       1             : // SPDX-License-Identifier: GPL-2.0+
       2             : /*
       3             :  * Copyright (C) 2016 Oracle.  All Rights Reserved.
       4             :  * Author: Darrick J. Wong <darrick.wong@oracle.com>
       5             :  */
       6             : #include "xfs.h"
       7             : #include "xfs_fs.h"
       8             : #include "xfs_shared.h"
       9             : #include "xfs_format.h"
      10             : #include "xfs_log_format.h"
      11             : #include "xfs_trans_resv.h"
      12             : #include "xfs_mount.h"
      13             : #include "xfs_alloc.h"
      14             : #include "xfs_errortag.h"
      15             : #include "xfs_error.h"
      16             : #include "xfs_trace.h"
      17             : #include "xfs_trans.h"
      18             : #include "xfs_rmap_btree.h"
      19             : #include "xfs_btree.h"
      20             : #include "xfs_refcount_btree.h"
      21             : #include "xfs_ialloc_btree.h"
      22             : #include "xfs_ag.h"
      23             : #include "xfs_ag_resv.h"
      24             : 
      25             : /*
      26             :  * Per-AG Block Reservations
      27             :  *
      28             :  * For some kinds of allocation group metadata structures, it is advantageous
      29             :  * to reserve a small number of blocks in each AG so that future expansions of
      30             :  * that data structure do not encounter ENOSPC because errors during a btree
      31             :  * split cause the filesystem to go offline.
      32             :  *
      33             :  * Prior to the introduction of reflink, this wasn't an issue because the free
      34             :  * space btrees maintain a reserve of space (the AGFL) to handle any expansion
      35             :  * that may be necessary; and allocations of other metadata (inodes, BMBT,
      36             :  * dir/attr) aren't restricted to a single AG.  However, with reflink it is
      37             :  * possible to allocate all the space in an AG, have subsequent reflink/CoW
      38             :  * activity expand the refcount btree, and discover that there's no space left
      39             :  * to handle that expansion.  Since we can calculate the maximum size of the
      40             :  * refcount btree, we can reserve space for it and avoid ENOSPC.
      41             :  *
      42             :  * Handling per-AG reservations consists of three changes to the allocator's
      43             :  * behavior:  First, because these reservations are always needed, we decrease
      44             :  * the ag_max_usable counter to reflect the size of the AG after the reserved
      45             :  * blocks are taken.  Second, the reservations must be reflected in the
      46             :  * fdblocks count to maintain proper accounting.  Third, each AG must maintain
      47             :  * its own reserved block counter so that we can calculate the amount of space
      48             :  * that must remain free to maintain the reservations.  Fourth, the "remaining
      49             :  * reserved blocks" count must be used when calculating the length of the
      50             :  * longest free extent in an AG and to clamp maxlen in the per-AG allocation
      51             :  * functions.  In other words, we maintain a virtual allocation via in-core
      52             :  * accounting tricks so that we don't have to clean up after a crash. :)
      53             :  *
      54             :  * Reserved blocks can be managed by passing one of the enum xfs_ag_resv_type
      55             :  * values via struct xfs_alloc_arg or directly to the xfs_free_extent
      56             :  * function.  It might seem a little funny to maintain a reservoir of blocks
      57             :  * to feed another reservoir, but the AGFL only holds enough blocks to get
      58             :  * through the next transaction.  The per-AG reservation is to ensure (we
      59             :  * hope) that each AG never runs out of blocks.  Each data structure wanting
      60             :  * to use the reservation system should update ask/used in xfs_ag_resv_init.
      61             :  */
      62             : 
      63             : /*
      64             :  * Are we critically low on blocks?  For now we'll define that as the number
      65             :  * of blocks we can get our hands on being less than 10% of what we reserved
      66             :  * or less than some arbitrary number (maximum btree height).
      67             :  */
      68             : bool
      69   152044548 : xfs_ag_resv_critical(
      70             :         struct xfs_perag                *pag,
      71             :         enum xfs_ag_resv_type           type)
      72             : {
      73   152044548 :         xfs_extlen_t                    avail;
      74   152044548 :         xfs_extlen_t                    orig;
      75             : 
      76             :         /*
      77             :          * Pretend we're critically low on reservations in this AG to scare
      78             :          * everyone else away.
      79             :          */
      80   304089096 :         if (xfs_perag_prohibits_alloc(pag))
      81             :                 return true;
      82             : 
      83   152044548 :         switch (type) {
      84    76021624 :         case XFS_AG_RESV_METADATA:
      85    76021624 :                 avail = pag->pagf_freeblks - pag->pag_rmapbt_resv.ar_reserved;
      86    76021624 :                 orig = pag->pag_meta_resv.ar_asked;
      87    76021624 :                 break;
      88    76022924 :         case XFS_AG_RESV_RMAPBT:
      89    76022924 :                 avail = pag->pagf_freeblks + pag->pagf_flcount -
      90    76022924 :                         pag->pag_meta_resv.ar_reserved;
      91    76022924 :                 orig = pag->pag_rmapbt_resv.ar_asked;
      92    76022924 :                 break;
      93           0 :         default:
      94           0 :                 ASSERT(0);
      95           0 :                 return false;
      96             :         }
      97             : 
      98   152044548 :         trace_xfs_ag_resv_critical(pag, type, avail);
      99             : 
     100             :         /* Critically low if less than 10% or max btree height remains. */
     101   152044461 :         return XFS_TEST_ERROR(avail < orig / 10 ||
     102             :                               avail < pag->pag_mount->m_agbtree_maxlevels,
     103             :                         pag->pag_mount, XFS_ERRTAG_AG_RESV_CRITICAL);
     104             : }
     105             : 
     106             : /*
     107             :  * How many blocks are reserved but not used, and therefore must not be
     108             :  * allocated away?
     109             :  */
     110             : xfs_extlen_t
     111   480650324 : xfs_ag_resv_needed(
     112             :         struct xfs_perag                *pag,
     113             :         enum xfs_ag_resv_type           type)
     114             : {
     115   480650324 :         xfs_extlen_t                    len;
     116             : 
     117   480650324 :         len = pag->pag_meta_resv.ar_reserved + pag->pag_rmapbt_resv.ar_reserved;
     118   480650324 :         switch (type) {
     119             :         case XFS_AG_RESV_METADATA:
     120             :         case XFS_AG_RESV_RMAPBT:
     121      808694 :                 len -= xfs_perag_resv(pag, type)->ar_reserved;
     122      808694 :                 break;
     123             :         case XFS_AG_RESV_IMETA:
     124             :         case XFS_AG_RESV_NONE:
     125             :                 /*
     126             :                  * In noalloc mode, we pretend that all the free blocks in this
     127             :                  * AG have been allocated.  Make this AG look full.
     128             :                  */
     129   959683260 :                 if (xfs_perag_prohibits_alloc(pag))
     130           0 :                         len += xfs_ag_fdblocks(pag);
     131             :                 break;
     132           0 :         default:
     133           0 :                 ASSERT(0);
     134             :         }
     135             : 
     136   480650324 :         trace_xfs_ag_resv_needed(pag, type, len);
     137             : 
     138   480625872 :         return len;
     139             : }
     140             : 
     141             : /* Clean out a reservation */
     142             : static void
     143     2571454 : __xfs_ag_resv_free(
     144             :         struct xfs_perag                *pag,
     145             :         enum xfs_ag_resv_type           type)
     146             : {
     147     2571454 :         struct xfs_ag_resv              *resv;
     148     2571454 :         xfs_extlen_t                    oldresv;
     149             : 
     150     2571454 :         trace_xfs_ag_resv_free(pag, type, 0);
     151             : 
     152     2571001 :         resv = xfs_perag_resv(pag, type);
     153     2571001 :         if (pag->pag_agno == 0)
     154      373054 :                 pag->pag_mount->m_ag_max_usable += resv->ar_asked;
     155             :         /*
     156             :          * RMAPBT blocks come from the AGFL and AGFL blocks are always
     157             :          * considered "free", so whatever was reserved at mount time must be
     158             :          * given back at umount.
     159             :          */
     160     2571001 :         if (type == XFS_AG_RESV_RMAPBT)
     161     1284615 :                 oldresv = resv->ar_orig_reserved;
     162             :         else
     163     1286386 :                 oldresv = resv->ar_reserved;
     164     2571001 :         xfs_mod_fdblocks(pag->pag_mount, oldresv, true);
     165     2572808 :         resv->ar_reserved = 0;
     166     2572808 :         resv->ar_asked = 0;
     167     2572808 :         resv->ar_orig_reserved = 0;
     168     2572808 : }
     169             : 
     170             : /* Free a per-AG reservation. */
     171             : void
     172     1285187 : xfs_ag_resv_free(
     173             :         struct xfs_perag                *pag)
     174             : {
     175     1285187 :         __xfs_ag_resv_free(pag, XFS_AG_RESV_RMAPBT);
     176     1286491 :         __xfs_ag_resv_free(pag, XFS_AG_RESV_METADATA);
     177     1286445 : }
     178             : 
     179             : static int
     180     2552452 : __xfs_ag_resv_init(
     181             :         struct xfs_perag                *pag,
     182             :         enum xfs_ag_resv_type           type,
     183             :         xfs_extlen_t                    ask,
     184             :         xfs_extlen_t                    used)
     185             : {
     186     2552452 :         struct xfs_mount                *mp = pag->pag_mount;
     187     2552452 :         struct xfs_ag_resv              *resv;
     188     2552452 :         int                             error;
     189     2552452 :         xfs_extlen_t                    hidden_space;
     190             : 
     191     2552452 :         if (used > ask)
     192             :                 ask = used;
     193             : 
     194     2552452 :         switch (type) {
     195             :         case XFS_AG_RESV_RMAPBT:
     196             :                 /*
     197             :                  * Space taken by the rmapbt is not subtracted from fdblocks
     198             :                  * because the rmapbt lives in the free space.  Here we must
     199             :                  * subtract the entire reservation from fdblocks so that we
     200             :                  * always have blocks available for rmapbt expansion.
     201             :                  */
     202             :                 hidden_space = ask;
     203             :                 break;
     204     1276304 :         case XFS_AG_RESV_METADATA:
     205             :                 /*
     206             :                  * Space taken by all other metadata btrees are accounted
     207             :                  * on-disk as used space.  We therefore only hide the space
     208             :                  * that is reserved but not used by the trees.
     209             :                  */
     210     1276304 :                 hidden_space = ask - used;
     211     1276304 :                 break;
     212           0 :         default:
     213           0 :                 ASSERT(0);
     214           0 :                 return -EINVAL;
     215             :         }
     216             : 
     217     2552452 :         if (XFS_TEST_ERROR(false, mp, XFS_ERRTAG_AG_RESV_FAIL))
     218             :                 error = -ENOSPC;
     219             :         else
     220     2551954 :                 error = xfs_mod_fdblocks(mp, -(int64_t)hidden_space, true);
     221     2553397 :         if (error) {
     222           0 :                 trace_xfs_ag_resv_init_error(pag->pag_mount, pag->pag_agno,
     223           0 :                                 error, _RET_IP_);
     224           0 :                 xfs_warn(mp,
     225             : "Per-AG reservation for AG %u failed.  Filesystem may run out of space.",
     226             :                                 pag->pag_agno);
     227           0 :                 return error;
     228             :         }
     229             : 
     230             :         /*
     231             :          * Reduce the maximum per-AG allocation length by however much we're
     232             :          * trying to reserve for an AG.  Since this is a filesystem-wide
     233             :          * counter, we only make the adjustment for AG 0.  This assumes that
     234             :          * there aren't any AGs hungrier for per-AG reservation than AG 0.
     235             :          */
     236     2553397 :         if (pag->pag_agno == 0)
     237      368474 :                 mp->m_ag_max_usable -= ask;
     238             : 
     239     2553397 :         resv = xfs_perag_resv(pag, type);
     240     2553397 :         resv->ar_asked = ask;
     241     2553397 :         resv->ar_orig_reserved = hidden_space;
     242     2553397 :         resv->ar_reserved = ask - used;
     243             : 
     244     2553397 :         trace_xfs_ag_resv_init(pag, type, ask);
     245     2553397 :         return 0;
     246             : }
     247             : 
     248             : /* Create a per-AG block reservation. */
     249             : int
     250     1279683 : xfs_ag_resv_init(
     251             :         struct xfs_perag                *pag,
     252             :         struct xfs_trans                *tp)
     253             : {
     254     1279683 :         struct xfs_mount                *mp = pag->pag_mount;
     255     1279683 :         xfs_extlen_t                    ask;
     256     1279683 :         xfs_extlen_t                    used;
     257     1279683 :         int                             error = 0, error2;
     258     1279683 :         bool                            has_resv = false;
     259             : 
     260             :         /* Create the metadata reservation. */
     261     1279683 :         if (pag->pag_meta_resv.ar_asked == 0) {
     262     1277503 :                 ask = used = 0;
     263             : 
     264     1277503 :                 error = xfs_refcountbt_calc_reserves(mp, tp, pag, &ask, &used);
     265     1276548 :                 if (error)
     266         436 :                         goto out;
     267             : 
     268     1276112 :                 error = xfs_finobt_calc_reserves(pag, tp, &ask, &used);
     269     1276384 :                 if (error)
     270           4 :                         goto out;
     271             : 
     272     1276380 :                 error = __xfs_ag_resv_init(pag, XFS_AG_RESV_METADATA,
     273             :                                 ask, used);
     274     1276097 :                 if (error) {
     275             :                         /*
     276             :                          * Because we didn't have per-AG reservations when the
     277             :                          * finobt feature was added we might not be able to
     278             :                          * reserve all needed blocks.  Warn and fall back to the
     279             :                          * old and potentially buggy code in that case, but
     280             :                          * ensure we do have the reservation for the refcountbt.
     281             :                          */
     282           0 :                         ask = used = 0;
     283             : 
     284           0 :                         mp->m_finobt_nores = true;
     285             : 
     286           0 :                         error = xfs_refcountbt_calc_reserves(mp, tp, pag, &ask,
     287             :                                         &used);
     288           0 :                         if (error)
     289           0 :                                 goto out;
     290             : 
     291           0 :                         error = __xfs_ag_resv_init(pag, XFS_AG_RESV_METADATA,
     292             :                                         ask, used);
     293           0 :                         if (error)
     294           0 :                                 goto out;
     295             :                 }
     296     1276097 :                 if (ask)
     297     1260458 :                         has_resv = true;
     298             :         }
     299             : 
     300             :         /* Create the RMAPBT metadata reservation */
     301     1278277 :         if (pag->pag_rmapbt_resv.ar_asked == 0) {
     302     1275902 :                 ask = used = 0;
     303             : 
     304     1275902 :                 error = xfs_rmapbt_calc_reserves(mp, tp, pag, &ask, &used);
     305     1276612 :                 if (error)
     306           0 :                         goto out;
     307             : 
     308     1276612 :                 error = __xfs_ag_resv_init(pag, XFS_AG_RESV_RMAPBT, ask, used);
     309     1277076 :                 if (error)
     310           0 :                         goto out;
     311     1277076 :                 if (ask)
     312             :                         has_resv = true;
     313             :         }
     314             : 
     315       58134 : out:
     316             :         /*
     317             :          * Initialize the pagf if we have at least one active reservation on the
     318             :          * AG. This may have occurred already via reservation calculation, but
     319             :          * fall back to an explicit init to ensure the in-core allocbt usage
     320             :          * counters are initialized as soon as possible. This is important
     321             :          * because filesystems with large perag reservations are susceptible to
     322             :          * free space reservation problems that the allocbt counter is used to
     323             :          * address.
     324             :          */
     325       58574 :         if (has_resv) {
     326     1261289 :                 error2 = xfs_alloc_read_agf(pag, tp, 0, NULL);
     327     1261069 :                 if (error2)
     328             :                         return error2;
     329             : 
     330             :                 /*
     331             :                  * If there isn't enough space in the AG to satisfy the
     332             :                  * reservation, let the caller know that there wasn't enough
     333             :                  * space.  Callers are responsible for deciding what to do
     334             :                  * next, since (in theory) we can stumble along with
     335             :                  * insufficient reservation if data blocks are being freed to
     336             :                  * replenish the AG's free space.
     337             :                  */
     338     1261065 :                 if (!error &&
     339     1261123 :                     xfs_perag_resv(pag, XFS_AG_RESV_METADATA)->ar_reserved +
     340     1261123 :                     xfs_perag_resv(pag, XFS_AG_RESV_RMAPBT)->ar_reserved >
     341     1261123 :                     pag->pagf_freeblks + pag->pagf_flcount)
     342           0 :                         error = -ENOSPC;
     343             :         }
     344             : 
     345             :         return error;
     346             : }
     347             : 
     348             : /* Allocate a block from the reservation. */
     349             : void
     350    98526897 : xfs_ag_resv_alloc_extent(
     351             :         struct xfs_perag                *pag,
     352             :         enum xfs_ag_resv_type           type,
     353             :         struct xfs_alloc_arg            *args)
     354             : {
     355    98526897 :         struct xfs_ag_resv              *resv;
     356    98526897 :         xfs_extlen_t                    len;
     357    98526897 :         uint                            field;
     358             : 
     359   195104164 :         ASSERT(type != XFS_AG_RESV_NONE || !xfs_perag_prohibits_alloc(pag));
     360             : 
     361    98526897 :         trace_xfs_ag_resv_alloc_extent(pag, type, args->len);
     362             : 
     363    98503652 :         switch (type) {
     364             :         case XFS_AG_RESV_AGFL:
     365             :         case XFS_AG_RESV_IMETA:
     366             :                 return;
     367             :         case XFS_AG_RESV_METADATA:
     368             :         case XFS_AG_RESV_RMAPBT:
     369     1208906 :                 resv = xfs_perag_resv(pag, type);
     370     1208906 :                 break;
     371           0 :         default:
     372           0 :                 ASSERT(0);
     373    96568450 :                 fallthrough;
     374    96568450 :         case XFS_AG_RESV_NONE:
     375    96568450 :                 field = args->wasdel ? XFS_TRANS_SB_RES_FDBLOCKS :
     376             :                                        XFS_TRANS_SB_FDBLOCKS;
     377    96568450 :                 xfs_trans_mod_sb(args->tp, field, -(int64_t)args->len);
     378    96568450 :                 return;
     379             :         }
     380             : 
     381     1208906 :         len = min_t(xfs_extlen_t, args->len, resv->ar_reserved);
     382     1208906 :         resv->ar_reserved -= len;
     383     1208906 :         if (type == XFS_AG_RESV_RMAPBT)
     384             :                 return;
     385             :         /* Allocations of reserved blocks only need on-disk sb updates... */
     386      365643 :         xfs_trans_mod_sb(args->tp, XFS_TRANS_SB_RES_FDBLOCKS, -(int64_t)len);
     387             :         /* ...but non-reserved blocks need in-core and on-disk updates. */
     388      365468 :         if (args->len > len)
     389           0 :                 xfs_trans_mod_sb(args->tp, XFS_TRANS_SB_FDBLOCKS,
     390           0 :                                 -((int64_t)args->len - len));
     391             : }
     392             : 
     393             : /* Free a block to the reservation. */
     394             : void
     395   106094964 : xfs_ag_resv_free_extent(
     396             :         struct xfs_perag                *pag,
     397             :         enum xfs_ag_resv_type           type,
     398             :         struct xfs_trans                *tp,
     399             :         xfs_extlen_t                    len)
     400             : {
     401   106094964 :         xfs_extlen_t                    leftover;
     402   106094964 :         struct xfs_ag_resv              *resv;
     403             : 
     404   106094964 :         trace_xfs_ag_resv_free_extent(pag, type, len);
     405             : 
     406   106082857 :         switch (type) {
     407             :         case XFS_AG_RESV_AGFL:
     408             :         case XFS_AG_RESV_IMETA:
     409             :                 return;
     410             :         case XFS_AG_RESV_METADATA:
     411             :         case XFS_AG_RESV_RMAPBT:
     412      509494 :                 resv = xfs_perag_resv(pag, type);
     413      509494 :                 break;
     414           0 :         default:
     415           0 :                 ASSERT(0);
     416   105000272 :                 fallthrough;
     417   105000272 :         case XFS_AG_RESV_NONE:
     418             :                 /*
     419             :                  * Normally we put freed blocks back into fdblocks.  In noalloc
     420             :                  * mode, however, we pretend that there are no fdblocks in the
     421             :                  * AG, so don't put them back.
     422             :                  */
     423   210000544 :                 if (!xfs_perag_prohibits_alloc(pag))
     424   104999764 :                         xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS,
     425             :                                         (int64_t)len);
     426             :                 fallthrough;
     427             :         case XFS_AG_RESV_IGNORE:
     428             :                 return;
     429             :         }
     430             : 
     431      509494 :         leftover = min_t(xfs_extlen_t, len, resv->ar_asked - resv->ar_reserved);
     432      509494 :         resv->ar_reserved += leftover;
     433      509494 :         if (type == XFS_AG_RESV_RMAPBT)
     434             :                 return;
     435             :         /* Freeing into the reserved pool only requires on-disk update... */
     436      203641 :         xfs_trans_mod_sb(tp, XFS_TRANS_SB_RES_FDBLOCKS, len);
     437             :         /* ...but freeing beyond that requires in-core and on-disk update. */
     438      203553 :         if (len > leftover && !xfs_perag_prohibits_alloc(pag))
     439           0 :                 xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, len - leftover);
     440             : }

Generated by: LCOV version 1.14