LCOV - code coverage report
Current view: top level - fs/xfs/libxfs - xfs_ag_resv.c (source / functions) Hit Total Coverage
Test: fstests of 6.5.0-rc3-achx @ Mon Jul 31 20:08:12 PDT 2023 Lines: 134 167 80.2 %
Date: 2023-07-31 20:08:12 Functions: 8 8 100.0 %

          Line data    Source code
       1             : // SPDX-License-Identifier: GPL-2.0+
       2             : /*
       3             :  * Copyright (C) 2016 Oracle.  All Rights Reserved.
       4             :  * Author: Darrick J. Wong <darrick.wong@oracle.com>
       5             :  */
       6             : #include "xfs.h"
       7             : #include "xfs_fs.h"
       8             : #include "xfs_shared.h"
       9             : #include "xfs_format.h"
      10             : #include "xfs_log_format.h"
      11             : #include "xfs_trans_resv.h"
      12             : #include "xfs_mount.h"
      13             : #include "xfs_alloc.h"
      14             : #include "xfs_errortag.h"
      15             : #include "xfs_error.h"
      16             : #include "xfs_trace.h"
      17             : #include "xfs_trans.h"
      18             : #include "xfs_rmap_btree.h"
      19             : #include "xfs_btree.h"
      20             : #include "xfs_refcount_btree.h"
      21             : #include "xfs_ialloc_btree.h"
      22             : #include "xfs_ag.h"
      23             : #include "xfs_ag_resv.h"
      24             : 
      25             : /*
      26             :  * Per-AG Block Reservations
      27             :  *
      28             :  * For some kinds of allocation group metadata structures, it is advantageous
      29             :  * to reserve a small number of blocks in each AG so that future expansions of
      30             :  * that data structure do not encounter ENOSPC because errors during a btree
      31             :  * split cause the filesystem to go offline.
      32             :  *
      33             :  * Prior to the introduction of reflink, this wasn't an issue because the free
      34             :  * space btrees maintain a reserve of space (the AGFL) to handle any expansion
      35             :  * that may be necessary; and allocations of other metadata (inodes, BMBT,
      36             :  * dir/attr) aren't restricted to a single AG.  However, with reflink it is
      37             :  * possible to allocate all the space in an AG, have subsequent reflink/CoW
      38             :  * activity expand the refcount btree, and discover that there's no space left
      39             :  * to handle that expansion.  Since we can calculate the maximum size of the
      40             :  * refcount btree, we can reserve space for it and avoid ENOSPC.
      41             :  *
      42             :  * Handling per-AG reservations consists of three changes to the allocator's
      43             :  * behavior:  First, because these reservations are always needed, we decrease
      44             :  * the ag_max_usable counter to reflect the size of the AG after the reserved
      45             :  * blocks are taken.  Second, the reservations must be reflected in the
      46             :  * fdblocks count to maintain proper accounting.  Third, each AG must maintain
      47             :  * its own reserved block counter so that we can calculate the amount of space
      48             :  * that must remain free to maintain the reservations.  Fourth, the "remaining
      49             :  * reserved blocks" count must be used when calculating the length of the
      50             :  * longest free extent in an AG and to clamp maxlen in the per-AG allocation
      51             :  * functions.  In other words, we maintain a virtual allocation via in-core
      52             :  * accounting tricks so that we don't have to clean up after a crash. :)
      53             :  *
      54             :  * Reserved blocks can be managed by passing one of the enum xfs_ag_resv_type
      55             :  * values via struct xfs_alloc_arg or directly to the xfs_free_extent
      56             :  * function.  It might seem a little funny to maintain a reservoir of blocks
      57             :  * to feed another reservoir, but the AGFL only holds enough blocks to get
      58             :  * through the next transaction.  The per-AG reservation is to ensure (we
      59             :  * hope) that each AG never runs out of blocks.  Each data structure wanting
      60             :  * to use the reservation system should update ask/used in xfs_ag_resv_init.
      61             :  */
      62             : 
      63             : /*
      64             :  * Are we critically low on blocks?  For now we'll define that as the number
      65             :  * of blocks we can get our hands on being less than 10% of what we reserved
      66             :  * or less than some arbitrary number (maximum btree height).
      67             :  */
      68             : bool
      69   156088871 : xfs_ag_resv_critical(
      70             :         struct xfs_perag                *pag,
      71             :         enum xfs_ag_resv_type           type)
      72             : {
      73   156088871 :         xfs_extlen_t                    avail;
      74   156088871 :         xfs_extlen_t                    orig;
      75             : 
      76   156088871 :         switch (type) {
      77    78043780 :         case XFS_AG_RESV_METADATA:
      78    78043780 :                 avail = pag->pagf_freeblks - pag->pag_rmapbt_resv.ar_reserved;
      79    78043780 :                 orig = pag->pag_meta_resv.ar_asked;
      80    78043780 :                 break;
      81    78045091 :         case XFS_AG_RESV_RMAPBT:
      82    78045091 :                 avail = pag->pagf_freeblks + pag->pagf_flcount -
      83    78045091 :                         pag->pag_meta_resv.ar_reserved;
      84    78045091 :                 orig = pag->pag_rmapbt_resv.ar_asked;
      85    78045091 :                 break;
      86           0 :         default:
      87           0 :                 ASSERT(0);
      88           0 :                 return false;
      89             :         }
      90             : 
      91   156088871 :         trace_xfs_ag_resv_critical(pag, type, avail);
      92             : 
      93             :         /* Critically low if less than 10% or max btree height remains. */
      94   156088780 :         return XFS_TEST_ERROR(avail < orig / 10 ||
      95             :                               avail < pag->pag_mount->m_agbtree_maxlevels,
      96             :                         pag->pag_mount, XFS_ERRTAG_AG_RESV_CRITICAL);
      97             : }
      98             : 
      99             : /*
     100             :  * How many blocks are reserved but not used, and therefore must not be
     101             :  * allocated away?
     102             :  */
     103             : xfs_extlen_t
     104   443255457 : xfs_ag_resv_needed(
     105             :         struct xfs_perag                *pag,
     106             :         enum xfs_ag_resv_type           type)
     107             : {
     108   443255457 :         xfs_extlen_t                    len;
     109             : 
     110   443255457 :         len = pag->pag_meta_resv.ar_reserved + pag->pag_rmapbt_resv.ar_reserved;
     111   443255457 :         switch (type) {
     112             :         case XFS_AG_RESV_METADATA:
     113             :         case XFS_AG_RESV_RMAPBT:
     114      506372 :                 len -= xfs_perag_resv(pag, type)->ar_reserved;
     115      506372 :                 break;
     116             :         case XFS_AG_RESV_NONE:
     117             :                 /* empty */
     118             :                 break;
     119           0 :         default:
     120           0 :                 ASSERT(0);
     121             :         }
     122             : 
     123   443255457 :         trace_xfs_ag_resv_needed(pag, type, len);
     124             : 
     125   443234709 :         return len;
     126             : }
     127             : 
     128             : /* Clean out a reservation */
     129             : static int
     130     1990167 : __xfs_ag_resv_free(
     131             :         struct xfs_perag                *pag,
     132             :         enum xfs_ag_resv_type           type)
     133             : {
     134     1990167 :         struct xfs_ag_resv              *resv;
     135     1990167 :         xfs_extlen_t                    oldresv;
     136     1990167 :         int                             error;
     137             : 
     138     1990167 :         trace_xfs_ag_resv_free(pag, type, 0);
     139             : 
     140     1989999 :         resv = xfs_perag_resv(pag, type);
     141     1989999 :         if (pag->pag_agno == 0)
     142      286420 :                 pag->pag_mount->m_ag_max_usable += resv->ar_asked;
     143             :         /*
     144             :          * RMAPBT blocks come from the AGFL and AGFL blocks are always
     145             :          * considered "free", so whatever was reserved at mount time must be
     146             :          * given back at umount.
     147             :          */
     148     1989999 :         if (type == XFS_AG_RESV_RMAPBT)
     149      994687 :                 oldresv = resv->ar_orig_reserved;
     150             :         else
     151      995312 :                 oldresv = resv->ar_reserved;
     152     1989999 :         error = xfs_mod_fdblocks(pag->pag_mount, oldresv, true);
     153     1990601 :         resv->ar_reserved = 0;
     154     1990601 :         resv->ar_asked = 0;
     155     1990601 :         resv->ar_orig_reserved = 0;
     156             : 
     157     1990601 :         if (error)
     158           0 :                 trace_xfs_ag_resv_free_error(pag->pag_mount, pag->pag_agno,
     159           0 :                                 error, _RET_IP_);
     160     1990601 :         return error;
     161             : }
     162             : 
     163             : /* Free a per-AG reservation. */
     164             : int
     165      994923 : xfs_ag_resv_free(
     166             :         struct xfs_perag                *pag)
     167             : {
     168      994923 :         int                             error;
     169      994923 :         int                             err2;
     170             : 
     171      994923 :         error = __xfs_ag_resv_free(pag, XFS_AG_RESV_RMAPBT);
     172      995346 :         err2 = __xfs_ag_resv_free(pag, XFS_AG_RESV_METADATA);
     173      995311 :         if (err2 && !error)
     174           0 :                 error = err2;
     175      995311 :         return error;
     176             : }
     177             : 
     178             : static int
     179     1970575 : __xfs_ag_resv_init(
     180             :         struct xfs_perag                *pag,
     181             :         enum xfs_ag_resv_type           type,
     182             :         xfs_extlen_t                    ask,
     183             :         xfs_extlen_t                    used)
     184             : {
     185     1970575 :         struct xfs_mount                *mp = pag->pag_mount;
     186     1970575 :         struct xfs_ag_resv              *resv;
     187     1970575 :         int                             error;
     188     1970575 :         xfs_extlen_t                    hidden_space;
     189             : 
     190     1970575 :         if (used > ask)
     191             :                 ask = used;
     192             : 
     193     1970575 :         switch (type) {
     194             :         case XFS_AG_RESV_RMAPBT:
     195             :                 /*
     196             :                  * Space taken by the rmapbt is not subtracted from fdblocks
     197             :                  * because the rmapbt lives in the free space.  Here we must
     198             :                  * subtract the entire reservation from fdblocks so that we
     199             :                  * always have blocks available for rmapbt expansion.
     200             :                  */
     201             :                 hidden_space = ask;
     202             :                 break;
     203      984942 :         case XFS_AG_RESV_METADATA:
     204             :                 /*
     205             :                  * Space taken by all other metadata btrees are accounted
     206             :                  * on-disk as used space.  We therefore only hide the space
     207             :                  * that is reserved but not used by the trees.
     208             :                  */
     209      984942 :                 hidden_space = ask - used;
     210      984942 :                 break;
     211           0 :         default:
     212           0 :                 ASSERT(0);
     213           0 :                 return -EINVAL;
     214             :         }
     215             : 
     216     1970575 :         if (XFS_TEST_ERROR(false, mp, XFS_ERRTAG_AG_RESV_FAIL))
     217             :                 error = -ENOSPC;
     218             :         else
     219     1970085 :                 error = xfs_mod_fdblocks(mp, -(int64_t)hidden_space, true);
     220     1971245 :         if (error) {
     221           0 :                 trace_xfs_ag_resv_init_error(pag->pag_mount, pag->pag_agno,
     222           0 :                                 error, _RET_IP_);
     223           0 :                 xfs_warn(mp,
     224             : "Per-AG reservation for AG %u failed.  Filesystem may run out of space.",
     225             :                                 pag->pag_agno);
     226           0 :                 return error;
     227             :         }
     228             : 
     229             :         /*
     230             :          * Reduce the maximum per-AG allocation length by however much we're
     231             :          * trying to reserve for an AG.  Since this is a filesystem-wide
     232             :          * counter, we only make the adjustment for AG 0.  This assumes that
     233             :          * there aren't any AGs hungrier for per-AG reservation than AG 0.
     234             :          */
     235     1971245 :         if (pag->pag_agno == 0)
     236      281587 :                 mp->m_ag_max_usable -= ask;
     237             : 
     238     1971245 :         resv = xfs_perag_resv(pag, type);
     239     1971245 :         resv->ar_asked = ask;
     240     1971245 :         resv->ar_orig_reserved = hidden_space;
     241     1971245 :         resv->ar_reserved = ask - used;
     242             : 
     243     1971245 :         trace_xfs_ag_resv_init(pag, type, ask);
     244     1971245 :         return 0;
     245             : }
     246             : 
     247             : /* Create a per-AG block reservation. */
     248             : int
     249      988499 : xfs_ag_resv_init(
     250             :         struct xfs_perag                *pag,
     251             :         struct xfs_trans                *tp)
     252             : {
     253      988499 :         struct xfs_mount                *mp = pag->pag_mount;
     254      988499 :         xfs_extlen_t                    ask;
     255      988499 :         xfs_extlen_t                    used;
     256      988499 :         int                             error = 0, error2;
     257      988499 :         bool                            has_resv = false;
     258             : 
     259             :         /* Create the metadata reservation. */
     260      988499 :         if (pag->pag_meta_resv.ar_asked == 0) {
     261      985800 :                 ask = used = 0;
     262             : 
     263      985800 :                 error = xfs_refcountbt_calc_reserves(mp, tp, pag, &ask, &used);
     264      985506 :                 if (error)
     265         372 :                         goto out;
     266             : 
     267      985134 :                 error = xfs_finobt_calc_reserves(pag, tp, &ask, &used);
     268      985111 :                 if (error)
     269          20 :                         goto out;
     270             : 
     271      985091 :                 error = __xfs_ag_resv_init(pag, XFS_AG_RESV_METADATA,
     272             :                                 ask, used);
     273      985174 :                 if (error) {
     274             :                         /*
     275             :                          * Because we didn't have per-AG reservations when the
     276             :                          * finobt feature was added we might not be able to
     277             :                          * reserve all needed blocks.  Warn and fall back to the
     278             :                          * old and potentially buggy code in that case, but
     279             :                          * ensure we do have the reservation for the refcountbt.
     280             :                          */
     281           0 :                         ask = used = 0;
     282             : 
     283           0 :                         mp->m_finobt_nores = true;
     284             : 
     285           0 :                         error = xfs_refcountbt_calc_reserves(mp, tp, pag, &ask,
     286             :                                         &used);
     287           0 :                         if (error)
     288           0 :                                 goto out;
     289             : 
     290           0 :                         error = __xfs_ag_resv_init(pag, XFS_AG_RESV_METADATA,
     291             :                                         ask, used);
     292           0 :                         if (error)
     293           0 :                                 goto out;
     294             :                 }
     295      985174 :                 if (ask)
     296      969711 :                         has_resv = true;
     297             :         }
     298             : 
     299             :         /* Create the RMAPBT metadata reservation */
     300      987873 :         if (pag->pag_rmapbt_resv.ar_asked == 0) {
     301      985554 :                 ask = used = 0;
     302             : 
     303      985554 :                 error = xfs_rmapbt_calc_reserves(mp, tp, pag, &ask, &used);
     304      985838 :                 if (error)
     305           0 :                         goto out;
     306             : 
     307      985838 :                 error = __xfs_ag_resv_init(pag, XFS_AG_RESV_RMAPBT, ask, used);
     308      985937 :                 if (error)
     309           0 :                         goto out;
     310      985937 :                 if (ask)
     311             :                         has_resv = true;
     312             :         }
     313             : 
     314      160468 : out:
     315             :         /*
     316             :          * Initialize the pagf if we have at least one active reservation on the
     317             :          * AG. This may have occurred already via reservation calculation, but
     318             :          * fall back to an explicit init to ensure the in-core allocbt usage
     319             :          * counters are initialized as soon as possible. This is important
     320             :          * because filesystems with large perag reservations are susceptible to
     321             :          * free space reservation problems that the allocbt counter is used to
     322             :          * address.
     323             :          */
     324      160860 :         if (has_resv) {
     325      969838 :                 error2 = xfs_alloc_read_agf(pag, tp, 0, NULL);
     326      969868 :                 if (error2)
     327             :                         return error2;
     328             : 
     329             :                 /*
     330             :                  * If there isn't enough space in the AG to satisfy the
     331             :                  * reservation, let the caller know that there wasn't enough
     332             :                  * space.  Callers are responsible for deciding what to do
     333             :                  * next, since (in theory) we can stumble along with
     334             :                  * insufficient reservation if data blocks are being freed to
     335             :                  * replenish the AG's free space.
     336             :                  */
     337      969848 :                 if (!error &&
     338      969865 :                     xfs_perag_resv(pag, XFS_AG_RESV_METADATA)->ar_reserved +
     339      969865 :                     xfs_perag_resv(pag, XFS_AG_RESV_RMAPBT)->ar_reserved >
     340      969865 :                     pag->pagf_freeblks + pag->pagf_flcount)
     341           0 :                         error = -ENOSPC;
     342             :         }
     343             : 
     344             :         return error;
     345             : }
     346             : 
     347             : /* Allocate a block from the reservation. */
     348             : void
     349    85888327 : xfs_ag_resv_alloc_extent(
     350             :         struct xfs_perag                *pag,
     351             :         enum xfs_ag_resv_type           type,
     352             :         struct xfs_alloc_arg            *args)
     353             : {
     354    85888327 :         struct xfs_ag_resv              *resv;
     355    85888327 :         xfs_extlen_t                    len;
     356    85888327 :         uint                            field;
     357             : 
     358    85888327 :         trace_xfs_ag_resv_alloc_extent(pag, type, args->len);
     359             : 
     360    85873036 :         switch (type) {
     361             :         case XFS_AG_RESV_AGFL:
     362             :                 return;
     363             :         case XFS_AG_RESV_METADATA:
     364             :         case XFS_AG_RESV_RMAPBT:
     365      968043 :                 resv = xfs_perag_resv(pag, type);
     366      968043 :                 break;
     367           0 :         default:
     368           0 :                 ASSERT(0);
     369    84904993 :                 fallthrough;
     370    84904993 :         case XFS_AG_RESV_NONE:
     371    84904993 :                 field = args->wasdel ? XFS_TRANS_SB_RES_FDBLOCKS :
     372             :                                        XFS_TRANS_SB_FDBLOCKS;
     373    84904993 :                 xfs_trans_mod_sb(args->tp, field, -(int64_t)args->len);
     374    84904993 :                 return;
     375             :         }
     376             : 
     377      968043 :         len = min_t(xfs_extlen_t, args->len, resv->ar_reserved);
     378      968043 :         resv->ar_reserved -= len;
     379      968043 :         if (type == XFS_AG_RESV_RMAPBT)
     380             :                 return;
     381             :         /* Allocations of reserved blocks only need on-disk sb updates... */
     382      234056 :         xfs_trans_mod_sb(args->tp, XFS_TRANS_SB_RES_FDBLOCKS, -(int64_t)len);
     383             :         /* ...but non-reserved blocks need in-core and on-disk updates. */
     384      234015 :         if (args->len > len)
     385           0 :                 xfs_trans_mod_sb(args->tp, XFS_TRANS_SB_FDBLOCKS,
     386           0 :                                 -((int64_t)args->len - len));
     387             : }
     388             : 
     389             : /* Free a block to the reservation. */
     390             : void
     391    99003282 : xfs_ag_resv_free_extent(
     392             :         struct xfs_perag                *pag,
     393             :         enum xfs_ag_resv_type           type,
     394             :         struct xfs_trans                *tp,
     395             :         xfs_extlen_t                    len)
     396             : {
     397    99003282 :         xfs_extlen_t                    leftover;
     398    99003282 :         struct xfs_ag_resv              *resv;
     399             : 
     400    99003282 :         trace_xfs_ag_resv_free_extent(pag, type, len);
     401             : 
     402    98991972 :         switch (type) {
     403             :         case XFS_AG_RESV_AGFL:
     404             :                 return;
     405             :         case XFS_AG_RESV_METADATA:
     406             :         case XFS_AG_RESV_RMAPBT:
     407      421986 :                 resv = xfs_perag_resv(pag, type);
     408      421986 :                 break;
     409           0 :         default:
     410           0 :                 ASSERT(0);
     411    98249133 :                 fallthrough;
     412    98249133 :         case XFS_AG_RESV_NONE:
     413    98249133 :                 xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, (int64_t)len);
     414             :                 fallthrough;
     415             :         case XFS_AG_RESV_IGNORE:
     416             :                 return;
     417             :         }
     418             : 
     419      421986 :         leftover = min_t(xfs_extlen_t, len, resv->ar_asked - resv->ar_reserved);
     420      421986 :         resv->ar_reserved += leftover;
     421      421986 :         if (type == XFS_AG_RESV_RMAPBT)
     422             :                 return;
     423             :         /* Freeing into the reserved pool only requires on-disk update... */
     424      145804 :         xfs_trans_mod_sb(tp, XFS_TRANS_SB_RES_FDBLOCKS, len);
     425             :         /* ...but freeing beyond that requires in-core and on-disk update. */
     426      145771 :         if (len > leftover)
     427           0 :                 xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, len - leftover);
     428             : }

Generated by: LCOV version 1.14