LCOV - code coverage report
Current view: top level - fs/xfs/libxfs - xfs_alloc.c (source / functions) Hit Total Coverage
Test: fstests of 6.5.0-rc3-achx @ Mon Jul 31 20:08:12 PDT 2023 Lines: 1507 1810 83.3 %
Date: 2023-07-31 20:08:12 Functions: 73 75 97.3 %

          Line data    Source code
       1             : // SPDX-License-Identifier: GPL-2.0
       2             : /*
       3             :  * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
       4             :  * All Rights Reserved.
       5             :  */
       6             : #include "xfs.h"
       7             : #include "xfs_fs.h"
       8             : #include "xfs_format.h"
       9             : #include "xfs_log_format.h"
      10             : #include "xfs_shared.h"
      11             : #include "xfs_trans_resv.h"
      12             : #include "xfs_bit.h"
      13             : #include "xfs_mount.h"
      14             : #include "xfs_defer.h"
      15             : #include "xfs_btree.h"
      16             : #include "xfs_rmap.h"
      17             : #include "xfs_alloc_btree.h"
      18             : #include "xfs_alloc.h"
      19             : #include "xfs_extent_busy.h"
      20             : #include "xfs_errortag.h"
      21             : #include "xfs_error.h"
      22             : #include "xfs_trace.h"
      23             : #include "xfs_trans.h"
      24             : #include "xfs_buf_item.h"
      25             : #include "xfs_log.h"
      26             : #include "xfs_ag.h"
      27             : #include "xfs_ag_resv.h"
      28             : #include "xfs_bmap.h"
      29             : #include "xfs_health.h"
      30             : 
      31             : struct kmem_cache       *xfs_extfree_item_cache;
      32             : 
      33             : struct workqueue_struct *xfs_alloc_wq;
      34             : 
      35             : #define XFS_ABSDIFF(a,b)        (((a) <= (b)) ? ((b) - (a)) : ((a) - (b)))
      36             : 
      37             : #define XFSA_FIXUP_BNO_OK       1
      38             : #define XFSA_FIXUP_CNT_OK       2
      39             : 
      40             : /*
      41             :  * Size of the AGFL.  For CRC-enabled filesystes we steal a couple of slots in
      42             :  * the beginning of the block for a proper header with the location information
      43             :  * and CRC.
      44             :  */
      45             : unsigned int
      46    29618198 : xfs_agfl_size(
      47             :         struct xfs_mount        *mp)
      48             : {
      49  1429695340 :         unsigned int            size = mp->m_sb.sb_sectsize;
      50             : 
      51    29618198 :         if (xfs_has_crc(mp))
      52  1416570064 :                 size -= sizeof(struct xfs_agfl);
      53             : 
      54  1429695340 :         return size / sizeof(xfs_agblock_t);
      55             : }
      56             : 
      57             : unsigned int
      58      276305 : xfs_refc_block(
      59             :         struct xfs_mount        *mp)
      60             : {
      61      276305 :         if (xfs_has_rmapbt(mp))
      62      262562 :                 return XFS_RMAP_BLOCK(mp) + 1;
      63       13743 :         if (xfs_has_finobt(mp))
      64         146 :                 return XFS_FIBT_BLOCK(mp) + 1;
      65       13597 :         return XFS_IBT_BLOCK(mp) + 1;
      66             : }
      67             : 
      68             : xfs_extlen_t
      69       75030 : xfs_prealloc_blocks(
      70             :         struct xfs_mount        *mp)
      71             : {
      72       75030 :         if (xfs_has_reflink(mp))
      73       56764 :                 return xfs_refc_block(mp) + 1;
      74       18266 :         if (xfs_has_rmapbt(mp))
      75          26 :                 return XFS_RMAP_BLOCK(mp) + 1;
      76       18240 :         if (xfs_has_finobt(mp))
      77       18002 :                 return XFS_FIBT_BLOCK(mp) + 1;
      78         238 :         return XFS_IBT_BLOCK(mp) + 1;
      79             : }
      80             : 
      81             : /*
      82             :  * The number of blocks per AG that we withhold from xfs_mod_fdblocks to
      83             :  * guarantee that we can refill the AGFL prior to allocating space in a nearly
      84             :  * full AG.  Although the space described by the free space btrees, the
      85             :  * blocks used by the freesp btrees themselves, and the blocks owned by the
      86             :  * AGFL are counted in the ondisk fdblocks, it's a mistake to let the ondisk
      87             :  * free space in the AG drop so low that the free space btrees cannot refill an
      88             :  * empty AGFL up to the minimum level.  Rather than grind through empty AGs
      89             :  * until the fs goes down, we subtract this many AG blocks from the incore
      90             :  * fdblocks to ensure user allocation does not overcommit the space the
      91             :  * filesystem needs for the AGFLs.  The rmap btree uses a per-AG reservation to
      92             :  * withhold space from xfs_mod_fdblocks, so we do not account for that here.
      93             :  */
      94             : #define XFS_ALLOCBT_AGFL_RESERVE        4
      95             : 
      96             : /*
      97             :  * Compute the number of blocks that we set aside to guarantee the ability to
      98             :  * refill the AGFL and handle a full bmap btree split.
      99             :  *
     100             :  * In order to avoid ENOSPC-related deadlock caused by out-of-order locking of
     101             :  * AGF buffer (PV 947395), we place constraints on the relationship among
     102             :  * actual allocations for data blocks, freelist blocks, and potential file data
     103             :  * bmap btree blocks. However, these restrictions may result in no actual space
     104             :  * allocated for a delayed extent, for example, a data block in a certain AG is
     105             :  * allocated but there is no additional block for the additional bmap btree
     106             :  * block due to a split of the bmap btree of the file. The result of this may
     107             :  * lead to an infinite loop when the file gets flushed to disk and all delayed
     108             :  * extents need to be actually allocated. To get around this, we explicitly set
     109             :  * aside a few blocks which will not be reserved in delayed allocation.
     110             :  *
     111             :  * For each AG, we need to reserve enough blocks to replenish a totally empty
     112             :  * AGFL and 4 more to handle a potential split of the file's bmap btree.
     113             :  */
     114             : unsigned int
     115       75456 : xfs_alloc_set_aside(
     116             :         struct xfs_mount        *mp)
     117             : {
     118       75456 :         return mp->m_sb.sb_agcount * (XFS_ALLOCBT_AGFL_RESERVE + 4);
     119             : }
     120             : 
     121             : /*
     122             :  * When deciding how much space to allocate out of an AG, we limit the
     123             :  * allocation maximum size to the size the AG. However, we cannot use all the
     124             :  * blocks in the AG - some are permanently used by metadata. These
     125             :  * blocks are generally:
     126             :  *      - the AG superblock, AGF, AGI and AGFL
     127             :  *      - the AGF (bno and cnt) and AGI btree root blocks, and optionally
     128             :  *        the AGI free inode and rmap btree root blocks.
     129             :  *      - blocks on the AGFL according to xfs_alloc_set_aside() limits
     130             :  *      - the rmapbt root block
     131             :  *
     132             :  * The AG headers are sector sized, so the amount of space they take up is
     133             :  * dependent on filesystem geometry. The others are all single blocks.
     134             :  */
     135             : unsigned int
     136       60890 : xfs_alloc_ag_max_usable(
     137             :         struct xfs_mount        *mp)
     138             : {
     139       60890 :         unsigned int            blocks;
     140             : 
     141       60890 :         blocks = XFS_BB_TO_FSB(mp, XFS_FSS_TO_BB(mp, 4)); /* ag headers */
     142       60890 :         blocks += XFS_ALLOCBT_AGFL_RESERVE;
     143       60890 :         blocks += 3;                    /* AGF, AGI btree root blocks */
     144       60890 :         if (xfs_has_finobt(mp))
     145       60651 :                 blocks++;               /* finobt root block */
     146       60890 :         if (xfs_has_rmapbt(mp))
     147       44233 :                 blocks++;               /* rmap root block */
     148       60890 :         if (xfs_has_reflink(mp))
     149       44207 :                 blocks++;               /* refcount root block */
     150             : 
     151       60890 :         return mp->m_sb.sb_agblocks - blocks;
     152             : }
     153             : 
     154             : /*
     155             :  * Lookup the record equal to [bno, len] in the btree given by cur.
     156             :  */
     157             : STATIC int                              /* error */
     158   394037577 : xfs_alloc_lookup_eq(
     159             :         struct xfs_btree_cur    *cur,   /* btree cursor */
     160             :         xfs_agblock_t           bno,    /* starting block of extent */
     161             :         xfs_extlen_t            len,    /* length of extent */
     162             :         int                     *stat)  /* success/failure */
     163             : {
     164   394037577 :         int                     error;
     165             : 
     166   394037577 :         cur->bc_rec.a.ar_startblock = bno;
     167   394037577 :         cur->bc_rec.a.ar_blockcount = len;
     168   394037577 :         error = xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat);
     169   394066281 :         cur->bc_ag.abt.active = (*stat == 1);
     170   394066281 :         return error;
     171             : }
     172             : 
     173             : /*
     174             :  * Lookup the first record greater than or equal to [bno, len]
     175             :  * in the btree given by cur.
     176             :  */
     177             : int                             /* error */
     178       18525 : xfs_alloc_lookup_ge(
     179             :         struct xfs_btree_cur    *cur,   /* btree cursor */
     180             :         xfs_agblock_t           bno,    /* starting block of extent */
     181             :         xfs_extlen_t            len,    /* length of extent */
     182             :         int                     *stat)  /* success/failure */
     183             : {
     184   376224711 :         int                     error;
     185             : 
     186   376224711 :         cur->bc_rec.a.ar_startblock = bno;
     187   376224711 :         cur->bc_rec.a.ar_blockcount = len;
     188       18525 :         error = xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat);
     189   376251066 :         cur->bc_ag.abt.active = (*stat == 1);
     190   376251066 :         return error;
     191             : }
     192             : 
     193             : /*
     194             :  * Lookup the first record less than or equal to [bno, len]
     195             :  * in the btree given by cur.
     196             :  */
     197             : int                                     /* error */
     198   445719885 : xfs_alloc_lookup_le(
     199             :         struct xfs_btree_cur    *cur,   /* btree cursor */
     200             :         xfs_agblock_t           bno,    /* starting block of extent */
     201             :         xfs_extlen_t            len,    /* length of extent */
     202             :         int                     *stat)  /* success/failure */
     203             : {
     204   593122276 :         int                     error;
     205   593122276 :         cur->bc_rec.a.ar_startblock = bno;
     206   593122276 :         cur->bc_rec.a.ar_blockcount = len;
     207   445719885 :         error = xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat);
     208   593137220 :         cur->bc_ag.abt.active = (*stat == 1);
     209   593137220 :         return error;
     210             : }
     211             : 
     212             : static inline bool
     213 11187183782 : xfs_alloc_cur_active(
     214             :         struct xfs_btree_cur    *cur)
     215             : {
     216 11187183782 :         return cur && cur->bc_ag.abt.active;
     217             : }
     218             : 
     219             : /*
     220             :  * Update the record referred to by cur to the value given
     221             :  * by [bno, len].
     222             :  * This either works (return 0) or gets an EFSCORRUPTED error.
     223             :  */
     224             : STATIC int                              /* error */
     225   105251599 : xfs_alloc_update(
     226             :         struct xfs_btree_cur    *cur,   /* btree cursor */
     227             :         xfs_agblock_t           bno,    /* starting block of extent */
     228             :         xfs_extlen_t            len)    /* length of extent */
     229             : {
     230   105251599 :         union xfs_btree_rec     rec;
     231             : 
     232   105251599 :         rec.alloc.ar_startblock = cpu_to_be32(bno);
     233   105251599 :         rec.alloc.ar_blockcount = cpu_to_be32(len);
     234   105251599 :         return xfs_btree_update(cur, &rec);
     235             : }
     236             : 
     237             : /* Convert the ondisk btree record to its incore representation. */
     238             : void
     239   444884001 : xfs_alloc_btrec_to_irec(
     240             :         const union xfs_btree_rec       *rec,
     241             :         struct xfs_alloc_rec_incore     *irec)
     242             : {
     243  9870935423 :         irec->ar_startblock = be32_to_cpu(rec->alloc.ar_startblock);
     244  9870935423 :         irec->ar_blockcount = be32_to_cpu(rec->alloc.ar_blockcount);
     245   444884001 : }
     246             : 
     247             : inline xfs_failaddr_t
     248  9915404526 : xfs_alloc_check_perag_irec(
     249             :         struct xfs_perag                *pag,
     250             :         const struct xfs_alloc_rec_incore *irec)
     251             : {
     252  9915404526 :         if (irec->ar_blockcount == 0)
     253           0 :                 return __this_address;
     254             : 
     255             :         /* check for valid extent range, including overflow */
     256  9915404526 :         if (!xfs_verify_agbext(pag, irec->ar_startblock, irec->ar_blockcount))
     257           0 :                 return __this_address;
     258             : 
     259             :         return NULL;
     260             : }
     261             : 
     262             : /* Simple checks for free space records. */
     263             : xfs_failaddr_t
     264   444874784 : xfs_alloc_check_irec(
     265             :         struct xfs_btree_cur            *cur,
     266             :         const struct xfs_alloc_rec_incore *irec)
     267             : {
     268   444874784 :         return xfs_alloc_check_perag_irec(cur->bc_ag.pag, irec);
     269             : }
     270             : 
     271             : static inline int
     272           0 : xfs_alloc_complain_bad_rec(
     273             :         struct xfs_btree_cur            *cur,
     274             :         xfs_failaddr_t                  fa,
     275             :         const struct xfs_alloc_rec_incore *irec)
     276             : {
     277           0 :         struct xfs_mount                *mp = cur->bc_mp;
     278             : 
     279           0 :         xfs_warn(mp,
     280             :                 "%s Freespace BTree record corruption in AG %d detected at %pS!",
     281             :                 cur->bc_btnum == XFS_BTNUM_BNO ? "Block" : "Size",
     282             :                 cur->bc_ag.pag->pag_agno, fa);
     283           0 :         xfs_warn(mp,
     284             :                 "start block 0x%x block count 0x%x", irec->ar_startblock,
     285             :                 irec->ar_blockcount);
     286           0 :         xfs_btree_mark_sick(cur);
     287           0 :         return -EFSCORRUPTED;
     288             : }
     289             : 
     290             : /*
     291             :  * Get the data from the pointed-to record.
     292             :  */
     293             : int                                     /* error */
     294  7172878875 : xfs_alloc_get_rec(
     295             :         struct xfs_btree_cur    *cur,   /* btree cursor */
     296             :         xfs_agblock_t           *bno,   /* output: starting block of extent */
     297             :         xfs_extlen_t            *len,   /* output: length of extent */
     298             :         int                     *stat)  /* output: success/failure */
     299             : {
     300  7172878875 :         struct xfs_alloc_rec_incore irec;
     301  7172878875 :         union xfs_btree_rec     *rec;
     302  7172878875 :         xfs_failaddr_t          fa;
     303  7172878875 :         int                     error;
     304             : 
     305  7172878875 :         error = xfs_btree_get_rec(cur, &rec, stat);
     306  7160844992 :         if (error || !(*stat))
     307             :                 return error;
     308             : 
     309  7161651219 :         xfs_alloc_btrec_to_irec(rec, &irec);
     310  7161651219 :         fa = xfs_alloc_check_irec(cur, &irec);
     311  7158011172 :         if (fa)
     312           0 :                 return xfs_alloc_complain_bad_rec(cur, fa, &irec);
     313             : 
     314  7158011172 :         *bno = irec.ar_startblock;
     315  7158011172 :         *len = irec.ar_blockcount;
     316  7158011172 :         return 0;
     317             : }
     318             : 
     319             : /*
     320             :  * Compute aligned version of the found extent.
     321             :  * Takes alignment and min length into account.
     322             :  */
     323             : STATIC bool
     324  5667603957 : xfs_alloc_compute_aligned(
     325             :         xfs_alloc_arg_t *args,          /* allocation argument structure */
     326             :         xfs_agblock_t   foundbno,       /* starting block in found extent */
     327             :         xfs_extlen_t    foundlen,       /* length in found extent */
     328             :         xfs_agblock_t   *resbno,        /* result block number */
     329             :         xfs_extlen_t    *reslen,        /* result length */
     330             :         unsigned        *busy_gen)
     331             : {
     332  5667603957 :         xfs_agblock_t   bno = foundbno;
     333  5667603957 :         xfs_extlen_t    len = foundlen;
     334  5667603957 :         xfs_extlen_t    diff;
     335  5667603957 :         bool            busy;
     336             : 
     337             :         /* Trim busy sections out of found extent */
     338  5667603957 :         busy = xfs_extent_busy_trim(args, &bno, &len, busy_gen);
     339             : 
     340             :         /*
     341             :          * If we have a largish extent that happens to start before min_agbno,
     342             :          * see if we can shift it into range...
     343             :          */
     344  5677102395 :         if (bno < args->min_agbno && bno + len > args->min_agbno) {
     345        2581 :                 diff = args->min_agbno - bno;
     346        2581 :                 if (len > diff) {
     347        2581 :                         bno += diff;
     348        2581 :                         len -= diff;
     349             :                 }
     350             :         }
     351             : 
     352  5677102395 :         if (args->alignment > 1 && len >= args->minlen) {
     353    16695473 :                 xfs_agblock_t   aligned_bno = roundup(bno, args->alignment);
     354             : 
     355    16695473 :                 diff = aligned_bno - bno;
     356             : 
     357    16695473 :                 *resbno = aligned_bno;
     358    16695473 :                 *reslen = diff >= len ? 0 : len - diff;
     359             :         } else {
     360  5660406922 :                 *resbno = bno;
     361  5660406922 :                 *reslen = len;
     362             :         }
     363             : 
     364  5677102395 :         return busy;
     365             : }
     366             : 
     367             : /*
     368             :  * Compute best start block and diff for "near" allocations.
     369             :  * freelen >= wantlen already checked by caller.
     370             :  */
     371             : STATIC xfs_extlen_t                     /* difference value (absolute) */
     372  2273983459 : xfs_alloc_compute_diff(
     373             :         xfs_agblock_t   wantbno,        /* target starting block */
     374             :         xfs_extlen_t    wantlen,        /* target length */
     375             :         xfs_extlen_t    alignment,      /* target alignment */
     376             :         int             datatype,       /* are we allocating data? */
     377             :         xfs_agblock_t   freebno,        /* freespace's starting block */
     378             :         xfs_extlen_t    freelen,        /* freespace's length */
     379             :         xfs_agblock_t   *newbnop)       /* result: best start block from free */
     380             : {
     381  2273983459 :         xfs_agblock_t   freeend;        /* end of freespace extent */
     382  2273983459 :         xfs_agblock_t   newbno1;        /* return block number */
     383  2273983459 :         xfs_agblock_t   newbno2;        /* other new block number */
     384  2273983459 :         xfs_extlen_t    newlen1=0;      /* length with newbno1 */
     385  2273983459 :         xfs_extlen_t    newlen2=0;      /* length with newbno2 */
     386  2273983459 :         xfs_agblock_t   wantend;        /* end of target extent */
     387  2273983459 :         bool            userdata = datatype & XFS_ALLOC_USERDATA;
     388             : 
     389  2273983459 :         ASSERT(freelen >= wantlen);
     390  2273983459 :         freeend = freebno + freelen;
     391  2273983459 :         wantend = wantbno + wantlen;
     392             :         /*
     393             :          * We want to allocate from the start of a free extent if it is past
     394             :          * the desired block or if we are allocating user data and the free
     395             :          * extent is before desired block. The second case is there to allow
     396             :          * for contiguous allocation from the remaining free space if the file
     397             :          * grows in the short term.
     398             :          */
     399  2273983459 :         if (freebno >= wantbno || (userdata && freeend < wantend)) {
     400  1680514212 :                 if ((newbno1 = roundup(freebno, alignment)) >= freeend)
     401           0 :                         newbno1 = NULLAGBLOCK;
     402   593469247 :         } else if (freeend >= wantend && alignment > 1) {
     403          29 :                 newbno1 = roundup(wantbno, alignment);
     404          29 :                 newbno2 = newbno1 - alignment;
     405          29 :                 if (newbno1 >= freeend)
     406             :                         newbno1 = NULLAGBLOCK;
     407             :                 else
     408          29 :                         newlen1 = XFS_EXTLEN_MIN(wantlen, freeend - newbno1);
     409          29 :                 if (newbno2 < freebno)
     410             :                         newbno2 = NULLAGBLOCK;
     411             :                 else
     412          29 :                         newlen2 = XFS_EXTLEN_MIN(wantlen, freeend - newbno2);
     413          29 :                 if (newbno1 != NULLAGBLOCK && newbno2 != NULLAGBLOCK) {
     414          29 :                         if (newlen1 < newlen2 ||
     415          28 :                             (newlen1 == newlen2 &&
     416          28 :                              XFS_ABSDIFF(newbno1, wantbno) >
     417          28 :                              XFS_ABSDIFF(newbno2, wantbno)))
     418          16 :                                 newbno1 = newbno2;
     419           0 :                 } else if (newbno2 != NULLAGBLOCK)
     420           0 :                         newbno1 = newbno2;
     421   593469218 :         } else if (freeend >= wantend) {
     422             :                 newbno1 = wantbno;
     423   591435711 :         } else if (alignment > 1) {
     424       25940 :                 newbno1 = roundup(freeend - wantlen, alignment);
     425       25940 :                 if (newbno1 > freeend - wantlen &&
     426       16012 :                     newbno1 - alignment >= freebno)
     427             :                         newbno1 -= alignment;
     428        9928 :                 else if (newbno1 >= freeend)
     429           0 :                         newbno1 = NULLAGBLOCK;
     430             :         } else
     431   591409771 :                 newbno1 = freeend - wantlen;
     432  2273983459 :         *newbnop = newbno1;
     433  2273983459 :         return newbno1 == NULLAGBLOCK ? 0 : XFS_ABSDIFF(newbno1, wantbno);
     434             : }
     435             : 
     436             : /*
     437             :  * Fix up the length, based on mod and prod.
     438             :  * len should be k * prod + mod for some k.
     439             :  * If len is too small it is returned unchanged.
     440             :  * If len hits maxlen it is left alone.
     441             :  */
     442             : STATIC void
     443  2281730070 : xfs_alloc_fix_len(
     444             :         xfs_alloc_arg_t *args)          /* allocation argument structure */
     445             : {
     446  2281730070 :         xfs_extlen_t    k;
     447  2281730070 :         xfs_extlen_t    rlen;
     448             : 
     449  2281730070 :         ASSERT(args->mod < args->prod);
     450  2281730070 :         rlen = args->len;
     451  2281730070 :         ASSERT(rlen >= args->minlen);
     452  2281730070 :         ASSERT(rlen <= args->maxlen);
     453  2281730070 :         if (args->prod <= 1 || rlen < args->mod || rlen == args->maxlen ||
     454      220748 :             (args->mod == 0 && rlen < args->prod))
     455             :                 return;
     456       23117 :         k = rlen % args->prod;
     457       23117 :         if (k == args->mod)
     458             :                 return;
     459       19669 :         if (k > args->mod)
     460       19309 :                 rlen = rlen - (k - args->mod);
     461             :         else
     462         360 :                 rlen = rlen - args->prod + (args->mod - k);
     463             :         /* casts to (int) catch length underflows */
     464       19669 :         if ((int)rlen < (int)args->minlen)
     465             :                 return;
     466        1390 :         ASSERT(rlen >= args->minlen && rlen <= args->maxlen);
     467        1390 :         ASSERT(rlen % args->prod == args->mod);
     468        1390 :         ASSERT(args->pag->pagf_freeblks + args->pag->pagf_flcount >=
     469             :                 rlen + args->minleft);
     470        1390 :         args->len = rlen;
     471             : }
     472             : 
     473             : /*
     474             :  * Update the two btrees, logically removing from freespace the extent
     475             :  * starting at rbno, rlen blocks.  The extent is contained within the
     476             :  * actual (current) free extent fbno for flen blocks.
     477             :  * Flags are passed in indicating whether the cursors are set to the
     478             :  * relevant records.
     479             :  */
     480             : STATIC int                              /* error code */
     481    85969767 : xfs_alloc_fixup_trees(
     482             :         struct xfs_btree_cur *cnt_cur,  /* cursor for by-size btree */
     483             :         struct xfs_btree_cur *bno_cur,  /* cursor for by-block btree */
     484             :         xfs_agblock_t   fbno,           /* starting block of free extent */
     485             :         xfs_extlen_t    flen,           /* length of free extent */
     486             :         xfs_agblock_t   rbno,           /* starting block of returned extent */
     487             :         xfs_extlen_t    rlen,           /* length of returned extent */
     488             :         int             flags)          /* flags, XFSA_FIXUP_... */
     489             : {
     490    85969767 :         int             error;          /* error code */
     491    85969767 :         int             i;              /* operation results */
     492    85969767 :         xfs_agblock_t   nfbno1;         /* first new free startblock */
     493    85969767 :         xfs_agblock_t   nfbno2;         /* second new free startblock */
     494    85969767 :         xfs_extlen_t    nflen1=0;       /* first new free length */
     495    85969767 :         xfs_extlen_t    nflen2=0;       /* second new free length */
     496    85969767 :         struct xfs_mount *mp;
     497             : 
     498    85969767 :         mp = cnt_cur->bc_mp;
     499             : 
     500             :         /*
     501             :          * Look up the record in the by-size tree if necessary.
     502             :          */
     503    85969767 :         if (flags & XFSA_FIXUP_CNT_OK) {
     504             : #ifdef DEBUG
     505     6599460 :                 if ((error = xfs_alloc_get_rec(cnt_cur, &nfbno1, &nflen1, &i)))
     506             :                         return error;
     507     6599375 :                 if (XFS_IS_CORRUPT(mp,
     508             :                                    i != 1 ||
     509             :                                    nfbno1 != fbno ||
     510             :                                    nflen1 != flen)) {
     511           0 :                         xfs_btree_mark_sick(cnt_cur);
     512           0 :                         return -EFSCORRUPTED;
     513             :                 }
     514             : #endif
     515             :         } else {
     516    79370307 :                 if ((error = xfs_alloc_lookup_eq(cnt_cur, fbno, flen, &i)))
     517             :                         return error;
     518    79375555 :                 if (XFS_IS_CORRUPT(mp, i != 1)) {
     519           0 :                         xfs_btree_mark_sick(cnt_cur);
     520           0 :                         return -EFSCORRUPTED;
     521             :                 }
     522             :         }
     523             :         /*
     524             :          * Look up the record in the by-block tree if necessary.
     525             :          */
     526    85974930 :         if (flags & XFSA_FIXUP_BNO_OK) {
     527             : #ifdef DEBUG
     528      428017 :                 if ((error = xfs_alloc_get_rec(bno_cur, &nfbno1, &nflen1, &i)))
     529             :                         return error;
     530      428044 :                 if (XFS_IS_CORRUPT(mp,
     531             :                                    i != 1 ||
     532             :                                    nfbno1 != fbno ||
     533             :                                    nflen1 != flen)) {
     534           0 :                         xfs_btree_mark_sick(bno_cur);
     535           0 :                         return -EFSCORRUPTED;
     536             :                 }
     537             : #endif
     538             :         } else {
     539    85546913 :                 if ((error = xfs_alloc_lookup_eq(bno_cur, fbno, flen, &i)))
     540             :                         return error;
     541    85545656 :                 if (XFS_IS_CORRUPT(mp, i != 1)) {
     542           0 :                         xfs_btree_mark_sick(bno_cur);
     543           0 :                         return -EFSCORRUPTED;
     544             :                 }
     545             :         }
     546             : 
     547             : #ifdef DEBUG
     548    85973700 :         if (bno_cur->bc_nlevels == 1 && cnt_cur->bc_nlevels == 1) {
     549    53893147 :                 struct xfs_btree_block  *bnoblock;
     550    53893147 :                 struct xfs_btree_block  *cntblock;
     551             : 
     552    53893147 :                 bnoblock = XFS_BUF_TO_BLOCK(bno_cur->bc_levels[0].bp);
     553    53893147 :                 cntblock = XFS_BUF_TO_BLOCK(cnt_cur->bc_levels[0].bp);
     554             : 
     555    53893147 :                 if (XFS_IS_CORRUPT(mp,
     556             :                                    bnoblock->bb_numrecs !=
     557             :                                    cntblock->bb_numrecs)) {
     558           0 :                         xfs_btree_mark_sick(bno_cur);
     559           0 :                         return -EFSCORRUPTED;
     560             :                 }
     561             :         }
     562             : #endif
     563             : 
     564             :         /*
     565             :          * Deal with all four cases: the allocated record is contained
     566             :          * within the freespace record, so we can have new freespace
     567             :          * at either (or both) end, or no freespace remaining.
     568             :          */
     569    85973700 :         if (rbno == fbno && rlen == flen)
     570    24060982 :                 nfbno1 = nfbno2 = NULLAGBLOCK;
     571    61912718 :         else if (rbno == fbno) {
     572    52326428 :                 nfbno1 = rbno + rlen;
     573    52326428 :                 nflen1 = flen - rlen;
     574    52326428 :                 nfbno2 = NULLAGBLOCK;
     575     9586290 :         } else if (rbno + rlen == fbno + flen) {
     576     4283179 :                 nfbno1 = fbno;
     577     4283179 :                 nflen1 = flen - rlen;
     578     4283179 :                 nfbno2 = NULLAGBLOCK;
     579             :         } else {
     580     5303111 :                 nfbno1 = fbno;
     581     5303111 :                 nflen1 = rbno - fbno;
     582     5303111 :                 nfbno2 = rbno + rlen;
     583     5303111 :                 nflen2 = (fbno + flen) - nfbno2;
     584             :         }
     585             :         /*
     586             :          * Delete the entry from the by-size btree.
     587             :          */
     588    85973700 :         if ((error = xfs_btree_delete(cnt_cur, &i)))
     589             :                 return error;
     590    85963717 :         if (XFS_IS_CORRUPT(mp, i != 1)) {
     591           0 :                 xfs_btree_mark_sick(cnt_cur);
     592           0 :                 return -EFSCORRUPTED;
     593             :         }
     594             :         /*
     595             :          * Add new by-size btree entry(s).
     596             :          */
     597    85963717 :         if (nfbno1 != NULLAGBLOCK) {
     598    61906289 :                 if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno1, nflen1, &i)))
     599             :                         return error;
     600    61912613 :                 if (XFS_IS_CORRUPT(mp, i != 0)) {
     601           0 :                         xfs_btree_mark_sick(cnt_cur);
     602           0 :                         return -EFSCORRUPTED;
     603             :                 }
     604    61912613 :                 if ((error = xfs_btree_insert(cnt_cur, &i)))
     605             :                         return error;
     606    61897814 :                 if (XFS_IS_CORRUPT(mp, i != 1)) {
     607           0 :                         xfs_btree_mark_sick(cnt_cur);
     608           0 :                         return -EFSCORRUPTED;
     609             :                 }
     610             :         }
     611    85955242 :         if (nfbno2 != NULLAGBLOCK) {
     612     5302961 :                 if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno2, nflen2, &i)))
     613             :                         return error;
     614     5303109 :                 if (XFS_IS_CORRUPT(mp, i != 0)) {
     615           0 :                         xfs_btree_mark_sick(cnt_cur);
     616           0 :                         return -EFSCORRUPTED;
     617             :                 }
     618     5303109 :                 if ((error = xfs_btree_insert(cnt_cur, &i)))
     619             :                         return error;
     620     5303134 :                 if (XFS_IS_CORRUPT(mp, i != 1)) {
     621           0 :                         xfs_btree_mark_sick(cnt_cur);
     622           0 :                         return -EFSCORRUPTED;
     623             :                 }
     624             :         }
     625             :         /*
     626             :          * Fix up the by-block btree entry(s).
     627             :          */
     628    85955415 :         if (nfbno1 == NULLAGBLOCK) {
     629             :                 /*
     630             :                  * No remaining freespace, just delete the by-block tree entry.
     631             :                  */
     632    24056591 :                 if ((error = xfs_btree_delete(bno_cur, &i)))
     633             :                         return error;
     634    24060928 :                 if (XFS_IS_CORRUPT(mp, i != 1)) {
     635           0 :                         xfs_btree_mark_sick(bno_cur);
     636           0 :                         return -EFSCORRUPTED;
     637             :                 }
     638             :         } else {
     639             :                 /*
     640             :                  * Update the by-block entry to start later|be shorter.
     641             :                  */
     642    61898824 :                 if ((error = xfs_alloc_update(bno_cur, nfbno1, nflen1)))
     643             :                         return error;
     644             :         }
     645    85968413 :         if (nfbno2 != NULLAGBLOCK) {
     646             :                 /*
     647             :                  * 2 resulting free entries, need to add one.
     648             :                  */
     649     5303065 :                 if ((error = xfs_alloc_lookup_eq(bno_cur, nfbno2, nflen2, &i)))
     650             :                         return error;
     651     5303103 :                 if (XFS_IS_CORRUPT(mp, i != 0)) {
     652           0 :                         xfs_btree_mark_sick(bno_cur);
     653           0 :                         return -EFSCORRUPTED;
     654             :                 }
     655     5303103 :                 if ((error = xfs_btree_insert(bno_cur, &i)))
     656             :                         return error;
     657     5303120 :                 if (XFS_IS_CORRUPT(mp, i != 1)) {
     658           0 :                         xfs_btree_mark_sick(bno_cur);
     659           0 :                         return -EFSCORRUPTED;
     660             :                 }
     661             :         }
     662             :         return 0;
     663             : }
     664             : 
     665             : /*
     666             :  * We do not verify the AGFL contents against AGF-based index counters here,
     667             :  * even though we may have access to the perag that contains shadow copies. We
     668             :  * don't know if the AGF based counters have been checked, and if they have they
     669             :  * still may be inconsistent because they haven't yet been reset on the first
     670             :  * allocation after the AGF has been read in.
     671             :  *
     672             :  * This means we can only check that all agfl entries contain valid or null
     673             :  * values because we can't reliably determine the active range to exclude
     674             :  * NULLAGBNO as a valid value.
     675             :  *
     676             :  * However, we can't even do that for v4 format filesystems because there are
     677             :  * old versions of mkfs out there that does not initialise the AGFL to known,
     678             :  * verifiable values. HEnce we can't tell the difference between a AGFL block
     679             :  * allocated by mkfs and a corrupted AGFL block here on v4 filesystems.
     680             :  *
     681             :  * As a result, we can only fully validate AGFL block numbers when we pull them
     682             :  * from the freelist in xfs_alloc_get_freelist().
     683             :  */
     684             : static xfs_failaddr_t
     685      898541 : xfs_agfl_verify(
     686             :         struct xfs_buf  *bp)
     687             : {
     688      898541 :         struct xfs_mount *mp = bp->b_mount;
     689      898541 :         struct xfs_agfl *agfl = XFS_BUF_TO_AGFL(bp);
     690      898541 :         __be32          *agfl_bno = xfs_buf_to_agfl_bno(bp);
     691      898541 :         int             i;
     692             : 
     693      898541 :         if (!xfs_has_crc(mp))
     694             :                 return NULL;
     695             : 
     696      898519 :         if (!xfs_verify_magic(bp, agfl->agfl_magicnum))
     697           0 :                 return __this_address;
     698      898487 :         if (!uuid_equal(&agfl->agfl_uuid, &mp->m_sb.sb_meta_uuid))
     699           0 :                 return __this_address;
     700             :         /*
     701             :          * during growfs operations, the perag is not fully initialised,
     702             :          * so we can't use it for any useful checking. growfs ensures we can't
     703             :          * use it by using uncached buffers that don't have the perag attached
     704             :          * so we can detect and avoid this problem.
     705             :          */
     706      898656 :         if (bp->b_pag && be32_to_cpu(agfl->agfl_seqno) != bp->b_pag->pag_agno)
     707           0 :                 return __this_address;
     708             : 
     709  1758033377 :         for (i = 0; i < xfs_agfl_size(mp); i++) {
     710   878089620 :                 if (be32_to_cpu(agfl_bno[i]) != NULLAGBLOCK &&
     711   139918834 :                     be32_to_cpu(agfl_bno[i]) >= mp->m_sb.sb_agblocks)
     712           0 :                         return __this_address;
     713             :         }
     714             : 
     715      898656 :         if (!xfs_log_check_lsn(mp, be64_to_cpu(XFS_BUF_TO_AGFL(bp)->agfl_lsn)))
     716           0 :                 return __this_address;
     717             :         return NULL;
     718             : }
     719             : 
     720             : static void
     721      183049 : xfs_agfl_read_verify(
     722             :         struct xfs_buf  *bp)
     723             : {
     724      183049 :         struct xfs_mount *mp = bp->b_mount;
     725      183049 :         xfs_failaddr_t  fa;
     726             : 
     727             :         /*
     728             :          * There is no verification of non-crc AGFLs because mkfs does not
     729             :          * initialise the AGFL to zero or NULL. Hence the only valid part of the
     730             :          * AGFL is what the AGF says is active. We can't get to the AGF, so we
     731             :          * can't verify just those entries are valid.
     732             :          */
     733      183049 :         if (!xfs_has_crc(mp))
     734             :                 return;
     735             : 
     736      182972 :         if (!xfs_buf_verify_cksum(bp, XFS_AGFL_CRC_OFF))
     737         640 :                 xfs_verifier_error(bp, -EFSBADCRC, __this_address);
     738             :         else {
     739      182332 :                 fa = xfs_agfl_verify(bp);
     740      182332 :                 if (fa)
     741           0 :                         xfs_verifier_error(bp, -EFSCORRUPTED, fa);
     742             :         }
     743             : }
     744             : 
     745             : static void
     746      108287 : xfs_agfl_write_verify(
     747             :         struct xfs_buf  *bp)
     748             : {
     749      108287 :         struct xfs_mount        *mp = bp->b_mount;
     750      108287 :         struct xfs_buf_log_item *bip = bp->b_log_item;
     751      108287 :         xfs_failaddr_t          fa;
     752             : 
     753             :         /* no verification of non-crc AGFLs */
     754      108287 :         if (!xfs_has_crc(mp))
     755             :                 return;
     756             : 
     757       94690 :         fa = xfs_agfl_verify(bp);
     758       94690 :         if (fa) {
     759           0 :                 xfs_verifier_error(bp, -EFSCORRUPTED, fa);
     760           0 :                 return;
     761             :         }
     762             : 
     763       94690 :         if (bip)
     764       76682 :                 XFS_BUF_TO_AGFL(bp)->agfl_lsn = cpu_to_be64(bip->bli_item.li_lsn);
     765             : 
     766       94690 :         xfs_buf_update_cksum(bp, XFS_AGFL_CRC_OFF);
     767             : }
     768             : 
     769             : const struct xfs_buf_ops xfs_agfl_buf_ops = {
     770             :         .name = "xfs_agfl",
     771             :         .magic = { cpu_to_be32(XFS_AGFL_MAGIC), cpu_to_be32(XFS_AGFL_MAGIC) },
     772             :         .verify_read = xfs_agfl_read_verify,
     773             :         .verify_write = xfs_agfl_write_verify,
     774             :         .verify_struct = xfs_agfl_verify,
     775             : };
     776             : 
     777             : /*
     778             :  * Read in the allocation group free block array.
     779             :  */
     780             : int
     781   593404781 : xfs_alloc_read_agfl(
     782             :         struct xfs_perag        *pag,
     783             :         struct xfs_trans        *tp,
     784             :         struct xfs_buf          **bpp)
     785             : {
     786   593404781 :         struct xfs_mount        *mp = pag->pag_mount;
     787   593404781 :         struct xfs_buf          *bp;
     788   593404781 :         int                     error;
     789             : 
     790  1780214343 :         error = xfs_trans_read_buf(
     791   593404781 :                         mp, tp, mp->m_ddev_targp,
     792   593404781 :                         XFS_AG_DADDR(mp, pag->pag_agno, XFS_AGFL_DADDR(mp)),
     793   593404781 :                         XFS_FSS_TO_BB(mp, 1), 0, &bp, &xfs_agfl_buf_ops);
     794   593495449 :         if (xfs_metadata_is_sick(error))
     795         640 :                 xfs_ag_mark_sick(pag, XFS_SICK_AG_AGFL);
     796   593495449 :         if (error)
     797             :                 return error;
     798   593494748 :         xfs_buf_set_ref(bp, XFS_AGFL_REF);
     799   593467885 :         *bpp = bp;
     800   593467885 :         return 0;
     801             : }
     802             : 
     803             : STATIC int
     804   184761337 : xfs_alloc_update_counters(
     805             :         struct xfs_trans        *tp,
     806             :         struct xfs_buf          *agbp,
     807             :         long                    len)
     808             : {
     809   184761337 :         struct xfs_agf          *agf = agbp->b_addr;
     810             : 
     811   184761337 :         agbp->b_pag->pagf_freeblks += len;
     812   184761337 :         be32_add_cpu(&agf->agf_freeblks, len);
     813             : 
     814   184761337 :         if (unlikely(be32_to_cpu(agf->agf_freeblks) >
     815             :                      be32_to_cpu(agf->agf_length))) {
     816           0 :                 xfs_buf_mark_corrupt(agbp);
     817           0 :                 xfs_ag_mark_sick(agbp->b_pag, XFS_SICK_AG_AGF);
     818           0 :                 return -EFSCORRUPTED;
     819             :         }
     820             : 
     821   184761337 :         xfs_alloc_log_agf(tp, agbp, XFS_AGF_FREEBLKS);
     822   184761337 :         return 0;
     823             : }
     824             : 
     825             : /*
     826             :  * Block allocation algorithm and data structures.
     827             :  */
     828             : struct xfs_alloc_cur {
     829             :         struct xfs_btree_cur            *cnt;   /* btree cursors */
     830             :         struct xfs_btree_cur            *bnolt;
     831             :         struct xfs_btree_cur            *bnogt;
     832             :         xfs_extlen_t                    cur_len;/* current search length */
     833             :         xfs_agblock_t                   rec_bno;/* extent startblock */
     834             :         xfs_extlen_t                    rec_len;/* extent length */
     835             :         xfs_agblock_t                   bno;    /* alloc bno */
     836             :         xfs_extlen_t                    len;    /* alloc len */
     837             :         xfs_extlen_t                    diff;   /* diff from search bno */
     838             :         unsigned int                    busy_gen;/* busy state */
     839             :         bool                            busy;
     840             : };
     841             : 
     842             : /*
     843             :  * Set up cursors, etc. in the extent allocation cursor. This function can be
     844             :  * called multiple times to reset an initialized structure without having to
     845             :  * reallocate cursors.
     846             :  */
     847             : static int
     848    78995079 : xfs_alloc_cur_setup(
     849             :         struct xfs_alloc_arg    *args,
     850             :         struct xfs_alloc_cur    *acur)
     851             : {
     852    78995079 :         int                     error;
     853    78995079 :         int                     i;
     854             : 
     855    78995079 :         acur->cur_len = args->maxlen;
     856    78995079 :         acur->rec_bno = 0;
     857    78995079 :         acur->rec_len = 0;
     858    78995079 :         acur->bno = 0;
     859    78995079 :         acur->len = 0;
     860    78995079 :         acur->diff = -1;
     861    78995079 :         acur->busy = false;
     862    78995079 :         acur->busy_gen = 0;
     863             : 
     864             :         /*
     865             :          * Perform an initial cntbt lookup to check for availability of maxlen
     866             :          * extents. If this fails, we'll return -ENOSPC to signal the caller to
     867             :          * attempt a small allocation.
     868             :          */
     869    78995079 :         if (!acur->cnt)
     870    78847726 :                 acur->cnt = xfs_allocbt_init_cursor(args->mp, args->tp,
     871             :                                         args->agbp, args->pag, XFS_BTNUM_CNT);
     872    79076220 :         error = xfs_alloc_lookup_ge(acur->cnt, 0, args->maxlen, &i);
     873    79048596 :         if (error)
     874             :                 return error;
     875             : 
     876             :         /*
     877             :          * Allocate the bnobt left and right search cursors.
     878             :          */
     879    79047006 :         if (!acur->bnolt)
     880    78932282 :                 acur->bnolt = xfs_allocbt_init_cursor(args->mp, args->tp,
     881             :                                         args->agbp, args->pag, XFS_BTNUM_BNO);
     882    79056795 :         if (!acur->bnogt)
     883    78938570 :                 acur->bnogt = xfs_allocbt_init_cursor(args->mp, args->tp,
     884             :                                         args->agbp, args->pag, XFS_BTNUM_BNO);
     885    79064587 :         return i == 1 ? 0 : -ENOSPC;
     886             : }
     887             : 
     888             : static void
     889    78934083 : xfs_alloc_cur_close(
     890             :         struct xfs_alloc_cur    *acur,
     891             :         bool                    error)
     892             : {
     893    78934083 :         int                     cur_error = XFS_BTREE_NOERROR;
     894             : 
     895    78934083 :         if (error)
     896        3013 :                 cur_error = XFS_BTREE_ERROR;
     897             : 
     898    78934083 :         if (acur->cnt)
     899    78934083 :                 xfs_btree_del_cursor(acur->cnt, cur_error);
     900    78951306 :         if (acur->bnolt)
     901    78949716 :                 xfs_btree_del_cursor(acur->bnolt, cur_error);
     902    78954897 :         if (acur->bnogt)
     903    78953307 :                 xfs_btree_del_cursor(acur->bnogt, cur_error);
     904    78953581 :         acur->cnt = acur->bnolt = acur->bnogt = NULL;
     905    78953581 : }
     906             : 
     907             : /*
     908             :  * Check an extent for allocation and track the best available candidate in the
     909             :  * allocation structure. The cursor is deactivated if it has entered an out of
     910             :  * range state based on allocation arguments. Optionally return the extent
     911             :  * extent geometry and allocation status if requested by the caller.
     912             :  */
     913             : static int
     914  5287111793 : xfs_alloc_cur_check(
     915             :         struct xfs_alloc_arg    *args,
     916             :         struct xfs_alloc_cur    *acur,
     917             :         struct xfs_btree_cur    *cur,
     918             :         int                     *new)
     919             : {
     920  5287111793 :         int                     error, i;
     921  5287111793 :         xfs_agblock_t           bno, bnoa, bnew;
     922  5287111793 :         xfs_extlen_t            len, lena, diff = -1;
     923  5287111793 :         bool                    busy;
     924  5287111793 :         unsigned                busy_gen = 0;
     925  5287111793 :         bool                    deactivate = false;
     926  5287111793 :         bool                    isbnobt = cur->bc_btnum == XFS_BTNUM_BNO;
     927             : 
     928  5287111793 :         *new = 0;
     929             : 
     930  5287111793 :         error = xfs_alloc_get_rec(cur, &bno, &len, &i);
     931  5284368828 :         if (error)
     932             :                 return error;
     933  5284368828 :         if (XFS_IS_CORRUPT(args->mp, i != 1)) {
     934           0 :                 xfs_btree_mark_sick(cur);
     935           0 :                 return -EFSCORRUPTED;
     936             :         }
     937             : 
     938             :         /*
     939             :          * Check minlen and deactivate a cntbt cursor if out of acceptable size
     940             :          * range (i.e., walking backwards looking for a minlen extent).
     941             :          */
     942  5284368828 :         if (len < args->minlen) {
     943   339184922 :                 deactivate = !isbnobt;
     944   339184922 :                 goto out;
     945             :         }
     946             : 
     947  4945183906 :         busy = xfs_alloc_compute_aligned(args, bno, len, &bnoa, &lena,
     948             :                                          &busy_gen);
     949  4958858241 :         acur->busy |= busy;
     950  4958858241 :         if (busy)
     951  2831955146 :                 acur->busy_gen = busy_gen;
     952             :         /* deactivate a bnobt cursor outside of locality range */
     953  4958858241 :         if (bnoa < args->min_agbno || bnoa > args->max_agbno) {
     954       22260 :                 deactivate = isbnobt;
     955       22260 :                 goto out;
     956             :         }
     957  4958835981 :         if (lena < args->minlen)
     958  2683502712 :                 goto out;
     959             : 
     960  2275333269 :         args->len = XFS_EXTLEN_MIN(lena, args->maxlen);
     961  2275333269 :         xfs_alloc_fix_len(args);
     962  2274565054 :         ASSERT(args->len >= args->minlen);
     963  2274565054 :         if (args->len < acur->len)
     964     2264142 :                 goto out;
     965             : 
     966             :         /*
     967             :          * We have an aligned record that satisfies minlen and beats or matches
     968             :          * the candidate extent size. Compare locality for near allocation mode.
     969             :          */
     970  2272300912 :         diff = xfs_alloc_compute_diff(args->agbno, args->len,
     971             :                                       args->alignment, args->datatype,
     972             :                                       bnoa, lena, &bnew);
     973  2272771188 :         if (bnew == NULLAGBLOCK)
     974           0 :                 goto out;
     975             : 
     976             :         /*
     977             :          * Deactivate a bnobt cursor with worse locality than the current best.
     978             :          */
     979  2272771188 :         if (diff > acur->diff) {
     980  1723036091 :                 deactivate = isbnobt;
     981  1723036091 :                 goto out;
     982             :         }
     983             : 
     984   549735097 :         ASSERT(args->len > acur->len ||
     985             :                (args->len == acur->len && diff <= acur->diff));
     986   549735097 :         acur->rec_bno = bno;
     987   549735097 :         acur->rec_len = len;
     988   549735097 :         acur->bno = bnew;
     989   549735097 :         acur->len = args->len;
     990   549735097 :         acur->diff = diff;
     991   549735097 :         *new = 1;
     992             : 
     993             :         /*
     994             :          * We're done if we found a perfect allocation. This only deactivates
     995             :          * the current cursor, but this is just an optimization to terminate a
     996             :          * cntbt search that otherwise runs to the edge of the tree.
     997             :          */
     998   549735097 :         if (acur->diff == 0 && acur->len == args->maxlen)
     999             :                 deactivate = true;
    1000   528012175 : out:
    1001  4748010127 :         if (deactivate)
    1002    33144954 :                 cur->bc_ag.abt.active = false;
    1003  5297745224 :         trace_xfs_alloc_cur_check(args->mp, cur->bc_btnum, bno, len, diff,
    1004  5297745224 :                                   *new);
    1005  5297745224 :         return 0;
    1006             : }
    1007             : 
    1008             : /*
    1009             :  * Complete an allocation of a candidate extent. Remove the extent from both
    1010             :  * trees and update the args structure.
    1011             :  */
    1012             : STATIC int
    1013    78943825 : xfs_alloc_cur_finish(
    1014             :         struct xfs_alloc_arg    *args,
    1015             :         struct xfs_alloc_cur    *acur)
    1016             : {
    1017    78943825 :         struct xfs_agf __maybe_unused *agf = args->agbp->b_addr;
    1018    78943825 :         int                     error;
    1019             : 
    1020    78943825 :         ASSERT(acur->cnt && acur->bnolt);
    1021    78943825 :         ASSERT(acur->bno >= acur->rec_bno);
    1022    78943825 :         ASSERT(acur->bno + acur->len <= acur->rec_bno + acur->rec_len);
    1023    78943825 :         ASSERT(acur->rec_bno + acur->rec_len <= be32_to_cpu(agf->agf_length));
    1024             : 
    1025    78943825 :         error = xfs_alloc_fixup_trees(acur->cnt, acur->bnolt, acur->rec_bno,
    1026             :                                       acur->rec_len, acur->bno, acur->len, 0);
    1027    78939022 :         if (error)
    1028             :                 return error;
    1029             : 
    1030    78935799 :         args->agbno = acur->bno;
    1031    78935799 :         args->len = acur->len;
    1032    78935799 :         args->wasfromfl = 0;
    1033             : 
    1034    78935799 :         trace_xfs_alloc_cur(args);
    1035    78935799 :         return 0;
    1036             : }
    1037             : 
    1038             : /*
    1039             :  * Locality allocation lookup algorithm. This expects a cntbt cursor and uses
    1040             :  * bno optimized lookup to search for extents with ideal size and locality.
    1041             :  */
    1042             : STATIC int
    1043   192796432 : xfs_alloc_cntbt_iter(
    1044             :         struct xfs_alloc_arg            *args,
    1045             :         struct xfs_alloc_cur            *acur)
    1046             : {
    1047   192796432 :         struct xfs_btree_cur    *cur = acur->cnt;
    1048   192796432 :         xfs_agblock_t           bno;
    1049   192796432 :         xfs_extlen_t            len, cur_len;
    1050   192796432 :         int                     error;
    1051   192796432 :         int                     i;
    1052             : 
    1053   192796432 :         if (!xfs_alloc_cur_active(cur))
    1054             :                 return 0;
    1055             : 
    1056             :         /* locality optimized lookup */
    1057   192502109 :         cur_len = acur->cur_len;
    1058   192502109 :         error = xfs_alloc_lookup_ge(cur, args->agbno, cur_len, &i);
    1059   192543883 :         if (error)
    1060             :                 return error;
    1061   192543834 :         if (i == 0)
    1062             :                 return 0;
    1063   176965391 :         error = xfs_alloc_get_rec(cur, &bno, &len, &i);
    1064   176938196 :         if (error)
    1065             :                 return error;
    1066             : 
    1067             :         /* check the current record and update search length from it */
    1068   176940157 :         error = xfs_alloc_cur_check(args, acur, cur, &i);
    1069   177010510 :         if (error)
    1070             :                 return error;
    1071   177010510 :         ASSERT(len >= acur->cur_len);
    1072   177010510 :         acur->cur_len = len;
    1073             : 
    1074             :         /*
    1075             :          * We looked up the first record >= [agbno, len] above. The agbno is a
    1076             :          * secondary key and so the current record may lie just before or after
    1077             :          * agbno. If it is past agbno, check the previous record too so long as
    1078             :          * the length matches as it may be closer. Don't check a smaller record
    1079             :          * because that could deactivate our cursor.
    1080             :          */
    1081   177010510 :         if (bno > args->agbno) {
    1082   172849585 :                 error = xfs_btree_decrement(cur, 0, &i);
    1083   172813624 :                 if (!error && i) {
    1084   170978735 :                         error = xfs_alloc_get_rec(cur, &bno, &len, &i);
    1085   170966292 :                         if (!error && i && len == acur->cur_len)
    1086    41154625 :                                 error = xfs_alloc_cur_check(args, acur, cur,
    1087             :                                                             &i);
    1088             :                 }
    1089   172807581 :                 if (error)
    1090             :                         return error;
    1091             :         }
    1092             : 
    1093             :         /*
    1094             :          * Increment the search key until we find at least one allocation
    1095             :          * candidate or if the extent we found was larger. Otherwise, double the
    1096             :          * search key to optimize the search. Efficiency is more important here
    1097             :          * than absolute best locality.
    1098             :          */
    1099   176968506 :         cur_len <<= 1;
    1100   176968506 :         if (!acur->len || acur->cur_len >= cur_len)
    1101   127314833 :                 acur->cur_len++;
    1102             :         else
    1103    49653673 :                 acur->cur_len = cur_len;
    1104             : 
    1105             :         return error;
    1106             : }
    1107             : 
    1108             : /*
    1109             :  * Deal with the case where only small freespaces remain. Either return the
    1110             :  * contents of the last freespace record, or allocate space from the freelist if
    1111             :  * there is nothing in the tree.
    1112             :  */
    1113             : STATIC int                      /* error */
    1114     1181871 : xfs_alloc_ag_vextent_small(
    1115             :         struct xfs_alloc_arg    *args,  /* allocation argument structure */
    1116             :         struct xfs_btree_cur    *ccur,  /* optional by-size cursor */
    1117             :         xfs_agblock_t           *fbnop, /* result block number */
    1118             :         xfs_extlen_t            *flenp, /* result length */
    1119             :         int                     *stat)  /* status: 0-freelist, 1-normal/none */
    1120             : {
    1121     1181871 :         struct xfs_agf          *agf = args->agbp->b_addr;
    1122     1181871 :         int                     error = 0;
    1123     1181871 :         xfs_agblock_t           fbno = NULLAGBLOCK;
    1124     1181871 :         xfs_extlen_t            flen = 0;
    1125     1181871 :         int                     i = 0;
    1126             : 
    1127             :         /*
    1128             :          * If a cntbt cursor is provided, try to allocate the largest record in
    1129             :          * the tree. Try the AGFL if the cntbt is empty, otherwise fail the
    1130             :          * allocation. Make sure to respect minleft even when pulling from the
    1131             :          * freelist.
    1132             :          */
    1133     1181871 :         if (ccur)
    1134     1181871 :                 error = xfs_btree_decrement(ccur, 0, &i);
    1135     1181872 :         if (error)
    1136           0 :                 goto error;
    1137     1181872 :         if (i) {
    1138     1181872 :                 error = xfs_alloc_get_rec(ccur, &fbno, &flen, &i);
    1139     1181870 :                 if (error)
    1140           0 :                         goto error;
    1141     1181870 :                 if (XFS_IS_CORRUPT(args->mp, i != 1)) {
    1142           0 :                         xfs_btree_mark_sick(ccur);
    1143           0 :                         error = -EFSCORRUPTED;
    1144           0 :                         goto error;
    1145             :                 }
    1146     1181870 :                 goto out;
    1147             :         }
    1148             : 
    1149           0 :         if (args->minlen != 1 || args->alignment != 1 ||
    1150           0 :             args->resv == XFS_AG_RESV_AGFL ||
    1151           0 :             be32_to_cpu(agf->agf_flcount) <= args->minleft)
    1152           0 :                 goto out;
    1153             : 
    1154           0 :         error = xfs_alloc_get_freelist(args->pag, args->tp, args->agbp,
    1155             :                         &fbno, 0);
    1156           0 :         if (error)
    1157           0 :                 goto error;
    1158           0 :         if (fbno == NULLAGBLOCK)
    1159           0 :                 goto out;
    1160             : 
    1161           0 :         xfs_extent_busy_reuse(args->mp, args->pag, fbno, 1,
    1162           0 :                               (args->datatype & XFS_ALLOC_NOBUSY));
    1163             : 
    1164           0 :         if (args->datatype & XFS_ALLOC_USERDATA) {
    1165           0 :                 struct xfs_buf  *bp;
    1166             : 
    1167           0 :                 error = xfs_trans_get_buf(args->tp, args->mp->m_ddev_targp,
    1168           0 :                                 XFS_AGB_TO_DADDR(args->mp, args->agno, fbno),
    1169           0 :                                 args->mp->m_bsize, 0, &bp);
    1170           0 :                 if (error)
    1171           0 :                         goto error;
    1172           0 :                 xfs_trans_binval(args->tp, bp);
    1173             :         }
    1174           0 :         *fbnop = args->agbno = fbno;
    1175           0 :         *flenp = args->len = 1;
    1176           0 :         if (XFS_IS_CORRUPT(args->mp, fbno >= be32_to_cpu(agf->agf_length))) {
    1177           0 :                 xfs_btree_mark_sick(ccur);
    1178           0 :                 error = -EFSCORRUPTED;
    1179           0 :                 goto error;
    1180             :         }
    1181           0 :         args->wasfromfl = 1;
    1182           0 :         trace_xfs_alloc_small_freelist(args);
    1183             : 
    1184             :         /*
    1185             :          * If we're feeding an AGFL block to something that doesn't live in the
    1186             :          * free space, we need to clear out the OWN_AG rmap.
    1187             :          */
    1188           0 :         error = xfs_rmap_free(args->tp, args->agbp, args->pag, fbno, 1,
    1189             :                               &XFS_RMAP_OINFO_AG);
    1190           0 :         if (error)
    1191           0 :                 goto error;
    1192             : 
    1193           0 :         *stat = 0;
    1194           0 :         return 0;
    1195             : 
    1196     1181870 : out:
    1197             :         /*
    1198             :          * Can't do the allocation, give up.
    1199             :          */
    1200     1181870 :         if (flen < args->minlen) {
    1201           0 :                 args->agbno = NULLAGBLOCK;
    1202           0 :                 trace_xfs_alloc_small_notenough(args);
    1203           0 :                 flen = 0;
    1204             :         }
    1205     1181870 :         *fbnop = fbno;
    1206     1181870 :         *flenp = flen;
    1207     1181870 :         *stat = 1;
    1208     1181870 :         trace_xfs_alloc_small_done(args);
    1209     1181870 :         return 0;
    1210             : 
    1211           0 : error:
    1212           0 :         trace_xfs_alloc_small_error(args);
    1213           0 :         return error;
    1214             : }
    1215             : 
    1216             : /*
    1217             :  * Allocate a variable extent at exactly agno/bno.
    1218             :  * Extent's length (returned in *len) will be between minlen and maxlen,
    1219             :  * and of the form k * prod + mod unless there's nothing that large.
    1220             :  * Return the starting a.g. block (bno), or NULLAGBLOCK if we can't do it.
    1221             :  */
    1222             : STATIC int                      /* error */
    1223     1052791 : xfs_alloc_ag_vextent_exact(
    1224             :         xfs_alloc_arg_t *args)  /* allocation argument structure */
    1225             : {
    1226     1052791 :         struct xfs_agf __maybe_unused *agf = args->agbp->b_addr;
    1227     1052791 :         struct xfs_btree_cur *bno_cur;/* by block-number btree cursor */
    1228     1052791 :         struct xfs_btree_cur *cnt_cur;/* by count btree cursor */
    1229     1052791 :         int             error;
    1230     1052791 :         xfs_agblock_t   fbno;   /* start block of found extent */
    1231     1052791 :         xfs_extlen_t    flen;   /* length of found extent */
    1232     1052791 :         xfs_agblock_t   tbno;   /* start block of busy extent */
    1233     1052791 :         xfs_extlen_t    tlen;   /* length of busy extent */
    1234     1052791 :         xfs_agblock_t   tend;   /* end block of busy extent */
    1235     1052791 :         int             i;      /* success/failure of operation */
    1236     1052791 :         unsigned        busy_gen;
    1237             : 
    1238     1052791 :         ASSERT(args->alignment == 1);
    1239             : 
    1240             :         /*
    1241             :          * Allocate/initialize a cursor for the by-number freespace btree.
    1242             :          */
    1243     1052791 :         bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
    1244             :                                           args->pag, XFS_BTNUM_BNO);
    1245             : 
    1246             :         /*
    1247             :          * Lookup bno and minlen in the btree (minlen is irrelevant, really).
    1248             :          * Look for the closest free block <= bno, it must contain bno
    1249             :          * if any free block does.
    1250             :          */
    1251     1052865 :         error = xfs_alloc_lookup_le(bno_cur, args->agbno, args->minlen, &i);
    1252     1052822 :         if (error)
    1253           0 :                 goto error0;
    1254     1052822 :         if (!i)
    1255       69887 :                 goto not_found;
    1256             : 
    1257             :         /*
    1258             :          * Grab the freespace record.
    1259             :          */
    1260      982935 :         error = xfs_alloc_get_rec(bno_cur, &fbno, &flen, &i);
    1261      982792 :         if (error)
    1262           0 :                 goto error0;
    1263      982792 :         if (XFS_IS_CORRUPT(args->mp, i != 1)) {
    1264           0 :                 xfs_btree_mark_sick(bno_cur);
    1265           0 :                 error = -EFSCORRUPTED;
    1266           0 :                 goto error0;
    1267             :         }
    1268      982792 :         ASSERT(fbno <= args->agbno);
    1269             : 
    1270             :         /*
    1271             :          * Check for overlapping busy extents.
    1272             :          */
    1273      982792 :         tbno = fbno;
    1274      982792 :         tlen = flen;
    1275      982792 :         xfs_extent_busy_trim(args, &tbno, &tlen, &busy_gen);
    1276             : 
    1277             :         /*
    1278             :          * Give up if the start of the extent is busy, or the freespace isn't
    1279             :          * long enough for the minimum request.
    1280             :          */
    1281      982873 :         if (tbno > args->agbno)
    1282        3955 :                 goto not_found;
    1283      978918 :         if (tlen < args->minlen)
    1284      540277 :                 goto not_found;
    1285      438641 :         tend = tbno + tlen;
    1286      438641 :         if (tend < args->agbno + args->minlen)
    1287       10717 :                 goto not_found;
    1288             : 
    1289             :         /*
    1290             :          * End of extent will be smaller of the freespace end and the
    1291             :          * maximal requested end.
    1292             :          *
    1293             :          * Fix the length according to mod and prod if given.
    1294             :          */
    1295      427924 :         args->len = XFS_AGBLOCK_MIN(tend, args->agbno + args->maxlen)
    1296      427924 :                                                 - args->agbno;
    1297      427924 :         xfs_alloc_fix_len(args);
    1298      427874 :         ASSERT(args->agbno + args->len <= tend);
    1299             : 
    1300             :         /*
    1301             :          * We are allocating agbno for args->len
    1302             :          * Allocate/initialize a cursor for the by-size btree.
    1303             :          */
    1304      427874 :         cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
    1305             :                                         args->pag, XFS_BTNUM_CNT);
    1306      428009 :         ASSERT(args->agbno + args->len <= be32_to_cpu(agf->agf_length));
    1307      428009 :         error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen, args->agbno,
    1308             :                                       args->len, XFSA_FIXUP_BNO_OK);
    1309      428015 :         if (error) {
    1310           0 :                 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
    1311           0 :                 goto error0;
    1312             :         }
    1313             : 
    1314      428015 :         xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
    1315      428017 :         xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
    1316             : 
    1317      428054 :         args->wasfromfl = 0;
    1318      428054 :         trace_xfs_alloc_exact_done(args);
    1319      428054 :         return 0;
    1320             : 
    1321      624836 : not_found:
    1322             :         /* Didn't find it, return null. */
    1323      624836 :         xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
    1324      624841 :         args->agbno = NULLAGBLOCK;
    1325      624841 :         trace_xfs_alloc_exact_notfound(args);
    1326      624841 :         return 0;
    1327             : 
    1328           0 : error0:
    1329           0 :         xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
    1330           0 :         trace_xfs_alloc_exact_error(args);
    1331           0 :         return error;
    1332             : }
    1333             : 
    1334             : /*
    1335             :  * Search a given number of btree records in a given direction. Check each
    1336             :  * record against the good extent we've already found.
    1337             :  */
    1338             : STATIC int
    1339   491618510 : xfs_alloc_walk_iter(
    1340             :         struct xfs_alloc_arg    *args,
    1341             :         struct xfs_alloc_cur    *acur,
    1342             :         struct xfs_btree_cur    *cur,
    1343             :         bool                    increment,
    1344             :         bool                    find_one, /* quit on first candidate */
    1345             :         int                     count,    /* rec count (-1 for infinite) */
    1346             :         int                     *stat)
    1347             : {
    1348   491618510 :         int                     error;
    1349   491618510 :         int                     i;
    1350             : 
    1351   491618510 :         *stat = 0;
    1352             : 
    1353             :         /*
    1354             :          * Search so long as the cursor is active or we find a better extent.
    1355             :          * The cursor is deactivated if it extends beyond the range of the
    1356             :          * current allocation candidate.
    1357             :          */
    1358  5499972245 :         while (xfs_alloc_cur_active(cur) && count) {
    1359  5067048243 :                 error = xfs_alloc_cur_check(args, acur, cur, &i);
    1360  5076863104 :                 if (error)
    1361           0 :                         return error;
    1362  5076863104 :                 if (i == 1) {
    1363   521617189 :                         *stat = 1;
    1364   521617189 :                         if (find_one)
    1365             :                                 break;
    1366             :                 }
    1367  5031342175 :                 if (!xfs_alloc_cur_active(cur))
    1368             :                         break;
    1369             : 
    1370  5011158369 :                 if (increment)
    1371  3569083869 :                         error = xfs_btree_increment(cur, 0, &i);
    1372             :                 else
    1373  1442074500 :                         error = xfs_btree_decrement(cur, 0, &i);
    1374  5008353746 :                 if (error)
    1375          11 :                         return error;
    1376  5008353735 :                 if (i == 0)
    1377    29641257 :                         cur->bc_ag.abt.active = false;
    1378             : 
    1379  5008353735 :                 if (count > 0)
    1380   354714878 :                         count--;
    1381             :         }
    1382             : 
    1383             :         return 0;
    1384             : }
    1385             : 
    1386             : /*
    1387             :  * Search the by-bno and by-size btrees in parallel in search of an extent with
    1388             :  * ideal locality based on the NEAR mode ->agbno locality hint.
    1389             :  */
    1390             : STATIC int
    1391    47522285 : xfs_alloc_ag_vextent_locality(
    1392             :         struct xfs_alloc_arg    *args,
    1393             :         struct xfs_alloc_cur    *acur,
    1394             :         int                     *stat)
    1395             : {
    1396    47522285 :         struct xfs_btree_cur    *fbcur = NULL;
    1397    47522285 :         int                     error;
    1398    47522285 :         int                     i;
    1399    47522285 :         bool                    fbinc;
    1400             : 
    1401    47522285 :         ASSERT(acur->len == 0);
    1402             : 
    1403    47522285 :         *stat = 0;
    1404             : 
    1405    47522285 :         error = xfs_alloc_lookup_ge(acur->cnt, args->agbno, acur->cur_len, &i);
    1406    47534989 :         if (error)
    1407             :                 return error;
    1408    47533089 :         error = xfs_alloc_lookup_le(acur->bnolt, args->agbno, 0, &i);
    1409    47537721 :         if (error)
    1410             :                 return error;
    1411    47537143 :         error = xfs_alloc_lookup_ge(acur->bnogt, args->agbno, 0, &i);
    1412    47537310 :         if (error)
    1413             :                 return error;
    1414             : 
    1415             :         /*
    1416             :          * Search the bnobt and cntbt in parallel. Search the bnobt left and
    1417             :          * right and lookup the closest extent to the locality hint for each
    1418             :          * extent size key in the cntbt. The entire search terminates
    1419             :          * immediately on a bnobt hit because that means we've found best case
    1420             :          * locality. Otherwise the search continues until the cntbt cursor runs
    1421             :          * off the end of the tree. If no allocation candidate is found at this
    1422             :          * point, give up on locality, walk backwards from the end of the cntbt
    1423             :          * and take the first available extent.
    1424             :          *
    1425             :          * The parallel tree searches balance each other out to provide fairly
    1426             :          * consistent performance for various situations. The bnobt search can
    1427             :          * have pathological behavior in the worst case scenario of larger
    1428             :          * allocation requests and fragmented free space. On the other hand, the
    1429             :          * bnobt is able to satisfy most smaller allocation requests much more
    1430             :          * quickly than the cntbt. The cntbt search can sift through fragmented
    1431             :          * free space and sets of free extents for larger allocation requests
    1432             :          * more quickly than the bnobt. Since the locality hint is just a hint
    1433             :          * and we don't want to scan the entire bnobt for perfect locality, the
    1434             :          * cntbt search essentially bounds the bnobt search such that we can
    1435             :          * find good enough locality at reasonable performance in most cases.
    1436             :          */
    1437    40767382 :         while (xfs_alloc_cur_active(acur->bnolt) ||
    1438   224486738 :                xfs_alloc_cur_active(acur->bnogt) ||
    1439       21961 :                xfs_alloc_cur_active(acur->cnt)) {
    1440             : 
    1441   224466190 :                 trace_xfs_alloc_cur_lookup(args);
    1442             : 
    1443             :                 /*
    1444             :                  * Search the bnobt left and right. In the case of a hit, finish
    1445             :                  * the search in the opposite direction and we're done.
    1446             :                  */
    1447   224468795 :                 error = xfs_alloc_walk_iter(args, acur, acur->bnolt, false,
    1448             :                                             true, 1, &i);
    1449   224460289 :                 if (error)
    1450           1 :                         return error;
    1451   224460288 :                 if (i == 1) {
    1452    21105218 :                         trace_xfs_alloc_cur_left(args);
    1453    21103659 :                         fbcur = acur->bnogt;
    1454    21103659 :                         fbinc = true;
    1455    21103659 :                         break;
    1456             :                 }
    1457   203355070 :                 error = xfs_alloc_walk_iter(args, acur, acur->bnogt, true, true,
    1458             :                                             1, &i);
    1459   203355897 :                 if (error)
    1460           0 :                         return error;
    1461   203355897 :                 if (i == 1) {
    1462    10558679 :                         trace_xfs_alloc_cur_right(args);
    1463    10558842 :                         fbcur = acur->bnolt;
    1464    10558842 :                         fbinc = false;
    1465    10558842 :                         break;
    1466             :                 }
    1467             : 
    1468             :                 /*
    1469             :                  * Check the extent with best locality based on the current
    1470             :                  * extent size search key and keep track of the best candidate.
    1471             :                  */
    1472   192797218 :                 error = xfs_alloc_cntbt_iter(args, acur);
    1473   192820562 :                 if (error)
    1474          49 :                         return error;
    1475   192820513 :                 if (!xfs_alloc_cur_active(acur->cnt)) {
    1476    15871562 :                         trace_xfs_alloc_cur_lookup_done(args);
    1477    15871562 :                         break;
    1478             :                 }
    1479             :         }
    1480             : 
    1481             :         /*
    1482             :          * If we failed to find anything due to busy extents, return empty
    1483             :          * handed so the caller can flush and retry. If no busy extents were
    1484             :          * found, walk backwards from the end of the cntbt as a last resort.
    1485             :          */
    1486    47533998 :         if (!xfs_alloc_cur_active(acur->cnt) && !acur->len && !acur->busy) {
    1487      284172 :                 error = xfs_btree_decrement(acur->cnt, 0, &i);
    1488      284171 :                 if (error)
    1489             :                         return error;
    1490      284171 :                 if (i) {
    1491      284171 :                         acur->cnt->bc_ag.abt.active = true;
    1492      284171 :                         fbcur = acur->cnt;
    1493      284171 :                         fbinc = false;
    1494             :                 }
    1495             :         }
    1496             : 
    1497             :         /*
    1498             :          * Search in the opposite direction for a better entry in the case of
    1499             :          * a bnobt hit or walk backwards from the end of the cntbt.
    1500             :          */
    1501    47533502 :         if (fbcur) {
    1502    31946936 :                 error = xfs_alloc_walk_iter(args, acur, fbcur, fbinc, true, -1,
    1503             :                                             &i);
    1504    31950501 :                 if (error)
    1505             :                         return error;
    1506             :         }
    1507             : 
    1508    47537057 :         if (acur->len)
    1509    47420307 :                 *stat = 1;
    1510             : 
    1511             :         return 0;
    1512             : }
    1513             : 
    1514             : /* Check the last block of the cnt btree for allocations. */
    1515             : static int
    1516    63978734 : xfs_alloc_ag_vextent_lastblock(
    1517             :         struct xfs_alloc_arg    *args,
    1518             :         struct xfs_alloc_cur    *acur,
    1519             :         xfs_agblock_t           *bno,
    1520             :         xfs_extlen_t            *len,
    1521             :         bool                    *allocated)
    1522             : {
    1523    63978734 :         int                     error;
    1524    63978734 :         int                     i;
    1525             : 
    1526             : #ifdef DEBUG
    1527             :         /* Randomly don't execute the first algorithm. */
    1528    63978734 :         if (get_random_u32_below(2))
    1529             :                 return 0;
    1530             : #endif
    1531             : 
    1532             :         /*
    1533             :          * Start from the entry that lookup found, sequence through all larger
    1534             :          * free blocks.  If we're actually pointing at a record smaller than
    1535             :          * maxlen, go to the start of this block, and skip all those smaller
    1536             :          * than minlen.
    1537             :          */
    1538    31987660 :         if (*len || args->alignment > 1) {
    1539     1062089 :                 acur->cnt->bc_levels[0].ptr = 1;
    1540   118298874 :                 do {
    1541   118298874 :                         error = xfs_alloc_get_rec(acur->cnt, bno, len, &i);
    1542   118299744 :                         if (error)
    1543           0 :                                 return error;
    1544   118299744 :                         if (XFS_IS_CORRUPT(args->mp, i != 1)) {
    1545           0 :                                 xfs_btree_mark_sick(acur->cnt);
    1546           0 :                                 return -EFSCORRUPTED;
    1547             :                         }
    1548   118299744 :                         if (*len >= args->minlen)
    1549             :                                 break;
    1550   117237647 :                         error = xfs_btree_increment(acur->cnt, 0, &i);
    1551   117236785 :                         if (error)
    1552           0 :                                 return error;
    1553   117236785 :                 } while (i);
    1554     1062097 :                 ASSERT(*len >= args->minlen);
    1555     1062097 :                 if (!i)
    1556             :                         return 0;
    1557             :         }
    1558             : 
    1559    31987668 :         error = xfs_alloc_walk_iter(args, acur, acur->cnt, true, false, -1, &i);
    1560    32007197 :         if (error)
    1561             :                 return error;
    1562             : 
    1563             :         /*
    1564             :          * It didn't work.  We COULD be in a case where there's a good record
    1565             :          * somewhere, so try again.
    1566             :          */
    1567    32007197 :         if (acur->len == 0)
    1568             :                 return 0;
    1569             : 
    1570    31527020 :         trace_xfs_alloc_near_first(args);
    1571    31526888 :         *allocated = true;
    1572    31526888 :         return 0;
    1573             : }
    1574             : 
    1575             : /*
    1576             :  * Allocate a variable extent near bno in the allocation group agno.
    1577             :  * Extent's length (returned in len) will be between minlen and maxlen,
    1578             :  * and of the form k * prod + mod unless there's nothing that large.
    1579             :  * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
    1580             :  */
    1581             : STATIC int
    1582    78903374 : xfs_alloc_ag_vextent_near(
    1583             :         struct xfs_alloc_arg    *args,
    1584             :         uint32_t                alloc_flags)
    1585             : {
    1586    78903374 :         struct xfs_alloc_cur    acur = {};
    1587    78903374 :         int                     error;          /* error code */
    1588    78903374 :         int                     i;              /* result code, temporary */
    1589    78903374 :         xfs_agblock_t           bno;
    1590    78903374 :         xfs_extlen_t            len;
    1591             : 
    1592             :         /* handle uninitialized agbno range so caller doesn't have to */
    1593    78903374 :         if (!args->min_agbno && !args->max_agbno)
    1594    78170376 :                 args->max_agbno = args->mp->m_sb.sb_agblocks - 1;
    1595    78903374 :         ASSERT(args->min_agbno <= args->max_agbno);
    1596             : 
    1597             :         /* clamp agbno to the range if it's outside */
    1598    78903374 :         if (args->agbno < args->min_agbno)
    1599      311858 :                 args->agbno = args->min_agbno;
    1600    78903374 :         if (args->agbno > args->max_agbno)
    1601           0 :                 args->agbno = args->max_agbno;
    1602             : 
    1603             :         /* Retry once quickly if we find busy extents before blocking. */
    1604    78903374 :         alloc_flags |= XFS_ALLOC_FLAG_TRYFLUSH;
    1605    79015494 : restart:
    1606    79015494 :         len = 0;
    1607             : 
    1608             :         /*
    1609             :          * Set up cursors and see if there are any free extents as big as
    1610             :          * maxlen. If not, pick the last entry in the tree unless the tree is
    1611             :          * empty.
    1612             :          */
    1613    79015494 :         error = xfs_alloc_cur_setup(args, &acur);
    1614    79060323 :         if (error == -ENOSPC) {
    1615      889409 :                 error = xfs_alloc_ag_vextent_small(args, acur.cnt, &bno,
    1616             :                                 &len, &i);
    1617      889405 :                 if (error)
    1618           0 :                         goto out;
    1619      889405 :                 if (i == 0 || len == 0) {
    1620           0 :                         trace_xfs_alloc_near_noentry(args);
    1621           0 :                         goto out;
    1622             :                 }
    1623      889405 :                 ASSERT(i == 1);
    1624    78170914 :         } else if (error) {
    1625        1590 :                 goto out;
    1626             :         }
    1627             : 
    1628             :         /*
    1629             :          * First algorithm.
    1630             :          * If the requested extent is large wrt the freespaces available
    1631             :          * in this a.g., then the cursor will be pointing to a btree entry
    1632             :          * near the right edge of the tree.  If it's in the last btree leaf
    1633             :          * block, then we just examine all the entries in that block
    1634             :          * that are big enough, and pick the best one.
    1635             :          */
    1636    79058729 :         if (xfs_btree_islastblock(acur.cnt, 0)) {
    1637    63984656 :                 bool            allocated = false;
    1638             : 
    1639    63984656 :                 error = xfs_alloc_ag_vextent_lastblock(args, &acur, &bno, &len,
    1640             :                                 &allocated);
    1641    63986975 :                 if (error)
    1642           0 :                         goto out;
    1643    63986975 :                 if (allocated)
    1644    31526925 :                         goto alloc_finish;
    1645             :         }
    1646             : 
    1647             :         /*
    1648             :          * Second algorithm. Combined cntbt and bnobt search to find ideal
    1649             :          * locality.
    1650             :          */
    1651    47513404 :         error = xfs_alloc_ag_vextent_locality(args, &acur, &i);
    1652    47537873 :         if (error)
    1653         629 :                 goto out;
    1654             : 
    1655             :         /*
    1656             :          * If we couldn't get anything, give up.
    1657             :          */
    1658    47537244 :         if (!acur.len) {
    1659      116750 :                 if (acur.busy) {
    1660             :                         /*
    1661             :                          * Our only valid extents must have been busy. Flush and
    1662             :                          * retry the allocation again. If we get an -EAGAIN
    1663             :                          * error, we're being told that a deadlock was avoided
    1664             :                          * and the current transaction needs committing before
    1665             :                          * the allocation can be retried.
    1666             :                          */
    1667      112123 :                         trace_xfs_alloc_near_busy(args);
    1668      112123 :                         error = xfs_extent_busy_flush(args->tp, args->pag,
    1669             :                                         acur.busy_gen, alloc_flags);
    1670      112121 :                         if (error)
    1671           1 :                                 goto out;
    1672             : 
    1673      112120 :                         alloc_flags &= ~XFS_ALLOC_FLAG_TRYFLUSH;
    1674      112120 :                         goto restart;
    1675             :                 }
    1676        4627 :                 trace_xfs_alloc_size_neither(args);
    1677        4627 :                 args->agbno = NULLAGBLOCK;
    1678        4627 :                 goto out;
    1679             :         }
    1680             : 
    1681    47420494 : alloc_finish:
    1682             :         /* fix up btrees on a successful allocation */
    1683    78947419 :         error = xfs_alloc_cur_finish(args, &acur);
    1684             : 
    1685    78937514 : out:
    1686    78937514 :         xfs_alloc_cur_close(&acur, error);
    1687    78952575 :         return error;
    1688             : }
    1689             : 
    1690             : /*
    1691             :  * Allocate a variable extent anywhere in the allocation group agno.
    1692             :  * Extent's length (returned in len) will be between minlen and maxlen,
    1693             :  * and of the form k * prod + mod unless there's nothing that large.
    1694             :  * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
    1695             :  */
    1696             : static int
    1697     6754131 : xfs_alloc_ag_vextent_size(
    1698             :         struct xfs_alloc_arg    *args,
    1699             :         uint32_t                alloc_flags)
    1700             : {
    1701     6754131 :         struct xfs_agf          *agf = args->agbp->b_addr;
    1702     6754131 :         struct xfs_btree_cur    *bno_cur;
    1703     6754131 :         struct xfs_btree_cur    *cnt_cur;
    1704     6754131 :         xfs_agblock_t           fbno;           /* start of found freespace */
    1705     6754131 :         xfs_extlen_t            flen;           /* length of found freespace */
    1706     6754131 :         xfs_agblock_t           rbno;           /* returned block number */
    1707     6754131 :         xfs_extlen_t            rlen;           /* length of returned extent */
    1708     6754131 :         bool                    busy;
    1709     6754131 :         unsigned                busy_gen;
    1710     6754131 :         int                     error;
    1711     6754131 :         int                     i;
    1712             : 
    1713             :         /* Retry once quickly if we find busy extents before blocking. */
    1714     6754131 :         alloc_flags |= XFS_ALLOC_FLAG_TRYFLUSH;
    1715             : restart:
    1716             :         /*
    1717             :          * Allocate and initialize a cursor for the by-size btree.
    1718             :          */
    1719     6792646 :         cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
    1720             :                                         args->pag, XFS_BTNUM_CNT);
    1721     6793267 :         bno_cur = NULL;
    1722             : 
    1723             :         /*
    1724             :          * Look for an entry >= maxlen+alignment-1 blocks.
    1725             :          */
    1726     6793317 :         if ((error = xfs_alloc_lookup_ge(cnt_cur, 0,
    1727     6793267 :                         args->maxlen + args->alignment - 1, &i)))
    1728          90 :                 goto error0;
    1729             : 
    1730             :         /*
    1731             :          * If none then we have to settle for a smaller extent. In the case that
    1732             :          * there are no large extents, this will return the last entry in the
    1733             :          * tree unless the tree is empty. In the case that there are only busy
    1734             :          * large extents, this will return the largest small extent unless there
    1735             :          * are no smaller extents available.
    1736             :          */
    1737     6793227 :         if (!i) {
    1738      292464 :                 error = xfs_alloc_ag_vextent_small(args, cnt_cur,
    1739             :                                                    &fbno, &flen, &i);
    1740      292463 :                 if (error)
    1741           0 :                         goto error0;
    1742      292463 :                 if (i == 0 || flen == 0) {
    1743           0 :                         xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
    1744           0 :                         trace_xfs_alloc_size_noentry(args);
    1745           0 :                         return 0;
    1746             :                 }
    1747      292463 :                 ASSERT(i == 1);
    1748      292463 :                 busy = xfs_alloc_compute_aligned(args, fbno, flen, &rbno,
    1749             :                                 &rlen, &busy_gen);
    1750             :         } else {
    1751             :                 /*
    1752             :                  * Search for a non-busy extent that is large enough.
    1753             :                  */
    1754  1261306675 :                 for (;;) {
    1755   633903719 :                         error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, &i);
    1756   633603437 :                         if (error)
    1757           0 :                                 goto error0;
    1758   633603437 :                         if (XFS_IS_CORRUPT(args->mp, i != 1)) {
    1759           0 :                                 xfs_btree_mark_sick(cnt_cur);
    1760           0 :                                 error = -EFSCORRUPTED;
    1761           0 :                                 goto error0;
    1762             :                         }
    1763             : 
    1764   633603437 :                         busy = xfs_alloc_compute_aligned(args, fbno, flen,
    1765             :                                         &rbno, &rlen, &busy_gen);
    1766             : 
    1767   634266600 :                         if (rlen >= args->maxlen)
    1768             :                                 break;
    1769             : 
    1770   627802655 :                         error = xfs_btree_increment(cnt_cur, 0, &i);
    1771   627440086 :                         if (error)
    1772           0 :                                 goto error0;
    1773   627440086 :                         if (i)
    1774   627402956 :                                 continue;
    1775             : 
    1776             :                         /*
    1777             :                          * Our only valid extents must have been busy. Flush and
    1778             :                          * retry the allocation again. If we get an -EAGAIN
    1779             :                          * error, we're being told that a deadlock was avoided
    1780             :                          * and the current transaction needs committing before
    1781             :                          * the allocation can be retried.
    1782             :                          */
    1783       37130 :                         trace_xfs_alloc_size_busy(args);
    1784       37130 :                         error = xfs_extent_busy_flush(args->tp, args->pag,
    1785             :                                         busy_gen, alloc_flags);
    1786       37128 :                         if (error)
    1787           1 :                                 goto error0;
    1788             : 
    1789       37127 :                         alloc_flags &= ~XFS_ALLOC_FLAG_TRYFLUSH;
    1790       37127 :                         xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
    1791       37129 :                         goto restart;
    1792             :                 }
    1793             :         }
    1794             : 
    1795             :         /*
    1796             :          * In the first case above, we got the last entry in the
    1797             :          * by-size btree.  Now we check to see if the space hits maxlen
    1798             :          * once aligned; if not, we search left for something better.
    1799             :          * This can't happen in the second case above.
    1800             :          */
    1801     6756117 :         rlen = XFS_EXTLEN_MIN(args->maxlen, rlen);
    1802     6756117 :         if (XFS_IS_CORRUPT(args->mp,
    1803             :                            rlen != 0 &&
    1804             :                            (rlen > flen ||
    1805             :                             rbno + rlen > fbno + flen))) {
    1806           0 :                 xfs_btree_mark_sick(cnt_cur);
    1807           0 :                 error = -EFSCORRUPTED;
    1808           0 :                 goto error0;
    1809             :         }
    1810     6756117 :         if (rlen < args->maxlen) {
    1811      292458 :                 xfs_agblock_t   bestfbno;
    1812      292458 :                 xfs_extlen_t    bestflen;
    1813      292458 :                 xfs_agblock_t   bestrbno;
    1814      292458 :                 xfs_extlen_t    bestrlen;
    1815             : 
    1816      292458 :                 bestrlen = rlen;
    1817      292458 :                 bestrbno = rbno;
    1818      292458 :                 bestflen = flen;
    1819      292458 :                 bestfbno = fbno;
    1820    86443550 :                 for (;;) {
    1821    86443550 :                         if ((error = xfs_btree_decrement(cnt_cur, 0, &i)))
    1822           0 :                                 goto error0;
    1823    86443549 :                         if (i == 0)
    1824             :                                 break;
    1825    86440415 :                         if ((error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen,
    1826             :                                         &i)))
    1827           0 :                                 goto error0;
    1828    86440407 :                         if (XFS_IS_CORRUPT(args->mp, i != 1)) {
    1829           0 :                                 xfs_btree_mark_sick(cnt_cur);
    1830           0 :                                 error = -EFSCORRUPTED;
    1831           0 :                                 goto error0;
    1832             :                         }
    1833    86440407 :                         if (flen < bestrlen)
    1834             :                                 break;
    1835    86151083 :                         busy = xfs_alloc_compute_aligned(args, fbno, flen,
    1836             :                                         &rbno, &rlen, &busy_gen);
    1837    86151093 :                         rlen = XFS_EXTLEN_MIN(args->maxlen, rlen);
    1838    86151093 :                         if (XFS_IS_CORRUPT(args->mp,
    1839             :                                            rlen != 0 &&
    1840             :                                            (rlen > flen ||
    1841             :                                             rbno + rlen > fbno + flen))) {
    1842           0 :                                 xfs_btree_mark_sick(cnt_cur);
    1843           0 :                                 error = -EFSCORRUPTED;
    1844           0 :                                 goto error0;
    1845             :                         }
    1846    86151093 :                         if (rlen > bestrlen) {
    1847      160466 :                                 bestrlen = rlen;
    1848      160466 :                                 bestrbno = rbno;
    1849      160466 :                                 bestflen = flen;
    1850      160466 :                                 bestfbno = fbno;
    1851      160466 :                                 if (rlen == args->maxlen)
    1852             :                                         break;
    1853             :                         }
    1854             :                 }
    1855      292459 :                 if ((error = xfs_alloc_lookup_eq(cnt_cur, bestfbno, bestflen,
    1856             :                                 &i)))
    1857           0 :                         goto error0;
    1858      292458 :                 if (XFS_IS_CORRUPT(args->mp, i != 1)) {
    1859           0 :                         xfs_btree_mark_sick(cnt_cur);
    1860           0 :                         error = -EFSCORRUPTED;
    1861           0 :                         goto error0;
    1862             :                 }
    1863      292458 :                 rlen = bestrlen;
    1864      292458 :                 rbno = bestrbno;
    1865      292458 :                 flen = bestflen;
    1866      292458 :                 fbno = bestfbno;
    1867             :         }
    1868     6756117 :         args->wasfromfl = 0;
    1869             :         /*
    1870             :          * Fix up the length.
    1871             :          */
    1872     6756117 :         args->len = rlen;
    1873     6756117 :         if (rlen < args->minlen) {
    1874      157183 :                 if (busy) {
    1875             :                         /*
    1876             :                          * Our only valid extents must have been busy. Flush and
    1877             :                          * retry the allocation again. If we get an -EAGAIN
    1878             :                          * error, we're being told that a deadlock was avoided
    1879             :                          * and the current transaction needs committing before
    1880             :                          * the allocation can be retried.
    1881             :                          */
    1882        1386 :                         trace_xfs_alloc_size_busy(args);
    1883        1386 :                         error = xfs_extent_busy_flush(args->tp, args->pag,
    1884             :                                         busy_gen, alloc_flags);
    1885        1386 :                         if (error)
    1886           0 :                                 goto error0;
    1887             : 
    1888        1386 :                         alloc_flags &= ~XFS_ALLOC_FLAG_TRYFLUSH;
    1889        1386 :                         xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
    1890        1386 :                         goto restart;
    1891             :                 }
    1892      155797 :                 goto out_nominleft;
    1893             :         }
    1894     6598934 :         xfs_alloc_fix_len(args);
    1895             : 
    1896     6598357 :         rlen = args->len;
    1897     6598357 :         if (XFS_IS_CORRUPT(args->mp, rlen > flen)) {
    1898           0 :                 xfs_btree_mark_sick(cnt_cur);
    1899           0 :                 error = -EFSCORRUPTED;
    1900           0 :                 goto error0;
    1901             :         }
    1902             :         /*
    1903             :          * Allocate and initialize a cursor for the by-block tree.
    1904             :          */
    1905     6598357 :         bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
    1906             :                                         args->pag, XFS_BTNUM_BNO);
    1907     6599573 :         if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen,
    1908             :                         rbno, rlen, XFSA_FIXUP_CNT_OK)))
    1909         104 :                 goto error0;
    1910     6598555 :         xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
    1911     6599316 :         xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
    1912     6599710 :         cnt_cur = bno_cur = NULL;
    1913     6599710 :         args->len = rlen;
    1914     6599710 :         args->agbno = rbno;
    1915     6599710 :         if (XFS_IS_CORRUPT(args->mp,
    1916             :                            args->agbno + args->len >
    1917             :                            be32_to_cpu(agf->agf_length))) {
    1918           0 :                 xfs_ag_mark_sick(args->pag, XFS_SICK_AG_BNOBT);
    1919           0 :                 error = -EFSCORRUPTED;
    1920           0 :                 goto error0;
    1921             :         }
    1922     6599710 :         trace_xfs_alloc_size_done(args);
    1923     6599710 :         return 0;
    1924             : 
    1925         195 : error0:
    1926         195 :         trace_xfs_alloc_size_error(args);
    1927         195 :         if (cnt_cur)
    1928         195 :                 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
    1929         195 :         if (bno_cur)
    1930         104 :                 xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
    1931             :         return error;
    1932             : 
    1933             : out_nominleft:
    1934      155797 :         xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
    1935      155797 :         trace_xfs_alloc_size_nominleft(args);
    1936      155797 :         args->agbno = NULLAGBLOCK;
    1937      155797 :         return 0;
    1938             : }
    1939             : 
    1940             : /*
    1941             :  * Free the extent starting at agno/bno for length.
    1942             :  */
    1943             : STATIC int
    1944    98811282 : xfs_free_ag_extent(
    1945             :         struct xfs_trans                *tp,
    1946             :         struct xfs_buf                  *agbp,
    1947             :         xfs_agnumber_t                  agno,
    1948             :         xfs_agblock_t                   bno,
    1949             :         xfs_extlen_t                    len,
    1950             :         const struct xfs_owner_info     *oinfo,
    1951             :         enum xfs_ag_resv_type           type)
    1952             : {
    1953    98811282 :         struct xfs_mount                *mp;
    1954    98811282 :         struct xfs_btree_cur            *bno_cur;
    1955    98811282 :         struct xfs_btree_cur            *cnt_cur;
    1956    98811282 :         xfs_agblock_t                   gtbno; /* start of right neighbor */
    1957    98811282 :         xfs_extlen_t                    gtlen; /* length of right neighbor */
    1958    98811282 :         xfs_agblock_t                   ltbno; /* start of left neighbor */
    1959    98811282 :         xfs_extlen_t                    ltlen; /* length of left neighbor */
    1960    98811282 :         xfs_agblock_t                   nbno; /* new starting block of freesp */
    1961    98811282 :         xfs_extlen_t                    nlen; /* new length of freespace */
    1962    98811282 :         int                             haveleft; /* have a left neighbor */
    1963    98811282 :         int                             haveright; /* have a right neighbor */
    1964    98811282 :         int                             i;
    1965    98811282 :         int                             error;
    1966    98811282 :         struct xfs_perag                *pag = agbp->b_pag;
    1967             : 
    1968    98811282 :         bno_cur = cnt_cur = NULL;
    1969    98811282 :         mp = tp->t_mountp;
    1970             : 
    1971    98811282 :         if (!xfs_rmap_should_skip_owner_update(oinfo)) {
    1972     3057448 :                 error = xfs_rmap_free(tp, agbp, pag, bno, len, oinfo);
    1973     3057731 :                 if (error)
    1974          26 :                         goto error0;
    1975             :         }
    1976             : 
    1977             :         /*
    1978             :          * Allocate and initialize a cursor for the by-block btree.
    1979             :          */
    1980    98811539 :         bno_cur = xfs_allocbt_init_cursor(mp, tp, agbp, pag, XFS_BTNUM_BNO);
    1981             :         /*
    1982             :          * Look for a neighboring block on the left (lower block numbers)
    1983             :          * that is contiguous with this space.
    1984             :          */
    1985    98816437 :         if ((error = xfs_alloc_lookup_le(bno_cur, bno, len, &haveleft)))
    1986         157 :                 goto error0;
    1987    98817919 :         if (haveleft) {
    1988             :                 /*
    1989             :                  * There is a block to our left.
    1990             :                  */
    1991    97081898 :                 if ((error = xfs_alloc_get_rec(bno_cur, &ltbno, &ltlen, &i)))
    1992           0 :                         goto error0;
    1993    97075041 :                 if (XFS_IS_CORRUPT(mp, i != 1)) {
    1994           0 :                         xfs_btree_mark_sick(bno_cur);
    1995           0 :                         error = -EFSCORRUPTED;
    1996           0 :                         goto error0;
    1997             :                 }
    1998             :                 /*
    1999             :                  * It's not contiguous, though.
    2000             :                  */
    2001    97075041 :                 if (ltbno + ltlen < bno)
    2002    71097589 :                         haveleft = 0;
    2003             :                 else {
    2004             :                         /*
    2005             :                          * If this failure happens the request to free this
    2006             :                          * space was invalid, it's (partly) already free.
    2007             :                          * Very bad.
    2008             :                          */
    2009    25977452 :                         if (XFS_IS_CORRUPT(mp, ltbno + ltlen > bno)) {
    2010           0 :                                 xfs_btree_mark_sick(bno_cur);
    2011           0 :                                 error = -EFSCORRUPTED;
    2012           0 :                                 goto error0;
    2013             :                         }
    2014             :                 }
    2015             :         }
    2016             :         /*
    2017             :          * Look for a neighboring block on the right (higher block numbers)
    2018             :          * that is contiguous with this space.
    2019             :          */
    2020    98811062 :         if ((error = xfs_btree_increment(bno_cur, 0, &haveright)))
    2021           0 :                 goto error0;
    2022    98811949 :         if (haveright) {
    2023             :                 /*
    2024             :                  * There is a block to our right.
    2025             :                  */
    2026    97524350 :                 if ((error = xfs_alloc_get_rec(bno_cur, &gtbno, &gtlen, &i)))
    2027           0 :                         goto error0;
    2028    97530846 :                 if (XFS_IS_CORRUPT(mp, i != 1)) {
    2029           0 :                         xfs_btree_mark_sick(bno_cur);
    2030           0 :                         error = -EFSCORRUPTED;
    2031           0 :                         goto error0;
    2032             :                 }
    2033             :                 /*
    2034             :                  * It's not contiguous, though.
    2035             :                  */
    2036    97530846 :                 if (bno + len < gtbno)
    2037    65980712 :                         haveright = 0;
    2038             :                 else {
    2039             :                         /*
    2040             :                          * If this failure happens the request to free this
    2041             :                          * space was invalid, it's (partly) already free.
    2042             :                          * Very bad.
    2043             :                          */
    2044    31550134 :                         if (XFS_IS_CORRUPT(mp, bno + len > gtbno)) {
    2045           0 :                                 xfs_btree_mark_sick(bno_cur);
    2046           0 :                                 error = -EFSCORRUPTED;
    2047           0 :                                 goto error0;
    2048             :                         }
    2049             :                 }
    2050             :         }
    2051             :         /*
    2052             :          * Now allocate and initialize a cursor for the by-size tree.
    2053             :          */
    2054    98818445 :         cnt_cur = xfs_allocbt_init_cursor(mp, tp, agbp, pag, XFS_BTNUM_CNT);
    2055             :         /*
    2056             :          * Have both left and right contiguous neighbors.
    2057             :          * Merge all three into a single free block.
    2058             :          */
    2059    98820020 :         if (haveleft && haveright) {
    2060             :                 /*
    2061             :                  * Delete the old by-size entry on the left.
    2062             :                  */
    2063    14172318 :                 if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i)))
    2064          20 :                         goto error0;
    2065    14172651 :                 if (XFS_IS_CORRUPT(mp, i != 1)) {
    2066           0 :                         xfs_btree_mark_sick(cnt_cur);
    2067           0 :                         error = -EFSCORRUPTED;
    2068           0 :                         goto error0;
    2069             :                 }
    2070    14172651 :                 if ((error = xfs_btree_delete(cnt_cur, &i)))
    2071           0 :                         goto error0;
    2072    14171051 :                 if (XFS_IS_CORRUPT(mp, i != 1)) {
    2073           0 :                         xfs_btree_mark_sick(cnt_cur);
    2074           0 :                         error = -EFSCORRUPTED;
    2075           0 :                         goto error0;
    2076             :                 }
    2077             :                 /*
    2078             :                  * Delete the old by-size entry on the right.
    2079             :                  */
    2080    14171051 :                 if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i)))
    2081          16 :                         goto error0;
    2082    14172480 :                 if (XFS_IS_CORRUPT(mp, i != 1)) {
    2083           0 :                         xfs_btree_mark_sick(cnt_cur);
    2084           0 :                         error = -EFSCORRUPTED;
    2085           0 :                         goto error0;
    2086             :                 }
    2087    14172480 :                 if ((error = xfs_btree_delete(cnt_cur, &i)))
    2088           0 :                         goto error0;
    2089    14172778 :                 if (XFS_IS_CORRUPT(mp, i != 1)) {
    2090           0 :                         xfs_btree_mark_sick(cnt_cur);
    2091           0 :                         error = -EFSCORRUPTED;
    2092           0 :                         goto error0;
    2093             :                 }
    2094             :                 /*
    2095             :                  * Delete the old by-block entry for the right block.
    2096             :                  */
    2097    14172778 :                 if ((error = xfs_btree_delete(bno_cur, &i)))
    2098           0 :                         goto error0;
    2099    14172703 :                 if (XFS_IS_CORRUPT(mp, i != 1)) {
    2100           0 :                         xfs_btree_mark_sick(bno_cur);
    2101           0 :                         error = -EFSCORRUPTED;
    2102           0 :                         goto error0;
    2103             :                 }
    2104             :                 /*
    2105             :                  * Move the by-block cursor back to the left neighbor.
    2106             :                  */
    2107    14172703 :                 if ((error = xfs_btree_decrement(bno_cur, 0, &i)))
    2108           0 :                         goto error0;
    2109    14171985 :                 if (XFS_IS_CORRUPT(mp, i != 1)) {
    2110           0 :                         xfs_btree_mark_sick(bno_cur);
    2111           0 :                         error = -EFSCORRUPTED;
    2112           0 :                         goto error0;
    2113             :                 }
    2114             : #ifdef DEBUG
    2115             :                 /*
    2116             :                  * Check that this is the right record: delete didn't
    2117             :                  * mangle the cursor.
    2118             :                  */
    2119             :                 {
    2120    14171985 :                         xfs_agblock_t   xxbno;
    2121    14171985 :                         xfs_extlen_t    xxlen;
    2122             : 
    2123    14171985 :                         if ((error = xfs_alloc_get_rec(bno_cur, &xxbno, &xxlen,
    2124             :                                         &i)))
    2125           0 :                                 goto error0;
    2126    14172427 :                         if (XFS_IS_CORRUPT(mp,
    2127             :                                            i != 1 ||
    2128             :                                            xxbno != ltbno ||
    2129             :                                            xxlen != ltlen)) {
    2130           0 :                                 xfs_btree_mark_sick(bno_cur);
    2131           0 :                                 error = -EFSCORRUPTED;
    2132           0 :                                 goto error0;
    2133             :                         }
    2134             :                 }
    2135             : #endif
    2136             :                 /*
    2137             :                  * Update remaining by-block entry to the new, joined block.
    2138             :                  */
    2139    14172427 :                 nbno = ltbno;
    2140    14172427 :                 nlen = len + ltlen + gtlen;
    2141    14172427 :                 if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
    2142           0 :                         goto error0;
    2143             :         }
    2144             :         /*
    2145             :          * Have only a left contiguous neighbor.
    2146             :          * Merge it together with the new freespace.
    2147             :          */
    2148    84647702 :         else if (haveleft) {
    2149             :                 /*
    2150             :                  * Delete the old by-size entry on the left.
    2151             :                  */
    2152    11808113 :                 if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i)))
    2153           6 :                         goto error0;
    2154    11808669 :                 if (XFS_IS_CORRUPT(mp, i != 1)) {
    2155           0 :                         xfs_btree_mark_sick(cnt_cur);
    2156           0 :                         error = -EFSCORRUPTED;
    2157           0 :                         goto error0;
    2158             :                 }
    2159    11808669 :                 if ((error = xfs_btree_delete(cnt_cur, &i)))
    2160           0 :                         goto error0;
    2161    11807568 :                 if (XFS_IS_CORRUPT(mp, i != 1)) {
    2162           0 :                         xfs_btree_mark_sick(cnt_cur);
    2163           0 :                         error = -EFSCORRUPTED;
    2164           0 :                         goto error0;
    2165             :                 }
    2166             :                 /*
    2167             :                  * Back up the by-block cursor to the left neighbor, and
    2168             :                  * update its length.
    2169             :                  */
    2170    11807568 :                 if ((error = xfs_btree_decrement(bno_cur, 0, &i)))
    2171           0 :                         goto error0;
    2172    11807260 :                 if (XFS_IS_CORRUPT(mp, i != 1)) {
    2173           0 :                         xfs_btree_mark_sick(bno_cur);
    2174           0 :                         error = -EFSCORRUPTED;
    2175           0 :                         goto error0;
    2176             :                 }
    2177    11807260 :                 nbno = ltbno;
    2178    11807260 :                 nlen = len + ltlen;
    2179    11807260 :                 if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
    2180           0 :                         goto error0;
    2181             :         }
    2182             :         /*
    2183             :          * Have only a right contiguous neighbor.
    2184             :          * Merge it together with the new freespace.
    2185             :          */
    2186    72839589 :         else if (haveright) {
    2187             :                 /*
    2188             :                  * Delete the old by-size entry on the right.
    2189             :                  */
    2190    17378468 :                 if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i)))
    2191          18 :                         goto error0;
    2192    17378663 :                 if (XFS_IS_CORRUPT(mp, i != 1)) {
    2193           0 :                         xfs_btree_mark_sick(cnt_cur);
    2194           0 :                         error = -EFSCORRUPTED;
    2195           0 :                         goto error0;
    2196             :                 }
    2197    17378663 :                 if ((error = xfs_btree_delete(cnt_cur, &i)))
    2198           0 :                         goto error0;
    2199    17377702 :                 if (XFS_IS_CORRUPT(mp, i != 1)) {
    2200           0 :                         xfs_btree_mark_sick(cnt_cur);
    2201           0 :                         error = -EFSCORRUPTED;
    2202           0 :                         goto error0;
    2203             :                 }
    2204             :                 /*
    2205             :                  * Update the starting block and length of the right
    2206             :                  * neighbor in the by-block tree.
    2207             :                  */
    2208    17377702 :                 nbno = bno;
    2209    17377702 :                 nlen = len + gtlen;
    2210    17377702 :                 if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
    2211           0 :                         goto error0;
    2212             :         }
    2213             :         /*
    2214             :          * No contiguous neighbors.
    2215             :          * Insert the new freespace into the by-block tree.
    2216             :          */
    2217             :         else {
    2218    55461121 :                 nbno = bno;
    2219    55461121 :                 nlen = len;
    2220    55461121 :                 if ((error = xfs_btree_insert(bno_cur, &i)))
    2221           1 :                         goto error0;
    2222    55457348 :                 if (XFS_IS_CORRUPT(mp, i != 1)) {
    2223           0 :                         xfs_btree_mark_sick(bno_cur);
    2224           0 :                         error = -EFSCORRUPTED;
    2225           0 :                         goto error0;
    2226             :                 }
    2227             :         }
    2228    98815960 :         xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
    2229    98821789 :         bno_cur = NULL;
    2230             :         /*
    2231             :          * In all cases we need to insert the new freespace in the by-size tree.
    2232             :          */
    2233    98821789 :         if ((error = xfs_alloc_lookup_eq(cnt_cur, nbno, nlen, &i)))
    2234          40 :                 goto error0;
    2235    98822476 :         if (XFS_IS_CORRUPT(mp, i != 0)) {
    2236           0 :                 xfs_btree_mark_sick(cnt_cur);
    2237           0 :                 error = -EFSCORRUPTED;
    2238           0 :                 goto error0;
    2239             :         }
    2240    98822476 :         if ((error = xfs_btree_insert(cnt_cur, &i)))
    2241           1 :                 goto error0;
    2242    98815852 :         if (XFS_IS_CORRUPT(mp, i != 1)) {
    2243           0 :                 xfs_btree_mark_sick(cnt_cur);
    2244           0 :                 error = -EFSCORRUPTED;
    2245           0 :                 goto error0;
    2246             :         }
    2247    98815852 :         xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
    2248    98822628 :         cnt_cur = NULL;
    2249             : 
    2250             :         /*
    2251             :          * Update the freespace totals in the ag and superblock.
    2252             :          */
    2253    98822628 :         error = xfs_alloc_update_counters(tp, agbp, len);
    2254    98814214 :         xfs_ag_resv_free_extent(agbp->b_pag, type, tp, len);
    2255    98789716 :         if (error)
    2256           0 :                 goto error0;
    2257             : 
    2258    98789716 :         XFS_STATS_INC(mp, xs_freex);
    2259    98804488 :         XFS_STATS_ADD(mp, xs_freeb, len);
    2260             : 
    2261    98804130 :         trace_xfs_free_extent(mp, agno, bno, len, type, haveleft, haveright);
    2262             : 
    2263    98804130 :         return 0;
    2264             : 
    2265         285 :  error0:
    2266         285 :         trace_xfs_free_extent(mp, agno, bno, len, type, -1, -1);
    2267         285 :         if (bno_cur)
    2268         218 :                 xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
    2269         285 :         if (cnt_cur)
    2270         102 :                 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
    2271             :         return error;
    2272             : }
    2273             : 
    2274             : /*
    2275             :  * Visible (exported) allocation/free functions.
    2276             :  * Some of these are used just by xfs_alloc_btree.c and this file.
    2277             :  */
    2278             : 
    2279             : /*
    2280             :  * Compute and fill in value of m_alloc_maxlevels.
    2281             :  */
    2282             : void
    2283       60880 : xfs_alloc_compute_maxlevels(
    2284             :         xfs_mount_t     *mp)    /* file system mount structure */
    2285             : {
    2286      121760 :         mp->m_alloc_maxlevels = xfs_btree_compute_maxlevels(mp->m_alloc_mnr,
    2287       60880 :                         (mp->m_sb.sb_agblocks + 1) / 2);
    2288       60880 :         ASSERT(mp->m_alloc_maxlevels <= xfs_allocbt_maxlevels_ondisk());
    2289       60880 : }
    2290             : 
    2291             : /*
    2292             :  * Find the length of the longest extent in an AG.  The 'need' parameter
    2293             :  * specifies how much space we're going to need for the AGFL and the
    2294             :  * 'reserved' parameter tells us how many blocks in this AG are reserved for
    2295             :  * other callers.
    2296             :  */
    2297             : xfs_extlen_t
    2298   443150571 : xfs_alloc_longest_free_extent(
    2299             :         struct xfs_perag        *pag,
    2300             :         xfs_extlen_t            need,
    2301             :         xfs_extlen_t            reserved)
    2302             : {
    2303   443150571 :         xfs_extlen_t            delta = 0;
    2304             : 
    2305             :         /*
    2306             :          * If the AGFL needs a recharge, we'll have to subtract that from the
    2307             :          * longest extent.
    2308             :          */
    2309   443150571 :         if (need > pag->pagf_flcount)
    2310      563062 :                 delta = need - pag->pagf_flcount;
    2311             : 
    2312             :         /*
    2313             :          * If we cannot maintain others' reservations with space from the
    2314             :          * not-longest freesp extents, we'll have to subtract /that/ from
    2315             :          * the longest extent too.
    2316             :          */
    2317   443150571 :         if (pag->pagf_freeblks - pag->pagf_longest < reserved)
    2318   345035444 :                 delta += reserved - (pag->pagf_freeblks - pag->pagf_longest);
    2319             : 
    2320             :         /*
    2321             :          * If the longest extent is long enough to satisfy all the
    2322             :          * reservations and AGFL rules in place, we can return this extent.
    2323             :          */
    2324   443150571 :         if (pag->pagf_longest > delta)
    2325   442818294 :                 return min_t(xfs_extlen_t, pag->pag_mount->m_ag_max_usable,
    2326             :                                 pag->pagf_longest - delta);
    2327             : 
    2328             :         /* Otherwise, let the caller try for 1 block if there's space. */
    2329      332277 :         return pag->pagf_flcount > 0 || pag->pagf_longest > 0;
    2330             : }
    2331             : 
    2332             : /*
    2333             :  * Compute the minimum length of the AGFL in the given AG.  If @pag is NULL,
    2334             :  * return the largest possible minimum length.
    2335             :  */
    2336             : unsigned int
    2337  1453426713 : xfs_alloc_min_freelist(
    2338             :         struct xfs_mount        *mp,
    2339             :         struct xfs_perag        *pag)
    2340             : {
    2341             :         /* AG btrees have at least 1 level. */
    2342  1453426713 :         static const uint8_t    fake_levels[XFS_BTNUM_AGF] = {1, 1, 1};
    2343  1453426713 :         const uint8_t           *levels = pag ? pag->pagf_levels : fake_levels;
    2344  1453426713 :         unsigned int            min_free;
    2345             : 
    2346  1453426713 :         ASSERT(mp->m_alloc_maxlevels > 0);
    2347             : 
    2348             :         /* space needed by-bno freespace btree */
    2349  1453426713 :         min_free = min_t(unsigned int, levels[XFS_BTNUM_BNOi] + 1,
    2350             :                                        mp->m_alloc_maxlevels);
    2351             :         /* space needed by-size freespace btree */
    2352  1453426713 :         min_free += min_t(unsigned int, levels[XFS_BTNUM_CNTi] + 1,
    2353             :                                        mp->m_alloc_maxlevels);
    2354             :         /* space needed reverse mapping used space btree */
    2355  1453426713 :         if (xfs_has_rmapbt(mp))
    2356  1379650659 :                 min_free += min_t(unsigned int, levels[XFS_BTNUM_RMAPi] + 1,
    2357             :                                                 mp->m_rmap_maxlevels);
    2358             : 
    2359  1453426713 :         return min_free;
    2360             : }
    2361             : 
    2362             : /*
    2363             :  * Check if the operation we are fixing up the freelist for should go ahead or
    2364             :  * not. If we are freeing blocks, we always allow it, otherwise the allocation
    2365             :  * is dependent on whether the size and shape of free space available will
    2366             :  * permit the requested allocation to take place.
    2367             :  */
    2368             : static bool
    2369  1372457279 : xfs_alloc_space_available(
    2370             :         struct xfs_alloc_arg    *args,
    2371             :         xfs_extlen_t            min_free,
    2372             :         int                     flags)
    2373             : {
    2374  1372457279 :         struct xfs_perag        *pag = args->pag;
    2375  1372457279 :         xfs_extlen_t            alloc_len, longest;
    2376  1372457279 :         xfs_extlen_t            reservation; /* blocks that are still reserved */
    2377  1372457279 :         int                     available;
    2378  1372457279 :         xfs_extlen_t            agflcount;
    2379             : 
    2380  1372457279 :         if (flags & XFS_ALLOC_FLAG_FREEING)
    2381             :                 return true;
    2382             : 
    2383   362197117 :         reservation = xfs_ag_resv_needed(pag, args->resv);
    2384             : 
    2385             :         /* do we have enough contiguous free space for the allocation? */
    2386   362209034 :         alloc_len = args->minlen + (args->alignment - 1) + args->minalignslop;
    2387   362209034 :         longest = xfs_alloc_longest_free_extent(pag, min_free, reservation);
    2388   362209034 :         if (longest < alloc_len)
    2389             :                 return false;
    2390             : 
    2391             :         /*
    2392             :          * Do we have enough free space remaining for the allocation? Don't
    2393             :          * account extra agfl blocks because we are about to defer free them,
    2394             :          * making them unavailable until the current transaction commits.
    2395             :          */
    2396   286720886 :         agflcount = min_t(xfs_extlen_t, pag->pagf_flcount, min_free);
    2397   286720886 :         available = (int)(pag->pagf_freeblks + agflcount -
    2398   286720886 :                           reservation - min_free - args->minleft);
    2399   286720886 :         if (available < (int)max(args->total, alloc_len))
    2400             :                 return false;
    2401             : 
    2402             :         /*
    2403             :          * Clamp maxlen to the amount of free space available for the actual
    2404             :          * extent allocation.
    2405             :          */
    2406   178131138 :         if (available < (int)args->maxlen && !(flags & XFS_ALLOC_FLAG_CHECK)) {
    2407      414523 :                 args->maxlen = available;
    2408      414523 :                 ASSERT(args->maxlen > 0);
    2409      414523 :                 ASSERT(args->maxlen >= args->minlen);
    2410             :         }
    2411             : 
    2412             :         return true;
    2413             : }
    2414             : 
    2415             : int
    2416      266507 : xfs_free_agfl_block(
    2417             :         struct xfs_trans        *tp,
    2418             :         xfs_agnumber_t          agno,
    2419             :         xfs_agblock_t           agbno,
    2420             :         struct xfs_buf          *agbp,
    2421             :         struct xfs_owner_info   *oinfo)
    2422             : {
    2423      266507 :         int                     error;
    2424      266507 :         struct xfs_buf          *bp;
    2425             : 
    2426      266507 :         error = xfs_free_ag_extent(tp, agbp, agno, agbno, 1, oinfo,
    2427             :                                    XFS_AG_RESV_AGFL);
    2428      266507 :         if (error)
    2429             :                 return error;
    2430             : 
    2431      533000 :         error = xfs_trans_get_buf(tp, tp->t_mountp->m_ddev_targp,
    2432      266500 :                         XFS_AGB_TO_DADDR(tp->t_mountp, agno, agbno),
    2433      266500 :                         tp->t_mountp->m_bsize, 0, &bp);
    2434      266499 :         if (error)
    2435             :                 return error;
    2436      266499 :         xfs_trans_binval(tp, bp);
    2437             : 
    2438      266499 :         return 0;
    2439             : }
    2440             : 
    2441             : /*
    2442             :  * Check the agfl fields of the agf for inconsistency or corruption.
    2443             :  *
    2444             :  * The original purpose was to detect an agfl header padding mismatch between
    2445             :  * current and early v5 kernels. This problem manifests as a 1-slot size
    2446             :  * difference between the on-disk flcount and the active [first, last] range of
    2447             :  * a wrapped agfl.
    2448             :  *
    2449             :  * However, we need to use these same checks to catch agfl count corruptions
    2450             :  * unrelated to padding. This could occur on any v4 or v5 filesystem, so either
    2451             :  * way, we need to reset the agfl and warn the user.
    2452             :  *
    2453             :  * Return true if a reset is required before the agfl can be used, false
    2454             :  * otherwise.
    2455             :  */
    2456             : static bool
    2457      523791 : xfs_agfl_needs_reset(
    2458             :         struct xfs_mount        *mp,
    2459             :         struct xfs_agf          *agf)
    2460             : {
    2461      523791 :         uint32_t                f = be32_to_cpu(agf->agf_flfirst);
    2462      523791 :         uint32_t                l = be32_to_cpu(agf->agf_fllast);
    2463      523791 :         uint32_t                c = be32_to_cpu(agf->agf_flcount);
    2464      523791 :         int                     agfl_size = xfs_agfl_size(mp);
    2465      523791 :         int                     active;
    2466             : 
    2467             :         /*
    2468             :          * The agf read verifier catches severe corruption of these fields.
    2469             :          * Repeat some sanity checks to cover a packed -> unpacked mismatch if
    2470             :          * the verifier allows it.
    2471             :          */
    2472      523791 :         if (f >= agfl_size || l >= agfl_size)
    2473             :                 return true;
    2474      523791 :         if (c > agfl_size)
    2475             :                 return true;
    2476             : 
    2477             :         /*
    2478             :          * Check consistency between the on-disk count and the active range. An
    2479             :          * agfl padding mismatch manifests as an inconsistent flcount.
    2480             :          */
    2481      523791 :         if (c && l >= f)
    2482      490770 :                 active = l - f + 1;
    2483       33021 :         else if (c)
    2484         231 :                 active = agfl_size - f + l + 1;
    2485             :         else
    2486             :                 active = 0;
    2487             : 
    2488      523791 :         return active != c;
    2489             : }
    2490             : 
    2491             : /*
    2492             :  * Reset the agfl to an empty state. Ignore/drop any existing blocks since the
    2493             :  * agfl content cannot be trusted. Warn the user that a repair is required to
    2494             :  * recover leaked blocks.
    2495             :  *
    2496             :  * The purpose of this mechanism is to handle filesystems affected by the agfl
    2497             :  * header padding mismatch problem. A reset keeps the filesystem online with a
    2498             :  * relatively minor free space accounting inconsistency rather than suffer the
    2499             :  * inevitable crash from use of an invalid agfl block.
    2500             :  */
    2501             : static void
    2502          20 : xfs_agfl_reset(
    2503             :         struct xfs_trans        *tp,
    2504             :         struct xfs_buf          *agbp,
    2505             :         struct xfs_perag        *pag)
    2506             : {
    2507          20 :         struct xfs_mount        *mp = tp->t_mountp;
    2508          20 :         struct xfs_agf          *agf = agbp->b_addr;
    2509             : 
    2510          40 :         ASSERT(xfs_perag_agfl_needs_reset(pag));
    2511          20 :         trace_xfs_agfl_reset(mp, agf, 0, _RET_IP_);
    2512             : 
    2513          20 :         xfs_warn(mp,
    2514             :                "WARNING: Reset corrupted AGFL on AG %u. %d blocks leaked. "
    2515             :                "Please unmount and run xfs_repair.",
    2516             :                  pag->pag_agno, pag->pagf_flcount);
    2517             : 
    2518          20 :         agf->agf_flfirst = 0;
    2519          20 :         agf->agf_fllast = cpu_to_be32(xfs_agfl_size(mp) - 1);
    2520          20 :         agf->agf_flcount = 0;
    2521          20 :         xfs_alloc_log_agf(tp, agbp, XFS_AGF_FLFIRST | XFS_AGF_FLLAST |
    2522             :                                     XFS_AGF_FLCOUNT);
    2523             : 
    2524          20 :         pag->pagf_flcount = 0;
    2525          20 :         clear_bit(XFS_AGSTATE_AGFL_NEEDS_RESET, &pag->pag_opstate);
    2526          20 : }
    2527             : 
    2528             : /*
    2529             :  * Defer an AGFL block free. This is effectively equivalent to
    2530             :  * xfs_free_extent_later() with some special handling particular to AGFL blocks.
    2531             :  *
    2532             :  * Deferring AGFL frees helps prevent log reservation overruns due to too many
    2533             :  * allocation operations in a transaction. AGFL frees are prone to this problem
    2534             :  * because for one they are always freed one at a time. Further, an immediate
    2535             :  * AGFL block free can cause a btree join and require another block free before
    2536             :  * the real allocation can proceed. Deferring the free disconnects freeing up
    2537             :  * the AGFL slot from freeing the block.
    2538             :  */
    2539             : static int
    2540      266519 : xfs_defer_agfl_block(
    2541             :         struct xfs_trans                *tp,
    2542             :         xfs_agnumber_t                  agno,
    2543             :         xfs_agblock_t                   agbno,
    2544             :         struct xfs_owner_info           *oinfo)
    2545             : {
    2546      266519 :         struct xfs_mount                *mp = tp->t_mountp;
    2547      266519 :         struct xfs_extent_free_item     *xefi;
    2548      266519 :         xfs_fsblock_t                   fsbno = XFS_AGB_TO_FSB(mp, agno, agbno);
    2549             : 
    2550      266519 :         ASSERT(xfs_extfree_item_cache != NULL);
    2551      266519 :         ASSERT(oinfo != NULL);
    2552             : 
    2553      266519 :         if (XFS_IS_CORRUPT(mp, !xfs_verify_fsbno(mp, fsbno)))
    2554           0 :                 return -EFSCORRUPTED;
    2555             : 
    2556      266519 :         xefi = kmem_cache_zalloc(xfs_extfree_item_cache,
    2557             :                                GFP_KERNEL | __GFP_NOFAIL);
    2558      266520 :         xefi->xefi_startblock = fsbno;
    2559      266520 :         xefi->xefi_blockcount = 1;
    2560      266520 :         xefi->xefi_owner = oinfo->oi_owner;
    2561      266520 :         xefi->xefi_agresv = XFS_AG_RESV_AGFL;
    2562             : 
    2563      266520 :         trace_xfs_agfl_free_defer(mp, agno, 0, agbno, 1);
    2564             : 
    2565      266519 :         xfs_extent_free_get_group(mp, xefi);
    2566      266520 :         xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_AGFL_FREE, &xefi->xefi_list);
    2567      266520 :         return 0;
    2568             : }
    2569             : 
    2570             : /*
    2571             :  * Add the extent to the list of extents to be free at transaction end.
    2572             :  * The list is maintained sorted (by block number).
    2573             :  */
    2574             : int
    2575    98479560 : __xfs_free_extent_later(
    2576             :         struct xfs_trans                *tp,
    2577             :         xfs_fsblock_t                   bno,
    2578             :         xfs_filblks_t                   len,
    2579             :         const struct xfs_owner_info     *oinfo,
    2580             :         enum xfs_ag_resv_type           type,
    2581             :         bool                            skip_discard)
    2582             : {
    2583    98479560 :         struct xfs_extent_free_item     *xefi;
    2584    98479560 :         struct xfs_mount                *mp = tp->t_mountp;
    2585             : #ifdef DEBUG
    2586    98479560 :         xfs_agnumber_t                  agno;
    2587    98479560 :         xfs_agblock_t                   agbno;
    2588             : 
    2589    98479560 :         ASSERT(bno != NULLFSBLOCK);
    2590    98479560 :         ASSERT(len > 0);
    2591    98479560 :         ASSERT(len <= XFS_MAX_BMBT_EXTLEN);
    2592    98479560 :         ASSERT(!isnullstartblock(bno));
    2593    98479560 :         agno = XFS_FSB_TO_AGNO(mp, bno);
    2594    98479560 :         agbno = XFS_FSB_TO_AGBNO(mp, bno);
    2595    98419047 :         ASSERT(agno < mp->m_sb.sb_agcount);
    2596    98419047 :         ASSERT(agbno < mp->m_sb.sb_agblocks);
    2597    98419047 :         ASSERT(len < mp->m_sb.sb_agblocks);
    2598    98419047 :         ASSERT(agbno + len <= mp->m_sb.sb_agblocks);
    2599             : #endif
    2600    98419047 :         ASSERT(xfs_extfree_item_cache != NULL);
    2601    98419047 :         ASSERT(type != XFS_AG_RESV_AGFL);
    2602             : 
    2603    98419047 :         if (XFS_IS_CORRUPT(mp, !xfs_verify_fsbext(mp, bno, len)))
    2604           0 :                 return -EFSCORRUPTED;
    2605             : 
    2606    98472025 :         xefi = kmem_cache_zalloc(xfs_extfree_item_cache,
    2607             :                                GFP_KERNEL | __GFP_NOFAIL);
    2608    98495964 :         xefi->xefi_startblock = bno;
    2609    98495964 :         xefi->xefi_blockcount = (xfs_extlen_t)len;
    2610    98495964 :         xefi->xefi_agresv = type;
    2611    98495964 :         if (skip_discard)
    2612     6993594 :                 xefi->xefi_flags |= XFS_EFI_SKIP_DISCARD;
    2613    98495964 :         if (oinfo) {
    2614     2790723 :                 ASSERT(oinfo->oi_offset == 0);
    2615             : 
    2616     2790723 :                 if (oinfo->oi_flags & XFS_OWNER_INFO_ATTR_FORK)
    2617         143 :                         xefi->xefi_flags |= XFS_EFI_ATTR_FORK;
    2618     2790723 :                 if (oinfo->oi_flags & XFS_OWNER_INFO_BMBT_BLOCK)
    2619     2014601 :                         xefi->xefi_flags |= XFS_EFI_BMBT_BLOCK;
    2620     2790723 :                 xefi->xefi_owner = oinfo->oi_owner;
    2621             :         } else {
    2622    95705241 :                 xefi->xefi_owner = XFS_RMAP_OWN_NULL;
    2623             :         }
    2624   295418676 :         trace_xfs_bmap_free_defer(mp,
    2625    98461356 :                         XFS_FSB_TO_AGNO(tp->t_mountp, bno), 0,
    2626    98495964 :                         XFS_FSB_TO_AGBNO(tp->t_mountp, bno), len);
    2627             : 
    2628    98440075 :         xfs_extent_free_get_group(mp, xefi);
    2629    98537276 :         xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_FREE, &xefi->xefi_list);
    2630    98537276 :         return 0;
    2631             : }
    2632             : 
    2633             : #ifdef DEBUG
    2634             : /*
    2635             :  * Check if an AGF has a free extent record whose length is equal to
    2636             :  * args->minlen.
    2637             :  */
    2638             : STATIC int
    2639     2775156 : xfs_exact_minlen_extent_available(
    2640             :         struct xfs_alloc_arg    *args,
    2641             :         struct xfs_buf          *agbp,
    2642             :         int                     *stat)
    2643             : {
    2644     2775156 :         struct xfs_btree_cur    *cnt_cur;
    2645     2775156 :         xfs_agblock_t           fbno;
    2646     2775156 :         xfs_extlen_t            flen;
    2647     2775156 :         int                     error = 0;
    2648             : 
    2649     2775156 :         cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, agbp,
    2650             :                                         args->pag, XFS_BTNUM_CNT);
    2651     2775162 :         error = xfs_alloc_lookup_ge(cnt_cur, 0, args->minlen, stat);
    2652     2774448 :         if (error)
    2653           0 :                 goto out;
    2654             : 
    2655     2774448 :         if (*stat == 0) {
    2656           0 :                 xfs_btree_mark_sick(cnt_cur);
    2657           0 :                 error = -EFSCORRUPTED;
    2658           0 :                 goto out;
    2659             :         }
    2660             : 
    2661     2774448 :         error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, stat);
    2662     2771784 :         if (error)
    2663           0 :                 goto out;
    2664             : 
    2665     2771784 :         if (*stat == 1 && flen != args->minlen)
    2666       39326 :                 *stat = 0;
    2667             : 
    2668     2732458 : out:
    2669     2771784 :         xfs_btree_del_cursor(cnt_cur, error);
    2670             : 
    2671     2774301 :         return error;
    2672             : }
    2673             : #endif
    2674             : 
    2675             : /*
    2676             :  * Decide whether to use this allocation group for this allocation.
    2677             :  * If so, fix up the btree freelist's size.
    2678             :  */
    2679             : int                     /* error */
    2680   781417678 : xfs_alloc_fix_freelist(
    2681             :         struct xfs_alloc_arg    *args,  /* allocation argument structure */
    2682             :         uint32_t                alloc_flags)
    2683             : {
    2684   781417678 :         struct xfs_mount        *mp = args->mp;
    2685   781417678 :         struct xfs_perag        *pag = args->pag;
    2686   781417678 :         struct xfs_trans        *tp = args->tp;
    2687   781417678 :         struct xfs_buf          *agbp = NULL;
    2688   781417678 :         struct xfs_buf          *agflbp = NULL;
    2689   781417678 :         struct xfs_alloc_arg    targs;  /* local allocation arguments */
    2690   781417678 :         xfs_agblock_t           bno;    /* freelist block */
    2691   781417678 :         xfs_extlen_t            need;   /* total blocks needed in freelist */
    2692   781417678 :         int                     error = 0;
    2693             : 
    2694             :         /* deferred ops (AGFL block frees) require permanent transactions */
    2695   781417678 :         ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
    2696             : 
    2697  1562835356 :         if (!xfs_perag_initialised_agf(pag)) {
    2698        5730 :                 error = xfs_alloc_read_agf(pag, tp, alloc_flags, &agbp);
    2699        5730 :                 if (error) {
    2700             :                         /* Couldn't lock the AGF so skip this AG. */
    2701        2422 :                         if (error == -EAGAIN)
    2702        2422 :                                 error = 0;
    2703        2422 :                         goto out_no_agbp;
    2704             :                 }
    2705             :         }
    2706             : 
    2707             :         /*
    2708             :          * If this is a metadata preferred pag and we are user data then try
    2709             :          * somewhere else if we are not being asked to try harder at this
    2710             :          * point
    2711             :          */
    2712  1562830512 :         if (xfs_perag_prefers_metadata(pag) &&
    2713           0 :             (args->datatype & XFS_ALLOC_USERDATA) &&
    2714           0 :             (alloc_flags & XFS_ALLOC_FLAG_TRYLOCK)) {
    2715           0 :                 ASSERT(!(alloc_flags & XFS_ALLOC_FLAG_FREEING));
    2716           0 :                 goto out_agbp_relse;
    2717             :         }
    2718             : 
    2719   781415256 :         need = xfs_alloc_min_freelist(mp, pag);
    2720   781371356 :         if (!xfs_alloc_space_available(args, need, alloc_flags |
    2721             :                         XFS_ALLOC_FLAG_CHECK))
    2722   184071848 :                 goto out_agbp_relse;
    2723             : 
    2724             :         /*
    2725             :          * Get the a.g. freespace buffer.
    2726             :          * Can fail if we're not blocking on locks, and it's held.
    2727             :          */
    2728   597253268 :         if (!agbp) {
    2729   597255228 :                 error = xfs_alloc_read_agf(pag, tp, alloc_flags, &agbp);
    2730   597364428 :                 if (error) {
    2731             :                         /* Couldn't lock the AGF so skip this AG. */
    2732     6182918 :                         if (error == -EAGAIN)
    2733     6181475 :                                 error = 0;
    2734     6182918 :                         goto out_no_agbp;
    2735             :                 }
    2736             :         }
    2737             : 
    2738             :         /* reset a padding mismatched agfl before final free space check */
    2739  1182359100 :         if (xfs_perag_agfl_needs_reset(pag))
    2740          20 :                 xfs_agfl_reset(tp, agbp, pag);
    2741             : 
    2742             :         /* If there isn't enough total space or single-extent, reject it. */
    2743   591179550 :         need = xfs_alloc_min_freelist(mp, pag);
    2744   591138629 :         if (!xfs_alloc_space_available(args, need, alloc_flags))
    2745        5971 :                 goto out_agbp_relse;
    2746             : 
    2747             : #ifdef DEBUG
    2748   591152851 :         if (args->alloc_minlen_only) {
    2749     2775315 :                 int stat;
    2750             : 
    2751     2775315 :                 error = xfs_exact_minlen_extent_available(args, agbp, &stat);
    2752     2774511 :                 if (error || !stat)
    2753       39326 :                         goto out_agbp_relse;
    2754             :         }
    2755             : #endif
    2756             :         /*
    2757             :          * Make the freelist shorter if it's too long.
    2758             :          *
    2759             :          * Note that from this point onwards, we will always release the agf and
    2760             :          * agfl buffers on error. This handles the case where we error out and
    2761             :          * the buffers are clean or may not have been joined to the transaction
    2762             :          * and hence need to be released manually. If they have been joined to
    2763             :          * the transaction, then xfs_trans_brelse() will handle them
    2764             :          * appropriately based on the recursion count and dirty state of the
    2765             :          * buffer.
    2766             :          *
    2767             :          * XXX (dgc): When we have lots of free space, does this buy us
    2768             :          * anything other than extra overhead when we need to put more blocks
    2769             :          * back on the free list? Maybe we should only do this when space is
    2770             :          * getting low or the AGFL is more than half full?
    2771             :          *
    2772             :          * The NOSHRINK flag prevents the AGFL from being shrunk if it's too
    2773             :          * big; the NORMAP flag prevents AGFL expand/shrink operations from
    2774             :          * updating the rmapbt.  Both flags are used in xfs_repair while we're
    2775             :          * rebuilding the rmapbt, and neither are used by the kernel.  They're
    2776             :          * both required to ensure that rmaps are correctly recorded for the
    2777             :          * regenerated AGFL, bnobt, and cntbt.  See repair/phase5.c and
    2778             :          * repair/rmap.c in xfsprogs for details.
    2779             :          */
    2780   591112721 :         memset(&targs, 0, sizeof(targs));
    2781             :         /* struct copy below */
    2782   591112721 :         if (alloc_flags & XFS_ALLOC_FLAG_NORMAP)
    2783       19005 :                 targs.oinfo = XFS_RMAP_OINFO_SKIP_UPDATE;
    2784             :         else
    2785   591093716 :                 targs.oinfo = XFS_RMAP_OINFO_AG;
    2786   591379241 :         while (!(alloc_flags & XFS_ALLOC_FLAG_NOSHRINK) &&
    2787   591379241 :                         pag->pagf_flcount > need) {
    2788      269989 :                 error = xfs_alloc_get_freelist(pag, tp, agbp, &bno, 0);
    2789      266519 :                 if (error)
    2790           0 :                         goto out_agbp_relse;
    2791             : 
    2792             :                 /* defer agfl frees */
    2793      266519 :                 error = xfs_defer_agfl_block(tp, args->agno, bno, &targs.oinfo);
    2794      266520 :                 if (error)
    2795           0 :                         goto out_agbp_relse;
    2796             :         }
    2797             : 
    2798   591110875 :         targs.tp = tp;
    2799   591110875 :         targs.mp = mp;
    2800   591110875 :         targs.agbp = agbp;
    2801   591110875 :         targs.agno = args->agno;
    2802   591110875 :         targs.alignment = targs.minlen = targs.prod = 1;
    2803   591110875 :         targs.pag = pag;
    2804   591110875 :         error = xfs_alloc_read_agfl(pag, tp, &agflbp);
    2805   591178203 :         if (error)
    2806        1774 :                 goto out_agbp_relse;
    2807             : 
    2808             :         /* Make the freelist longer if it's too short. */
    2809   591958879 :         while (pag->pagf_flcount < need) {
    2810      780060 :                 targs.agbno = 0;
    2811      780060 :                 targs.maxlen = need - pag->pagf_flcount;
    2812      780060 :                 targs.resv = XFS_AG_RESV_AGFL;
    2813             : 
    2814             :                 /* Allocate as many blocks as possible at once. */
    2815      780060 :                 error = xfs_alloc_ag_vextent_size(&targs, alloc_flags);
    2816      782462 :                 if (error)
    2817           0 :                         goto out_agflbp_relse;
    2818             : 
    2819             :                 /*
    2820             :                  * Stop if we run out.  Won't happen if callers are obeying
    2821             :                  * the restrictions correctly.  Can happen for free calls
    2822             :                  * on a completely full ag.
    2823             :                  */
    2824      782462 :                 if (targs.agbno == NULLAGBLOCK) {
    2825           0 :                         if (alloc_flags & XFS_ALLOC_FLAG_FREEING)
    2826             :                                 break;
    2827           0 :                         goto out_agflbp_relse;
    2828             :                 }
    2829             : 
    2830      782462 :                 if (!xfs_rmap_should_skip_owner_update(&targs.oinfo)) {
    2831      782454 :                         error = xfs_rmap_alloc(tp, agbp, pag,
    2832             :                                        targs.agbno, targs.len, &targs.oinfo);
    2833      782455 :                         if (error)
    2834          13 :                                 goto out_agflbp_relse;
    2835             :                 }
    2836      782450 :                 error = xfs_alloc_update_counters(tp, agbp,
    2837      782450 :                                                   -((long)(targs.len)));
    2838      782450 :                 if (error)
    2839           0 :                         goto out_agflbp_relse;
    2840             : 
    2841             :                 /*
    2842             :                  * Put each allocated block on the list.
    2843             :                  */
    2844     1670992 :                 for (bno = targs.agbno; bno < targs.agbno + targs.len; bno++) {
    2845      888542 :                         error = xfs_alloc_put_freelist(pag, tp, agbp,
    2846             :                                                         agflbp, bno, 0);
    2847      888542 :                         if (error)
    2848           0 :                                 goto out_agflbp_relse;
    2849             :                 }
    2850             :         }
    2851   591178837 :         xfs_trans_brelse(tp, agflbp);
    2852   591188299 :         args->agbp = agbp;
    2853   591188299 :         return 0;
    2854             : 
    2855          13 : out_agflbp_relse:
    2856          13 :         xfs_trans_brelse(tp, agflbp);
    2857   184118932 : out_agbp_relse:
    2858   184118932 :         if (agbp)
    2859       47052 :                 xfs_trans_brelse(tp, agbp);
    2860   184071880 : out_no_agbp:
    2861   190304304 :         args->agbp = NULL;
    2862   190304304 :         return error;
    2863             : }
    2864             : 
    2865             : /*
    2866             :  * Get a block from the freelist.
    2867             :  * Returns with the buffer for the block gotten.
    2868             :  */
    2869             : int
    2870     1125368 : xfs_alloc_get_freelist(
    2871             :         struct xfs_perag        *pag,
    2872             :         struct xfs_trans        *tp,
    2873             :         struct xfs_buf          *agbp,
    2874             :         xfs_agblock_t           *bnop,
    2875             :         int                     btreeblk)
    2876             : {
    2877     1125368 :         struct xfs_agf          *agf = agbp->b_addr;
    2878     1125368 :         struct xfs_buf          *agflbp;
    2879     1125368 :         xfs_agblock_t           bno;
    2880     1125368 :         __be32                  *agfl_bno;
    2881     1125368 :         int                     error;
    2882     1125368 :         uint32_t                logflags;
    2883     1125368 :         struct xfs_mount        *mp = tp->t_mountp;
    2884             : 
    2885             :         /*
    2886             :          * Freelist is empty, give up.
    2887             :          */
    2888     1125368 :         if (!agf->agf_flcount) {
    2889           0 :                 *bnop = NULLAGBLOCK;
    2890           0 :                 return 0;
    2891             :         }
    2892             :         /*
    2893             :          * Read the array of free blocks.
    2894             :          */
    2895     1125368 :         error = xfs_alloc_read_agfl(pag, tp, &agflbp);
    2896     1125367 :         if (error)
    2897             :                 return error;
    2898             : 
    2899             : 
    2900             :         /*
    2901             :          * Get the block number and update the data structures.
    2902             :          */
    2903     1125367 :         agfl_bno = xfs_buf_to_agfl_bno(agflbp);
    2904     1125367 :         bno = be32_to_cpu(agfl_bno[be32_to_cpu(agf->agf_flfirst)]);
    2905     2250735 :         if (XFS_IS_CORRUPT(tp->t_mountp, !xfs_verify_agbno(pag, bno)))
    2906           0 :                 return -EFSCORRUPTED;
    2907             : 
    2908     1125367 :         be32_add_cpu(&agf->agf_flfirst, 1);
    2909     1125367 :         xfs_trans_brelse(tp, agflbp);
    2910     2250732 :         if (be32_to_cpu(agf->agf_flfirst) == xfs_agfl_size(mp))
    2911         738 :                 agf->agf_flfirst = 0;
    2912             : 
    2913     2250732 :         ASSERT(!xfs_perag_agfl_needs_reset(pag));
    2914     1125366 :         be32_add_cpu(&agf->agf_flcount, -1);
    2915     1125366 :         pag->pagf_flcount--;
    2916             : 
    2917     1125366 :         logflags = XFS_AGF_FLFIRST | XFS_AGF_FLCOUNT;
    2918     1125366 :         if (btreeblk) {
    2919      858847 :                 be32_add_cpu(&agf->agf_btreeblks, 1);
    2920      858847 :                 pag->pagf_btreeblks++;
    2921      858847 :                 logflags |= XFS_AGF_BTREEBLKS;
    2922             :         }
    2923             : 
    2924     1125366 :         xfs_alloc_log_agf(tp, agbp, logflags);
    2925     1125367 :         *bnop = bno;
    2926             : 
    2927     1125367 :         return 0;
    2928             : }
    2929             : 
    2930             : /*
    2931             :  * Log the given fields from the agf structure.
    2932             :  */
    2933             : void
    2934   239840015 : xfs_alloc_log_agf(
    2935             :         struct xfs_trans        *tp,
    2936             :         struct xfs_buf          *bp,
    2937             :         uint32_t                fields)
    2938             : {
    2939   239840015 :         int     first;          /* first byte offset */
    2940   239840015 :         int     last;           /* last byte offset */
    2941   239840015 :         static const short      offsets[] = {
    2942             :                 offsetof(xfs_agf_t, agf_magicnum),
    2943             :                 offsetof(xfs_agf_t, agf_versionnum),
    2944             :                 offsetof(xfs_agf_t, agf_seqno),
    2945             :                 offsetof(xfs_agf_t, agf_length),
    2946             :                 offsetof(xfs_agf_t, agf_roots[0]),
    2947             :                 offsetof(xfs_agf_t, agf_levels[0]),
    2948             :                 offsetof(xfs_agf_t, agf_flfirst),
    2949             :                 offsetof(xfs_agf_t, agf_fllast),
    2950             :                 offsetof(xfs_agf_t, agf_flcount),
    2951             :                 offsetof(xfs_agf_t, agf_freeblks),
    2952             :                 offsetof(xfs_agf_t, agf_longest),
    2953             :                 offsetof(xfs_agf_t, agf_btreeblks),
    2954             :                 offsetof(xfs_agf_t, agf_uuid),
    2955             :                 offsetof(xfs_agf_t, agf_rmap_blocks),
    2956             :                 offsetof(xfs_agf_t, agf_refcount_blocks),
    2957             :                 offsetof(xfs_agf_t, agf_refcount_root),
    2958             :                 offsetof(xfs_agf_t, agf_refcount_level),
    2959             :                 /* needed so that we don't log the whole rest of the structure: */
    2960             :                 offsetof(xfs_agf_t, agf_spare64),
    2961             :                 sizeof(xfs_agf_t)
    2962             :         };
    2963             : 
    2964   239840015 :         trace_xfs_agf(tp->t_mountp, bp->b_addr, fields, _RET_IP_);
    2965             : 
    2966   239824259 :         xfs_trans_buf_set_type(tp, bp, XFS_BLFT_AGF_BUF);
    2967             : 
    2968   239838920 :         xfs_btree_offsets(fields, offsets, XFS_AGF_NUM_BITS, &first, &last);
    2969   239848671 :         xfs_trans_log_buf(tp, bp, (uint)first, (uint)last);
    2970   239877544 : }
    2971             : 
    2972             : /*
    2973             :  * Put the block on the freelist for the allocation group.
    2974             :  */
    2975             : int
    2976     1133519 : xfs_alloc_put_freelist(
    2977             :         struct xfs_perag        *pag,
    2978             :         struct xfs_trans        *tp,
    2979             :         struct xfs_buf          *agbp,
    2980             :         struct xfs_buf          *agflbp,
    2981             :         xfs_agblock_t           bno,
    2982             :         int                     btreeblk)
    2983             : {
    2984     1133519 :         struct xfs_mount        *mp = tp->t_mountp;
    2985     1133519 :         struct xfs_agf          *agf = agbp->b_addr;
    2986     1133519 :         __be32                  *blockp;
    2987     1133519 :         int                     error;
    2988     1133519 :         uint32_t                logflags;
    2989     1133519 :         __be32                  *agfl_bno;
    2990     1133519 :         int                     startoff;
    2991             : 
    2992     1133519 :         if (!agflbp) {
    2993      244979 :                 error = xfs_alloc_read_agfl(pag, tp, &agflbp);
    2994      244979 :                 if (error)
    2995             :                         return error;
    2996             :         }
    2997             : 
    2998     1133519 :         be32_add_cpu(&agf->agf_fllast, 1);
    2999     2267038 :         if (be32_to_cpu(agf->agf_fllast) == xfs_agfl_size(mp))
    3000         748 :                 agf->agf_fllast = 0;
    3001             : 
    3002     2267038 :         ASSERT(!xfs_perag_agfl_needs_reset(pag));
    3003     1133519 :         be32_add_cpu(&agf->agf_flcount, 1);
    3004     1133519 :         pag->pagf_flcount++;
    3005             : 
    3006     1133519 :         logflags = XFS_AGF_FLLAST | XFS_AGF_FLCOUNT;
    3007     1133519 :         if (btreeblk) {
    3008      244979 :                 be32_add_cpu(&agf->agf_btreeblks, -1);
    3009      244979 :                 pag->pagf_btreeblks--;
    3010      244979 :                 logflags |= XFS_AGF_BTREEBLKS;
    3011             :         }
    3012             : 
    3013     1133519 :         xfs_alloc_log_agf(tp, agbp, logflags);
    3014             : 
    3015     2267041 :         ASSERT(be32_to_cpu(agf->agf_flcount) <= xfs_agfl_size(mp));
    3016             : 
    3017     1133520 :         agfl_bno = xfs_buf_to_agfl_bno(agflbp);
    3018     1133520 :         blockp = &agfl_bno[be32_to_cpu(agf->agf_fllast)];
    3019     1133520 :         *blockp = cpu_to_be32(bno);
    3020     1133520 :         startoff = (char *)blockp - (char *)agflbp->b_addr;
    3021             : 
    3022     1133520 :         xfs_alloc_log_agf(tp, agbp, logflags);
    3023             : 
    3024     1133521 :         xfs_trans_buf_set_type(tp, agflbp, XFS_BLFT_AGFL_BUF);
    3025     1133520 :         xfs_trans_log_buf(tp, agflbp, startoff,
    3026             :                           startoff + sizeof(xfs_agblock_t) - 1);
    3027     1133520 :         return 0;
    3028             : }
    3029             : 
    3030             : /*
    3031             :  * Check that this AGF/AGI header's sequence number and length matches the AG
    3032             :  * number and size in fsblocks.
    3033             :  */
    3034             : xfs_failaddr_t
    3035     4849320 : xfs_validate_ag_length(
    3036             :         struct xfs_buf          *bp,
    3037             :         uint32_t                seqno,
    3038             :         uint32_t                length)
    3039             : {
    3040     4849320 :         struct xfs_mount        *mp = bp->b_mount;
    3041             :         /*
    3042             :          * During growfs operations, the perag is not fully initialised,
    3043             :          * so we can't use it for any useful checking. growfs ensures we can't
    3044             :          * use it by using uncached buffers that don't have the perag attached
    3045             :          * so we can detect and avoid this problem.
    3046             :          */
    3047     4849320 :         if (bp->b_pag && seqno != bp->b_pag->pag_agno)
    3048           0 :                 return __this_address;
    3049             : 
    3050             :         /*
    3051             :          * Only the last AG in the filesystem is allowed to be shorter
    3052             :          * than the AG size recorded in the superblock.
    3053             :          */
    3054     4849320 :         if (length != mp->m_sb.sb_agblocks) {
    3055             :                 /*
    3056             :                  * During growfs, the new last AG can get here before we
    3057             :                  * have updated the superblock. Give it a pass on the seqno
    3058             :                  * check.
    3059             :                  */
    3060       26438 :                 if (bp->b_pag && seqno != mp->m_sb.sb_agcount - 1)
    3061           0 :                         return __this_address;
    3062       26438 :                 if (length < XFS_MIN_AG_BLOCKS)
    3063           0 :                         return __this_address;
    3064       26438 :                 if (length > mp->m_sb.sb_agblocks)
    3065          11 :                         return __this_address;
    3066             :         }
    3067             : 
    3068             :         return NULL;
    3069             : }
    3070             : 
    3071             : /*
    3072             :  * Verify the AGF is consistent.
    3073             :  *
    3074             :  * We do not verify the AGFL indexes in the AGF are fully consistent here
    3075             :  * because of issues with variable on-disk structure sizes. Instead, we check
    3076             :  * the agfl indexes for consistency when we initialise the perag from the AGF
    3077             :  * information after a read completes.
    3078             :  *
    3079             :  * If the index is inconsistent, then we mark the perag as needing an AGFL
    3080             :  * reset. The first AGFL update performed then resets the AGFL indexes and
    3081             :  * refills the AGFL with known good free blocks, allowing the filesystem to
    3082             :  * continue operating normally at the cost of a few leaked free space blocks.
    3083             :  */
    3084             : static xfs_failaddr_t
    3085     2634000 : xfs_agf_verify(
    3086             :         struct xfs_buf          *bp)
    3087             : {
    3088     2634000 :         struct xfs_mount        *mp = bp->b_mount;
    3089     2634000 :         struct xfs_agf          *agf = bp->b_addr;
    3090     2634000 :         xfs_failaddr_t          fa;
    3091     2634000 :         uint32_t                agf_seqno = be32_to_cpu(agf->agf_seqno);
    3092     2634000 :         uint32_t                agf_length = be32_to_cpu(agf->agf_length);
    3093             : 
    3094     2634000 :         if (xfs_has_crc(mp)) {
    3095     2620008 :                 if (!uuid_equal(&agf->agf_uuid, &mp->m_sb.sb_meta_uuid))
    3096           0 :                         return __this_address;
    3097     2620108 :                 if (!xfs_log_check_lsn(mp, be64_to_cpu(agf->agf_lsn)))
    3098           0 :                         return __this_address;
    3099             :         }
    3100             : 
    3101     2634088 :         if (!xfs_verify_magic(bp, agf->agf_magicnum))
    3102           0 :                 return __this_address;
    3103             : 
    3104     2633701 :         if (!XFS_AGF_GOOD_VERSION(be32_to_cpu(agf->agf_versionnum)))
    3105           0 :                 return __this_address;
    3106             : 
    3107             :         /*
    3108             :          * Both agf_seqno and agf_length need to validated before anything else
    3109             :          * block number related in the AGF or AGFL can be checked.
    3110             :          */
    3111     2633701 :         fa = xfs_validate_ag_length(bp, agf_seqno, agf_length);
    3112     2633467 :         if (fa)
    3113             :                 return fa;
    3114             : 
    3115     5253304 :         if (be32_to_cpu(agf->agf_flfirst) >= xfs_agfl_size(mp))
    3116          10 :                 return __this_address;
    3117     2633561 :         if (be32_to_cpu(agf->agf_fllast) >= xfs_agfl_size(mp))
    3118           0 :                 return __this_address;
    3119     2633561 :         if (be32_to_cpu(agf->agf_flcount) > xfs_agfl_size(mp))
    3120           0 :                 return __this_address;
    3121             : 
    3122     2633561 :         if (be32_to_cpu(agf->agf_freeblks) < be32_to_cpu(agf->agf_longest) ||
    3123             :             be32_to_cpu(agf->agf_freeblks) > agf_length)
    3124          22 :                 return __this_address;
    3125             : 
    3126     2633539 :         if (be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) < 1 ||
    3127     2633539 :             be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) < 1 ||
    3128             :             be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) >
    3129     2633539 :                                                 mp->m_alloc_maxlevels ||
    3130             :             be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) >
    3131             :                                                 mp->m_alloc_maxlevels)
    3132           0 :                 return __this_address;
    3133             : 
    3134     2633539 :         if (xfs_has_lazysbcount(mp) &&
    3135     2633598 :             be32_to_cpu(agf->agf_btreeblks) > agf_length)
    3136           0 :                 return __this_address;
    3137             : 
    3138     2633539 :         if (xfs_has_rmapbt(mp)) {
    3139     2245219 :                 if (be32_to_cpu(agf->agf_rmap_blocks) > agf_length)
    3140          11 :                         return __this_address;
    3141             : 
    3142     2245208 :                 if (be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) < 1 ||
    3143             :                     be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) >
    3144     2245208 :                                                         mp->m_rmap_maxlevels)
    3145           0 :                         return __this_address;
    3146             :         }
    3147             : 
    3148     2633528 :         if (xfs_has_reflink(mp)) {
    3149     2244670 :                 if (be32_to_cpu(agf->agf_refcount_blocks) > agf_length)
    3150          11 :                         return __this_address;
    3151             : 
    3152     2244659 :                 if (be32_to_cpu(agf->agf_refcount_level) < 1 ||
    3153     2244659 :                     be32_to_cpu(agf->agf_refcount_level) > mp->m_refc_maxlevels)
    3154           0 :                         return __this_address;
    3155             :         }
    3156             : 
    3157             :         return NULL;
    3158             : }
    3159             : 
    3160             : static void
    3161      820106 : xfs_agf_read_verify(
    3162             :         struct xfs_buf  *bp)
    3163             : {
    3164      820106 :         struct xfs_mount *mp = bp->b_mount;
    3165      820106 :         xfs_failaddr_t  fa;
    3166             : 
    3167     1640135 :         if (xfs_has_crc(mp) &&
    3168             :             !xfs_buf_verify_cksum(bp, XFS_AGF_CRC_OFF))
    3169          52 :                 xfs_verifier_error(bp, -EFSBADCRC, __this_address);
    3170             :         else {
    3171      820054 :                 fa = xfs_agf_verify(bp);
    3172      820054 :                 if (XFS_TEST_ERROR(fa, mp, XFS_ERRTAG_ALLOC_READ_AGF))
    3173          65 :                         xfs_verifier_error(bp, -EFSCORRUPTED, fa);
    3174             :         }
    3175      820106 : }
    3176             : 
    3177             : static void
    3178     1231580 : xfs_agf_write_verify(
    3179             :         struct xfs_buf  *bp)
    3180             : {
    3181     1231580 :         struct xfs_mount        *mp = bp->b_mount;
    3182     1231580 :         struct xfs_buf_log_item *bip = bp->b_log_item;
    3183     1231580 :         struct xfs_agf          *agf = bp->b_addr;
    3184     1231580 :         xfs_failaddr_t          fa;
    3185             : 
    3186     1231580 :         fa = xfs_agf_verify(bp);
    3187     1231580 :         if (fa) {
    3188           0 :                 xfs_verifier_error(bp, -EFSCORRUPTED, fa);
    3189           0 :                 return;
    3190             :         }
    3191             : 
    3192     1231580 :         if (!xfs_has_crc(mp))
    3193             :                 return;
    3194             : 
    3195     1217939 :         if (bip)
    3196     1199931 :                 agf->agf_lsn = cpu_to_be64(bip->bli_item.li_lsn);
    3197             : 
    3198     1217939 :         xfs_buf_update_cksum(bp, XFS_AGF_CRC_OFF);
    3199             : }
    3200             : 
    3201             : const struct xfs_buf_ops xfs_agf_buf_ops = {
    3202             :         .name = "xfs_agf",
    3203             :         .magic = { cpu_to_be32(XFS_AGF_MAGIC), cpu_to_be32(XFS_AGF_MAGIC) },
    3204             :         .verify_read = xfs_agf_read_verify,
    3205             :         .verify_write = xfs_agf_write_verify,
    3206             :         .verify_struct = xfs_agf_verify,
    3207             : };
    3208             : 
    3209             : /*
    3210             :  * Read in the allocation group header (free/alloc section).
    3211             :  */
    3212             : int
    3213  1513191760 : xfs_read_agf(
    3214             :         struct xfs_perag        *pag,
    3215             :         struct xfs_trans        *tp,
    3216             :         int                     flags,
    3217             :         struct xfs_buf          **agfbpp)
    3218             : {
    3219  1513191760 :         struct xfs_mount        *mp = pag->pag_mount;
    3220  1513191760 :         int                     error;
    3221             : 
    3222  1513191760 :         trace_xfs_read_agf(pag->pag_mount, pag->pag_agno);
    3223             : 
    3224  6052498736 :         error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
    3225  1513124684 :                         XFS_AG_DADDR(mp, pag->pag_agno, XFS_AGF_DADDR(mp)),
    3226  1513124684 :                         XFS_FSS_TO_BB(mp, 1), flags, agfbpp, &xfs_agf_buf_ops);
    3227  1514273696 :         if (xfs_metadata_is_sick(error))
    3228         117 :                 xfs_ag_mark_sick(pag, XFS_SICK_AG_AGF);
    3229  1514273696 :         if (error)
    3230             :                 return error;
    3231             : 
    3232  1508102968 :         xfs_buf_set_ref(*agfbpp, XFS_AGF_REF);
    3233  1508102968 :         return 0;
    3234             : }
    3235             : 
    3236             : /*
    3237             :  * Read in the allocation group header (free/alloc section) and initialise the
    3238             :  * perag structure if necessary. If the caller provides @agfbpp, then return the
    3239             :  * locked buffer to the caller, otherwise free it.
    3240             :  */
    3241             : int
    3242  1513812258 : xfs_alloc_read_agf(
    3243             :         struct xfs_perag        *pag,
    3244             :         struct xfs_trans        *tp,
    3245             :         int                     flags,
    3246             :         struct xfs_buf          **agfbpp)
    3247             : {
    3248  1513812258 :         struct xfs_buf          *agfbp;
    3249  1513812258 :         struct xfs_agf          *agf;
    3250  1513812258 :         int                     error;
    3251  1513812258 :         int                     allocbt_blks;
    3252             : 
    3253  1513812258 :         trace_xfs_alloc_read_agf(pag->pag_mount, pag->pag_agno);
    3254             : 
    3255             :         /* We don't support trylock when freeing. */
    3256  1513040068 :         ASSERT((flags & (XFS_ALLOC_FLAG_FREEING | XFS_ALLOC_FLAG_TRYLOCK)) !=
    3257             :                         (XFS_ALLOC_FLAG_FREEING | XFS_ALLOC_FLAG_TRYLOCK));
    3258  1513040068 :         error = xfs_read_agf(pag, tp,
    3259  1513040068 :                         (flags & XFS_ALLOC_FLAG_TRYLOCK) ? XBF_TRYLOCK : 0,
    3260             :                         &agfbp);
    3261  1514225687 :         if (error)
    3262             :                 return error;
    3263             : 
    3264  1508037324 :         agf = agfbp->b_addr;
    3265  3016074648 :         if (!xfs_perag_initialised_agf(pag)) {
    3266      523765 :                 pag->pagf_freeblks = be32_to_cpu(agf->agf_freeblks);
    3267      523765 :                 pag->pagf_btreeblks = be32_to_cpu(agf->agf_btreeblks);
    3268      523765 :                 pag->pagf_flcount = be32_to_cpu(agf->agf_flcount);
    3269      523765 :                 pag->pagf_longest = be32_to_cpu(agf->agf_longest);
    3270      523765 :                 pag->pagf_levels[XFS_BTNUM_BNOi] =
    3271      523765 :                         be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNOi]);
    3272      523765 :                 pag->pagf_levels[XFS_BTNUM_CNTi] =
    3273      523765 :                         be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]);
    3274      523765 :                 pag->pagf_levels[XFS_BTNUM_RMAPi] =
    3275      523765 :                         be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAPi]);
    3276      523765 :                 pag->pagf_refcount_level = be32_to_cpu(agf->agf_refcount_level);
    3277      523765 :                 if (xfs_agfl_needs_reset(pag->pag_mount, agf))
    3278          20 :                         set_bit(XFS_AGSTATE_AGFL_NEEDS_RESET, &pag->pag_opstate);
    3279             :                 else
    3280      523745 :                         clear_bit(XFS_AGSTATE_AGFL_NEEDS_RESET, &pag->pag_opstate);
    3281             : 
    3282             :                 /*
    3283             :                  * Update the in-core allocbt counter. Filter out the rmapbt
    3284             :                  * subset of the btreeblks counter because the rmapbt is managed
    3285             :                  * by perag reservation. Subtract one for the rmapbt root block
    3286             :                  * because the rmap counter includes it while the btreeblks
    3287             :                  * counter only tracks non-root blocks.
    3288             :                  */
    3289      523969 :                 allocbt_blks = pag->pagf_btreeblks;
    3290      523969 :                 if (xfs_has_rmapbt(pag->pag_mount))
    3291      453179 :                         allocbt_blks -= be32_to_cpu(agf->agf_rmap_blocks) - 1;
    3292      523969 :                 if (allocbt_blks > 0)
    3293       62071 :                         atomic64_add(allocbt_blks,
    3294             :                                         &pag->pag_mount->m_allocbt_blks);
    3295             : 
    3296      523969 :                 set_bit(XFS_AGSTATE_AGF_INIT, &pag->pag_opstate);
    3297             :         }
    3298             : #ifdef DEBUG
    3299  3015027118 :         else if (!xfs_is_shutdown(pag->pag_mount)) {
    3300  1507451099 :                 ASSERT(pag->pagf_freeblks == be32_to_cpu(agf->agf_freeblks));
    3301  1507451099 :                 ASSERT(pag->pagf_btreeblks == be32_to_cpu(agf->agf_btreeblks));
    3302  1507451099 :                 ASSERT(pag->pagf_flcount == be32_to_cpu(agf->agf_flcount));
    3303  1507451099 :                 ASSERT(pag->pagf_longest == be32_to_cpu(agf->agf_longest));
    3304  1507451099 :                 ASSERT(pag->pagf_levels[XFS_BTNUM_BNOi] ==
    3305             :                        be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNOi]));
    3306  1507451099 :                 ASSERT(pag->pagf_levels[XFS_BTNUM_CNTi] ==
    3307             :                        be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]));
    3308             :         }
    3309             : #endif
    3310  1508037530 :         if (agfbpp)
    3311  1507008211 :                 *agfbpp = agfbp;
    3312             :         else
    3313     1029319 :                 xfs_trans_brelse(tp, agfbp);
    3314             :         return 0;
    3315             : }
    3316             : 
    3317             : /*
    3318             :  * Pre-proces allocation arguments to set initial state that we don't require
    3319             :  * callers to set up correctly, as well as bounds check the allocation args
    3320             :  * that are set up.
    3321             :  */
    3322             : static int
    3323    89493546 : xfs_alloc_vextent_check_args(
    3324             :         struct xfs_alloc_arg    *args,
    3325             :         xfs_fsblock_t           target,
    3326             :         xfs_agnumber_t          *minimum_agno)
    3327             : {
    3328    89493546 :         struct xfs_mount        *mp = args->mp;
    3329    89493546 :         xfs_agblock_t           agsize;
    3330             : 
    3331    89493546 :         args->fsbno = NULLFSBLOCK;
    3332             : 
    3333    89493546 :         *minimum_agno = 0;
    3334    89493546 :         if (args->tp->t_highest_agno != NULLAGNUMBER)
    3335     1640842 :                 *minimum_agno = args->tp->t_highest_agno;
    3336             : 
    3337             :         /*
    3338             :          * Just fix this up, for the case where the last a.g. is shorter
    3339             :          * (or there's only one a.g.) and the caller couldn't easily figure
    3340             :          * that out (xfs_bmap_alloc).
    3341             :          */
    3342    89493546 :         agsize = mp->m_sb.sb_agblocks;
    3343    89493546 :         if (args->maxlen > agsize)
    3344           0 :                 args->maxlen = agsize;
    3345    89493546 :         if (args->alignment == 0)
    3346     3433939 :                 args->alignment = 1;
    3347             : 
    3348    89493546 :         ASSERT(args->minlen > 0);
    3349    89493546 :         ASSERT(args->maxlen > 0);
    3350    89493546 :         ASSERT(args->alignment > 0);
    3351    89493546 :         ASSERT(args->resv != XFS_AG_RESV_AGFL);
    3352             : 
    3353    89493546 :         ASSERT(XFS_FSB_TO_AGNO(mp, target) < mp->m_sb.sb_agcount);
    3354    89493546 :         ASSERT(XFS_FSB_TO_AGBNO(mp, target) < agsize);
    3355    89485313 :         ASSERT(args->minlen <= args->maxlen);
    3356    89485313 :         ASSERT(args->minlen <= agsize);
    3357    89485313 :         ASSERT(args->mod < args->prod);
    3358             : 
    3359    89485313 :         if (XFS_FSB_TO_AGNO(mp, target) >= mp->m_sb.sb_agcount ||
    3360    89485313 :             XFS_FSB_TO_AGBNO(mp, target) >= agsize ||
    3361    89485459 :             args->minlen > args->maxlen || args->minlen > agsize ||
    3362    89485459 :             args->mod >= args->prod) {
    3363           0 :                 trace_xfs_alloc_vextent_badargs(args);
    3364           0 :                 return -ENOSPC;
    3365             :         }
    3366             : 
    3367    89485459 :         if (args->agno != NULLAGNUMBER && *minimum_agno > args->agno) {
    3368        3330 :                 trace_xfs_alloc_vextent_skip_deadlock(args);
    3369        3330 :                 return -ENOSPC;
    3370             :         }
    3371             :         return 0;
    3372             : 
    3373             : }
    3374             : 
    3375             : /*
    3376             :  * Prepare an AG for allocation. If the AG is not prepared to accept the
    3377             :  * allocation, return failure.
    3378             :  *
    3379             :  * XXX(dgc): The complexity of "need_pag" will go away as all caller paths are
    3380             :  * modified to hold their own perag references.
    3381             :  */
    3382             : static int
    3383   276259412 : xfs_alloc_vextent_prepare_ag(
    3384             :         struct xfs_alloc_arg    *args,
    3385             :         uint32_t                alloc_flags)
    3386             : {
    3387   276259412 :         bool                    need_pag = !args->pag;
    3388   276259412 :         int                     error;
    3389             : 
    3390   276259412 :         if (need_pag)
    3391           0 :                 args->pag = xfs_perag_get(args->mp, args->agno);
    3392             : 
    3393   276259412 :         args->agbp = NULL;
    3394   276259412 :         error = xfs_alloc_fix_freelist(args, alloc_flags);
    3395   276268039 :         if (error) {
    3396        1779 :                 trace_xfs_alloc_vextent_nofix(args);
    3397        1779 :                 if (need_pag)
    3398           0 :                         xfs_perag_put(args->pag);
    3399        1779 :                 args->agbno = NULLAGBLOCK;
    3400        1779 :                 return error;
    3401             :         }
    3402   276266260 :         if (!args->agbp) {
    3403             :                 /* cannot allocate in this AG at all */
    3404   190300321 :                 trace_xfs_alloc_vextent_noagbp(args);
    3405   190298436 :                 args->agbno = NULLAGBLOCK;
    3406   190298436 :                 return 0;
    3407             :         }
    3408    85965939 :         args->wasfromfl = 0;
    3409    85965939 :         return 0;
    3410             : }
    3411             : 
    3412             : /*
    3413             :  * Post-process allocation results to account for the allocation if it succeed
    3414             :  * and set the allocated block number correctly for the caller.
    3415             :  *
    3416             :  * XXX: we should really be returning ENOSPC for ENOSPC, not
    3417             :  * hiding it behind a "successful" NULLFSBLOCK allocation.
    3418             :  */
    3419             : static int
    3420    89564716 : xfs_alloc_vextent_finish(
    3421             :         struct xfs_alloc_arg    *args,
    3422             :         xfs_agnumber_t          minimum_agno,
    3423             :         int                     alloc_error,
    3424             :         bool                    drop_perag)
    3425             : {
    3426    89564716 :         struct xfs_mount        *mp = args->mp;
    3427    89564716 :         int                     error = 0;
    3428             : 
    3429             :         /*
    3430             :          * We can end up here with a locked AGF. If we failed, the caller is
    3431             :          * likely going to try to allocate again with different parameters, and
    3432             :          * that can widen the AGs that are searched for free space. If we have
    3433             :          * to do BMBT block allocation, we have to do a new allocation.
    3434             :          *
    3435             :          * Hence leaving this function with the AGF locked opens up potential
    3436             :          * ABBA AGF deadlocks because a future allocation attempt in this
    3437             :          * transaction may attempt to lock a lower number AGF.
    3438             :          *
    3439             :          * We can't release the AGF until the transaction is commited, so at
    3440             :          * this point we must update the "first allocation" tracker to point at
    3441             :          * this AG if the tracker is empty or points to a lower AG. This allows
    3442             :          * the next allocation attempt to be modified appropriately to avoid
    3443             :          * deadlocks.
    3444             :          */
    3445    89564716 :         if (args->agbp &&
    3446    85957659 :             (args->tp->t_highest_agno == NULLAGNUMBER ||
    3447     1630171 :              args->agno > minimum_agno))
    3448    84537260 :                 args->tp->t_highest_agno = args->agno;
    3449             : 
    3450             :         /*
    3451             :          * If the allocation failed with an error or we had an ENOSPC result,
    3452             :          * preserve the returned error whilst also marking the allocation result
    3453             :          * as "no extent allocated". This ensures that callers that fail to
    3454             :          * capture the error will still treat it as a failed allocation.
    3455             :          */
    3456    89564716 :         if (alloc_error || args->agbno == NULLAGBLOCK) {
    3457     4396608 :                 args->fsbno = NULLFSBLOCK;
    3458     4396608 :                 error = alloc_error;
    3459     4396608 :                 goto out_drop_perag;
    3460             :         }
    3461             : 
    3462    85168108 :         args->fsbno = XFS_AGB_TO_FSB(mp, args->agno, args->agbno);
    3463             : 
    3464    85168108 :         ASSERT(args->len >= args->minlen);
    3465    85168108 :         ASSERT(args->len <= args->maxlen);
    3466    85168108 :         ASSERT(args->agbno % args->alignment == 0);
    3467    85168108 :         XFS_AG_CHECK_DADDR(mp, XFS_FSB_TO_DADDR(mp, args->fsbno), args->len);
    3468             : 
    3469             :         /* if not file data, insert new block into the reverse map btree */
    3470    85166515 :         if (!xfs_rmap_should_skip_owner_update(&args->oinfo)) {
    3471     4749079 :                 error = xfs_rmap_alloc(args->tp, args->agbp, args->pag,
    3472     4749079 :                                        args->agbno, args->len, &args->oinfo);
    3473     4749172 :                 if (error)
    3474          53 :                         goto out_drop_perag;
    3475             :         }
    3476             : 
    3477    85166555 :         if (!args->wasfromfl) {
    3478    85166575 :                 error = xfs_alloc_update_counters(args->tp, args->agbp,
    3479    85166575 :                                                   -((long)(args->len)));
    3480    85176048 :                 if (error)
    3481           0 :                         goto out_drop_perag;
    3482             : 
    3483    85176048 :                 ASSERT(!xfs_extent_busy_search(mp, args->pag, args->agbno,
    3484             :                                 args->len));
    3485             :         }
    3486             : 
    3487    85177097 :         xfs_ag_resv_alloc_extent(args->pag, args->resv, args);
    3488             : 
    3489    85150192 :         XFS_STATS_INC(mp, xs_allocx);
    3490    85173855 :         XFS_STATS_ADD(mp, xs_allocb, args->len);
    3491             : 
    3492    85176781 :         trace_xfs_alloc_vextent_finish(args);
    3493             : 
    3494    89566248 : out_drop_perag:
    3495    89566248 :         if (drop_perag && args->pag) {
    3496    83651044 :                 xfs_perag_rele(args->pag);
    3497    83667232 :                 args->pag = NULL;
    3498             :         }
    3499    89582436 :         return error;
    3500             : }
    3501             : 
    3502             : /*
    3503             :  * Allocate within a single AG only. This uses a best-fit length algorithm so if
    3504             :  * you need an exact sized allocation without locality constraints, this is the
    3505             :  * fastest way to do it.
    3506             :  *
    3507             :  * Caller is expected to hold a perag reference in args->pag.
    3508             :  */
    3509             : int
    3510           0 : xfs_alloc_vextent_this_ag(
    3511             :         struct xfs_alloc_arg    *args,
    3512             :         xfs_agnumber_t          agno)
    3513             : {
    3514           0 :         struct xfs_mount        *mp = args->mp;
    3515           0 :         xfs_agnumber_t          minimum_agno;
    3516           0 :         uint32_t                alloc_flags = 0;
    3517           0 :         int                     error;
    3518             : 
    3519           0 :         ASSERT(args->pag != NULL);
    3520           0 :         ASSERT(args->pag->pag_agno == agno);
    3521             : 
    3522           0 :         args->agno = agno;
    3523           0 :         args->agbno = 0;
    3524             : 
    3525           0 :         trace_xfs_alloc_vextent_this_ag(args);
    3526             : 
    3527           0 :         error = xfs_alloc_vextent_check_args(args, XFS_AGB_TO_FSB(mp, agno, 0),
    3528             :                         &minimum_agno);
    3529           0 :         if (error) {
    3530           0 :                 if (error == -ENOSPC)
    3531             :                         return 0;
    3532           0 :                 return error;
    3533             :         }
    3534             : 
    3535           0 :         error = xfs_alloc_vextent_prepare_ag(args, alloc_flags);
    3536           0 :         if (!error && args->agbp)
    3537           0 :                 error = xfs_alloc_ag_vextent_size(args, alloc_flags);
    3538             : 
    3539           0 :         return xfs_alloc_vextent_finish(args, minimum_agno, error, false);
    3540             : }
    3541             : 
    3542             : /*
    3543             :  * Iterate all AGs trying to allocate an extent starting from @start_ag.
    3544             :  *
    3545             :  * If the incoming allocation type is XFS_ALLOCTYPE_NEAR_BNO, it means the
    3546             :  * allocation attempts in @start_agno have locality information. If we fail to
    3547             :  * allocate in that AG, then we revert to anywhere-in-AG for all the other AGs
    3548             :  * we attempt to allocation in as there is no locality optimisation possible for
    3549             :  * those allocations.
    3550             :  *
    3551             :  * On return, args->pag may be left referenced if we finish before the "all
    3552             :  * failed" return point. The allocation finish still needs the perag, and
    3553             :  * so the caller will release it once they've finished the allocation.
    3554             :  *
    3555             :  * When we wrap the AG iteration at the end of the filesystem, we have to be
    3556             :  * careful not to wrap into AGs below ones we already have locked in the
    3557             :  * transaction if we are doing a blocking iteration. This will result in an
    3558             :  * out-of-order locking of AGFs and hence can cause deadlocks.
    3559             :  */
    3560             : static int
    3561    83807399 : xfs_alloc_vextent_iterate_ags(
    3562             :         struct xfs_alloc_arg    *args,
    3563             :         xfs_agnumber_t          minimum_agno,
    3564             :         xfs_agnumber_t          start_agno,
    3565             :         xfs_agblock_t           target_agbno,
    3566             :         uint32_t                alloc_flags)
    3567             : {
    3568    83807399 :         struct xfs_mount        *mp = args->mp;
    3569    83807399 :         xfs_agnumber_t          restart_agno = minimum_agno;
    3570    83807399 :         xfs_agnumber_t          agno;
    3571    83807399 :         int                     error = 0;
    3572             : 
    3573    83807399 :         if (alloc_flags & XFS_ALLOC_FLAG_TRYLOCK)
    3574    83807399 :                 restart_agno = 0;
    3575           0 : restart:
    3576   271534098 :         for_each_perag_wrap_range(mp, start_agno, restart_agno,
    3577             :                         mp->m_sb.sb_agcount, agno, args->pag) {
    3578   270576411 :                 args->agno = agno;
    3579   270576411 :                 error = xfs_alloc_vextent_prepare_ag(args, alloc_flags);
    3580   270569262 :                 if (error)
    3581             :                         break;
    3582   270567485 :                 if (!args->agbp) {
    3583   187126272 :                         trace_xfs_alloc_vextent_loopfailed(args);
    3584   187125742 :                         continue;
    3585             :                 }
    3586             : 
    3587             :                 /*
    3588             :                  * Allocation is supposed to succeed now, so break out of the
    3589             :                  * loop regardless of whether we succeed or not.
    3590             :                  */
    3591    83441213 :                 if (args->agno == start_agno && target_agbno) {
    3592    77465184 :                         args->agbno = target_agbno;
    3593    77465184 :                         error = xfs_alloc_ag_vextent_near(args, alloc_flags);
    3594             :                 } else {
    3595     5976029 :                         args->agbno = 0;
    3596     5976029 :                         error = xfs_alloc_ag_vextent_size(args, alloc_flags);
    3597             :                 }
    3598             :                 break;
    3599             :         }
    3600    84488311 :         if (error) {
    3601        4978 :                 xfs_perag_rele(args->pag);
    3602        4978 :                 args->pag = NULL;
    3603        4978 :                 return error;
    3604             :         }
    3605    84483333 :         if (args->agbp)
    3606             :                 return 0;
    3607             : 
    3608             :         /*
    3609             :          * We didn't find an AG we can alloation from. If we were given
    3610             :          * constraining flags by the caller, drop them and retry the allocation
    3611             :          * without any constraints being set.
    3612             :          */
    3613     1029148 :         if (alloc_flags & XFS_ALLOC_FLAG_TRYLOCK) {
    3614      595798 :                 alloc_flags &= ~XFS_ALLOC_FLAG_TRYLOCK;
    3615      595798 :                 restart_agno = minimum_agno;
    3616      595798 :                 goto restart;
    3617             :         }
    3618             : 
    3619      433350 :         ASSERT(args->pag == NULL);
    3620      433350 :         trace_xfs_alloc_vextent_allfailed(args);
    3621      433350 :         return 0;
    3622             : }
    3623             : 
    3624             : /*
    3625             :  * Iterate from the AGs from the start AG to the end of the filesystem, trying
    3626             :  * to allocate blocks. It starts with a near allocation attempt in the initial
    3627             :  * AG, then falls back to anywhere-in-ag after the first AG fails. It will wrap
    3628             :  * back to zero if allowed by previous allocations in this transaction,
    3629             :  * otherwise will wrap back to the start AG and run a second blocking pass to
    3630             :  * the end of the filesystem.
    3631             :  */
    3632             : int
    3633    81101312 : xfs_alloc_vextent_start_ag(
    3634             :         struct xfs_alloc_arg    *args,
    3635             :         xfs_fsblock_t           target)
    3636             : {
    3637    81101312 :         struct xfs_mount        *mp = args->mp;
    3638    81101312 :         xfs_agnumber_t          minimum_agno;
    3639    81101312 :         xfs_agnumber_t          start_agno;
    3640    81101312 :         xfs_agnumber_t          rotorstep = xfs_rotorstep;
    3641    81101312 :         bool                    bump_rotor = false;
    3642    81101312 :         uint32_t                alloc_flags = XFS_ALLOC_FLAG_TRYLOCK;
    3643    81101312 :         int                     error;
    3644             : 
    3645    81101312 :         ASSERT(args->pag == NULL);
    3646             : 
    3647    81101312 :         args->agno = NULLAGNUMBER;
    3648    81101312 :         args->agbno = NULLAGBLOCK;
    3649             : 
    3650    81101312 :         trace_xfs_alloc_vextent_start_ag(args);
    3651             : 
    3652    81081361 :         error = xfs_alloc_vextent_check_args(args, target, &minimum_agno);
    3653    81069580 :         if (error) {
    3654           0 :                 if (error == -ENOSPC)
    3655             :                         return 0;
    3656           0 :                 return error;
    3657             :         }
    3658             : 
    3659    84256419 :         if ((args->datatype & XFS_ALLOC_INITIAL_USER_DATA) &&
    3660             :             xfs_is_inode32(mp)) {
    3661           0 :                 target = XFS_AGB_TO_FSB(mp,
    3662             :                                 ((mp->m_agfrotor / rotorstep) %
    3663             :                                 mp->m_sb.sb_agcount), 0);
    3664           0 :                 bump_rotor = 1;
    3665             :         }
    3666             : 
    3667    81069580 :         start_agno = max(minimum_agno, XFS_FSB_TO_AGNO(mp, target));
    3668    81096754 :         error = xfs_alloc_vextent_iterate_ags(args, minimum_agno, start_agno,
    3669    81069580 :                         XFS_FSB_TO_AGBNO(mp, target), alloc_flags);
    3670             : 
    3671    81157189 :         if (bump_rotor) {
    3672           0 :                 if (args->agno == start_agno)
    3673           0 :                         mp->m_agfrotor = (mp->m_agfrotor + 1) %
    3674           0 :                                 (mp->m_sb.sb_agcount * rotorstep);
    3675             :                 else
    3676           0 :                         mp->m_agfrotor = (args->agno * rotorstep + 1) %
    3677           0 :                                 (mp->m_sb.sb_agcount * rotorstep);
    3678             :         }
    3679             : 
    3680    81157189 :         return xfs_alloc_vextent_finish(args, minimum_agno, error, true);
    3681             : }
    3682             : 
    3683             : /*
    3684             :  * Iterate from the agno indicated via @target through to the end of the
    3685             :  * filesystem attempting blocking allocation. This does not wrap or try a second
    3686             :  * pass, so will not recurse into AGs lower than indicated by the target.
    3687             :  */
    3688             : int
    3689     2732349 : xfs_alloc_vextent_first_ag(
    3690             :         struct xfs_alloc_arg    *args,
    3691             :         xfs_fsblock_t           target)
    3692             :  {
    3693     2732349 :         struct xfs_mount        *mp = args->mp;
    3694     2732349 :         xfs_agnumber_t          minimum_agno;
    3695     2732349 :         xfs_agnumber_t          start_agno;
    3696     2732349 :         uint32_t                alloc_flags = XFS_ALLOC_FLAG_TRYLOCK;
    3697     2732349 :         int                     error;
    3698             : 
    3699     2732349 :         ASSERT(args->pag == NULL);
    3700             : 
    3701     2732349 :         args->agno = NULLAGNUMBER;
    3702     2732349 :         args->agbno = NULLAGBLOCK;
    3703             : 
    3704     2732349 :         trace_xfs_alloc_vextent_first_ag(args);
    3705             : 
    3706     2729055 :         error = xfs_alloc_vextent_check_args(args, target, &minimum_agno);
    3707     2728947 :         if (error) {
    3708           0 :                 if (error == -ENOSPC)
    3709             :                         return 0;
    3710           0 :                 return error;
    3711             :         }
    3712             : 
    3713     2728947 :         start_agno = max(minimum_agno, XFS_FSB_TO_AGNO(mp, target));
    3714     2729133 :         error = xfs_alloc_vextent_iterate_ags(args, minimum_agno, start_agno,
    3715     2728947 :                         XFS_FSB_TO_AGBNO(mp, target), alloc_flags);
    3716     2734874 :         return xfs_alloc_vextent_finish(args, minimum_agno, error, true);
    3717             : }
    3718             : 
    3719             : /*
    3720             :  * Allocate at the exact block target or fail. Caller is expected to hold a
    3721             :  * perag reference in args->pag.
    3722             :  */
    3723             : int
    3724     1834294 : xfs_alloc_vextent_exact_bno(
    3725             :         struct xfs_alloc_arg    *args,
    3726             :         xfs_fsblock_t           target)
    3727             : {
    3728     1834294 :         struct xfs_mount        *mp = args->mp;
    3729     1834294 :         xfs_agnumber_t          minimum_agno;
    3730     1834294 :         int                     error;
    3731             : 
    3732     1834294 :         ASSERT(args->pag != NULL);
    3733     1834294 :         ASSERT(args->pag->pag_agno == XFS_FSB_TO_AGNO(mp, target));
    3734             : 
    3735     1834294 :         args->agno = XFS_FSB_TO_AGNO(mp, target);
    3736     1834294 :         args->agbno = XFS_FSB_TO_AGBNO(mp, target);
    3737             : 
    3738     1834174 :         trace_xfs_alloc_vextent_exact_bno(args);
    3739             : 
    3740     1834203 :         error = xfs_alloc_vextent_check_args(args, target, &minimum_agno);
    3741     1834232 :         if (error) {
    3742         478 :                 if (error == -ENOSPC)
    3743             :                         return 0;
    3744           0 :                 return error;
    3745             :         }
    3746             : 
    3747     1833754 :         error = xfs_alloc_vextent_prepare_ag(args, 0);
    3748     1833860 :         if (!error && args->agbp)
    3749     1052820 :                 error = xfs_alloc_ag_vextent_exact(args);
    3750             : 
    3751     1833904 :         return xfs_alloc_vextent_finish(args, minimum_agno, error, false);
    3752             : }
    3753             : 
    3754             : /*
    3755             :  * Allocate an extent as close to the target as possible. If there are not
    3756             :  * viable candidates in the AG, then fail the allocation.
    3757             :  *
    3758             :  * Caller may or may not have a per-ag reference in args->pag.
    3759             :  */
    3760             : int
    3761     3856271 : xfs_alloc_vextent_near_bno(
    3762             :         struct xfs_alloc_arg    *args,
    3763             :         xfs_fsblock_t           target)
    3764             : {
    3765     3856271 :         struct xfs_mount        *mp = args->mp;
    3766     3856271 :         xfs_agnumber_t          minimum_agno;
    3767     3856271 :         bool                    needs_perag = args->pag == NULL;
    3768     3856271 :         uint32_t                alloc_flags = 0;
    3769     3856271 :         int                     error;
    3770             : 
    3771     3856271 :         if (!needs_perag)
    3772     3647811 :                 ASSERT(args->pag->pag_agno == XFS_FSB_TO_AGNO(mp, target));
    3773             : 
    3774     3856271 :         args->agno = XFS_FSB_TO_AGNO(mp, target);
    3775     3856271 :         args->agbno = XFS_FSB_TO_AGBNO(mp, target);
    3776             : 
    3777     3856030 :         trace_xfs_alloc_vextent_near_bno(args);
    3778             : 
    3779     3856060 :         error = xfs_alloc_vextent_check_args(args, target, &minimum_agno);
    3780     3856171 :         if (error) {
    3781        2852 :                 if (error == -ENOSPC)
    3782             :                         return 0;
    3783           0 :                 return error;
    3784             :         }
    3785             : 
    3786     3853319 :         if (needs_perag)
    3787      208356 :                 args->pag = xfs_perag_grab(mp, args->agno);
    3788             : 
    3789     3853578 :         error = xfs_alloc_vextent_prepare_ag(args, alloc_flags);
    3790     3853623 :         if (!error && args->agbp)
    3791     1461621 :                 error = xfs_alloc_ag_vextent_near(args, alloc_flags);
    3792             : 
    3793     3853602 :         return xfs_alloc_vextent_finish(args, minimum_agno, error, needs_perag);
    3794             : }
    3795             : 
    3796             : /* Ensure that the freelist is at full capacity. */
    3797             : int
    3798   505215970 : xfs_free_extent_fix_freelist(
    3799             :         struct xfs_trans        *tp,
    3800             :         struct xfs_perag        *pag,
    3801             :         struct xfs_buf          **agbp)
    3802             : {
    3803   505215970 :         struct xfs_alloc_arg    args;
    3804   505215970 :         int                     error;
    3805             : 
    3806   505215970 :         memset(&args, 0, sizeof(struct xfs_alloc_arg));
    3807   505215970 :         args.tp = tp;
    3808   505215970 :         args.mp = tp->t_mountp;
    3809   505215970 :         args.agno = pag->pag_agno;
    3810   505215970 :         args.pag = pag;
    3811             : 
    3812             :         /*
    3813             :          * validate that the block number is legal - the enables us to detect
    3814             :          * and handle a silent filesystem corruption rather than crashing.
    3815             :          */
    3816   505215970 :         if (args.agno >= args.mp->m_sb.sb_agcount)
    3817             :                 return -EFSCORRUPTED;
    3818             : 
    3819   505215970 :         error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING);
    3820   505226031 :         if (error)
    3821             :                 return error;
    3822             : 
    3823   505224511 :         *agbp = args.agbp;
    3824   505224511 :         return 0;
    3825             : }
    3826             : 
    3827             : /*
    3828             :  * Free an extent.
    3829             :  * Just break up the extent address and hand off to xfs_free_ag_extent
    3830             :  * after fixing up the freelist.
    3831             :  */
    3832             : int
    3833    98536263 : __xfs_free_extent(
    3834             :         struct xfs_trans                *tp,
    3835             :         struct xfs_perag                *pag,
    3836             :         xfs_agblock_t                   agbno,
    3837             :         xfs_extlen_t                    len,
    3838             :         const struct xfs_owner_info     *oinfo,
    3839             :         enum xfs_ag_resv_type           type,
    3840             :         bool                            skip_discard)
    3841             : {
    3842    98536263 :         struct xfs_mount                *mp = tp->t_mountp;
    3843    98536263 :         struct xfs_buf                  *agbp;
    3844    98536263 :         struct xfs_agf                  *agf;
    3845    98536263 :         int                             error;
    3846    98536263 :         unsigned int                    busy_flags = 0;
    3847             : 
    3848    98536263 :         ASSERT(len != 0);
    3849    98536263 :         ASSERT(type != XFS_AG_RESV_AGFL);
    3850             : 
    3851    98536263 :         if (XFS_TEST_ERROR(false, mp,
    3852             :                         XFS_ERRTAG_FREE_EXTENT))
    3853             :                 return -EIO;
    3854             : 
    3855    98531201 :         error = xfs_free_extent_fix_freelist(tp, pag, &agbp);
    3856    98551884 :         if (error) {
    3857          34 :                 if (xfs_metadata_is_sick(error))
    3858           0 :                         xfs_ag_mark_sick(pag, XFS_SICK_AG_BNOBT);
    3859          34 :                 return error;
    3860             :         }
    3861             : 
    3862    98551850 :         agf = agbp->b_addr;
    3863             : 
    3864    98551850 :         if (XFS_IS_CORRUPT(mp, agbno >= mp->m_sb.sb_agblocks)) {
    3865           0 :                 xfs_ag_mark_sick(pag, XFS_SICK_AG_BNOBT);
    3866           0 :                 error = -EFSCORRUPTED;
    3867           0 :                 goto err_release;
    3868             :         }
    3869             : 
    3870             :         /* validate the extent size is legal now we have the agf locked */
    3871    98551850 :         if (XFS_IS_CORRUPT(mp, agbno + len > be32_to_cpu(agf->agf_length))) {
    3872           0 :                 xfs_ag_mark_sick(pag, XFS_SICK_AG_BNOBT);
    3873           0 :                 error = -EFSCORRUPTED;
    3874           0 :                 goto err_release;
    3875             :         }
    3876             : 
    3877    98551850 :         error = xfs_free_ag_extent(tp, agbp, pag->pag_agno, agbno, len, oinfo,
    3878             :                         type);
    3879    98539070 :         if (error)
    3880         278 :                 goto err_release;
    3881             : 
    3882    98538792 :         if (skip_discard)
    3883     6993138 :                 busy_flags |= XFS_EXTENT_BUSY_SKIP_DISCARD;
    3884    98538792 :         xfs_extent_busy_insert(tp, pag, agbno, len, busy_flags);
    3885    98538792 :         return 0;
    3886             : 
    3887         278 : err_release:
    3888         278 :         xfs_trans_brelse(tp, agbp);
    3889         278 :         return error;
    3890             : }
    3891             : 
    3892             : struct xfs_alloc_query_range_info {
    3893             :         xfs_alloc_query_range_fn        fn;
    3894             :         void                            *priv;
    3895             : };
    3896             : 
    3897             : /* Format btree record and pass to our callback. */
    3898             : STATIC int
    3899  2264400203 : xfs_alloc_query_range_helper(
    3900             :         struct xfs_btree_cur            *cur,
    3901             :         const union xfs_btree_rec       *rec,
    3902             :         void                            *priv)
    3903             : {
    3904  2264400203 :         struct xfs_alloc_query_range_info       *query = priv;
    3905  2264400203 :         struct xfs_alloc_rec_incore             irec;
    3906  2264400203 :         xfs_failaddr_t                          fa;
    3907             : 
    3908  2264400203 :         xfs_alloc_btrec_to_irec(rec, &irec);
    3909  2264400203 :         fa = xfs_alloc_check_irec(cur, &irec);
    3910  2264371165 :         if (fa)
    3911           0 :                 return xfs_alloc_complain_bad_rec(cur, fa, &irec);
    3912             : 
    3913  2264371165 :         return query->fn(cur, &irec, query->priv);
    3914             : }
    3915             : 
    3916             : /* Find all free space within a given range of blocks. */
    3917             : int
    3918       57341 : xfs_alloc_query_range(
    3919             :         struct xfs_btree_cur                    *cur,
    3920             :         const struct xfs_alloc_rec_incore       *low_rec,
    3921             :         const struct xfs_alloc_rec_incore       *high_rec,
    3922             :         xfs_alloc_query_range_fn                fn,
    3923             :         void                                    *priv)
    3924             : {
    3925       57341 :         union xfs_btree_irec                    low_brec = { .a = *low_rec };
    3926       57341 :         union xfs_btree_irec                    high_brec = { .a = *high_rec };
    3927       57341 :         struct xfs_alloc_query_range_info       query = { .priv = priv, .fn = fn };
    3928             : 
    3929       57341 :         ASSERT(cur->bc_btnum == XFS_BTNUM_BNO);
    3930       57341 :         return xfs_btree_query_range(cur, &low_brec, &high_brec,
    3931             :                         xfs_alloc_query_range_helper, &query);
    3932             : }
    3933             : 
    3934             : /* Find all free space records. */
    3935             : int
    3936      685695 : xfs_alloc_query_all(
    3937             :         struct xfs_btree_cur                    *cur,
    3938             :         xfs_alloc_query_range_fn                fn,
    3939             :         void                                    *priv)
    3940             : {
    3941      685695 :         struct xfs_alloc_query_range_info       query;
    3942             : 
    3943      685695 :         ASSERT(cur->bc_btnum == XFS_BTNUM_BNO);
    3944      685695 :         query.priv = priv;
    3945      685695 :         query.fn = fn;
    3946      685695 :         return xfs_btree_query_all(cur, xfs_alloc_query_range_helper, &query);
    3947             : }
    3948             : 
    3949             : /*
    3950             :  * Scan part of the keyspace of the free space and tell us if the area has no
    3951             :  * records, is fully mapped by records, or is partially filled.
    3952             :  */
    3953             : int
    3954  1458622660 : xfs_alloc_has_records(
    3955             :         struct xfs_btree_cur    *cur,
    3956             :         xfs_agblock_t           bno,
    3957             :         xfs_extlen_t            len,
    3958             :         enum xbtree_recpacking  *outcome)
    3959             : {
    3960  1458622660 :         union xfs_btree_irec    low;
    3961  1458622660 :         union xfs_btree_irec    high;
    3962             : 
    3963  1458622660 :         memset(&low, 0, sizeof(low));
    3964  1458622660 :         low.a.ar_startblock = bno;
    3965  1458622660 :         memset(&high, 0xFF, sizeof(high));
    3966  1458622660 :         high.a.ar_startblock = bno + len - 1;
    3967             : 
    3968  1458622660 :         return xfs_btree_has_records(cur, &low, &high, NULL, outcome);
    3969             : }
    3970             : 
    3971             : /*
    3972             :  * Walk all the blocks in the AGFL.  The @walk_fn can return any negative
    3973             :  * error code or XFS_ITER_*.
    3974             :  */
    3975             : int
    3976    59312116 : xfs_agfl_walk(
    3977             :         struct xfs_mount        *mp,
    3978             :         struct xfs_agf          *agf,
    3979             :         struct xfs_buf          *agflbp,
    3980             :         xfs_agfl_walk_fn        walk_fn,
    3981             :         void                    *priv)
    3982             : {
    3983    59312116 :         __be32                  *agfl_bno;
    3984    59312116 :         unsigned int            i;
    3985    59312116 :         int                     error;
    3986             : 
    3987    59312116 :         agfl_bno = xfs_buf_to_agfl_bno(agflbp);
    3988    59312116 :         i = be32_to_cpu(agf->agf_flfirst);
    3989             : 
    3990             :         /* Nothing to walk in an empty AGFL. */
    3991    59312116 :         if (agf->agf_flcount == cpu_to_be32(0))
    3992             :                 return 0;
    3993             : 
    3994             :         /* Otherwise, walk from first to last, wrapping as needed. */
    3995   573821857 :         for (;;) {
    3996   573821857 :                 error = walk_fn(mp, be32_to_cpu(agfl_bno[i]), priv);
    3997   573822008 :                 if (error)
    3998     2226582 :                         return error;
    3999   571595426 :                 if (i == be32_to_cpu(agf->agf_fllast))
    4000             :                         break;
    4001  1029078220 :                 if (++i == xfs_agfl_size(mp))
    4002      501264 :                         i = 0;
    4003             :         }
    4004             : 
    4005             :         return 0;
    4006             : }
    4007             : 
    4008             : int __init
    4009          50 : xfs_extfree_intent_init_cache(void)
    4010             : {
    4011          50 :         xfs_extfree_item_cache = kmem_cache_create("xfs_extfree_intent",
    4012             :                         sizeof(struct xfs_extent_free_item),
    4013             :                         0, 0, NULL);
    4014             : 
    4015          50 :         return xfs_extfree_item_cache != NULL ? 0 : -ENOMEM;
    4016             : }
    4017             : 
    4018             : void
    4019          49 : xfs_extfree_intent_destroy_cache(void)
    4020             : {
    4021          49 :         kmem_cache_destroy(xfs_extfree_item_cache);
    4022          49 :         xfs_extfree_item_cache = NULL;
    4023          49 : }

Generated by: LCOV version 1.14