LCOV - code coverage report
Current view: top level - fs/xfs/libxfs - xfs_alloc.c (source / functions) Hit Total Coverage
Test: fstests of 6.5.0-rc4-xfsx @ Mon Jul 31 20:08:34 PDT 2023 Lines: 1574 1874 84.0 %
Date: 2023-07-31 20:08:34 Functions: 74 76 97.4 %

          Line data    Source code
       1             : // SPDX-License-Identifier: GPL-2.0
       2             : /*
       3             :  * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
       4             :  * All Rights Reserved.
       5             :  */
       6             : #include "xfs.h"
       7             : #include "xfs_fs.h"
       8             : #include "xfs_format.h"
       9             : #include "xfs_log_format.h"
      10             : #include "xfs_shared.h"
      11             : #include "xfs_trans_resv.h"
      12             : #include "xfs_bit.h"
      13             : #include "xfs_mount.h"
      14             : #include "xfs_defer.h"
      15             : #include "xfs_btree.h"
      16             : #include "xfs_rmap.h"
      17             : #include "xfs_alloc_btree.h"
      18             : #include "xfs_alloc.h"
      19             : #include "xfs_extent_busy.h"
      20             : #include "xfs_errortag.h"
      21             : #include "xfs_error.h"
      22             : #include "xfs_trace.h"
      23             : #include "xfs_trans.h"
      24             : #include "xfs_buf_item.h"
      25             : #include "xfs_log.h"
      26             : #include "xfs_ag.h"
      27             : #include "xfs_ag_resv.h"
      28             : #include "xfs_bmap.h"
      29             : #include "xfs_health.h"
      30             : 
      31             : struct kmem_cache       *xfs_extfree_item_cache;
      32             : 
      33             : struct workqueue_struct *xfs_alloc_wq;
      34             : 
      35             : #define XFS_ABSDIFF(a,b)        (((a) <= (b)) ? ((b) - (a)) : ((a) - (b)))
      36             : 
      37             : #define XFSA_FIXUP_BNO_OK       1
      38             : #define XFSA_FIXUP_CNT_OK       2
      39             : 
      40             : /*
      41             :  * Size of the AGFL.  For CRC-enabled filesystes we steal a couple of slots in
      42             :  * the beginning of the block for a proper header with the location information
      43             :  * and CRC.
      44             :  */
      45             : unsigned int
      46    39948989 : xfs_agfl_size(
      47             :         struct xfs_mount        *mp)
      48             : {
      49  1940126409 :         unsigned int            size = mp->m_sb.sb_sectsize;
      50             : 
      51    39948989 :         if (xfs_has_crc(mp))
      52  1926947107 :                 size -= sizeof(struct xfs_agfl);
      53             : 
      54  1940126409 :         return size / sizeof(xfs_agblock_t);
      55             : }
      56             : 
      57             : unsigned int
      58      385513 : xfs_refc_block(
      59             :         struct xfs_mount        *mp)
      60             : {
      61      385513 :         if (xfs_has_rmapbt(mp))
      62      371879 :                 return XFS_RMAP_BLOCK(mp) + 1;
      63       13634 :         if (xfs_has_finobt(mp))
      64          37 :                 return XFS_FIBT_BLOCK(mp) + 1;
      65       13597 :         return XFS_IBT_BLOCK(mp) + 1;
      66             : }
      67             : 
      68             : xfs_extlen_t
      69       80854 : xfs_prealloc_blocks(
      70             :         struct xfs_mount        *mp)
      71             : {
      72       80854 :         if (xfs_has_reflink(mp))
      73       75793 :                 return xfs_refc_block(mp) + 1;
      74        5061 :         if (xfs_has_rmapbt(mp))
      75          48 :                 return XFS_RMAP_BLOCK(mp) + 1;
      76        5013 :         if (xfs_has_finobt(mp))
      77        4753 :                 return XFS_FIBT_BLOCK(mp) + 1;
      78         260 :         return XFS_IBT_BLOCK(mp) + 1;
      79             : }
      80             : 
      81             : /*
      82             :  * The number of blocks per AG that we withhold from xfs_mod_fdblocks to
      83             :  * guarantee that we can refill the AGFL prior to allocating space in a nearly
      84             :  * full AG.  Although the space described by the free space btrees, the
      85             :  * blocks used by the freesp btrees themselves, and the blocks owned by the
      86             :  * AGFL are counted in the ondisk fdblocks, it's a mistake to let the ondisk
      87             :  * free space in the AG drop so low that the free space btrees cannot refill an
      88             :  * empty AGFL up to the minimum level.  Rather than grind through empty AGs
      89             :  * until the fs goes down, we subtract this many AG blocks from the incore
      90             :  * fdblocks to ensure user allocation does not overcommit the space the
      91             :  * filesystem needs for the AGFLs.  The rmap btree uses a per-AG reservation to
      92             :  * withhold space from xfs_mod_fdblocks, so we do not account for that here.
      93             :  */
      94             : #define XFS_ALLOCBT_AGFL_RESERVE        4
      95             : 
      96             : /*
      97             :  * Compute the number of blocks that we set aside to guarantee the ability to
      98             :  * refill the AGFL and handle a full bmap btree split.
      99             :  *
     100             :  * In order to avoid ENOSPC-related deadlock caused by out-of-order locking of
     101             :  * AGF buffer (PV 947395), we place constraints on the relationship among
     102             :  * actual allocations for data blocks, freelist blocks, and potential file data
     103             :  * bmap btree blocks. However, these restrictions may result in no actual space
     104             :  * allocated for a delayed extent, for example, a data block in a certain AG is
     105             :  * allocated but there is no additional block for the additional bmap btree
     106             :  * block due to a split of the bmap btree of the file. The result of this may
     107             :  * lead to an infinite loop when the file gets flushed to disk and all delayed
     108             :  * extents need to be actually allocated. To get around this, we explicitly set
     109             :  * aside a few blocks which will not be reserved in delayed allocation.
     110             :  *
     111             :  * For each AG, we need to reserve enough blocks to replenish a totally empty
     112             :  * AGFL and 4 more to handle a potential split of the file's bmap btree.
     113             :  */
     114             : unsigned int
     115       81229 : xfs_alloc_set_aside(
     116             :         struct xfs_mount        *mp)
     117             : {
     118       81229 :         return mp->m_sb.sb_agcount * (XFS_ALLOCBT_AGFL_RESERVE + 4);
     119             : }
     120             : 
     121             : /*
     122             :  * When deciding how much space to allocate out of an AG, we limit the
     123             :  * allocation maximum size to the size the AG. However, we cannot use all the
     124             :  * blocks in the AG - some are permanently used by metadata. These
     125             :  * blocks are generally:
     126             :  *      - the AG superblock, AGF, AGI and AGFL
     127             :  *      - the AGF (bno and cnt) and AGI btree root blocks, and optionally
     128             :  *        the AGI free inode and rmap btree root blocks.
     129             :  *      - blocks on the AGFL according to xfs_alloc_set_aside() limits
     130             :  *      - the rmapbt root block
     131             :  *
     132             :  * The AG headers are sector sized, so the amount of space they take up is
     133             :  * dependent on filesystem geometry. The others are all single blocks.
     134             :  */
     135             : unsigned int
     136       66982 : xfs_alloc_ag_max_usable(
     137             :         struct xfs_mount        *mp)
     138             : {
     139       66982 :         unsigned int            blocks;
     140             : 
     141       66982 :         blocks = XFS_BB_TO_FSB(mp, XFS_FSS_TO_BB(mp, 4)); /* ag headers */
     142       66982 :         blocks += XFS_ALLOCBT_AGFL_RESERVE;
     143       66982 :         blocks += 3;                    /* AGF, AGI btree root blocks */
     144       66982 :         if (xfs_has_finobt(mp))
     145       66713 :                 blocks++;               /* finobt root block */
     146       66982 :         if (xfs_has_rmapbt(mp))
     147       62365 :                 blocks++;               /* rmap root block */
     148       66982 :         if (xfs_has_reflink(mp))
     149       62317 :                 blocks++;               /* refcount root block */
     150             : 
     151       66982 :         return mp->m_sb.sb_agblocks - blocks;
     152             : }
     153             : 
     154             : /*
     155             :  * Lookup the record equal to [bno, len] in the btree given by cur.
     156             :  */
     157             : STATIC int                              /* error */
     158   433096049 : xfs_alloc_lookup_eq(
     159             :         struct xfs_btree_cur    *cur,   /* btree cursor */
     160             :         xfs_agblock_t           bno,    /* starting block of extent */
     161             :         xfs_extlen_t            len,    /* length of extent */
     162             :         int                     *stat)  /* success/failure */
     163             : {
     164   433096049 :         int                     error;
     165             : 
     166   433096049 :         cur->bc_rec.a.ar_startblock = bno;
     167   433096049 :         cur->bc_rec.a.ar_blockcount = len;
     168   433096049 :         error = xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat);
     169   433123920 :         cur->bc_ag.abt.active = (*stat == 1);
     170   433123920 :         return error;
     171             : }
     172             : 
     173             : /*
     174             :  * Lookup the first record greater than or equal to [bno, len]
     175             :  * in the btree given by cur.
     176             :  */
     177             : int                             /* error */
     178       15969 : xfs_alloc_lookup_ge(
     179             :         struct xfs_btree_cur    *cur,   /* btree cursor */
     180             :         xfs_agblock_t           bno,    /* starting block of extent */
     181             :         xfs_extlen_t            len,    /* length of extent */
     182             :         int                     *stat)  /* success/failure */
     183             : {
     184   363371962 :         int                     error;
     185             : 
     186   363371962 :         cur->bc_rec.a.ar_startblock = bno;
     187   363371962 :         cur->bc_rec.a.ar_blockcount = len;
     188       15969 :         error = xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat);
     189   363344230 :         cur->bc_ag.abt.active = (*stat == 1);
     190   363344230 :         return error;
     191             : }
     192             : 
     193             : /*
     194             :  * Lookup the first record less than or equal to [bno, len]
     195             :  * in the btree given by cur.
     196             :  */
     197             : int                                     /* error */
     198   433803591 : xfs_alloc_lookup_le(
     199             :         struct xfs_btree_cur    *cur,   /* btree cursor */
     200             :         xfs_agblock_t           bno,    /* starting block of extent */
     201             :         xfs_extlen_t            len,    /* length of extent */
     202             :         int                     *stat)  /* success/failure */
     203             : {
     204   593753476 :         int                     error;
     205   593753476 :         cur->bc_rec.a.ar_startblock = bno;
     206   593753476 :         cur->bc_rec.a.ar_blockcount = len;
     207   433803591 :         error = xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat);
     208   593800697 :         cur->bc_ag.abt.active = (*stat == 1);
     209   593800697 :         return error;
     210             : }
     211             : 
     212             : static inline bool
     213 11628546039 : xfs_alloc_cur_active(
     214             :         struct xfs_btree_cur    *cur)
     215             : {
     216 11628546039 :         return cur && cur->bc_ag.abt.active;
     217             : }
     218             : 
     219             : /*
     220             :  * Update the record referred to by cur to the value given
     221             :  * by [bno, len].
     222             :  * This either works (return 0) or gets an EFSCORRUPTED error.
     223             :  */
     224             : STATIC int                              /* error */
     225   112995709 : xfs_alloc_update(
     226             :         struct xfs_btree_cur    *cur,   /* btree cursor */
     227             :         xfs_agblock_t           bno,    /* starting block of extent */
     228             :         xfs_extlen_t            len)    /* length of extent */
     229             : {
     230   112995709 :         union xfs_btree_rec     rec;
     231             : 
     232   112995709 :         rec.alloc.ar_startblock = cpu_to_be32(bno);
     233   112995709 :         rec.alloc.ar_blockcount = cpu_to_be32(len);
     234   112995709 :         return xfs_btree_update(cur, &rec);
     235             : }
     236             : 
     237             : /* Convert the ondisk btree record to its incore representation. */
     238             : void
     239   432618654 : xfs_alloc_btrec_to_irec(
     240             :         const union xfs_btree_rec       *rec,
     241             :         struct xfs_alloc_rec_incore     *irec)
     242             : {
     243 10049532570 :         irec->ar_startblock = be32_to_cpu(rec->alloc.ar_startblock);
     244 10049532570 :         irec->ar_blockcount = be32_to_cpu(rec->alloc.ar_blockcount);
     245   432618654 : }
     246             : 
     247             : inline xfs_failaddr_t
     248 10094486663 : xfs_alloc_check_perag_irec(
     249             :         struct xfs_perag                *pag,
     250             :         const struct xfs_alloc_rec_incore *irec)
     251             : {
     252 10094486663 :         if (irec->ar_blockcount == 0)
     253           0 :                 return __this_address;
     254             : 
     255             :         /* check for valid extent range, including overflow */
     256 10094486663 :         if (!xfs_verify_agbext(pag, irec->ar_startblock, irec->ar_blockcount))
     257           0 :                 return __this_address;
     258             : 
     259             :         return NULL;
     260             : }
     261             : 
     262             : /* Simple checks for free space records. */
     263             : xfs_failaddr_t
     264   432619832 : xfs_alloc_check_irec(
     265             :         struct xfs_btree_cur            *cur,
     266             :         const struct xfs_alloc_rec_incore *irec)
     267             : {
     268   432619832 :         return xfs_alloc_check_perag_irec(cur->bc_ag.pag, irec);
     269             : }
     270             : 
     271             : static inline int
     272           0 : xfs_alloc_complain_bad_rec(
     273             :         struct xfs_btree_cur            *cur,
     274             :         xfs_failaddr_t                  fa,
     275             :         const struct xfs_alloc_rec_incore *irec)
     276             : {
     277           0 :         struct xfs_mount                *mp = cur->bc_mp;
     278             : 
     279           0 :         xfs_warn(mp,
     280             :                 "%s Freespace BTree record corruption in AG %d detected at %pS!",
     281             :                 cur->bc_btnum == XFS_BTNUM_BNO ? "Block" : "Size",
     282             :                 cur->bc_ag.pag->pag_agno, fa);
     283           0 :         xfs_warn(mp,
     284             :                 "start block 0x%x block count 0x%x", irec->ar_startblock,
     285             :                 irec->ar_blockcount);
     286           0 :         xfs_btree_mark_sick(cur);
     287           0 :         return -EFSCORRUPTED;
     288             : }
     289             : 
     290             : /*
     291             :  * Get the data from the pointed-to record.
     292             :  */
     293             : int                                     /* error */
     294  7530206785 : xfs_alloc_get_rec(
     295             :         struct xfs_btree_cur    *cur,   /* btree cursor */
     296             :         xfs_agblock_t           *bno,   /* output: starting block of extent */
     297             :         xfs_extlen_t            *len,   /* output: length of extent */
     298             :         int                     *stat)  /* output: success/failure */
     299             : {
     300  7530206785 :         struct xfs_alloc_rec_incore irec;
     301  7530206785 :         union xfs_btree_rec     *rec;
     302  7530206785 :         xfs_failaddr_t          fa;
     303  7530206785 :         int                     error;
     304             : 
     305  7530206785 :         error = xfs_btree_get_rec(cur, &rec, stat);
     306  7513608311 :         if (error || !(*stat))
     307             :                 return error;
     308             : 
     309  7514544713 :         xfs_alloc_btrec_to_irec(rec, &irec);
     310  7514544713 :         fa = xfs_alloc_check_irec(cur, &irec);
     311  7510292197 :         if (fa)
     312           0 :                 return xfs_alloc_complain_bad_rec(cur, fa, &irec);
     313             : 
     314  7510292197 :         *bno = irec.ar_startblock;
     315  7510292197 :         *len = irec.ar_blockcount;
     316  7510292197 :         return 0;
     317             : }
     318             : 
     319             : /*
     320             :  * Compute aligned version of the found extent.
     321             :  * Takes alignment and min length into account.
     322             :  */
     323             : STATIC bool
     324  6120110959 : xfs_alloc_compute_aligned(
     325             :         xfs_alloc_arg_t *args,          /* allocation argument structure */
     326             :         xfs_agblock_t   foundbno,       /* starting block in found extent */
     327             :         xfs_extlen_t    foundlen,       /* length in found extent */
     328             :         xfs_agblock_t   *resbno,        /* result block number */
     329             :         xfs_extlen_t    *reslen,        /* result length */
     330             :         unsigned        *busy_gen)
     331             : {
     332  6120110959 :         xfs_agblock_t   bno = foundbno;
     333  6120110959 :         xfs_extlen_t    len = foundlen;
     334  6120110959 :         xfs_extlen_t    diff;
     335  6120110959 :         bool            busy;
     336             : 
     337             :         /* Trim busy sections out of found extent */
     338  6120110959 :         busy = xfs_extent_busy_trim(args, &bno, &len, busy_gen);
     339             : 
     340             :         /*
     341             :          * If we have a largish extent that happens to start before min_agbno,
     342             :          * see if we can shift it into range...
     343             :          */
     344  6130077905 :         if (bno < args->min_agbno && bno + len > args->min_agbno) {
     345        4233 :                 diff = args->min_agbno - bno;
     346        4233 :                 if (len > diff) {
     347        4233 :                         bno += diff;
     348        4233 :                         len -= diff;
     349             :                 }
     350             :         }
     351             : 
     352  6130077905 :         if (args->alignment > 1 && len >= args->minlen) {
     353    19047292 :                 xfs_agblock_t   aligned_bno = roundup(bno, args->alignment);
     354             : 
     355    19047292 :                 diff = aligned_bno - bno;
     356             : 
     357    19047292 :                 *resbno = aligned_bno;
     358    19047292 :                 *reslen = diff >= len ? 0 : len - diff;
     359             :         } else {
     360  6111030613 :                 *resbno = bno;
     361  6111030613 :                 *reslen = len;
     362             :         }
     363             : 
     364  6130077905 :         return busy;
     365             : }
     366             : 
     367             : /*
     368             :  * Compute best start block and diff for "near" allocations.
     369             :  * freelen >= wantlen already checked by caller.
     370             :  */
     371             : STATIC xfs_extlen_t                     /* difference value (absolute) */
     372  2690274531 : xfs_alloc_compute_diff(
     373             :         xfs_agblock_t   wantbno,        /* target starting block */
     374             :         xfs_extlen_t    wantlen,        /* target length */
     375             :         xfs_extlen_t    alignment,      /* target alignment */
     376             :         int             datatype,       /* are we allocating data? */
     377             :         xfs_agblock_t   freebno,        /* freespace's starting block */
     378             :         xfs_extlen_t    freelen,        /* freespace's length */
     379             :         xfs_agblock_t   *newbnop)       /* result: best start block from free */
     380             : {
     381  2690274531 :         xfs_agblock_t   freeend;        /* end of freespace extent */
     382  2690274531 :         xfs_agblock_t   newbno1;        /* return block number */
     383  2690274531 :         xfs_agblock_t   newbno2;        /* other new block number */
     384  2690274531 :         xfs_extlen_t    newlen1=0;      /* length with newbno1 */
     385  2690274531 :         xfs_extlen_t    newlen2=0;      /* length with newbno2 */
     386  2690274531 :         xfs_agblock_t   wantend;        /* end of target extent */
     387  2690274531 :         bool            userdata = datatype & XFS_ALLOC_USERDATA;
     388             : 
     389  2690274531 :         ASSERT(freelen >= wantlen);
     390  2690274531 :         freeend = freebno + freelen;
     391  2690274531 :         wantend = wantbno + wantlen;
     392             :         /*
     393             :          * We want to allocate from the start of a free extent if it is past
     394             :          * the desired block or if we are allocating user data and the free
     395             :          * extent is before desired block. The second case is there to allow
     396             :          * for contiguous allocation from the remaining free space if the file
     397             :          * grows in the short term.
     398             :          */
     399  2690274531 :         if (freebno >= wantbno || (userdata && freeend < wantend)) {
     400  1991507135 :                 if ((newbno1 = roundup(freebno, alignment)) >= freeend)
     401           0 :                         newbno1 = NULLAGBLOCK;
     402   698767396 :         } else if (freeend >= wantend && alignment > 1) {
     403          23 :                 newbno1 = roundup(wantbno, alignment);
     404          23 :                 newbno2 = newbno1 - alignment;
     405          23 :                 if (newbno1 >= freeend)
     406             :                         newbno1 = NULLAGBLOCK;
     407             :                 else
     408          23 :                         newlen1 = XFS_EXTLEN_MIN(wantlen, freeend - newbno1);
     409          23 :                 if (newbno2 < freebno)
     410             :                         newbno2 = NULLAGBLOCK;
     411             :                 else
     412          23 :                         newlen2 = XFS_EXTLEN_MIN(wantlen, freeend - newbno2);
     413          23 :                 if (newbno1 != NULLAGBLOCK && newbno2 != NULLAGBLOCK) {
     414          23 :                         if (newlen1 < newlen2 ||
     415          19 :                             (newlen1 == newlen2 &&
     416          19 :                              XFS_ABSDIFF(newbno1, wantbno) >
     417          19 :                              XFS_ABSDIFF(newbno2, wantbno)))
     418          13 :                                 newbno1 = newbno2;
     419           0 :                 } else if (newbno2 != NULLAGBLOCK)
     420           0 :                         newbno1 = newbno2;
     421   698767373 :         } else if (freeend >= wantend) {
     422             :                 newbno1 = wantbno;
     423   696748987 :         } else if (alignment > 1) {
     424       26978 :                 newbno1 = roundup(freeend - wantlen, alignment);
     425       26978 :                 if (newbno1 > freeend - wantlen &&
     426       17338 :                     newbno1 - alignment >= freebno)
     427             :                         newbno1 -= alignment;
     428        9640 :                 else if (newbno1 >= freeend)
     429           0 :                         newbno1 = NULLAGBLOCK;
     430             :         } else
     431   696722009 :                 newbno1 = freeend - wantlen;
     432  2690274531 :         *newbnop = newbno1;
     433  2690274531 :         return newbno1 == NULLAGBLOCK ? 0 : XFS_ABSDIFF(newbno1, wantbno);
     434             : }
     435             : 
     436             : /*
     437             :  * Fix up the length, based on mod and prod.
     438             :  * len should be k * prod + mod for some k.
     439             :  * If len is too small it is returned unchanged.
     440             :  * If len hits maxlen it is left alone.
     441             :  */
     442             : STATIC void
     443  2698914646 : xfs_alloc_fix_len(
     444             :         xfs_alloc_arg_t *args)          /* allocation argument structure */
     445             : {
     446  2698914646 :         xfs_extlen_t    k;
     447  2698914646 :         xfs_extlen_t    rlen;
     448             : 
     449  2698914646 :         ASSERT(args->mod < args->prod);
     450  2698914646 :         rlen = args->len;
     451  2698914646 :         ASSERT(rlen >= args->minlen);
     452  2698914646 :         ASSERT(rlen <= args->maxlen);
     453  2698914646 :         if (args->prod <= 1 || rlen < args->mod || rlen == args->maxlen ||
     454      214421 :             (args->mod == 0 && rlen < args->prod))
     455             :                 return;
     456       24602 :         k = rlen % args->prod;
     457       24602 :         if (k == args->mod)
     458             :                 return;
     459       20738 :         if (k > args->mod)
     460       20361 :                 rlen = rlen - (k - args->mod);
     461             :         else
     462         377 :                 rlen = rlen - args->prod + (args->mod - k);
     463             :         /* casts to (int) catch length underflows */
     464       20738 :         if ((int)rlen < (int)args->minlen)
     465             :                 return;
     466        1706 :         ASSERT(rlen >= args->minlen && rlen <= args->maxlen);
     467        1706 :         ASSERT(rlen % args->prod == args->mod);
     468        1706 :         ASSERT(args->pag->pagf_freeblks + args->pag->pagf_flcount >=
     469             :                 rlen + args->minleft);
     470        1706 :         args->len = rlen;
     471             : }
     472             : 
     473             : /*
     474             :  * Update the two btrees, logically removing from freespace the extent
     475             :  * starting at rbno, rlen blocks.  The extent is contained within the
     476             :  * actual (current) free extent fbno for flen blocks.
     477             :  * Flags are passed in indicating whether the cursors are set to the
     478             :  * relevant records.
     479             :  */
     480             : STATIC int                              /* error code */
     481    98618344 : xfs_alloc_fixup_trees(
     482             :         struct xfs_btree_cur *cnt_cur,  /* cursor for by-size btree */
     483             :         struct xfs_btree_cur *bno_cur,  /* cursor for by-block btree */
     484             :         xfs_agblock_t   fbno,           /* starting block of free extent */
     485             :         xfs_extlen_t    flen,           /* length of free extent */
     486             :         xfs_agblock_t   rbno,           /* starting block of returned extent */
     487             :         xfs_extlen_t    rlen,           /* length of returned extent */
     488             :         int             flags)          /* flags, XFSA_FIXUP_... */
     489             : {
     490    98618344 :         int             error;          /* error code */
     491    98618344 :         int             i;              /* operation results */
     492    98618344 :         xfs_agblock_t   nfbno1;         /* first new free startblock */
     493    98618344 :         xfs_agblock_t   nfbno2;         /* second new free startblock */
     494    98618344 :         xfs_extlen_t    nflen1=0;       /* first new free length */
     495    98618344 :         xfs_extlen_t    nflen2=0;       /* second new free length */
     496    98618344 :         struct xfs_mount *mp;
     497             : 
     498    98618344 :         mp = cnt_cur->bc_mp;
     499             : 
     500             :         /*
     501             :          * Look up the record in the by-size tree if necessary.
     502             :          */
     503    98618344 :         if (flags & XFSA_FIXUP_CNT_OK) {
     504             : #ifdef DEBUG
     505     7826360 :                 if ((error = xfs_alloc_get_rec(cnt_cur, &nfbno1, &nflen1, &i)))
     506             :                         return error;
     507     7826378 :                 if (XFS_IS_CORRUPT(mp,
     508             :                                    i != 1 ||
     509             :                                    nfbno1 != fbno ||
     510             :                                    nflen1 != flen)) {
     511           0 :                         xfs_btree_mark_sick(cnt_cur);
     512           0 :                         return -EFSCORRUPTED;
     513             :                 }
     514             : #endif
     515             :         } else {
     516    90791984 :                 if ((error = xfs_alloc_lookup_eq(cnt_cur, fbno, flen, &i)))
     517             :                         return error;
     518    90798764 :                 if (XFS_IS_CORRUPT(mp, i != 1)) {
     519           0 :                         xfs_btree_mark_sick(cnt_cur);
     520           0 :                         return -EFSCORRUPTED;
     521             :                 }
     522             :         }
     523             :         /*
     524             :          * Look up the record in the by-block tree if necessary.
     525             :          */
     526    98625142 :         if (flags & XFSA_FIXUP_BNO_OK) {
     527             : #ifdef DEBUG
     528      410964 :                 if ((error = xfs_alloc_get_rec(bno_cur, &nfbno1, &nflen1, &i)))
     529             :                         return error;
     530      410970 :                 if (XFS_IS_CORRUPT(mp,
     531             :                                    i != 1 ||
     532             :                                    nfbno1 != fbno ||
     533             :                                    nflen1 != flen)) {
     534           0 :                         xfs_btree_mark_sick(bno_cur);
     535           0 :                         return -EFSCORRUPTED;
     536             :                 }
     537             : #endif
     538             :         } else {
     539    98214178 :                 if ((error = xfs_alloc_lookup_eq(bno_cur, fbno, flen, &i)))
     540             :                         return error;
     541    98213974 :                 if (XFS_IS_CORRUPT(mp, i != 1)) {
     542           0 :                         xfs_btree_mark_sick(bno_cur);
     543           0 :                         return -EFSCORRUPTED;
     544             :                 }
     545             :         }
     546             : 
     547             : #ifdef DEBUG
     548    98624944 :         if (bno_cur->bc_nlevels == 1 && cnt_cur->bc_nlevels == 1) {
     549    66374572 :                 struct xfs_btree_block  *bnoblock;
     550    66374572 :                 struct xfs_btree_block  *cntblock;
     551             : 
     552    66374572 :                 bnoblock = XFS_BUF_TO_BLOCK(bno_cur->bc_levels[0].bp);
     553    66374572 :                 cntblock = XFS_BUF_TO_BLOCK(cnt_cur->bc_levels[0].bp);
     554             : 
     555    66374572 :                 if (XFS_IS_CORRUPT(mp,
     556             :                                    bnoblock->bb_numrecs !=
     557             :                                    cntblock->bb_numrecs)) {
     558           0 :                         xfs_btree_mark_sick(bno_cur);
     559           0 :                         return -EFSCORRUPTED;
     560             :                 }
     561             :         }
     562             : #endif
     563             : 
     564             :         /*
     565             :          * Deal with all four cases: the allocated record is contained
     566             :          * within the freespace record, so we can have new freespace
     567             :          * at either (or both) end, or no freespace remaining.
     568             :          */
     569    98624944 :         if (rbno == fbno && rlen == flen)
     570    29782645 :                 nfbno1 = nfbno2 = NULLAGBLOCK;
     571    68842299 :         else if (rbno == fbno) {
     572    58224520 :                 nfbno1 = rbno + rlen;
     573    58224520 :                 nflen1 = flen - rlen;
     574    58224520 :                 nfbno2 = NULLAGBLOCK;
     575    10617779 :         } else if (rbno + rlen == fbno + flen) {
     576     5126219 :                 nfbno1 = fbno;
     577     5126219 :                 nflen1 = flen - rlen;
     578     5126219 :                 nfbno2 = NULLAGBLOCK;
     579             :         } else {
     580     5491560 :                 nfbno1 = fbno;
     581     5491560 :                 nflen1 = rbno - fbno;
     582     5491560 :                 nfbno2 = rbno + rlen;
     583     5491560 :                 nflen2 = (fbno + flen) - nfbno2;
     584             :         }
     585             :         /*
     586             :          * Delete the entry from the by-size btree.
     587             :          */
     588    98624944 :         if ((error = xfs_btree_delete(cnt_cur, &i)))
     589             :                 return error;
     590    98614516 :         if (XFS_IS_CORRUPT(mp, i != 1)) {
     591           0 :                 xfs_btree_mark_sick(cnt_cur);
     592           0 :                 return -EFSCORRUPTED;
     593             :         }
     594             :         /*
     595             :          * Add new by-size btree entry(s).
     596             :          */
     597    98614516 :         if (nfbno1 != NULLAGBLOCK) {
     598    68833927 :                 if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno1, nflen1, &i)))
     599             :                         return error;
     600    68841321 :                 if (XFS_IS_CORRUPT(mp, i != 0)) {
     601           0 :                         xfs_btree_mark_sick(cnt_cur);
     602           0 :                         return -EFSCORRUPTED;
     603             :                 }
     604    68841321 :                 if ((error = xfs_btree_insert(cnt_cur, &i)))
     605             :                         return error;
     606    68828646 :                 if (XFS_IS_CORRUPT(mp, i != 1)) {
     607           0 :                         xfs_btree_mark_sick(cnt_cur);
     608           0 :                         return -EFSCORRUPTED;
     609             :                 }
     610             :         }
     611    98609235 :         if (nfbno2 != NULLAGBLOCK) {
     612     5491400 :                 if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno2, nflen2, &i)))
     613             :                         return error;
     614     5491563 :                 if (XFS_IS_CORRUPT(mp, i != 0)) {
     615           0 :                         xfs_btree_mark_sick(cnt_cur);
     616           0 :                         return -EFSCORRUPTED;
     617             :                 }
     618     5491563 :                 if ((error = xfs_btree_insert(cnt_cur, &i)))
     619             :                         return error;
     620     5491579 :                 if (XFS_IS_CORRUPT(mp, i != 1)) {
     621           0 :                         xfs_btree_mark_sick(cnt_cur);
     622           0 :                         return -EFSCORRUPTED;
     623             :                 }
     624             :         }
     625             :         /*
     626             :          * Fix up the by-block btree entry(s).
     627             :          */
     628    98609414 :         if (nfbno1 == NULLAGBLOCK) {
     629             :                 /*
     630             :                  * No remaining freespace, just delete the by-block tree entry.
     631             :                  */
     632    29777035 :                 if ((error = xfs_btree_delete(bno_cur, &i)))
     633             :                         return error;
     634    29782527 :                 if (XFS_IS_CORRUPT(mp, i != 1)) {
     635           0 :                         xfs_btree_mark_sick(bno_cur);
     636           0 :                         return -EFSCORRUPTED;
     637             :                 }
     638             :         } else {
     639             :                 /*
     640             :                  * Update the by-block entry to start later|be shorter.
     641             :                  */
     642    68832379 :                 if ((error = xfs_alloc_update(bno_cur, nfbno1, nflen1)))
     643             :                         return error;
     644             :         }
     645    98619930 :         if (nfbno2 != NULLAGBLOCK) {
     646             :                 /*
     647             :                  * 2 resulting free entries, need to add one.
     648             :                  */
     649     5491543 :                 if ((error = xfs_alloc_lookup_eq(bno_cur, nfbno2, nflen2, &i)))
     650             :                         return error;
     651     5491563 :                 if (XFS_IS_CORRUPT(mp, i != 0)) {
     652           0 :                         xfs_btree_mark_sick(bno_cur);
     653           0 :                         return -EFSCORRUPTED;
     654             :                 }
     655     5491563 :                 if ((error = xfs_btree_insert(bno_cur, &i)))
     656             :                         return error;
     657     5491568 :                 if (XFS_IS_CORRUPT(mp, i != 1)) {
     658           0 :                         xfs_btree_mark_sick(bno_cur);
     659           0 :                         return -EFSCORRUPTED;
     660             :                 }
     661             :         }
     662             :         return 0;
     663             : }
     664             : 
     665             : /*
     666             :  * We do not verify the AGFL contents against AGF-based index counters here,
     667             :  * even though we may have access to the perag that contains shadow copies. We
     668             :  * don't know if the AGF based counters have been checked, and if they have they
     669             :  * still may be inconsistent because they haven't yet been reset on the first
     670             :  * allocation after the AGF has been read in.
     671             :  *
     672             :  * This means we can only check that all agfl entries contain valid or null
     673             :  * values because we can't reliably determine the active range to exclude
     674             :  * NULLAGBNO as a valid value.
     675             :  *
     676             :  * However, we can't even do that for v4 format filesystems because there are
     677             :  * old versions of mkfs out there that does not initialise the AGFL to known,
     678             :  * verifiable values. HEnce we can't tell the difference between a AGFL block
     679             :  * allocated by mkfs and a corrupted AGFL block here on v4 filesystems.
     680             :  *
     681             :  * As a result, we can only fully validate AGFL block numbers when we pull them
     682             :  * from the freelist in xfs_alloc_get_freelist().
     683             :  */
     684             : static xfs_failaddr_t
     685     1243831 : xfs_agfl_verify(
     686             :         struct xfs_buf  *bp)
     687             : {
     688     1243831 :         struct xfs_mount *mp = bp->b_mount;
     689     1243831 :         struct xfs_agfl *agfl = XFS_BUF_TO_AGFL(bp);
     690     1243831 :         __be32          *agfl_bno = xfs_buf_to_agfl_bno(bp);
     691     1243831 :         int             i;
     692             : 
     693     1243831 :         if (!xfs_has_crc(mp))
     694             :                 return NULL;
     695             : 
     696     1243809 :         if (!xfs_verify_magic(bp, agfl->agfl_magicnum))
     697           0 :                 return __this_address;
     698     1243699 :         if (!uuid_equal(&agfl->agfl_uuid, &mp->m_sb.sb_meta_uuid))
     699           0 :                 return __this_address;
     700             :         /*
     701             :          * during growfs operations, the perag is not fully initialised,
     702             :          * so we can't use it for any useful checking. growfs ensures we can't
     703             :          * use it by using uncached buffers that don't have the perag attached
     704             :          * so we can detect and avoid this problem.
     705             :          */
     706     1244101 :         if (bp->b_pag && be32_to_cpu(agfl->agfl_seqno) != bp->b_pag->pag_agno)
     707           0 :                 return __this_address;
     708             : 
     709  2457307088 :         for (i = 0; i < xfs_agfl_size(mp); i++) {
     710  1227407936 :                 if (be32_to_cpu(agfl_bno[i]) != NULLAGBLOCK &&
     711   141589091 :                     be32_to_cpu(agfl_bno[i]) >= mp->m_sb.sb_agblocks)
     712           0 :                         return __this_address;
     713             :         }
     714             : 
     715     1244101 :         if (!xfs_log_check_lsn(mp, be64_to_cpu(XFS_BUF_TO_AGFL(bp)->agfl_lsn)))
     716           0 :                 return __this_address;
     717             :         return NULL;
     718             : }
     719             : 
     720             : static void
     721      210737 : xfs_agfl_read_verify(
     722             :         struct xfs_buf  *bp)
     723             : {
     724      210737 :         struct xfs_mount *mp = bp->b_mount;
     725      210737 :         xfs_failaddr_t  fa;
     726             : 
     727             :         /*
     728             :          * There is no verification of non-crc AGFLs because mkfs does not
     729             :          * initialise the AGFL to zero or NULL. Hence the only valid part of the
     730             :          * AGFL is what the AGF says is active. We can't get to the AGF, so we
     731             :          * can't verify just those entries are valid.
     732             :          */
     733      210737 :         if (!xfs_has_crc(mp))
     734             :                 return;
     735             : 
     736      210640 :         if (!xfs_buf_verify_cksum(bp, XFS_AGFL_CRC_OFF))
     737         640 :                 xfs_verifier_error(bp, -EFSBADCRC, __this_address);
     738             :         else {
     739      210000 :                 fa = xfs_agfl_verify(bp);
     740      210000 :                 if (fa)
     741           0 :                         xfs_verifier_error(bp, -EFSCORRUPTED, fa);
     742             :         }
     743             : }
     744             : 
     745             : static void
     746      157474 : xfs_agfl_write_verify(
     747             :         struct xfs_buf  *bp)
     748             : {
     749      157474 :         struct xfs_mount        *mp = bp->b_mount;
     750      157474 :         struct xfs_buf_log_item *bip = bp->b_log_item;
     751      157474 :         xfs_failaddr_t          fa;
     752             : 
     753             :         /* no verification of non-crc AGFLs */
     754      157474 :         if (!xfs_has_crc(mp))
     755             :                 return;
     756             : 
     757      143877 :         fa = xfs_agfl_verify(bp);
     758      143877 :         if (fa) {
     759           0 :                 xfs_verifier_error(bp, -EFSCORRUPTED, fa);
     760           0 :                 return;
     761             :         }
     762             : 
     763      143877 :         if (bip)
     764      116353 :                 XFS_BUF_TO_AGFL(bp)->agfl_lsn = cpu_to_be64(bip->bli_item.li_lsn);
     765             : 
     766      143877 :         xfs_buf_update_cksum(bp, XFS_AGFL_CRC_OFF);
     767             : }
     768             : 
     769             : const struct xfs_buf_ops xfs_agfl_buf_ops = {
     770             :         .name = "xfs_agfl",
     771             :         .magic = { cpu_to_be32(XFS_AGFL_MAGIC), cpu_to_be32(XFS_AGFL_MAGIC) },
     772             :         .verify_read = xfs_agfl_read_verify,
     773             :         .verify_write = xfs_agfl_write_verify,
     774             :         .verify_struct = xfs_agfl_verify,
     775             : };
     776             : 
     777             : /*
     778             :  * Read in the allocation group free block array.
     779             :  */
     780             : int
     781   651876127 : xfs_alloc_read_agfl(
     782             :         struct xfs_perag        *pag,
     783             :         struct xfs_trans        *tp,
     784             :         struct xfs_buf          **bpp)
     785             : {
     786   651876127 :         struct xfs_mount        *mp = pag->pag_mount;
     787   651876127 :         struct xfs_buf          *bp;
     788   651876127 :         int                     error;
     789             : 
     790  1955628381 :         error = xfs_trans_read_buf(
     791   651876127 :                         mp, tp, mp->m_ddev_targp,
     792   651876127 :                         XFS_AG_DADDR(mp, pag->pag_agno, XFS_AGFL_DADDR(mp)),
     793   651876127 :                         XFS_FSS_TO_BB(mp, 1), 0, &bp, &xfs_agfl_buf_ops);
     794   651934340 :         if (xfs_metadata_is_sick(error))
     795         640 :                 xfs_ag_mark_sick(pag, XFS_SICK_AG_AGFL);
     796   651934340 :         if (error)
     797             :                 return error;
     798   651934159 :         xfs_buf_set_ref(bp, XFS_AGFL_REF);
     799   651915952 :         *bpp = bp;
     800   651915952 :         return 0;
     801             : }
     802             : 
     803             : STATIC int
     804   204483180 : xfs_alloc_update_counters(
     805             :         struct xfs_trans        *tp,
     806             :         struct xfs_buf          *agbp,
     807             :         long                    len)
     808             : {
     809   204483180 :         struct xfs_agf          *agf = agbp->b_addr;
     810             : 
     811   204483180 :         agbp->b_pag->pagf_freeblks += len;
     812   204483180 :         be32_add_cpu(&agf->agf_freeblks, len);
     813             : 
     814   204483180 :         if (unlikely(be32_to_cpu(agf->agf_freeblks) >
     815             :                      be32_to_cpu(agf->agf_length))) {
     816           0 :                 xfs_buf_mark_corrupt(agbp);
     817           0 :                 xfs_ag_mark_sick(agbp->b_pag, XFS_SICK_AG_AGF);
     818           0 :                 return -EFSCORRUPTED;
     819             :         }
     820             : 
     821   204483180 :         xfs_alloc_log_agf(tp, agbp, XFS_AGF_FREEBLKS);
     822   204483180 :         return 0;
     823             : }
     824             : 
     825             : /*
     826             :  * Block allocation algorithm and data structures.
     827             :  */
     828             : struct xfs_alloc_cur {
     829             :         struct xfs_btree_cur            *cnt;   /* btree cursors */
     830             :         struct xfs_btree_cur            *bnolt;
     831             :         struct xfs_btree_cur            *bnogt;
     832             :         xfs_extlen_t                    cur_len;/* current search length */
     833             :         xfs_agblock_t                   rec_bno;/* extent startblock */
     834             :         xfs_extlen_t                    rec_len;/* extent length */
     835             :         xfs_agblock_t                   bno;    /* alloc bno */
     836             :         xfs_extlen_t                    len;    /* alloc len */
     837             :         xfs_extlen_t                    diff;   /* diff from search bno */
     838             :         unsigned int                    busy_gen;/* busy state */
     839             :         bool                            busy;
     840             : };
     841             : 
     842             : /*
     843             :  * Set up cursors, etc. in the extent allocation cursor. This function can be
     844             :  * called multiple times to reset an initialized structure without having to
     845             :  * reallocate cursors.
     846             :  */
     847             : static int
     848    90421839 : xfs_alloc_cur_setup(
     849             :         struct xfs_alloc_arg    *args,
     850             :         struct xfs_alloc_cur    *acur)
     851             : {
     852    90421839 :         int                     error;
     853    90421839 :         int                     i;
     854             : 
     855    90421839 :         acur->cur_len = args->maxlen;
     856    90421839 :         acur->rec_bno = 0;
     857    90421839 :         acur->rec_len = 0;
     858    90421839 :         acur->bno = 0;
     859    90421839 :         acur->len = 0;
     860    90421839 :         acur->diff = -1;
     861    90421839 :         acur->busy = false;
     862    90421839 :         acur->busy_gen = 0;
     863             : 
     864             :         /*
     865             :          * Perform an initial cntbt lookup to check for availability of maxlen
     866             :          * extents. If this fails, we'll return -ENOSPC to signal the caller to
     867             :          * attempt a small allocation.
     868             :          */
     869    90421839 :         if (!acur->cnt)
     870    90267157 :                 acur->cnt = xfs_allocbt_init_cursor(args->mp, args->tp,
     871             :                                         args->agbp, args->pag, XFS_BTNUM_CNT);
     872    90527728 :         error = xfs_alloc_lookup_ge(acur->cnt, 0, args->maxlen, &i);
     873    90484481 :         if (error)
     874             :                 return error;
     875             : 
     876             :         /*
     877             :          * Allocate the bnobt left and right search cursors.
     878             :          */
     879    90482837 :         if (!acur->bnolt)
     880    90362901 :                 acur->bnolt = xfs_allocbt_init_cursor(args->mp, args->tp,
     881             :                                         args->agbp, args->pag, XFS_BTNUM_BNO);
     882    90494991 :         if (!acur->bnogt)
     883    90371334 :                 acur->bnogt = xfs_allocbt_init_cursor(args->mp, args->tp,
     884             :                                         args->agbp, args->pag, XFS_BTNUM_BNO);
     885    90505494 :         return i == 1 ? 0 : -ENOSPC;
     886             : }
     887             : 
     888             : static void
     889    90376134 : xfs_alloc_cur_close(
     890             :         struct xfs_alloc_cur    *acur,
     891             :         bool                    error)
     892             : {
     893    90376134 :         int                     cur_error = XFS_BTREE_NOERROR;
     894             : 
     895    90376134 :         if (error)
     896        3137 :                 cur_error = XFS_BTREE_ERROR;
     897             : 
     898    90376134 :         if (acur->cnt)
     899    90376134 :                 xfs_btree_del_cursor(acur->cnt, cur_error);
     900    90388305 :         if (acur->bnolt)
     901    90386661 :                 xfs_btree_del_cursor(acur->bnolt, cur_error);
     902    90395724 :         if (acur->bnogt)
     903    90394080 :                 xfs_btree_del_cursor(acur->bnogt, cur_error);
     904    90392812 :         acur->cnt = acur->bnolt = acur->bnogt = NULL;
     905    90392812 : }
     906             : 
     907             : /*
     908             :  * Check an extent for allocation and track the best available candidate in the
     909             :  * allocation structure. The cursor is deactivated if it has entered an out of
     910             :  * range state based on allocation arguments. Optionally return the extent
     911             :  * extent geometry and allocation status if requested by the caller.
     912             :  */
     913             : static int
     914  5554057736 : xfs_alloc_cur_check(
     915             :         struct xfs_alloc_arg    *args,
     916             :         struct xfs_alloc_cur    *acur,
     917             :         struct xfs_btree_cur    *cur,
     918             :         int                     *new)
     919             : {
     920  5554057736 :         int                     error, i;
     921  5554057736 :         xfs_agblock_t           bno, bnoa, bnew;
     922  5554057736 :         xfs_extlen_t            len, lena, diff = -1;
     923  5554057736 :         bool                    busy;
     924  5554057736 :         unsigned                busy_gen = 0;
     925  5554057736 :         bool                    deactivate = false;
     926  5554057736 :         bool                    isbnobt = cur->bc_btnum == XFS_BTNUM_BNO;
     927             : 
     928  5554057736 :         *new = 0;
     929             : 
     930  5554057736 :         error = xfs_alloc_get_rec(cur, &bno, &len, &i);
     931  5548307186 :         if (error)
     932             :                 return error;
     933  5548307186 :         if (XFS_IS_CORRUPT(args->mp, i != 1)) {
     934           0 :                 xfs_btree_mark_sick(cur);
     935           0 :                 return -EFSCORRUPTED;
     936             :         }
     937             : 
     938             :         /*
     939             :          * Check minlen and deactivate a cntbt cursor if out of acceptable size
     940             :          * range (i.e., walking backwards looking for a minlen extent).
     941             :          */
     942  5548307186 :         if (len < args->minlen) {
     943   297951025 :                 deactivate = !isbnobt;
     944   297951025 :                 goto out;
     945             :         }
     946             : 
     947  5250356161 :         busy = xfs_alloc_compute_aligned(args, bno, len, &bnoa, &lena,
     948             :                                          &busy_gen);
     949  5266647534 :         acur->busy |= busy;
     950  5266647534 :         if (busy)
     951  2737232280 :                 acur->busy_gen = busy_gen;
     952             :         /* deactivate a bnobt cursor outside of locality range */
     953  5266647534 :         if (bnoa < args->min_agbno || bnoa > args->max_agbno) {
     954       18093 :                 deactivate = isbnobt;
     955       18093 :                 goto out;
     956             :         }
     957  5266629441 :         if (lena < args->minlen)
     958  2575154144 :                 goto out;
     959             : 
     960  2691475297 :         args->len = XFS_EXTLEN_MIN(lena, args->maxlen);
     961  2691475297 :         xfs_alloc_fix_len(args);
     962  2690298871 :         ASSERT(args->len >= args->minlen);
     963  2690298871 :         if (args->len < acur->len)
     964     3363110 :                 goto out;
     965             : 
     966             :         /*
     967             :          * We have an aligned record that satisfies minlen and beats or matches
     968             :          * the candidate extent size. Compare locality for near allocation mode.
     969             :          */
     970  2686935761 :         diff = xfs_alloc_compute_diff(args->agbno, args->len,
     971             :                                       args->alignment, args->datatype,
     972             :                                       bnoa, lena, &bnew);
     973  2688888137 :         if (bnew == NULLAGBLOCK)
     974           0 :                 goto out;
     975             : 
     976             :         /*
     977             :          * Deactivate a bnobt cursor with worse locality than the current best.
     978             :          */
     979  2688888137 :         if (diff > acur->diff) {
     980  2052766514 :                 deactivate = isbnobt;
     981  2052766514 :                 goto out;
     982             :         }
     983             : 
     984   636121623 :         ASSERT(args->len > acur->len ||
     985             :                (args->len == acur->len && diff <= acur->diff));
     986   636121623 :         acur->rec_bno = bno;
     987   636121623 :         acur->rec_len = len;
     988   636121623 :         acur->bno = bnew;
     989   636121623 :         acur->len = args->len;
     990   636121623 :         acur->diff = diff;
     991   636121623 :         *new = 1;
     992             : 
     993             :         /*
     994             :          * We're done if we found a perfect allocation. This only deactivates
     995             :          * the current cursor, but this is just an optimization to terminate a
     996             :          * cntbt search that otherwise runs to the edge of the tree.
     997             :          */
     998   636121623 :         if (acur->diff == 0 && acur->len == args->maxlen)
     999             :                 deactivate = true;
    1000   614082011 : out:
    1001  4929252886 :         if (deactivate)
    1002    35290996 :                 cur->bc_ag.abt.active = false;
    1003  5565374509 :         trace_xfs_alloc_cur_check(args->mp, cur->bc_btnum, bno, len, diff,
    1004  5565374509 :                                   *new);
    1005  5565374509 :         return 0;
    1006             : }
    1007             : 
    1008             : /*
    1009             :  * Complete an allocation of a candidate extent. Remove the extent from both
    1010             :  * trees and update the args structure.
    1011             :  */
    1012             : STATIC int
    1013    90384163 : xfs_alloc_cur_finish(
    1014             :         struct xfs_alloc_arg    *args,
    1015             :         struct xfs_alloc_cur    *acur)
    1016             : {
    1017    90384163 :         struct xfs_agf __maybe_unused *agf = args->agbp->b_addr;
    1018    90384163 :         int                     error;
    1019             : 
    1020    90384163 :         ASSERT(acur->cnt && acur->bnolt);
    1021    90384163 :         ASSERT(acur->bno >= acur->rec_bno);
    1022    90384163 :         ASSERT(acur->bno + acur->len <= acur->rec_bno + acur->rec_len);
    1023    90384163 :         ASSERT(acur->rec_bno + acur->rec_len <= be32_to_cpu(agf->agf_length));
    1024             : 
    1025    90384163 :         error = xfs_alloc_fixup_trees(acur->cnt, acur->bnolt, acur->rec_bno,
    1026             :                                       acur->rec_len, acur->bno, acur->len, 0);
    1027    90380105 :         if (error)
    1028             :                 return error;
    1029             : 
    1030    90376318 :         args->agbno = acur->bno;
    1031    90376318 :         args->len = acur->len;
    1032    90376318 :         args->wasfromfl = 0;
    1033             : 
    1034    90376318 :         trace_xfs_alloc_cur(args);
    1035    90376318 :         return 0;
    1036             : }
    1037             : 
    1038             : /*
    1039             :  * Locality allocation lookup algorithm. This expects a cntbt cursor and uses
    1040             :  * bno optimized lookup to search for extents with ideal size and locality.
    1041             :  */
    1042             : STATIC int
    1043   156567831 : xfs_alloc_cntbt_iter(
    1044             :         struct xfs_alloc_arg            *args,
    1045             :         struct xfs_alloc_cur            *acur)
    1046             : {
    1047   156567831 :         struct xfs_btree_cur    *cur = acur->cnt;
    1048   156567831 :         xfs_agblock_t           bno;
    1049   156567831 :         xfs_extlen_t            len, cur_len;
    1050   156567831 :         int                     error;
    1051   156567831 :         int                     i;
    1052             : 
    1053   156567831 :         if (!xfs_alloc_cur_active(cur))
    1054             :                 return 0;
    1055             : 
    1056             :         /* locality optimized lookup */
    1057   156246734 :         cur_len = acur->cur_len;
    1058   156246734 :         error = xfs_alloc_lookup_ge(cur, args->agbno, cur_len, &i);
    1059   156248288 :         if (error)
    1060             :                 return error;
    1061   156248270 :         if (i == 0)
    1062             :                 return 0;
    1063   141319622 :         error = xfs_alloc_get_rec(cur, &bno, &len, &i);
    1064   141308536 :         if (error)
    1065             :                 return error;
    1066             : 
    1067             :         /* check the current record and update search length from it */
    1068   141309191 :         error = xfs_alloc_cur_check(args, acur, cur, &i);
    1069   141350840 :         if (error)
    1070             :                 return error;
    1071   141350840 :         ASSERT(len >= acur->cur_len);
    1072   141350840 :         acur->cur_len = len;
    1073             : 
    1074             :         /*
    1075             :          * We looked up the first record >= [agbno, len] above. The agbno is a
    1076             :          * secondary key and so the current record may lie just before or after
    1077             :          * agbno. If it is past agbno, check the previous record too so long as
    1078             :          * the length matches as it may be closer. Don't check a smaller record
    1079             :          * because that could deactivate our cursor.
    1080             :          */
    1081   141350840 :         if (bno > args->agbno) {
    1082   137287714 :                 error = xfs_btree_decrement(cur, 0, &i);
    1083   137275680 :                 if (!error && i) {
    1084   135424344 :                         error = xfs_alloc_get_rec(cur, &bno, &len, &i);
    1085   135418097 :                         if (!error && i && len == acur->cur_len)
    1086    34781012 :                                 error = xfs_alloc_cur_check(args, acur, cur,
    1087             :                                                             &i);
    1088             :                 }
    1089   137273434 :                 if (error)
    1090             :                         return error;
    1091             :         }
    1092             : 
    1093             :         /*
    1094             :          * Increment the search key until we find at least one allocation
    1095             :          * candidate or if the extent we found was larger. Otherwise, double the
    1096             :          * search key to optimize the search. Efficiency is more important here
    1097             :          * than absolute best locality.
    1098             :          */
    1099   141336560 :         cur_len <<= 1;
    1100   141336560 :         if (!acur->len || acur->cur_len >= cur_len)
    1101    91775298 :                 acur->cur_len++;
    1102             :         else
    1103    49561262 :                 acur->cur_len = cur_len;
    1104             : 
    1105             :         return error;
    1106             : }
    1107             : 
    1108             : /*
    1109             :  * Deal with the case where only small freespaces remain. Either return the
    1110             :  * contents of the last freespace record, or allocate space from the freelist if
    1111             :  * there is nothing in the tree.
    1112             :  */
    1113             : STATIC int                      /* error */
    1114     1314452 : xfs_alloc_ag_vextent_small(
    1115             :         struct xfs_alloc_arg    *args,  /* allocation argument structure */
    1116             :         struct xfs_btree_cur    *ccur,  /* optional by-size cursor */
    1117             :         xfs_agblock_t           *fbnop, /* result block number */
    1118             :         xfs_extlen_t            *flenp, /* result length */
    1119             :         int                     *stat)  /* status: 0-freelist, 1-normal/none */
    1120             : {
    1121     1314452 :         struct xfs_agf          *agf = args->agbp->b_addr;
    1122     1314452 :         int                     error = 0;
    1123     1314452 :         xfs_agblock_t           fbno = NULLAGBLOCK;
    1124     1314452 :         xfs_extlen_t            flen = 0;
    1125     1314452 :         int                     i = 0;
    1126             : 
    1127             :         /*
    1128             :          * If a cntbt cursor is provided, try to allocate the largest record in
    1129             :          * the tree. Try the AGFL if the cntbt is empty, otherwise fail the
    1130             :          * allocation. Make sure to respect minleft even when pulling from the
    1131             :          * freelist.
    1132             :          */
    1133     1314452 :         if (ccur)
    1134     1314452 :                 error = xfs_btree_decrement(ccur, 0, &i);
    1135     1314449 :         if (error)
    1136           0 :                 goto error;
    1137     1314449 :         if (i) {
    1138     1314226 :                 error = xfs_alloc_get_rec(ccur, &fbno, &flen, &i);
    1139     1314223 :                 if (error)
    1140           0 :                         goto error;
    1141     1314223 :                 if (XFS_IS_CORRUPT(args->mp, i != 1)) {
    1142           0 :                         xfs_btree_mark_sick(ccur);
    1143           0 :                         error = -EFSCORRUPTED;
    1144           0 :                         goto error;
    1145             :                 }
    1146     1314223 :                 goto out;
    1147             :         }
    1148             : 
    1149         223 :         if (args->minlen != 1 || args->alignment != 1 ||
    1150         223 :             args->resv == XFS_AG_RESV_AGFL ||
    1151           0 :             be32_to_cpu(agf->agf_flcount) <= args->minleft)
    1152         223 :                 goto out;
    1153             : 
    1154           0 :         error = xfs_alloc_get_freelist(args->pag, args->tp, args->agbp,
    1155             :                         &fbno, 0);
    1156           0 :         if (error)
    1157           0 :                 goto error;
    1158           0 :         if (fbno == NULLAGBLOCK)
    1159           0 :                 goto out;
    1160             : 
    1161           0 :         xfs_extent_busy_reuse(args->mp, args->pag, fbno, 1,
    1162           0 :                               (args->datatype & XFS_ALLOC_NOBUSY));
    1163             : 
    1164           0 :         if (args->datatype & XFS_ALLOC_USERDATA) {
    1165           0 :                 struct xfs_buf  *bp;
    1166             : 
    1167           0 :                 error = xfs_trans_get_buf(args->tp, args->mp->m_ddev_targp,
    1168           0 :                                 XFS_AGB_TO_DADDR(args->mp, args->agno, fbno),
    1169           0 :                                 args->mp->m_bsize, 0, &bp);
    1170           0 :                 if (error)
    1171           0 :                         goto error;
    1172           0 :                 xfs_trans_binval(args->tp, bp);
    1173             :         }
    1174           0 :         *fbnop = args->agbno = fbno;
    1175           0 :         *flenp = args->len = 1;
    1176           0 :         if (XFS_IS_CORRUPT(args->mp, fbno >= be32_to_cpu(agf->agf_length))) {
    1177           0 :                 xfs_btree_mark_sick(ccur);
    1178           0 :                 error = -EFSCORRUPTED;
    1179           0 :                 goto error;
    1180             :         }
    1181           0 :         args->wasfromfl = 1;
    1182           0 :         trace_xfs_alloc_small_freelist(args);
    1183             : 
    1184             :         /*
    1185             :          * If we're feeding an AGFL block to something that doesn't live in the
    1186             :          * free space, we need to clear out the OWN_AG rmap.
    1187             :          */
    1188           0 :         error = xfs_rmap_free(args->tp, args->agbp, args->pag, fbno, 1,
    1189             :                               &XFS_RMAP_OINFO_AG);
    1190           0 :         if (error)
    1191           0 :                 goto error;
    1192             : 
    1193           0 :         *stat = 0;
    1194           0 :         return 0;
    1195             : 
    1196     1314446 : out:
    1197             :         /*
    1198             :          * Can't do the allocation, give up.
    1199             :          */
    1200     1314446 :         if (flen < args->minlen) {
    1201         223 :                 args->agbno = NULLAGBLOCK;
    1202         223 :                 trace_xfs_alloc_small_notenough(args);
    1203         223 :                 flen = 0;
    1204             :         }
    1205     1314446 :         *fbnop = fbno;
    1206     1314446 :         *flenp = flen;
    1207     1314446 :         *stat = 1;
    1208     1314446 :         trace_xfs_alloc_small_done(args);
    1209     1314446 :         return 0;
    1210             : 
    1211           0 : error:
    1212           0 :         trace_xfs_alloc_small_error(args);
    1213           0 :         return error;
    1214             : }
    1215             : 
    1216             : /*
    1217             :  * Allocate a variable extent at exactly agno/bno.
    1218             :  * Extent's length (returned in *len) will be between minlen and maxlen,
    1219             :  * and of the form k * prod + mod unless there's nothing that large.
    1220             :  * Return the starting a.g. block (bno), or NULLAGBLOCK if we can't do it.
    1221             :  */
    1222             : STATIC int                      /* error */
    1223     1112033 : xfs_alloc_ag_vextent_exact(
    1224             :         xfs_alloc_arg_t *args)  /* allocation argument structure */
    1225             : {
    1226     1112033 :         struct xfs_agf __maybe_unused *agf = args->agbp->b_addr;
    1227     1112033 :         struct xfs_btree_cur *bno_cur;/* by block-number btree cursor */
    1228     1112033 :         struct xfs_btree_cur *cnt_cur;/* by count btree cursor */
    1229     1112033 :         int             error;
    1230     1112033 :         xfs_agblock_t   fbno;   /* start block of found extent */
    1231     1112033 :         xfs_extlen_t    flen;   /* length of found extent */
    1232     1112033 :         xfs_agblock_t   tbno;   /* start block of busy extent */
    1233     1112033 :         xfs_extlen_t    tlen;   /* length of busy extent */
    1234     1112033 :         xfs_agblock_t   tend;   /* end block of busy extent */
    1235     1112033 :         int             i;      /* success/failure of operation */
    1236     1112033 :         unsigned        busy_gen;
    1237             : 
    1238     1112033 :         ASSERT(args->alignment == 1);
    1239             : 
    1240             :         /*
    1241             :          * Allocate/initialize a cursor for the by-number freespace btree.
    1242             :          */
    1243     1112033 :         bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
    1244             :                                           args->pag, XFS_BTNUM_BNO);
    1245             : 
    1246             :         /*
    1247             :          * Lookup bno and minlen in the btree (minlen is irrelevant, really).
    1248             :          * Look for the closest free block <= bno, it must contain bno
    1249             :          * if any free block does.
    1250             :          */
    1251     1112083 :         error = xfs_alloc_lookup_le(bno_cur, args->agbno, args->minlen, &i);
    1252     1112072 :         if (error)
    1253           1 :                 goto error0;
    1254     1112071 :         if (!i)
    1255       95029 :                 goto not_found;
    1256             : 
    1257             :         /*
    1258             :          * Grab the freespace record.
    1259             :          */
    1260     1017042 :         error = xfs_alloc_get_rec(bno_cur, &fbno, &flen, &i);
    1261     1016920 :         if (error)
    1262           0 :                 goto error0;
    1263     1016920 :         if (XFS_IS_CORRUPT(args->mp, i != 1)) {
    1264           0 :                 xfs_btree_mark_sick(bno_cur);
    1265           0 :                 error = -EFSCORRUPTED;
    1266           0 :                 goto error0;
    1267             :         }
    1268     1016920 :         ASSERT(fbno <= args->agbno);
    1269             : 
    1270             :         /*
    1271             :          * Check for overlapping busy extents.
    1272             :          */
    1273     1016920 :         tbno = fbno;
    1274     1016920 :         tlen = flen;
    1275     1016920 :         xfs_extent_busy_trim(args, &tbno, &tlen, &busy_gen);
    1276             : 
    1277             :         /*
    1278             :          * Give up if the start of the extent is busy, or the freespace isn't
    1279             :          * long enough for the minimum request.
    1280             :          */
    1281     1016964 :         if (tbno > args->agbno)
    1282        2190 :                 goto not_found;
    1283     1014774 :         if (tlen < args->minlen)
    1284      590574 :                 goto not_found;
    1285      424200 :         tend = tbno + tlen;
    1286      424200 :         if (tend < args->agbno + args->minlen)
    1287       13319 :                 goto not_found;
    1288             : 
    1289             :         /*
    1290             :          * End of extent will be smaller of the freespace end and the
    1291             :          * maximal requested end.
    1292             :          *
    1293             :          * Fix the length according to mod and prod if given.
    1294             :          */
    1295      410881 :         args->len = XFS_AGBLOCK_MIN(tend, args->agbno + args->maxlen)
    1296      410881 :                                                 - args->agbno;
    1297      410881 :         xfs_alloc_fix_len(args);
    1298      410853 :         ASSERT(args->agbno + args->len <= tend);
    1299             : 
    1300             :         /*
    1301             :          * We are allocating agbno for args->len
    1302             :          * Allocate/initialize a cursor for the by-size btree.
    1303             :          */
    1304      410853 :         cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
    1305             :                                         args->pag, XFS_BTNUM_CNT);
    1306      410953 :         ASSERT(args->agbno + args->len <= be32_to_cpu(agf->agf_length));
    1307      410953 :         error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen, args->agbno,
    1308             :                                       args->len, XFSA_FIXUP_BNO_OK);
    1309      410944 :         if (error) {
    1310           0 :                 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
    1311           0 :                 goto error0;
    1312             :         }
    1313             : 
    1314      410944 :         xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
    1315      410961 :         xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
    1316             : 
    1317      410980 :         args->wasfromfl = 0;
    1318      410980 :         trace_xfs_alloc_exact_done(args);
    1319      410980 :         return 0;
    1320             : 
    1321      701112 : not_found:
    1322             :         /* Didn't find it, return null. */
    1323      701112 :         xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
    1324      701121 :         args->agbno = NULLAGBLOCK;
    1325      701121 :         trace_xfs_alloc_exact_notfound(args);
    1326      701121 :         return 0;
    1327             : 
    1328           1 : error0:
    1329           1 :         xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
    1330           1 :         trace_xfs_alloc_exact_error(args);
    1331           1 :         return error;
    1332             : }
    1333             : 
    1334             : /*
    1335             :  * Search a given number of btree records in a given direction. Check each
    1336             :  * record against the good extent we've already found.
    1337             :  */
    1338             : STATIC int
    1339   440229966 : xfs_alloc_walk_iter(
    1340             :         struct xfs_alloc_arg    *args,
    1341             :         struct xfs_alloc_cur    *acur,
    1342             :         struct xfs_btree_cur    *cur,
    1343             :         bool                    increment,
    1344             :         bool                    find_one, /* quit on first candidate */
    1345             :         int                     count,    /* rec count (-1 for infinite) */
    1346             :         int                     *stat)
    1347             : {
    1348   440229966 :         int                     error;
    1349   440229966 :         int                     i;
    1350             : 
    1351   440229966 :         *stat = 0;
    1352             : 
    1353             :         /*
    1354             :          * Search so long as the cursor is active or we find a better extent.
    1355             :          * The cursor is deactivated if it extends beyond the range of the
    1356             :          * current allocation candidate.
    1357             :          */
    1358  5745118571 :         while (xfs_alloc_cur_active(cur) && count) {
    1359  5373414296 :                 error = xfs_alloc_cur_check(args, acur, cur, &i);
    1360  5382991975 :                 if (error)
    1361           0 :                         return error;
    1362  5382991975 :                 if (i == 1) {
    1363   608243540 :                         *stat = 1;
    1364   608243540 :                         if (find_one)
    1365             :                                 break;
    1366             :                 }
    1367  5329658757 :                 if (!xfs_alloc_cur_active(cur))
    1368             :                         break;
    1369             : 
    1370  5306318748 :                 if (increment)
    1371  3948982949 :                         error = xfs_btree_increment(cur, 0, &i);
    1372             :                 else
    1373  1357335799 :                         error = xfs_btree_decrement(cur, 0, &i);
    1374  5304888613 :                 if (error)
    1375           8 :                         return error;
    1376  5304888605 :                 if (i == 0)
    1377    35810625 :                         cur->bc_ag.abt.active = false;
    1378             : 
    1379  5304888605 :                 if (count > 0)
    1380   287145767 :                         count--;
    1381             :         }
    1382             : 
    1383             :         return 0;
    1384             : }
    1385             : 
    1386             : /*
    1387             :  * Search the by-bno and by-size btrees in parallel in search of an extent with
    1388             :  * ideal locality based on the NEAR mode ->agbno locality hint.
    1389             :  */
    1390             : STATIC int
    1391    52920175 : xfs_alloc_ag_vextent_locality(
    1392             :         struct xfs_alloc_arg    *args,
    1393             :         struct xfs_alloc_cur    *acur,
    1394             :         int                     *stat)
    1395             : {
    1396    52920175 :         struct xfs_btree_cur    *fbcur = NULL;
    1397    52920175 :         int                     error;
    1398    52920175 :         int                     i;
    1399    52920175 :         bool                    fbinc;
    1400             : 
    1401    52920175 :         ASSERT(acur->len == 0);
    1402             : 
    1403    52920175 :         *stat = 0;
    1404             : 
    1405    52920175 :         error = xfs_alloc_lookup_ge(acur->cnt, args->agbno, acur->cur_len, &i);
    1406    52935222 :         if (error)
    1407             :                 return error;
    1408    52932474 :         error = xfs_alloc_lookup_le(acur->bnolt, args->agbno, 0, &i);
    1409    52937268 :         if (error)
    1410             :                 return error;
    1411    52936517 :         error = xfs_alloc_lookup_ge(acur->bnogt, args->agbno, 0, &i);
    1412    52936492 :         if (error)
    1413             :                 return error;
    1414             : 
    1415             :         /*
    1416             :          * Search the bnobt and cntbt in parallel. Search the bnobt left and
    1417             :          * right and lookup the closest extent to the locality hint for each
    1418             :          * extent size key in the cntbt. The entire search terminates
    1419             :          * immediately on a bnobt hit because that means we've found best case
    1420             :          * locality. Otherwise the search continues until the cntbt cursor runs
    1421             :          * off the end of the tree. If no allocation candidate is found at this
    1422             :          * point, give up on locality, walk backwards from the end of the cntbt
    1423             :          * and take the first available extent.
    1424             :          *
    1425             :          * The parallel tree searches balance each other out to provide fairly
    1426             :          * consistent performance for various situations. The bnobt search can
    1427             :          * have pathological behavior in the worst case scenario of larger
    1428             :          * allocation requests and fragmented free space. On the other hand, the
    1429             :          * bnobt is able to satisfy most smaller allocation requests much more
    1430             :          * quickly than the cntbt. The cntbt search can sift through fragmented
    1431             :          * free space and sets of free extents for larger allocation requests
    1432             :          * more quickly than the bnobt. Since the locality hint is just a hint
    1433             :          * and we don't want to scan the entire bnobt for perfect locality, the
    1434             :          * cntbt search essentially bounds the bnobt search such that we can
    1435             :          * find good enough locality at reasonable performance in most cases.
    1436             :          */
    1437    38707819 :         while (xfs_alloc_cur_active(acur->bnolt) ||
    1438   194266011 :                xfs_alloc_cur_active(acur->bnogt) ||
    1439       22243 :                xfs_alloc_cur_active(acur->cnt)) {
    1440             : 
    1441   194242962 :                 trace_xfs_alloc_cur_lookup(args);
    1442             : 
    1443             :                 /*
    1444             :                  * Search the bnobt left and right. In the case of a hit, finish
    1445             :                  * the search in the opposite direction and we're done.
    1446             :                  */
    1447   194243953 :                 error = xfs_alloc_walk_iter(args, acur, acur->bnolt, false,
    1448             :                                             true, 1, &i);
    1449   194222128 :                 if (error)
    1450           2 :                         return error;
    1451   194222126 :                 if (i == 1) {
    1452    24210675 :                         trace_xfs_alloc_cur_left(args);
    1453    24208430 :                         fbcur = acur->bnogt;
    1454    24208430 :                         fbinc = true;
    1455    24208430 :                         break;
    1456             :                 }
    1457   170011451 :                 error = xfs_alloc_walk_iter(args, acur, acur->bnogt, true, true,
    1458             :                                             1, &i);
    1459   170058248 :                 if (error)
    1460           0 :                         return error;
    1461   170058248 :                 if (i == 1) {
    1462    13475514 :                         trace_xfs_alloc_cur_right(args);
    1463    13475604 :                         fbcur = acur->bnolt;
    1464    13475604 :                         fbinc = false;
    1465    13475604 :                         break;
    1466             :                 }
    1467             : 
    1468             :                 /*
    1469             :                  * Check the extent with best locality based on the current
    1470             :                  * extent size search key and keep track of the best candidate.
    1471             :                  */
    1472   156582734 :                 error = xfs_alloc_cntbt_iter(args, acur);
    1473   156569156 :                 if (error)
    1474          18 :                         return error;
    1475   156569138 :                 if (!xfs_alloc_cur_active(acur->cnt)) {
    1476    15247559 :                         trace_xfs_alloc_cur_lookup_done(args);
    1477    15247559 :                         break;
    1478             :                 }
    1479             :         }
    1480             : 
    1481             :         /*
    1482             :          * If we failed to find anything due to busy extents, return empty
    1483             :          * handed so the caller can flush and retry. If no busy extents were
    1484             :          * found, walk backwards from the end of the cntbt as a last resort.
    1485             :          */
    1486    52931489 :         if (!xfs_alloc_cur_active(acur->cnt) && !acur->len && !acur->busy) {
    1487      308960 :                 error = xfs_btree_decrement(acur->cnt, 0, &i);
    1488      308961 :                 if (error)
    1489             :                         return error;
    1490      308961 :                 if (i) {
    1491      308961 :                         acur->cnt->bc_ag.abt.active = true;
    1492      308961 :                         fbcur = acur->cnt;
    1493      308961 :                         fbinc = false;
    1494             :                 }
    1495             :         }
    1496             : 
    1497             :         /*
    1498             :          * Search in the opposite direction for a better entry in the case of
    1499             :          * a bnobt hit or walk backwards from the end of the cntbt.
    1500             :          */
    1501    52931793 :         if (fbcur) {
    1502    37994006 :                 error = xfs_alloc_walk_iter(args, acur, fbcur, fbinc, true, -1,
    1503             :                                             &i);
    1504    37998681 :                 if (error)
    1505             :                         return error;
    1506             :         }
    1507             : 
    1508    52936462 :         if (acur->len)
    1509    52816123 :                 *stat = 1;
    1510             : 
    1511             :         return 0;
    1512             : }
    1513             : 
    1514             : /* Check the last block of the cnt btree for allocations. */
    1515             : static int
    1516    76140357 : xfs_alloc_ag_vextent_lastblock(
    1517             :         struct xfs_alloc_arg    *args,
    1518             :         struct xfs_alloc_cur    *acur,
    1519             :         xfs_agblock_t           *bno,
    1520             :         xfs_extlen_t            *len,
    1521             :         bool                    *allocated)
    1522             : {
    1523    76140357 :         int                     error;
    1524    76140357 :         int                     i;
    1525             : 
    1526             : #ifdef DEBUG
    1527             :         /* Randomly don't execute the first algorithm. */
    1528    76140357 :         if (get_random_u32_below(2))
    1529             :                 return 0;
    1530             : #endif
    1531             : 
    1532             :         /*
    1533             :          * Start from the entry that lookup found, sequence through all larger
    1534             :          * free blocks.  If we're actually pointing at a record smaller than
    1535             :          * maxlen, go to the start of this block, and skip all those smaller
    1536             :          * than minlen.
    1537             :          */
    1538    38069220 :         if (*len || args->alignment > 1) {
    1539     1165510 :                 acur->cnt->bc_levels[0].ptr = 1;
    1540   129746576 :                 do {
    1541   129746576 :                         error = xfs_alloc_get_rec(acur->cnt, bno, len, &i);
    1542   129746795 :                         if (error)
    1543           0 :                                 return error;
    1544   129746795 :                         if (XFS_IS_CORRUPT(args->mp, i != 1)) {
    1545           0 :                                 xfs_btree_mark_sick(acur->cnt);
    1546           0 :                                 return -EFSCORRUPTED;
    1547             :                         }
    1548   129746795 :                         if (*len >= args->minlen)
    1549             :                                 break;
    1550   128581260 :                         error = xfs_btree_increment(acur->cnt, 0, &i);
    1551   128581066 :                         if (error)
    1552           0 :                                 return error;
    1553   128581066 :                 } while (i);
    1554     1165535 :                 ASSERT(*len >= args->minlen);
    1555     1165535 :                 if (!i)
    1556             :                         return 0;
    1557             :         }
    1558             : 
    1559    38069245 :         error = xfs_alloc_walk_iter(args, acur, acur->cnt, true, false, -1, &i);
    1560    38096037 :         if (error)
    1561             :                 return error;
    1562             : 
    1563             :         /*
    1564             :          * It didn't work.  We COULD be in a case where there's a good record
    1565             :          * somewhere, so try again.
    1566             :          */
    1567    38096037 :         if (acur->len == 0)
    1568             :                 return 0;
    1569             : 
    1570    37572525 :         trace_xfs_alloc_near_first(args);
    1571    37571959 :         *allocated = true;
    1572    37571959 :         return 0;
    1573             : }
    1574             : 
    1575             : /*
    1576             :  * Allocate a variable extent near bno in the allocation group agno.
    1577             :  * Extent's length (returned in len) will be between minlen and maxlen,
    1578             :  * and of the form k * prod + mod unless there's nothing that large.
    1579             :  * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
    1580             :  */
    1581             : STATIC int
    1582    90333155 : xfs_alloc_ag_vextent_near(
    1583             :         struct xfs_alloc_arg    *args,
    1584             :         uint32_t                alloc_flags)
    1585             : {
    1586    90333155 :         struct xfs_alloc_cur    acur = {};
    1587    90333155 :         int                     error;          /* error code */
    1588    90333155 :         int                     i;              /* result code, temporary */
    1589    90333155 :         xfs_agblock_t           bno;
    1590    90333155 :         xfs_extlen_t            len;
    1591             : 
    1592             :         /* handle uninitialized agbno range so caller doesn't have to */
    1593    90333155 :         if (!args->min_agbno && !args->max_agbno)
    1594    89546809 :                 args->max_agbno = args->mp->m_sb.sb_agblocks - 1;
    1595    90333155 :         ASSERT(args->min_agbno <= args->max_agbno);
    1596             : 
    1597             :         /* clamp agbno to the range if it's outside */
    1598    90333155 :         if (args->agbno < args->min_agbno)
    1599      340595 :                 args->agbno = args->min_agbno;
    1600    90333155 :         if (args->agbno > args->max_agbno)
    1601           0 :                 args->agbno = args->max_agbno;
    1602             : 
    1603             :         /* Retry once quickly if we find busy extents before blocking. */
    1604    90333155 :         alloc_flags |= XFS_ALLOC_FLAG_TRYFLUSH;
    1605    90450147 : restart:
    1606    90450147 :         len = 0;
    1607             : 
    1608             :         /*
    1609             :          * Set up cursors and see if there are any free extents as big as
    1610             :          * maxlen. If not, pick the last entry in the tree unless the tree is
    1611             :          * empty.
    1612             :          */
    1613    90450147 :         error = xfs_alloc_cur_setup(args, &acur);
    1614    90501316 :         if (error == -ENOSPC) {
    1615      976561 :                 error = xfs_alloc_ag_vextent_small(args, acur.cnt, &bno,
    1616             :                                 &len, &i);
    1617      976557 :                 if (error)
    1618           0 :                         goto out;
    1619      976557 :                 if (i == 0 || len == 0) {
    1620           0 :                         trace_xfs_alloc_near_noentry(args);
    1621           0 :                         goto out;
    1622             :                 }
    1623      976557 :                 ASSERT(i == 1);
    1624    89524755 :         } else if (error) {
    1625        1644 :                 goto out;
    1626             :         }
    1627             : 
    1628             :         /*
    1629             :          * First algorithm.
    1630             :          * If the requested extent is large wrt the freespaces available
    1631             :          * in this a.g., then the cursor will be pointing to a btree entry
    1632             :          * near the right edge of the tree.  If it's in the last btree leaf
    1633             :          * block, then we just examine all the entries in that block
    1634             :          * that are big enough, and pick the best one.
    1635             :          */
    1636    90499668 :         if (xfs_btree_islastblock(acur.cnt, 0)) {
    1637    76148789 :                 bool            allocated = false;
    1638             : 
    1639    76148789 :                 error = xfs_alloc_ag_vextent_lastblock(args, &acur, &bno, &len,
    1640             :                                 &allocated);
    1641    76159678 :                 if (error)
    1642           0 :                         goto out;
    1643    76159678 :                 if (allocated)
    1644    37572006 :                         goto alloc_finish;
    1645             :         }
    1646             : 
    1647             :         /*
    1648             :          * Second algorithm. Combined cntbt and bnobt search to find ideal
    1649             :          * locality.
    1650             :          */
    1651    52904527 :         error = xfs_alloc_ag_vextent_locality(args, &acur, &i);
    1652    52937126 :         if (error)
    1653         750 :                 goto out;
    1654             : 
    1655             :         /*
    1656             :          * If we couldn't get anything, give up.
    1657             :          */
    1658    52936376 :         if (!acur.len) {
    1659      120339 :                 if (acur.busy) {
    1660             :                         /*
    1661             :                          * Our only valid extents must have been busy. Flush and
    1662             :                          * retry the allocation again. If we get an -EAGAIN
    1663             :                          * error, we're being told that a deadlock was avoided
    1664             :                          * and the current transaction needs committing before
    1665             :                          * the allocation can be retried.
    1666             :                          */
    1667      116996 :                         trace_xfs_alloc_near_busy(args);
    1668      116996 :                         error = xfs_extent_busy_flush(args->tp, args->pag,
    1669             :                                         acur.busy_gen, alloc_flags);
    1670      116996 :                         if (error)
    1671           4 :                                 goto out;
    1672             : 
    1673      116992 :                         alloc_flags &= ~XFS_ALLOC_FLAG_TRYFLUSH;
    1674      116992 :                         goto restart;
    1675             :                 }
    1676        3343 :                 trace_xfs_alloc_size_neither(args);
    1677        3343 :                 args->agbno = NULLAGBLOCK;
    1678        3343 :                 goto out;
    1679             :         }
    1680             : 
    1681    52816037 : alloc_finish:
    1682             :         /* fix up btrees on a successful allocation */
    1683    90388043 :         error = xfs_alloc_cur_finish(args, &acur);
    1684             : 
    1685    90376703 : out:
    1686    90376703 :         xfs_alloc_cur_close(&acur, error);
    1687    90391453 :         return error;
    1688             : }
    1689             : 
    1690             : /*
    1691             :  * Allocate a variable extent anywhere in the allocation group agno.
    1692             :  * Extent's length (returned in len) will be between minlen and maxlen,
    1693             :  * and of the form k * prod + mod unless there's nothing that large.
    1694             :  * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
    1695             :  */
    1696             : static int
    1697     8001729 : xfs_alloc_ag_vextent_size(
    1698             :         struct xfs_alloc_arg    *args,
    1699             :         uint32_t                alloc_flags)
    1700             : {
    1701     8001729 :         struct xfs_agf          *agf = args->agbp->b_addr;
    1702     8001729 :         struct xfs_btree_cur    *bno_cur;
    1703     8001729 :         struct xfs_btree_cur    *cnt_cur;
    1704     8001729 :         xfs_agblock_t           fbno;           /* start of found freespace */
    1705     8001729 :         xfs_extlen_t            flen;           /* length of found freespace */
    1706     8001729 :         xfs_agblock_t           rbno;           /* returned block number */
    1707     8001729 :         xfs_extlen_t            rlen;           /* length of returned extent */
    1708     8001729 :         bool                    busy;
    1709     8001729 :         unsigned                busy_gen;
    1710     8001729 :         int                     error;
    1711     8001729 :         int                     i;
    1712             : 
    1713             :         /* Retry once quickly if we find busy extents before blocking. */
    1714     8001729 :         alloc_flags |= XFS_ALLOC_FLAG_TRYFLUSH;
    1715             : restart:
    1716             :         /*
    1717             :          * Allocate and initialize a cursor for the by-size btree.
    1718             :          */
    1719     8049622 :         cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
    1720             :                                         args->pag, XFS_BTNUM_CNT);
    1721     8050357 :         bno_cur = NULL;
    1722             : 
    1723             :         /*
    1724             :          * Look for an entry >= maxlen+alignment-1 blocks.
    1725             :          */
    1726     8049871 :         if ((error = xfs_alloc_lookup_ge(cnt_cur, 0,
    1727     8050357 :                         args->maxlen + args->alignment - 1, &i)))
    1728          47 :                 goto error0;
    1729             : 
    1730             :         /*
    1731             :          * If none then we have to settle for a smaller extent. In the case that
    1732             :          * there are no large extents, this will return the last entry in the
    1733             :          * tree unless the tree is empty. In the case that there are only busy
    1734             :          * large extents, this will return the largest small extent unless there
    1735             :          * are no smaller extents available.
    1736             :          */
    1737     8049824 :         if (!i) {
    1738      337891 :                 error = xfs_alloc_ag_vextent_small(args, cnt_cur,
    1739             :                                                    &fbno, &flen, &i);
    1740      337891 :                 if (error)
    1741           0 :                         goto error0;
    1742      337891 :                 if (i == 0 || flen == 0) {
    1743         223 :                         xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
    1744         223 :                         trace_xfs_alloc_size_noentry(args);
    1745         223 :                         return 0;
    1746             :                 }
    1747      337668 :                 ASSERT(i == 1);
    1748      337668 :                 busy = xfs_alloc_compute_aligned(args, fbno, flen, &rbno,
    1749             :                                 &rlen, &busy_gen);
    1750             :         } else {
    1751             :                 /*
    1752             :                  * Search for a non-busy extent that is large enough.
    1753             :                  */
    1754  1560465937 :                 for (;;) {
    1755   784088935 :                         error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, &i);
    1756   784000032 :                         if (error)
    1757           0 :                                 goto error0;
    1758   784000032 :                         if (XFS_IS_CORRUPT(args->mp, i != 1)) {
    1759           0 :                                 xfs_btree_mark_sick(cnt_cur);
    1760           0 :                                 error = -EFSCORRUPTED;
    1761           0 :                                 goto error0;
    1762             :                         }
    1763             : 
    1764   784000032 :                         busy = xfs_alloc_compute_aligned(args, fbno, flen,
    1765             :                                         &rbno, &rlen, &busy_gen);
    1766             : 
    1767   784235271 :                         if (rlen >= args->maxlen)
    1768             :                                 break;
    1769             : 
    1770   776568944 :                         error = xfs_btree_increment(cnt_cur, 0, &i);
    1771   776423281 :                         if (error)
    1772           0 :                                 goto error0;
    1773   776423281 :                         if (i)
    1774   776377002 :                                 continue;
    1775             : 
    1776             :                         /*
    1777             :                          * Our only valid extents must have been busy. Flush and
    1778             :                          * retry the allocation again. If we get an -EAGAIN
    1779             :                          * error, we're being told that a deadlock was avoided
    1780             :                          * and the current transaction needs committing before
    1781             :                          * the allocation can be retried.
    1782             :                          */
    1783       46279 :                         trace_xfs_alloc_size_busy(args);
    1784       46279 :                         error = xfs_extent_busy_flush(args->tp, args->pag,
    1785             :                                         busy_gen, alloc_flags);
    1786       46278 :                         if (error)
    1787           0 :                                 goto error0;
    1788             : 
    1789       46278 :                         alloc_flags &= ~XFS_ALLOC_FLAG_TRYFLUSH;
    1790       46278 :                         xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
    1791       46279 :                         goto restart;
    1792             :                 }
    1793             :         }
    1794             : 
    1795             :         /*
    1796             :          * In the first case above, we got the last entry in the
    1797             :          * by-size btree.  Now we check to see if the space hits maxlen
    1798             :          * once aligned; if not, we search left for something better.
    1799             :          * This can't happen in the second case above.
    1800             :          */
    1801     8003520 :         rlen = XFS_EXTLEN_MIN(args->maxlen, rlen);
    1802     8003520 :         if (XFS_IS_CORRUPT(args->mp,
    1803             :                            rlen != 0 &&
    1804             :                            (rlen > flen ||
    1805             :                             rbno + rlen > fbno + flen))) {
    1806           0 :                 xfs_btree_mark_sick(cnt_cur);
    1807           0 :                 error = -EFSCORRUPTED;
    1808           0 :                 goto error0;
    1809             :         }
    1810     8003520 :         if (rlen < args->maxlen) {
    1811      337301 :                 xfs_agblock_t   bestfbno;
    1812      337301 :                 xfs_extlen_t    bestflen;
    1813      337301 :                 xfs_agblock_t   bestrbno;
    1814      337301 :                 xfs_extlen_t    bestrlen;
    1815             : 
    1816      337301 :                 bestrlen = rlen;
    1817      337301 :                 bestrbno = rbno;
    1818      337301 :                 bestflen = flen;
    1819      337301 :                 bestfbno = fbno;
    1820    82807747 :                 for (;;) {
    1821    82807747 :                         if ((error = xfs_btree_decrement(cnt_cur, 0, &i)))
    1822           0 :                                 goto error0;
    1823    82807819 :                         if (i == 0)
    1824             :                                 break;
    1825    82805086 :                         if ((error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen,
    1826             :                                         &i)))
    1827           0 :                                 goto error0;
    1828    82804858 :                         if (XFS_IS_CORRUPT(args->mp, i != 1)) {
    1829           0 :                                 xfs_btree_mark_sick(cnt_cur);
    1830           0 :                                 error = -EFSCORRUPTED;
    1831           0 :                                 goto error0;
    1832             :                         }
    1833    82804858 :                         if (flen < bestrlen)
    1834             :                                 break;
    1835    82470289 :                         busy = xfs_alloc_compute_aligned(args, fbno, flen,
    1836             :                                         &rbno, &rlen, &busy_gen);
    1837    82470446 :                         rlen = XFS_EXTLEN_MIN(args->maxlen, rlen);
    1838    82470446 :                         if (XFS_IS_CORRUPT(args->mp,
    1839             :                                            rlen != 0 &&
    1840             :                                            (rlen > flen ||
    1841             :                                             rbno + rlen > fbno + flen))) {
    1842           0 :                                 xfs_btree_mark_sick(cnt_cur);
    1843           0 :                                 error = -EFSCORRUPTED;
    1844           0 :                                 goto error0;
    1845             :                         }
    1846    82470446 :                         if (rlen > bestrlen) {
    1847      180356 :                                 bestrlen = rlen;
    1848      180356 :                                 bestrbno = rbno;
    1849      180356 :                                 bestflen = flen;
    1850      180356 :                                 bestfbno = fbno;
    1851      180356 :                                 if (rlen == args->maxlen)
    1852             :                                         break;
    1853             :                         }
    1854             :                 }
    1855      337302 :                 if ((error = xfs_alloc_lookup_eq(cnt_cur, bestfbno, bestflen,
    1856             :                                 &i)))
    1857           0 :                         goto error0;
    1858      337302 :                 if (XFS_IS_CORRUPT(args->mp, i != 1)) {
    1859           0 :                         xfs_btree_mark_sick(cnt_cur);
    1860           0 :                         error = -EFSCORRUPTED;
    1861           0 :                         goto error0;
    1862             :                 }
    1863      337302 :                 rlen = bestrlen;
    1864      337302 :                 rbno = bestrbno;
    1865      337302 :                 flen = bestflen;
    1866      337302 :                 fbno = bestfbno;
    1867             :         }
    1868     8003521 :         args->wasfromfl = 0;
    1869             :         /*
    1870             :          * Fix up the length.
    1871             :          */
    1872     8003521 :         args->len = rlen;
    1873     8003521 :         if (rlen < args->minlen) {
    1874      177517 :                 if (busy) {
    1875             :                         /*
    1876             :                          * Our only valid extents must have been busy. Flush and
    1877             :                          * retry the allocation again. If we get an -EAGAIN
    1878             :                          * error, we're being told that a deadlock was avoided
    1879             :                          * and the current transaction needs committing before
    1880             :                          * the allocation can be retried.
    1881             :                          */
    1882        1614 :                         trace_xfs_alloc_size_busy(args);
    1883        1614 :                         error = xfs_extent_busy_flush(args->tp, args->pag,
    1884             :                                         busy_gen, alloc_flags);
    1885        1614 :                         if (error)
    1886           0 :                                 goto error0;
    1887             : 
    1888        1614 :                         alloc_flags &= ~XFS_ALLOC_FLAG_TRYFLUSH;
    1889        1614 :                         xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
    1890        1614 :                         goto restart;
    1891             :                 }
    1892      175903 :                 goto out_nominleft;
    1893             :         }
    1894     7826004 :         xfs_alloc_fix_len(args);
    1895             : 
    1896     7825601 :         rlen = args->len;
    1897     7825601 :         if (XFS_IS_CORRUPT(args->mp, rlen > flen)) {
    1898           0 :                 xfs_btree_mark_sick(cnt_cur);
    1899           0 :                 error = -EFSCORRUPTED;
    1900           0 :                 goto error0;
    1901             :         }
    1902             :         /*
    1903             :          * Allocate and initialize a cursor for the by-block tree.
    1904             :          */
    1905     7825601 :         bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
    1906             :                                         args->pag, XFS_BTNUM_BNO);
    1907     7826510 :         if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen,
    1908             :                         rbno, rlen, XFSA_FIXUP_CNT_OK)))
    1909          37 :                 goto error0;
    1910     7825547 :         xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
    1911     7826367 :         xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
    1912     7826752 :         cnt_cur = bno_cur = NULL;
    1913     7826752 :         args->len = rlen;
    1914     7826752 :         args->agbno = rbno;
    1915     7826752 :         if (XFS_IS_CORRUPT(args->mp,
    1916             :                            args->agbno + args->len >
    1917             :                            be32_to_cpu(agf->agf_length))) {
    1918           0 :                 xfs_ag_mark_sick(args->pag, XFS_SICK_AG_BNOBT);
    1919           0 :                 error = -EFSCORRUPTED;
    1920           0 :                 goto error0;
    1921             :         }
    1922     7826752 :         trace_xfs_alloc_size_done(args);
    1923     7826752 :         return 0;
    1924             : 
    1925          84 : error0:
    1926          84 :         trace_xfs_alloc_size_error(args);
    1927          84 :         if (cnt_cur)
    1928          84 :                 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
    1929          84 :         if (bno_cur)
    1930          37 :                 xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
    1931             :         return error;
    1932             : 
    1933             : out_nominleft:
    1934      175903 :         xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
    1935      175903 :         trace_xfs_alloc_size_nominleft(args);
    1936      175903 :         args->agbno = NULLAGBLOCK;
    1937      175903 :         return 0;
    1938             : }
    1939             : 
    1940             : /*
    1941             :  * Free the extent starting at agno/bno for length.
    1942             :  */
    1943             : STATIC int
    1944   105901561 : xfs_free_ag_extent(
    1945             :         struct xfs_trans                *tp,
    1946             :         struct xfs_buf                  *agbp,
    1947             :         xfs_agnumber_t                  agno,
    1948             :         xfs_agblock_t                   bno,
    1949             :         xfs_extlen_t                    len,
    1950             :         const struct xfs_owner_info     *oinfo,
    1951             :         enum xfs_ag_resv_type           type)
    1952             : {
    1953   105901561 :         struct xfs_mount                *mp;
    1954   105901561 :         struct xfs_btree_cur            *bno_cur;
    1955   105901561 :         struct xfs_btree_cur            *cnt_cur;
    1956   105901561 :         xfs_agblock_t                   gtbno; /* start of right neighbor */
    1957   105901561 :         xfs_extlen_t                    gtlen; /* length of right neighbor */
    1958   105901561 :         xfs_agblock_t                   ltbno; /* start of left neighbor */
    1959   105901561 :         xfs_extlen_t                    ltlen; /* length of left neighbor */
    1960   105901561 :         xfs_agblock_t                   nbno; /* new starting block of freesp */
    1961   105901561 :         xfs_extlen_t                    nlen; /* new length of freespace */
    1962   105901561 :         int                             haveleft; /* have a left neighbor */
    1963   105901561 :         int                             haveright; /* have a right neighbor */
    1964   105901561 :         int                             i;
    1965   105901561 :         int                             error;
    1966   105901561 :         struct xfs_perag                *pag = agbp->b_pag;
    1967             : 
    1968   105901561 :         bno_cur = cnt_cur = NULL;
    1969   105901561 :         mp = tp->t_mountp;
    1970             : 
    1971   105901561 :         if (!xfs_rmap_should_skip_owner_update(oinfo)) {
    1972     4182818 :                 error = xfs_rmap_free(tp, agbp, pag, bno, len, oinfo);
    1973     4183496 :                 if (error)
    1974          35 :                         goto error0;
    1975             :         }
    1976             : 
    1977             :         /*
    1978             :          * Allocate and initialize a cursor for the by-block btree.
    1979             :          */
    1980   105902204 :         bno_cur = xfs_allocbt_init_cursor(mp, tp, agbp, pag, XFS_BTNUM_BNO);
    1981             :         /*
    1982             :          * Look for a neighboring block on the left (lower block numbers)
    1983             :          * that is contiguous with this space.
    1984             :          */
    1985   105904619 :         if ((error = xfs_alloc_lookup_le(bno_cur, bno, len, &haveleft)))
    1986         111 :                 goto error0;
    1987   105902952 :         if (haveleft) {
    1988             :                 /*
    1989             :                  * There is a block to our left.
    1990             :                  */
    1991   103477356 :                 if ((error = xfs_alloc_get_rec(bno_cur, &ltbno, &ltlen, &i)))
    1992           0 :                         goto error0;
    1993   103471545 :                 if (XFS_IS_CORRUPT(mp, i != 1)) {
    1994           0 :                         xfs_btree_mark_sick(bno_cur);
    1995           0 :                         error = -EFSCORRUPTED;
    1996           0 :                         goto error0;
    1997             :                 }
    1998             :                 /*
    1999             :                  * It's not contiguous, though.
    2000             :                  */
    2001   103471545 :                 if (ltbno + ltlen < bno)
    2002    77153106 :                         haveleft = 0;
    2003             :                 else {
    2004             :                         /*
    2005             :                          * If this failure happens the request to free this
    2006             :                          * space was invalid, it's (partly) already free.
    2007             :                          * Very bad.
    2008             :                          */
    2009    26318439 :                         if (XFS_IS_CORRUPT(mp, ltbno + ltlen > bno)) {
    2010           0 :                                 xfs_btree_mark_sick(bno_cur);
    2011           0 :                                 error = -EFSCORRUPTED;
    2012           0 :                                 goto error0;
    2013             :                         }
    2014             :                 }
    2015             :         }
    2016             :         /*
    2017             :          * Look for a neighboring block on the right (higher block numbers)
    2018             :          * that is contiguous with this space.
    2019             :          */
    2020   105897141 :         if ((error = xfs_btree_increment(bno_cur, 0, &haveright)))
    2021           0 :                 goto error0;
    2022   105898683 :         if (haveright) {
    2023             :                 /*
    2024             :                  * There is a block to our right.
    2025             :                  */
    2026   104771196 :                 if ((error = xfs_alloc_get_rec(bno_cur, &gtbno, &gtlen, &i)))
    2027           0 :                         goto error0;
    2028   104773730 :                 if (XFS_IS_CORRUPT(mp, i != 1)) {
    2029           0 :                         xfs_btree_mark_sick(bno_cur);
    2030           0 :                         error = -EFSCORRUPTED;
    2031           0 :                         goto error0;
    2032             :                 }
    2033             :                 /*
    2034             :                  * It's not contiguous, though.
    2035             :                  */
    2036   104773730 :                 if (bno + len < gtbno)
    2037    73026554 :                         haveright = 0;
    2038             :                 else {
    2039             :                         /*
    2040             :                          * If this failure happens the request to free this
    2041             :                          * space was invalid, it's (partly) already free.
    2042             :                          * Very bad.
    2043             :                          */
    2044    31747176 :                         if (XFS_IS_CORRUPT(mp, bno + len > gtbno)) {
    2045           0 :                                 xfs_btree_mark_sick(bno_cur);
    2046           0 :                                 error = -EFSCORRUPTED;
    2047           0 :                                 goto error0;
    2048             :                         }
    2049             :                 }
    2050             :         }
    2051             :         /*
    2052             :          * Now allocate and initialize a cursor for the by-size tree.
    2053             :          */
    2054   105901217 :         cnt_cur = xfs_allocbt_init_cursor(mp, tp, agbp, pag, XFS_BTNUM_CNT);
    2055             :         /*
    2056             :          * Have both left and right contiguous neighbors.
    2057             :          * Merge all three into a single free block.
    2058             :          */
    2059   105903294 :         if (haveleft && haveright) {
    2060             :                 /*
    2061             :                  * Delete the old by-size entry on the left.
    2062             :                  */
    2063    13890665 :                 if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i)))
    2064          15 :                         goto error0;
    2065    13890869 :                 if (XFS_IS_CORRUPT(mp, i != 1)) {
    2066           0 :                         xfs_btree_mark_sick(cnt_cur);
    2067           0 :                         error = -EFSCORRUPTED;
    2068           0 :                         goto error0;
    2069             :                 }
    2070    13890869 :                 if ((error = xfs_btree_delete(cnt_cur, &i)))
    2071           0 :                         goto error0;
    2072    13889932 :                 if (XFS_IS_CORRUPT(mp, i != 1)) {
    2073           0 :                         xfs_btree_mark_sick(cnt_cur);
    2074           0 :                         error = -EFSCORRUPTED;
    2075           0 :                         goto error0;
    2076             :                 }
    2077             :                 /*
    2078             :                  * Delete the old by-size entry on the right.
    2079             :                  */
    2080    13889932 :                 if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i)))
    2081           7 :                         goto error0;
    2082    13890929 :                 if (XFS_IS_CORRUPT(mp, i != 1)) {
    2083           0 :                         xfs_btree_mark_sick(cnt_cur);
    2084           0 :                         error = -EFSCORRUPTED;
    2085           0 :                         goto error0;
    2086             :                 }
    2087    13890929 :                 if ((error = xfs_btree_delete(cnt_cur, &i)))
    2088           0 :                         goto error0;
    2089    13890974 :                 if (XFS_IS_CORRUPT(mp, i != 1)) {
    2090           0 :                         xfs_btree_mark_sick(cnt_cur);
    2091           0 :                         error = -EFSCORRUPTED;
    2092           0 :                         goto error0;
    2093             :                 }
    2094             :                 /*
    2095             :                  * Delete the old by-block entry for the right block.
    2096             :                  */
    2097    13890974 :                 if ((error = xfs_btree_delete(bno_cur, &i)))
    2098           1 :                         goto error0;
    2099    13890964 :                 if (XFS_IS_CORRUPT(mp, i != 1)) {
    2100           0 :                         xfs_btree_mark_sick(bno_cur);
    2101           0 :                         error = -EFSCORRUPTED;
    2102           0 :                         goto error0;
    2103             :                 }
    2104             :                 /*
    2105             :                  * Move the by-block cursor back to the left neighbor.
    2106             :                  */
    2107    13890964 :                 if ((error = xfs_btree_decrement(bno_cur, 0, &i)))
    2108           0 :                         goto error0;
    2109    13890575 :                 if (XFS_IS_CORRUPT(mp, i != 1)) {
    2110           0 :                         xfs_btree_mark_sick(bno_cur);
    2111           0 :                         error = -EFSCORRUPTED;
    2112           0 :                         goto error0;
    2113             :                 }
    2114             : #ifdef DEBUG
    2115             :                 /*
    2116             :                  * Check that this is the right record: delete didn't
    2117             :                  * mangle the cursor.
    2118             :                  */
    2119             :                 {
    2120    13890575 :                         xfs_agblock_t   xxbno;
    2121    13890575 :                         xfs_extlen_t    xxlen;
    2122             : 
    2123    13890575 :                         if ((error = xfs_alloc_get_rec(bno_cur, &xxbno, &xxlen,
    2124             :                                         &i)))
    2125           0 :                                 goto error0;
    2126    13890744 :                         if (XFS_IS_CORRUPT(mp,
    2127             :                                            i != 1 ||
    2128             :                                            xxbno != ltbno ||
    2129             :                                            xxlen != ltlen)) {
    2130           0 :                                 xfs_btree_mark_sick(bno_cur);
    2131           0 :                                 error = -EFSCORRUPTED;
    2132           0 :                                 goto error0;
    2133             :                         }
    2134             :                 }
    2135             : #endif
    2136             :                 /*
    2137             :                  * Update remaining by-block entry to the new, joined block.
    2138             :                  */
    2139    13890744 :                 nbno = ltbno;
    2140    13890744 :                 nlen = len + ltlen + gtlen;
    2141    13890744 :                 if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
    2142           0 :                         goto error0;
    2143             :         }
    2144             :         /*
    2145             :          * Have only a left contiguous neighbor.
    2146             :          * Merge it together with the new freespace.
    2147             :          */
    2148    92012629 :         else if (haveleft) {
    2149             :                 /*
    2150             :                  * Delete the old by-size entry on the left.
    2151             :                  */
    2152    12429367 :                 if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i)))
    2153           6 :                         goto error0;
    2154    12429870 :                 if (XFS_IS_CORRUPT(mp, i != 1)) {
    2155           0 :                         xfs_btree_mark_sick(cnt_cur);
    2156           0 :                         error = -EFSCORRUPTED;
    2157           0 :                         goto error0;
    2158             :                 }
    2159    12429870 :                 if ((error = xfs_btree_delete(cnt_cur, &i)))
    2160           0 :                         goto error0;
    2161    12428892 :                 if (XFS_IS_CORRUPT(mp, i != 1)) {
    2162           0 :                         xfs_btree_mark_sick(cnt_cur);
    2163           0 :                         error = -EFSCORRUPTED;
    2164           0 :                         goto error0;
    2165             :                 }
    2166             :                 /*
    2167             :                  * Back up the by-block cursor to the left neighbor, and
    2168             :                  * update its length.
    2169             :                  */
    2170    12428892 :                 if ((error = xfs_btree_decrement(bno_cur, 0, &i)))
    2171           0 :                         goto error0;
    2172    12429028 :                 if (XFS_IS_CORRUPT(mp, i != 1)) {
    2173           0 :                         xfs_btree_mark_sick(bno_cur);
    2174           0 :                         error = -EFSCORRUPTED;
    2175           0 :                         goto error0;
    2176             :                 }
    2177    12429028 :                 nbno = ltbno;
    2178    12429028 :                 nlen = len + ltlen;
    2179    12429028 :                 if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
    2180           0 :                         goto error0;
    2181             :         }
    2182             :         /*
    2183             :          * Have only a right contiguous neighbor.
    2184             :          * Merge it together with the new freespace.
    2185             :          */
    2186    79583262 :         else if (haveright) {
    2187             :                 /*
    2188             :                  * Delete the old by-size entry on the right.
    2189             :                  */
    2190    17856818 :                 if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i)))
    2191          27 :                         goto error0;
    2192    17856846 :                 if (XFS_IS_CORRUPT(mp, i != 1)) {
    2193           0 :                         xfs_btree_mark_sick(cnt_cur);
    2194           0 :                         error = -EFSCORRUPTED;
    2195           0 :                         goto error0;
    2196             :                 }
    2197    17856846 :                 if ((error = xfs_btree_delete(cnt_cur, &i)))
    2198           0 :                         goto error0;
    2199    17856156 :                 if (XFS_IS_CORRUPT(mp, i != 1)) {
    2200           0 :                         xfs_btree_mark_sick(cnt_cur);
    2201           0 :                         error = -EFSCORRUPTED;
    2202           0 :                         goto error0;
    2203             :                 }
    2204             :                 /*
    2205             :                  * Update the starting block and length of the right
    2206             :                  * neighbor in the by-block tree.
    2207             :                  */
    2208    17856156 :                 nbno = bno;
    2209    17856156 :                 nlen = len + gtlen;
    2210    17856156 :                 if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
    2211           0 :                         goto error0;
    2212             :         }
    2213             :         /*
    2214             :          * No contiguous neighbors.
    2215             :          * Insert the new freespace into the by-block tree.
    2216             :          */
    2217             :         else {
    2218    61726444 :                 nbno = bno;
    2219    61726444 :                 nlen = len;
    2220    61726444 :                 if ((error = xfs_btree_insert(bno_cur, &i)))
    2221           0 :                         goto error0;
    2222    61722493 :                 if (XFS_IS_CORRUPT(mp, i != 1)) {
    2223           0 :                         xfs_btree_mark_sick(bno_cur);
    2224           0 :                         error = -EFSCORRUPTED;
    2225           0 :                         goto error0;
    2226             :                 }
    2227             :         }
    2228   105898409 :         xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
    2229   105902510 :         bno_cur = NULL;
    2230             :         /*
    2231             :          * In all cases we need to insert the new freespace in the by-size tree.
    2232             :          */
    2233   105902510 :         if ((error = xfs_alloc_lookup_eq(cnt_cur, nbno, nlen, &i)))
    2234          39 :                 goto error0;
    2235   105902880 :         if (XFS_IS_CORRUPT(mp, i != 0)) {
    2236           0 :                 xfs_btree_mark_sick(cnt_cur);
    2237           0 :                 error = -EFSCORRUPTED;
    2238           0 :                 goto error0;
    2239             :         }
    2240   105902880 :         if ((error = xfs_btree_insert(cnt_cur, &i)))
    2241           0 :                 goto error0;
    2242   105893135 :         if (XFS_IS_CORRUPT(mp, i != 1)) {
    2243           0 :                 xfs_btree_mark_sick(cnt_cur);
    2244           0 :                 error = -EFSCORRUPTED;
    2245           0 :                 goto error0;
    2246             :         }
    2247   105893135 :         xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
    2248   105903024 :         cnt_cur = NULL;
    2249             : 
    2250             :         /*
    2251             :          * Update the freespace totals in the ag and superblock.
    2252             :          */
    2253   105903024 :         error = xfs_alloc_update_counters(tp, agbp, len);
    2254   105892986 :         xfs_ag_resv_free_extent(agbp->b_pag, type, tp, len);
    2255   105867019 :         if (error)
    2256           0 :                 goto error0;
    2257             : 
    2258   105867019 :         XFS_STATS_INC(mp, xs_freex);
    2259   105881825 :         XFS_STATS_ADD(mp, xs_freeb, len);
    2260             : 
    2261   105881943 :         trace_xfs_free_extent(mp, agno, bno, len, type, haveleft, haveright);
    2262             : 
    2263   105881943 :         return 0;
    2264             : 
    2265         241 :  error0:
    2266         241 :         trace_xfs_free_extent(mp, agno, bno, len, type, -1, -1);
    2267         241 :         if (bno_cur)
    2268         167 :                 xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
    2269         241 :         if (cnt_cur)
    2270          95 :                 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
    2271             :         return error;
    2272             : }
    2273             : 
    2274             : /*
    2275             :  * Visible (exported) allocation/free functions.
    2276             :  * Some of these are used just by xfs_alloc_btree.c and this file.
    2277             :  */
    2278             : 
    2279             : /*
    2280             :  * Compute and fill in value of m_alloc_maxlevels.
    2281             :  */
    2282             : void
    2283       66972 : xfs_alloc_compute_maxlevels(
    2284             :         xfs_mount_t     *mp)    /* file system mount structure */
    2285             : {
    2286      133944 :         mp->m_alloc_maxlevels = xfs_btree_compute_maxlevels(mp->m_alloc_mnr,
    2287       66972 :                         (mp->m_sb.sb_agblocks + 1) / 2);
    2288       66972 :         ASSERT(mp->m_alloc_maxlevels <= xfs_allocbt_maxlevels_ondisk());
    2289       66972 : }
    2290             : 
    2291             : /*
    2292             :  * Find the length of the longest extent in an AG.  The 'need' parameter
    2293             :  * specifies how much space we're going to need for the AGFL and the
    2294             :  * 'reserved' parameter tells us how many blocks in this AG are reserved for
    2295             :  * other callers.
    2296             :  */
    2297             : xfs_extlen_t
    2298   480540823 : xfs_alloc_longest_free_extent(
    2299             :         struct xfs_perag        *pag,
    2300             :         xfs_extlen_t            need,
    2301             :         xfs_extlen_t            reserved)
    2302             : {
    2303   480540823 :         xfs_extlen_t            delta = 0;
    2304             : 
    2305             :         /*
    2306             :          * If the AGFL needs a recharge, we'll have to subtract that from the
    2307             :          * longest extent.
    2308             :          */
    2309   480540823 :         if (need > pag->pagf_flcount)
    2310      798862 :                 delta = need - pag->pagf_flcount;
    2311             : 
    2312             :         /*
    2313             :          * If we cannot maintain others' reservations with space from the
    2314             :          * not-longest freesp extents, we'll have to subtract /that/ from
    2315             :          * the longest extent too.
    2316             :          */
    2317   480540823 :         if (pag->pagf_freeblks - pag->pagf_longest < reserved)
    2318   373023198 :                 delta += reserved - (pag->pagf_freeblks - pag->pagf_longest);
    2319             : 
    2320             :         /*
    2321             :          * If the longest extent is long enough to satisfy all the
    2322             :          * reservations and AGFL rules in place, we can return this extent.
    2323             :          */
    2324   480540823 :         if (pag->pagf_longest > delta)
    2325   480041791 :                 return min_t(xfs_extlen_t, pag->pag_mount->m_ag_max_usable,
    2326             :                                 pag->pagf_longest - delta);
    2327             : 
    2328             :         /* Otherwise, let the caller try for 1 block if there's space. */
    2329      499032 :         return pag->pagf_flcount > 0 || pag->pagf_longest > 0;
    2330             : }
    2331             : 
    2332             : /*
    2333             :  * Compute the minimum length of the AGFL in the given AG.  If @pag is NULL,
    2334             :  * return the largest possible minimum length.
    2335             :  */
    2336             : unsigned int
    2337  1580612266 : xfs_alloc_min_freelist(
    2338             :         struct xfs_mount        *mp,
    2339             :         struct xfs_perag        *pag)
    2340             : {
    2341             :         /* AG btrees have at least 1 level. */
    2342  1580612266 :         static const uint8_t    fake_levels[XFS_BTNUM_AGF] = {1, 1, 1};
    2343  1580612266 :         const uint8_t           *levels = pag ? pag->pagf_levels : fake_levels;
    2344  1580612266 :         unsigned int            min_free;
    2345             : 
    2346  1580612266 :         ASSERT(mp->m_alloc_maxlevels > 0);
    2347             : 
    2348             :         /* space needed by-bno freespace btree */
    2349  1580612266 :         min_free = min_t(unsigned int, levels[XFS_BTNUM_BNOi] + 1,
    2350             :                                        mp->m_alloc_maxlevels);
    2351             :         /* space needed by-size freespace btree */
    2352  1580612266 :         min_free += min_t(unsigned int, levels[XFS_BTNUM_CNTi] + 1,
    2353             :                                        mp->m_alloc_maxlevels);
    2354             :         /* space needed reverse mapping used space btree */
    2355  1580612266 :         if (xfs_has_rmapbt(mp))
    2356  1570176017 :                 min_free += min_t(unsigned int, levels[XFS_BTNUM_RMAPi] + 1,
    2357             :                                                 mp->m_rmap_maxlevels);
    2358             : 
    2359  1580612266 :         return min_free;
    2360             : }
    2361             : 
    2362             : /*
    2363             :  * Check if the operation we are fixing up the freelist for should go ahead or
    2364             :  * not. If we are freeing blocks, we always allow it, otherwise the allocation
    2365             :  * is dependent on whether the size and shape of free space available will
    2366             :  * permit the requested allocation to take place.
    2367             :  */
    2368             : static bool
    2369  1489281510 : xfs_alloc_space_available(
    2370             :         struct xfs_alloc_arg    *args,
    2371             :         xfs_extlen_t            min_free,
    2372             :         int                     flags)
    2373             : {
    2374  1489281510 :         struct xfs_perag        *pag = args->pag;
    2375  1489281510 :         xfs_extlen_t            alloc_len, longest;
    2376  1489281510 :         xfs_extlen_t            reservation; /* blocks that are still reserved */
    2377  1489281510 :         int                     available;
    2378  1489281510 :         xfs_extlen_t            agflcount;
    2379             : 
    2380  1489281510 :         if (flags & XFS_ALLOC_FLAG_FREEING)
    2381             :                 return true;
    2382             : 
    2383   389181141 :         reservation = xfs_ag_resv_needed(pag, args->resv);
    2384             : 
    2385             :         /* do we have enough contiguous free space for the allocation? */
    2386   389200880 :         alloc_len = args->minlen + (args->alignment - 1) + args->minalignslop;
    2387   389200880 :         longest = xfs_alloc_longest_free_extent(pag, min_free, reservation);
    2388   389200880 :         if (longest < alloc_len)
    2389             :                 return false;
    2390             : 
    2391             :         /*
    2392             :          * Do we have enough free space remaining for the allocation? Don't
    2393             :          * account extra agfl blocks because we are about to defer free them,
    2394             :          * making them unavailable until the current transaction commits.
    2395             :          */
    2396   313400171 :         agflcount = min_t(xfs_extlen_t, pag->pagf_flcount, min_free);
    2397   313400171 :         available = (int)(pag->pagf_freeblks + agflcount -
    2398   313400171 :                           reservation - min_free - args->minleft);
    2399   313400171 :         if (available < (int)max(args->total, alloc_len))
    2400             :                 return false;
    2401             : 
    2402             :         /*
    2403             :          * Clamp maxlen to the amount of free space available for the actual
    2404             :          * extent allocation.
    2405             :          */
    2406   205292149 :         if (available < (int)args->maxlen && !(flags & XFS_ALLOC_FLAG_CHECK)) {
    2407      453118 :                 args->maxlen = available;
    2408      453118 :                 ASSERT(args->maxlen > 0);
    2409      453118 :                 ASSERT(args->maxlen >= args->minlen);
    2410             :         }
    2411             : 
    2412             :         return true;
    2413             : }
    2414             : 
    2415             : int
    2416      328618 : xfs_free_agfl_block(
    2417             :         struct xfs_trans        *tp,
    2418             :         xfs_agnumber_t          agno,
    2419             :         xfs_agblock_t           agbno,
    2420             :         struct xfs_buf          *agbp,
    2421             :         struct xfs_owner_info   *oinfo)
    2422             : {
    2423      328618 :         int                     error;
    2424      328618 :         struct xfs_buf          *bp;
    2425             : 
    2426      328618 :         error = xfs_free_ag_extent(tp, agbp, agno, agbno, 1, oinfo,
    2427             :                                    XFS_AG_RESV_AGFL);
    2428      328619 :         if (error)
    2429             :                 return error;
    2430             : 
    2431      657222 :         error = xfs_trans_get_buf(tp, tp->t_mountp->m_ddev_targp,
    2432      328611 :                         XFS_AGB_TO_DADDR(tp->t_mountp, agno, agbno),
    2433      328611 :                         tp->t_mountp->m_bsize, 0, &bp);
    2434      328610 :         if (error)
    2435             :                 return error;
    2436      328610 :         xfs_trans_binval(tp, bp);
    2437             : 
    2438      328610 :         return 0;
    2439             : }
    2440             : 
    2441             : /*
    2442             :  * Check the agfl fields of the agf for inconsistency or corruption.
    2443             :  *
    2444             :  * The original purpose was to detect an agfl header padding mismatch between
    2445             :  * current and early v5 kernels. This problem manifests as a 1-slot size
    2446             :  * difference between the on-disk flcount and the active [first, last] range of
    2447             :  * a wrapped agfl.
    2448             :  *
    2449             :  * However, we need to use these same checks to catch agfl count corruptions
    2450             :  * unrelated to padding. This could occur on any v4 or v5 filesystem, so either
    2451             :  * way, we need to reset the agfl and warn the user.
    2452             :  *
    2453             :  * Return true if a reset is required before the agfl can be used, false
    2454             :  * otherwise.
    2455             :  */
    2456             : static bool
    2457      779899 : xfs_agfl_needs_reset(
    2458             :         struct xfs_mount        *mp,
    2459             :         struct xfs_agf          *agf)
    2460             : {
    2461      779899 :         uint32_t                f = be32_to_cpu(agf->agf_flfirst);
    2462      779899 :         uint32_t                l = be32_to_cpu(agf->agf_fllast);
    2463      779899 :         uint32_t                c = be32_to_cpu(agf->agf_flcount);
    2464      779899 :         int                     agfl_size = xfs_agfl_size(mp);
    2465      779899 :         int                     active;
    2466             : 
    2467             :         /*
    2468             :          * The agf read verifier catches severe corruption of these fields.
    2469             :          * Repeat some sanity checks to cover a packed -> unpacked mismatch if
    2470             :          * the verifier allows it.
    2471             :          */
    2472      779899 :         if (f >= agfl_size || l >= agfl_size)
    2473             :                 return true;
    2474      779899 :         if (c > agfl_size)
    2475             :                 return true;
    2476             : 
    2477             :         /*
    2478             :          * Check consistency between the on-disk count and the active range. An
    2479             :          * agfl padding mismatch manifests as an inconsistent flcount.
    2480             :          */
    2481      779899 :         if (c && l >= f)
    2482      727816 :                 active = l - f + 1;
    2483       52083 :         else if (c)
    2484         201 :                 active = agfl_size - f + l + 1;
    2485             :         else
    2486             :                 active = 0;
    2487             : 
    2488      779899 :         return active != c;
    2489             : }
    2490             : 
    2491             : /*
    2492             :  * Reset the agfl to an empty state. Ignore/drop any existing blocks since the
    2493             :  * agfl content cannot be trusted. Warn the user that a repair is required to
    2494             :  * recover leaked blocks.
    2495             :  *
    2496             :  * The purpose of this mechanism is to handle filesystems affected by the agfl
    2497             :  * header padding mismatch problem. A reset keeps the filesystem online with a
    2498             :  * relatively minor free space accounting inconsistency rather than suffer the
    2499             :  * inevitable crash from use of an invalid agfl block.
    2500             :  */
    2501             : static void
    2502          20 : xfs_agfl_reset(
    2503             :         struct xfs_trans        *tp,
    2504             :         struct xfs_buf          *agbp,
    2505             :         struct xfs_perag        *pag)
    2506             : {
    2507          20 :         struct xfs_mount        *mp = tp->t_mountp;
    2508          20 :         struct xfs_agf          *agf = agbp->b_addr;
    2509             : 
    2510          40 :         ASSERT(xfs_perag_agfl_needs_reset(pag));
    2511          20 :         trace_xfs_agfl_reset(mp, agf, 0, _RET_IP_);
    2512             : 
    2513          20 :         xfs_warn(mp,
    2514             :                "WARNING: Reset corrupted AGFL on AG %u. %d blocks leaked. "
    2515             :                "Please unmount and run xfs_repair.",
    2516             :                  pag->pag_agno, pag->pagf_flcount);
    2517             : 
    2518          20 :         agf->agf_flfirst = 0;
    2519          20 :         agf->agf_fllast = cpu_to_be32(xfs_agfl_size(mp) - 1);
    2520          20 :         agf->agf_flcount = 0;
    2521          20 :         xfs_alloc_log_agf(tp, agbp, XFS_AGF_FLFIRST | XFS_AGF_FLLAST |
    2522             :                                     XFS_AGF_FLCOUNT);
    2523             : 
    2524          20 :         pag->pagf_flcount = 0;
    2525          20 :         clear_bit(XFS_AGSTATE_AGFL_NEEDS_RESET, &pag->pag_opstate);
    2526          20 : }
    2527             : 
    2528             : /*
    2529             :  * Defer an AGFL block free. This is effectively equivalent to
    2530             :  * xfs_free_extent_later() with some special handling particular to AGFL blocks.
    2531             :  *
    2532             :  * Deferring AGFL frees helps prevent log reservation overruns due to too many
    2533             :  * allocation operations in a transaction. AGFL frees are prone to this problem
    2534             :  * because for one they are always freed one at a time. Further, an immediate
    2535             :  * AGFL block free can cause a btree join and require another block free before
    2536             :  * the real allocation can proceed. Deferring the free disconnects freeing up
    2537             :  * the AGFL slot from freeing the block.
    2538             :  */
    2539             : static int
    2540      328633 : xfs_defer_agfl_block(
    2541             :         struct xfs_trans                *tp,
    2542             :         xfs_agnumber_t                  agno,
    2543             :         xfs_agblock_t                   agbno,
    2544             :         struct xfs_owner_info           *oinfo)
    2545             : {
    2546      328633 :         struct xfs_mount                *mp = tp->t_mountp;
    2547      328633 :         struct xfs_extent_free_item     *xefi;
    2548      328633 :         xfs_fsblock_t                   fsbno = XFS_AGB_TO_FSB(mp, agno, agbno);
    2549             : 
    2550      328633 :         ASSERT(xfs_extfree_item_cache != NULL);
    2551      328633 :         ASSERT(oinfo != NULL);
    2552             : 
    2553      328633 :         if (XFS_IS_CORRUPT(mp, !xfs_verify_fsbno(mp, fsbno)))
    2554           0 :                 return -EFSCORRUPTED;
    2555             : 
    2556      328633 :         xefi = kmem_cache_zalloc(xfs_extfree_item_cache,
    2557             :                                GFP_KERNEL | __GFP_NOFAIL);
    2558      328632 :         xefi->xefi_startblock = fsbno;
    2559      328632 :         xefi->xefi_blockcount = 1;
    2560      328632 :         xefi->xefi_owner = oinfo->oi_owner;
    2561      328632 :         xefi->xefi_agresv = XFS_AG_RESV_AGFL;
    2562             : 
    2563      328632 :         trace_xfs_agfl_free_defer(mp, xefi);
    2564             : 
    2565      328633 :         xfs_extent_free_get_group(mp, xefi);
    2566      328634 :         xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_AGFL_FREE, &xefi->xefi_list);
    2567      328634 :         return 0;
    2568             : }
    2569             : 
    2570             : /*
    2571             :  * Add the extent to the list of extents to be free at transaction end.
    2572             :  * The list is maintained sorted (by block number).
    2573             :  */
    2574             : int
    2575   149346695 : xfs_free_extent_later(
    2576             :         struct xfs_trans                *tp,
    2577             :         xfs_fsblock_t                   bno,
    2578             :         xfs_filblks_t                   len,
    2579             :         const struct xfs_owner_info     *oinfo,
    2580             :         enum xfs_ag_resv_type           type,
    2581             :         unsigned int                    flags)
    2582             : {
    2583   149346695 :         struct xfs_extent_free_item     *xefi;
    2584   149346695 :         struct xfs_mount                *mp = tp->t_mountp;
    2585   149346695 :         enum xfs_defer_ops_type         optype;
    2586             : #ifdef DEBUG
    2587   149346695 :         xfs_agnumber_t                  agno;
    2588   149346695 :         xfs_agblock_t                   agbno;
    2589             : 
    2590   149346695 :         ASSERT(bno != NULLFSBLOCK);
    2591   149346695 :         ASSERT(len > 0);
    2592   149346695 :         ASSERT(len <= XFS_MAX_BMBT_EXTLEN);
    2593   149346695 :         ASSERT(!isnullstartblock(bno));
    2594   149346695 :         if (flags & XFS_FREE_EXTENT_REALTIME) {
    2595    43898021 :                 ASSERT(bno < mp->m_sb.sb_rblocks);
    2596    43898021 :                 ASSERT(len <= mp->m_sb.sb_rblocks);
    2597    43898021 :                 ASSERT(bno + len <= mp->m_sb.sb_rblocks);
    2598             :         } else {
    2599   105448674 :                 agno = XFS_FSB_TO_AGNO(mp, bno);
    2600   105448674 :                 agbno = XFS_FSB_TO_AGBNO(mp, bno);
    2601             : 
    2602   105364507 :                 ASSERT(agno < mp->m_sb.sb_agcount);
    2603   105364507 :                 ASSERT(agbno < mp->m_sb.sb_agblocks);
    2604   105364507 :                 ASSERT(len < mp->m_sb.sb_agblocks);
    2605   105364507 :                 ASSERT(agbno + len <= mp->m_sb.sb_agblocks);
    2606             :         }
    2607             : #endif
    2608   149262528 :         ASSERT(!(flags & ~XFS_FREE_EXTENT_ALL_FLAGS));
    2609   149262528 :         ASSERT(xfs_extfree_item_cache != NULL);
    2610   149262528 :         ASSERT(type != XFS_AG_RESV_AGFL);
    2611             : 
    2612   149262528 :         if (flags & XFS_FREE_EXTENT_REALTIME) {
    2613    43897839 :                 if (XFS_IS_CORRUPT(mp, !xfs_verify_rtbext(mp, bno, len)))
    2614           0 :                         return -EFSCORRUPTED;
    2615             :         } else {
    2616   105364689 :                 if (XFS_IS_CORRUPT(mp, !xfs_verify_fsbext(mp, bno, len)))
    2617           0 :                         return -EFSCORRUPTED;
    2618             :         }
    2619             : 
    2620   149343050 :         xefi = kmem_cache_zalloc(xfs_extfree_item_cache,
    2621             :                                GFP_KERNEL | __GFP_NOFAIL);
    2622   149367418 :         xefi->xefi_startblock = bno;
    2623   149367418 :         xefi->xefi_blockcount = (xfs_extlen_t)len;
    2624   149367418 :         xefi->xefi_agresv = type;
    2625   149367418 :         if (flags & XFS_FREE_EXTENT_SKIP_DISCARD)
    2626    15976699 :                 xefi->xefi_flags |= XFS_EFI_SKIP_DISCARD;
    2627   149367418 :         if (flags & XFS_FREE_EXTENT_REALTIME) {
    2628             :                 /*
    2629             :                  * Realtime and data section EFIs must use separate
    2630             :                  * transactions to finish deferred work because updates to
    2631             :                  * realtime metadata files can lock AGFs to allocate btree
    2632             :                  * blocks and we don't want that mixing with the AGF locks
    2633             :                  * taken to finish data section EFIs.
    2634             :                  */
    2635    43897817 :                 optype = XFS_DEFER_OPS_TYPE_FREE_RT;
    2636    43897817 :                 xefi->xefi_flags |= XFS_EFI_REALTIME;
    2637             :         } else {
    2638             :                 optype = XFS_DEFER_OPS_TYPE_FREE;
    2639             :         }
    2640   149367418 :         if (oinfo) {
    2641     3854360 :                 ASSERT(oinfo->oi_offset == 0);
    2642             : 
    2643     3854360 :                 if (oinfo->oi_flags & XFS_OWNER_INFO_ATTR_FORK)
    2644         268 :                         xefi->xefi_flags |= XFS_EFI_ATTR_FORK;
    2645     3854360 :                 if (oinfo->oi_flags & XFS_OWNER_INFO_BMBT_BLOCK)
    2646     2838240 :                         xefi->xefi_flags |= XFS_EFI_BMBT_BLOCK;
    2647     3854360 :                 xefi->xefi_owner = oinfo->oi_owner;
    2648             :         } else {
    2649   145513058 :                 xefi->xefi_owner = XFS_RMAP_OWN_NULL;
    2650             :         }
    2651             : 
    2652   149367418 :         trace_xfs_extent_free_defer(mp, xefi);
    2653             : 
    2654   149277078 :         xfs_extent_free_get_group(mp, xefi);
    2655   149439875 :         xfs_defer_add(tp, optype, &xefi->xefi_list);
    2656   149439875 :         return 0;
    2657             : }
    2658             : 
    2659             : #ifdef DEBUG
    2660             : /*
    2661             :  * Check if an AGF has a free extent record whose length is equal to
    2662             :  * args->minlen.
    2663             :  */
    2664             : STATIC int
    2665     2674380 : xfs_exact_minlen_extent_available(
    2666             :         struct xfs_alloc_arg    *args,
    2667             :         struct xfs_buf          *agbp,
    2668             :         int                     *stat)
    2669             : {
    2670     2674380 :         struct xfs_btree_cur    *cnt_cur;
    2671     2674380 :         xfs_agblock_t           fbno;
    2672     2674380 :         xfs_extlen_t            flen;
    2673     2674380 :         int                     error = 0;
    2674             : 
    2675     2674380 :         cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, agbp,
    2676             :                                         args->pag, XFS_BTNUM_CNT);
    2677     2674482 :         error = xfs_alloc_lookup_ge(cnt_cur, 0, args->minlen, stat);
    2678     2673908 :         if (error)
    2679           0 :                 goto out;
    2680             : 
    2681     2673908 :         if (*stat == 0) {
    2682           0 :                 xfs_btree_mark_sick(cnt_cur);
    2683           0 :                 error = -EFSCORRUPTED;
    2684           0 :                 goto out;
    2685             :         }
    2686             : 
    2687     2673908 :         error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, stat);
    2688     2671932 :         if (error)
    2689           0 :                 goto out;
    2690             : 
    2691     2671932 :         if (*stat == 1 && flen != args->minlen)
    2692       34219 :                 *stat = 0;
    2693             : 
    2694     2637713 : out:
    2695     2671932 :         xfs_btree_del_cursor(cnt_cur, error);
    2696             : 
    2697     2673714 :         return error;
    2698             : }
    2699             : #endif
    2700             : 
    2701             : /*
    2702             :  * Decide whether to use this allocation group for this allocation.
    2703             :  * If so, fix up the btree freelist's size.
    2704             :  */
    2705             : int                     /* error */
    2706   840631665 : xfs_alloc_fix_freelist(
    2707             :         struct xfs_alloc_arg    *args,  /* allocation argument structure */
    2708             :         uint32_t                alloc_flags)
    2709             : {
    2710   840631665 :         struct xfs_mount        *mp = args->mp;
    2711   840631665 :         struct xfs_perag        *pag = args->pag;
    2712   840631665 :         struct xfs_trans        *tp = args->tp;
    2713   840631665 :         struct xfs_buf          *agbp = NULL;
    2714   840631665 :         struct xfs_buf          *agflbp = NULL;
    2715   840631665 :         struct xfs_alloc_arg    targs;  /* local allocation arguments */
    2716   840631665 :         xfs_agblock_t           bno;    /* freelist block */
    2717   840631665 :         xfs_extlen_t            need;   /* total blocks needed in freelist */
    2718   840631665 :         int                     error = 0;
    2719             : 
    2720             :         /* deferred ops (AGFL block frees) require permanent transactions */
    2721   840631665 :         ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
    2722             : 
    2723  1681263330 :         if (!xfs_perag_initialised_agf(pag)) {
    2724        6462 :                 error = xfs_alloc_read_agf(pag, tp, alloc_flags, &agbp);
    2725        6462 :                 if (error) {
    2726             :                         /* Couldn't lock the AGF so skip this AG. */
    2727        2755 :                         if (error == -EAGAIN)
    2728        2755 :                                 error = 0;
    2729        2755 :                         goto out_no_agbp;
    2730             :                 }
    2731             :         }
    2732             : 
    2733             :         /*
    2734             :          * If this is a metadata preferred pag and we are user data then try
    2735             :          * somewhere else if we are not being asked to try harder at this
    2736             :          * point
    2737             :          */
    2738  1681257820 :         if (xfs_perag_prefers_metadata(pag) &&
    2739           0 :             (args->datatype & XFS_ALLOC_USERDATA) &&
    2740           0 :             (alloc_flags & XFS_ALLOC_FLAG_TRYLOCK)) {
    2741           0 :                 ASSERT(!(alloc_flags & XFS_ALLOC_FLAG_FREEING));
    2742           0 :                 goto out_agbp_relse;
    2743             :         }
    2744             : 
    2745   840628910 :         need = xfs_alloc_min_freelist(mp, pag);
    2746   840583157 :         if (!xfs_alloc_space_available(args, need, alloc_flags |
    2747             :                         XFS_ALLOC_FLAG_CHECK))
    2748   183901941 :                 goto out_agbp_relse;
    2749             : 
    2750             :         /*
    2751             :          * Get the a.g. freespace buffer.
    2752             :          * Can fail if we're not blocking on locks, and it's held.
    2753             :          */
    2754   656719850 :         if (!agbp) {
    2755   656707246 :                 error = xfs_alloc_read_agf(pag, tp, alloc_flags, &agbp);
    2756   656755817 :                 if (error) {
    2757             :                         /* Couldn't lock the AGF so skip this AG. */
    2758     8007289 :                         if (error == -EAGAIN)
    2759     8006026 :                                 error = 0;
    2760     8007289 :                         goto out_no_agbp;
    2761             :                 }
    2762             :         }
    2763             : 
    2764             :         /* reset a padding mismatched agfl before final free space check */
    2765  1297522264 :         if (xfs_perag_agfl_needs_reset(pag))
    2766          20 :                 xfs_agfl_reset(tp, agbp, pag);
    2767             : 
    2768             :         /* If there isn't enough total space or single-extent, reject it. */
    2769   648761132 :         need = xfs_alloc_min_freelist(mp, pag);
    2770   648755663 :         if (!xfs_alloc_space_available(args, need, alloc_flags))
    2771        6762 :                 goto out_agbp_relse;
    2772             : 
    2773             : #ifdef DEBUG
    2774   648748927 :         if (args->alloc_minlen_only) {
    2775     2674511 :                 int stat;
    2776             : 
    2777     2674511 :                 error = xfs_exact_minlen_extent_available(args, agbp, &stat);
    2778     2673927 :                 if (error || !stat)
    2779       34219 :                         goto out_agbp_relse;
    2780             :         }
    2781             : #endif
    2782             :         /*
    2783             :          * Make the freelist shorter if it's too long.
    2784             :          *
    2785             :          * Note that from this point onwards, we will always release the agf and
    2786             :          * agfl buffers on error. This handles the case where we error out and
    2787             :          * the buffers are clean or may not have been joined to the transaction
    2788             :          * and hence need to be released manually. If they have been joined to
    2789             :          * the transaction, then xfs_trans_brelse() will handle them
    2790             :          * appropriately based on the recursion count and dirty state of the
    2791             :          * buffer.
    2792             :          *
    2793             :          * XXX (dgc): When we have lots of free space, does this buy us
    2794             :          * anything other than extra overhead when we need to put more blocks
    2795             :          * back on the free list? Maybe we should only do this when space is
    2796             :          * getting low or the AGFL is more than half full?
    2797             :          *
    2798             :          * The NOSHRINK flag prevents the AGFL from being shrunk if it's too
    2799             :          * big; the NORMAP flag prevents AGFL expand/shrink operations from
    2800             :          * updating the rmapbt.  Both flags are used in xfs_repair while we're
    2801             :          * rebuilding the rmapbt, and neither are used by the kernel.  They're
    2802             :          * both required to ensure that rmaps are correctly recorded for the
    2803             :          * regenerated AGFL, bnobt, and cntbt.  See repair/phase5.c and
    2804             :          * repair/rmap.c in xfsprogs for details.
    2805             :          */
    2806   648714124 :         memset(&targs, 0, sizeof(targs));
    2807             :         /* struct copy below */
    2808   648714124 :         if (alloc_flags & XFS_ALLOC_FLAG_NORMAP)
    2809       38096 :                 targs.oinfo = XFS_RMAP_OINFO_SKIP_UPDATE;
    2810             :         else
    2811   648676028 :                 targs.oinfo = XFS_RMAP_OINFO_AG;
    2812   649042758 :         while (!(alloc_flags & XFS_ALLOC_FLAG_NOSHRINK) &&
    2813   649042758 :                         pag->pagf_flcount > need) {
    2814      338111 :                 error = xfs_alloc_get_freelist(pag, tp, agbp, &bno, 0);
    2815      328634 :                 if (error)
    2816           0 :                         goto out_agbp_relse;
    2817             : 
    2818             :                 /* defer agfl frees */
    2819      328634 :                 error = xfs_defer_agfl_block(tp, args->agno, bno, &targs.oinfo);
    2820      328634 :                 if (error)
    2821           0 :                         goto out_agbp_relse;
    2822             :         }
    2823             : 
    2824   648704647 :         targs.tp = tp;
    2825   648704647 :         targs.mp = mp;
    2826   648704647 :         targs.agbp = agbp;
    2827   648704647 :         targs.agno = args->agno;
    2828   648704647 :         targs.alignment = targs.minlen = targs.prod = 1;
    2829   648704647 :         targs.pag = pag;
    2830   648704647 :         error = xfs_alloc_read_agfl(pag, tp, &agflbp);
    2831   648731189 :         if (error)
    2832        1400 :                 goto out_agbp_relse;
    2833             : 
    2834             :         /* Make the freelist longer if it's too short. */
    2835   649608419 :         while (pag->pagf_flcount < need) {
    2836      880688 :                 targs.agbno = 0;
    2837      880688 :                 targs.maxlen = need - pag->pagf_flcount;
    2838      880688 :                 targs.resv = XFS_AG_RESV_AGFL;
    2839             : 
    2840             :                 /* Allocate as many blocks as possible at once. */
    2841      880688 :                 error = xfs_alloc_ag_vextent_size(&targs, alloc_flags);
    2842      878866 :                 if (error)
    2843           1 :                         goto out_agflbp_relse;
    2844             : 
    2845             :                 /*
    2846             :                  * Stop if we run out.  Won't happen if callers are obeying
    2847             :                  * the restrictions correctly.  Can happen for free calls
    2848             :                  * on a completely full ag.
    2849             :                  */
    2850      878865 :                 if (targs.agbno == NULLAGBLOCK) {
    2851         223 :                         if (alloc_flags & XFS_ALLOC_FLAG_FREEING)
    2852             :                                 break;
    2853           0 :                         goto out_agflbp_relse;
    2854             :                 }
    2855             : 
    2856      878642 :                 if (!xfs_rmap_should_skip_owner_update(&targs.oinfo)) {
    2857      878631 :                         error = xfs_rmap_alloc(tp, agbp, pag,
    2858             :                                        targs.agbno, targs.len, &targs.oinfo);
    2859      878631 :                         if (error)
    2860          12 :                                 goto out_agflbp_relse;
    2861             :                 }
    2862      878630 :                 error = xfs_alloc_update_counters(tp, agbp,
    2863      878630 :                                                   -((long)(targs.len)));
    2864      878629 :                 if (error)
    2865           0 :                         goto out_agflbp_relse;
    2866             : 
    2867             :                 /*
    2868             :                  * Put each allocated block on the list.
    2869             :                  */
    2870     1911215 :                 for (bno = targs.agbno; bno < targs.agbno + targs.len; bno++) {
    2871     1032585 :                         error = xfs_alloc_put_freelist(pag, tp, agbp,
    2872             :                                                         agflbp, bno, 0);
    2873     1032586 :                         if (error)
    2874           0 :                                 goto out_agflbp_relse;
    2875             :                 }
    2876             :         }
    2877   648727954 :         xfs_trans_brelse(tp, agflbp);
    2878   648717224 :         args->agbp = agbp;
    2879   648717224 :         return 0;
    2880             : 
    2881          13 : out_agflbp_relse:
    2882          13 :         xfs_trans_brelse(tp, agflbp);
    2883   183944335 : out_agbp_relse:
    2884   183944335 :         if (agbp)
    2885       42368 :                 xfs_trans_brelse(tp, agbp);
    2886   183901967 : out_no_agbp:
    2887   191954405 :         args->agbp = NULL;
    2888   191954405 :         return error;
    2889             : }
    2890             : 
    2891             : /*
    2892             :  * Get a block from the freelist.
    2893             :  * Returns with the buffer for the block gotten.
    2894             :  */
    2895             : int
    2896     1310531 : xfs_alloc_get_freelist(
    2897             :         struct xfs_perag        *pag,
    2898             :         struct xfs_trans        *tp,
    2899             :         struct xfs_buf          *agbp,
    2900             :         xfs_agblock_t           *bnop,
    2901             :         int                     btreeblk)
    2902             : {
    2903     1310531 :         struct xfs_agf          *agf = agbp->b_addr;
    2904     1310531 :         struct xfs_buf          *agflbp;
    2905     1310531 :         xfs_agblock_t           bno;
    2906     1310531 :         __be32                  *agfl_bno;
    2907     1310531 :         int                     error;
    2908     1310531 :         uint32_t                logflags;
    2909     1310531 :         struct xfs_mount        *mp = tp->t_mountp;
    2910             : 
    2911             :         /*
    2912             :          * Freelist is empty, give up.
    2913             :          */
    2914     1310531 :         if (!agf->agf_flcount) {
    2915           0 :                 *bnop = NULLAGBLOCK;
    2916           0 :                 return 0;
    2917             :         }
    2918             :         /*
    2919             :          * Read the array of free blocks.
    2920             :          */
    2921     1310531 :         error = xfs_alloc_read_agfl(pag, tp, &agflbp);
    2922     1310531 :         if (error)
    2923             :                 return error;
    2924             : 
    2925             : 
    2926             :         /*
    2927             :          * Get the block number and update the data structures.
    2928             :          */
    2929     1310531 :         agfl_bno = xfs_buf_to_agfl_bno(agflbp);
    2930     1310531 :         bno = be32_to_cpu(agfl_bno[be32_to_cpu(agf->agf_flfirst)]);
    2931     2621062 :         if (XFS_IS_CORRUPT(tp->t_mountp, !xfs_verify_agbno(pag, bno)))
    2932           0 :                 return -EFSCORRUPTED;
    2933             : 
    2934     1310531 :         be32_add_cpu(&agf->agf_flfirst, 1);
    2935     1310531 :         xfs_trans_brelse(tp, agflbp);
    2936     2621064 :         if (be32_to_cpu(agf->agf_flfirst) == xfs_agfl_size(mp))
    2937         805 :                 agf->agf_flfirst = 0;
    2938             : 
    2939     2621064 :         ASSERT(!xfs_perag_agfl_needs_reset(pag));
    2940     1310532 :         be32_add_cpu(&agf->agf_flcount, -1);
    2941     1310532 :         pag->pagf_flcount--;
    2942             : 
    2943     1310532 :         logflags = XFS_AGF_FLFIRST | XFS_AGF_FLCOUNT;
    2944     1310532 :         if (btreeblk) {
    2945      981898 :                 be32_add_cpu(&agf->agf_btreeblks, 1);
    2946      981898 :                 pag->pagf_btreeblks++;
    2947      981898 :                 logflags |= XFS_AGF_BTREEBLKS;
    2948             :         }
    2949             : 
    2950     1310532 :         xfs_alloc_log_agf(tp, agbp, logflags);
    2951     1310532 :         *bnop = bno;
    2952             : 
    2953     1310532 :         return 0;
    2954             : }
    2955             : 
    2956             : /*
    2957             :  * Log the given fields from the agf structure.
    2958             :  */
    2959             : void
    2960   263263918 : xfs_alloc_log_agf(
    2961             :         struct xfs_trans        *tp,
    2962             :         struct xfs_buf          *bp,
    2963             :         uint32_t                fields)
    2964             : {
    2965   263263918 :         int     first;          /* first byte offset */
    2966   263263918 :         int     last;           /* last byte offset */
    2967   263263918 :         static const short      offsets[] = {
    2968             :                 offsetof(xfs_agf_t, agf_magicnum),
    2969             :                 offsetof(xfs_agf_t, agf_versionnum),
    2970             :                 offsetof(xfs_agf_t, agf_seqno),
    2971             :                 offsetof(xfs_agf_t, agf_length),
    2972             :                 offsetof(xfs_agf_t, agf_roots[0]),
    2973             :                 offsetof(xfs_agf_t, agf_levels[0]),
    2974             :                 offsetof(xfs_agf_t, agf_flfirst),
    2975             :                 offsetof(xfs_agf_t, agf_fllast),
    2976             :                 offsetof(xfs_agf_t, agf_flcount),
    2977             :                 offsetof(xfs_agf_t, agf_freeblks),
    2978             :                 offsetof(xfs_agf_t, agf_longest),
    2979             :                 offsetof(xfs_agf_t, agf_btreeblks),
    2980             :                 offsetof(xfs_agf_t, agf_uuid),
    2981             :                 offsetof(xfs_agf_t, agf_rmap_blocks),
    2982             :                 offsetof(xfs_agf_t, agf_refcount_blocks),
    2983             :                 offsetof(xfs_agf_t, agf_refcount_root),
    2984             :                 offsetof(xfs_agf_t, agf_refcount_level),
    2985             :                 /* needed so that we don't log the whole rest of the structure: */
    2986             :                 offsetof(xfs_agf_t, agf_spare64),
    2987             :                 sizeof(xfs_agf_t)
    2988             :         };
    2989             : 
    2990   263263918 :         trace_xfs_agf(tp->t_mountp, bp->b_addr, fields, _RET_IP_);
    2991             : 
    2992   263244740 :         xfs_trans_buf_set_type(tp, bp, XFS_BLFT_AGF_BUF);
    2993             : 
    2994   263260038 :         xfs_btree_offsets(fields, offsets, XFS_AGF_NUM_BITS, &first, &last);
    2995   263275223 :         xfs_trans_log_buf(tp, bp, (uint)first, (uint)last);
    2996   263317664 : }
    2997             : 
    2998             : /*
    2999             :  * Put the block on the freelist for the allocation group.
    3000             :  */
    3001             : int
    3002     1320966 : xfs_alloc_put_freelist(
    3003             :         struct xfs_perag        *pag,
    3004             :         struct xfs_trans        *tp,
    3005             :         struct xfs_buf          *agbp,
    3006             :         struct xfs_buf          *agflbp,
    3007             :         xfs_agblock_t           bno,
    3008             :         int                     btreeblk)
    3009             : {
    3010     1320966 :         struct xfs_mount        *mp = tp->t_mountp;
    3011     1320966 :         struct xfs_agf          *agf = agbp->b_addr;
    3012     1320966 :         __be32                  *blockp;
    3013     1320966 :         int                     error;
    3014     1320966 :         uint32_t                logflags;
    3015     1320966 :         __be32                  *agfl_bno;
    3016     1320966 :         int                     startoff;
    3017             : 
    3018     1320966 :         if (!agflbp) {
    3019      288383 :                 error = xfs_alloc_read_agfl(pag, tp, &agflbp);
    3020      288383 :                 if (error)
    3021             :                         return error;
    3022             :         }
    3023             : 
    3024     1320966 :         be32_add_cpu(&agf->agf_fllast, 1);
    3025     2641932 :         if (be32_to_cpu(agf->agf_fllast) == xfs_agfl_size(mp))
    3026         815 :                 agf->agf_fllast = 0;
    3027             : 
    3028     2641932 :         ASSERT(!xfs_perag_agfl_needs_reset(pag));
    3029     1320966 :         be32_add_cpu(&agf->agf_flcount, 1);
    3030     1320966 :         pag->pagf_flcount++;
    3031             : 
    3032     1320966 :         logflags = XFS_AGF_FLLAST | XFS_AGF_FLCOUNT;
    3033     1320966 :         if (btreeblk) {
    3034      288383 :                 be32_add_cpu(&agf->agf_btreeblks, -1);
    3035      288383 :                 pag->pagf_btreeblks--;
    3036      288383 :                 logflags |= XFS_AGF_BTREEBLKS;
    3037             :         }
    3038             : 
    3039     1320966 :         xfs_alloc_log_agf(tp, agbp, logflags);
    3040             : 
    3041     2641938 :         ASSERT(be32_to_cpu(agf->agf_flcount) <= xfs_agfl_size(mp));
    3042             : 
    3043     1320969 :         agfl_bno = xfs_buf_to_agfl_bno(agflbp);
    3044     1320969 :         blockp = &agfl_bno[be32_to_cpu(agf->agf_fllast)];
    3045     1320969 :         *blockp = cpu_to_be32(bno);
    3046     1320969 :         startoff = (char *)blockp - (char *)agflbp->b_addr;
    3047             : 
    3048     1320969 :         xfs_alloc_log_agf(tp, agbp, logflags);
    3049             : 
    3050     1320969 :         xfs_trans_buf_set_type(tp, agflbp, XFS_BLFT_AGFL_BUF);
    3051     1320969 :         xfs_trans_log_buf(tp, agflbp, startoff,
    3052             :                           startoff + sizeof(xfs_agblock_t) - 1);
    3053     1320969 :         return 0;
    3054             : }
    3055             : 
    3056             : /*
    3057             :  * Check that this AGF/AGI header's sequence number and length matches the AG
    3058             :  * number and size in fsblocks.
    3059             :  */
    3060             : xfs_failaddr_t
    3061     5761510 : xfs_validate_ag_length(
    3062             :         struct xfs_buf          *bp,
    3063             :         uint32_t                seqno,
    3064             :         uint32_t                length)
    3065             : {
    3066     5761510 :         struct xfs_mount        *mp = bp->b_mount;
    3067             :         /*
    3068             :          * During growfs operations, the perag is not fully initialised,
    3069             :          * so we can't use it for any useful checking. growfs ensures we can't
    3070             :          * use it by using uncached buffers that don't have the perag attached
    3071             :          * so we can detect and avoid this problem.
    3072             :          */
    3073     5761510 :         if (bp->b_pag && seqno != bp->b_pag->pag_agno)
    3074           0 :                 return __this_address;
    3075             : 
    3076             :         /*
    3077             :          * Only the last AG in the filesystem is allowed to be shorter
    3078             :          * than the AG size recorded in the superblock.
    3079             :          */
    3080     5761510 :         if (length != mp->m_sb.sb_agblocks) {
    3081             :                 /*
    3082             :                  * During growfs, the new last AG can get here before we
    3083             :                  * have updated the superblock. Give it a pass on the seqno
    3084             :                  * check.
    3085             :                  */
    3086       25545 :                 if (bp->b_pag && seqno != mp->m_sb.sb_agcount - 1)
    3087           0 :                         return __this_address;
    3088       25545 :                 if (length < XFS_MIN_AG_BLOCKS)
    3089           0 :                         return __this_address;
    3090       25545 :                 if (length > mp->m_sb.sb_agblocks)
    3091          11 :                         return __this_address;
    3092             :         }
    3093             : 
    3094             :         return NULL;
    3095             : }
    3096             : 
    3097             : /*
    3098             :  * Verify the AGF is consistent.
    3099             :  *
    3100             :  * We do not verify the AGFL indexes in the AGF are fully consistent here
    3101             :  * because of issues with variable on-disk structure sizes. Instead, we check
    3102             :  * the agfl indexes for consistency when we initialise the perag from the AGF
    3103             :  * information after a read completes.
    3104             :  *
    3105             :  * If the index is inconsistent, then we mark the perag as needing an AGFL
    3106             :  * reset. The first AGFL update performed then resets the AGFL indexes and
    3107             :  * refills the AGFL with known good free blocks, allowing the filesystem to
    3108             :  * continue operating normally at the cost of a few leaked free space blocks.
    3109             :  */
    3110             : static xfs_failaddr_t
    3111     3075366 : xfs_agf_verify(
    3112             :         struct xfs_buf          *bp)
    3113             : {
    3114     3075366 :         struct xfs_mount        *mp = bp->b_mount;
    3115     3075366 :         struct xfs_agf          *agf = bp->b_addr;
    3116     3075366 :         xfs_failaddr_t          fa;
    3117     3075366 :         uint32_t                agf_seqno = be32_to_cpu(agf->agf_seqno);
    3118     3075366 :         uint32_t                agf_length = be32_to_cpu(agf->agf_length);
    3119             : 
    3120     3075366 :         if (xfs_has_crc(mp)) {
    3121     3060870 :                 if (!uuid_equal(&agf->agf_uuid, &mp->m_sb.sb_meta_uuid))
    3122           0 :                         return __this_address;
    3123     3061291 :                 if (!xfs_log_check_lsn(mp, be64_to_cpu(agf->agf_lsn)))
    3124           0 :                         return __this_address;
    3125             :         }
    3126             : 
    3127     3075965 :         if (!xfs_verify_magic(bp, agf->agf_magicnum))
    3128           0 :                 return __this_address;
    3129             : 
    3130     3075021 :         if (!XFS_AGF_GOOD_VERSION(be32_to_cpu(agf->agf_versionnum)))
    3131           0 :                 return __this_address;
    3132             : 
    3133             :         /*
    3134             :          * Both agf_seqno and agf_length need to validated before anything else
    3135             :          * block number related in the AGF or AGFL can be checked.
    3136             :          */
    3137     3075021 :         fa = xfs_validate_ag_length(bp, agf_seqno, agf_length);
    3138     3074790 :         if (fa)
    3139             :                 return fa;
    3140             : 
    3141     6135736 :         if (be32_to_cpu(agf->agf_flfirst) >= xfs_agfl_size(mp))
    3142          10 :                 return __this_address;
    3143     3074727 :         if (be32_to_cpu(agf->agf_fllast) >= xfs_agfl_size(mp))
    3144           0 :                 return __this_address;
    3145     3074727 :         if (be32_to_cpu(agf->agf_flcount) > xfs_agfl_size(mp))
    3146           0 :                 return __this_address;
    3147             : 
    3148     3074727 :         if (be32_to_cpu(agf->agf_freeblks) < be32_to_cpu(agf->agf_longest) ||
    3149             :             be32_to_cpu(agf->agf_freeblks) > agf_length)
    3150          22 :                 return __this_address;
    3151             : 
    3152     3074705 :         if (be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) < 1 ||
    3153     3074705 :             be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) < 1 ||
    3154             :             be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) >
    3155     3074705 :                                                 mp->m_alloc_maxlevels ||
    3156             :             be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) >
    3157             :                                                 mp->m_alloc_maxlevels)
    3158           0 :                 return __this_address;
    3159             : 
    3160     3074705 :         if (xfs_has_lazysbcount(mp) &&
    3161     3075021 :             be32_to_cpu(agf->agf_btreeblks) > agf_length)
    3162           0 :                 return __this_address;
    3163             : 
    3164     3074705 :         if (xfs_has_rmapbt(mp)) {
    3165     2989989 :                 if (be32_to_cpu(agf->agf_rmap_blocks) > agf_length)
    3166          11 :                         return __this_address;
    3167             : 
    3168     2989978 :                 if (be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) < 1 ||
    3169             :                     be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) >
    3170     2989978 :                                                         mp->m_rmap_maxlevels)
    3171           0 :                         return __this_address;
    3172             :         }
    3173             : 
    3174     3074694 :         if (xfs_has_reflink(mp)) {
    3175     2989001 :                 if (be32_to_cpu(agf->agf_refcount_blocks) > agf_length)
    3176          11 :                         return __this_address;
    3177             : 
    3178     2988990 :                 if (be32_to_cpu(agf->agf_refcount_level) < 1 ||
    3179     2988990 :                     be32_to_cpu(agf->agf_refcount_level) > mp->m_refc_maxlevels)
    3180           0 :                         return __this_address;
    3181             :         }
    3182             : 
    3183             :         return NULL;
    3184             : }
    3185             : 
    3186             : static void
    3187      963998 : xfs_agf_read_verify(
    3188             :         struct xfs_buf  *bp)
    3189             : {
    3190      963998 :         struct xfs_mount *mp = bp->b_mount;
    3191      963998 :         xfs_failaddr_t  fa;
    3192             : 
    3193     1927899 :         if (xfs_has_crc(mp) &&
    3194             :             !xfs_buf_verify_cksum(bp, XFS_AGF_CRC_OFF))
    3195          52 :                 xfs_verifier_error(bp, -EFSBADCRC, __this_address);
    3196             :         else {
    3197      963946 :                 fa = xfs_agf_verify(bp);
    3198      963946 :                 if (XFS_TEST_ERROR(fa, mp, XFS_ERRTAG_ALLOC_READ_AGF))
    3199          65 :                         xfs_verifier_error(bp, -EFSCORRUPTED, fa);
    3200             :         }
    3201      963998 : }
    3202             : 
    3203             : static void
    3204     1267446 : xfs_agf_write_verify(
    3205             :         struct xfs_buf  *bp)
    3206             : {
    3207     1267446 :         struct xfs_mount        *mp = bp->b_mount;
    3208     1267446 :         struct xfs_buf_log_item *bip = bp->b_log_item;
    3209     1267446 :         struct xfs_agf          *agf = bp->b_addr;
    3210     1267446 :         xfs_failaddr_t          fa;
    3211             : 
    3212     1267446 :         fa = xfs_agf_verify(bp);
    3213     1267446 :         if (fa) {
    3214           0 :                 xfs_verifier_error(bp, -EFSCORRUPTED, fa);
    3215           0 :                 return;
    3216             :         }
    3217             : 
    3218     1267446 :         if (!xfs_has_crc(mp))
    3219             :                 return;
    3220             : 
    3221     1253785 :         if (bip)
    3222     1226261 :                 agf->agf_lsn = cpu_to_be64(bip->bli_item.li_lsn);
    3223             : 
    3224     1253785 :         xfs_buf_update_cksum(bp, XFS_AGF_CRC_OFF);
    3225             : }
    3226             : 
    3227             : const struct xfs_buf_ops xfs_agf_buf_ops = {
    3228             :         .name = "xfs_agf",
    3229             :         .magic = { cpu_to_be32(XFS_AGF_MAGIC), cpu_to_be32(XFS_AGF_MAGIC) },
    3230             :         .verify_read = xfs_agf_read_verify,
    3231             :         .verify_write = xfs_agf_write_verify,
    3232             :         .verify_struct = xfs_agf_verify,
    3233             : };
    3234             : 
    3235             : /*
    3236             :  * Read in the allocation group header (free/alloc section).
    3237             :  */
    3238             : int
    3239  2394889712 : xfs_read_agf(
    3240             :         struct xfs_perag        *pag,
    3241             :         struct xfs_trans        *tp,
    3242             :         int                     flags,
    3243             :         struct xfs_buf          **agfbpp)
    3244             : {
    3245  2394889712 :         struct xfs_mount        *mp = pag->pag_mount;
    3246  2394889712 :         int                     error;
    3247             : 
    3248  2394889712 :         trace_xfs_read_agf(pag->pag_mount, pag->pag_agno);
    3249             : 
    3250  9578614740 :         error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
    3251  2394653685 :                         XFS_AG_DADDR(mp, pag->pag_agno, XFS_AGF_DADDR(mp)),
    3252  2394653685 :                         XFS_FSS_TO_BB(mp, 1), flags, agfbpp, &xfs_agf_buf_ops);
    3253  2395507438 :         if (xfs_metadata_is_sick(error))
    3254         117 :                 xfs_ag_mark_sick(pag, XFS_SICK_AG_AGF);
    3255  2395507438 :         if (error)
    3256             :                 return error;
    3257             : 
    3258  2387448820 :         xfs_buf_set_ref(*agfbpp, XFS_AGF_REF);
    3259  2387448820 :         return 0;
    3260             : }
    3261             : 
    3262             : /*
    3263             :  * Read in the allocation group header (free/alloc section) and initialise the
    3264             :  * perag structure if necessary. If the caller provides @agfbpp, then return the
    3265             :  * locked buffer to the caller, otherwise free it.
    3266             :  */
    3267             : int
    3268  2395298148 : xfs_alloc_read_agf(
    3269             :         struct xfs_perag        *pag,
    3270             :         struct xfs_trans        *tp,
    3271             :         int                     flags,
    3272             :         struct xfs_buf          **agfbpp)
    3273             : {
    3274  2395298148 :         struct xfs_buf          *agfbp;
    3275  2395298148 :         struct xfs_agf          *agf;
    3276  2395298148 :         int                     error;
    3277  2395298148 :         int                     allocbt_blks;
    3278             : 
    3279  2395298148 :         trace_xfs_alloc_read_agf(pag->pag_mount, pag->pag_agno);
    3280             : 
    3281             :         /* We don't support trylock when freeing. */
    3282  2394575776 :         ASSERT((flags & (XFS_ALLOC_FLAG_FREEING | XFS_ALLOC_FLAG_TRYLOCK)) !=
    3283             :                         (XFS_ALLOC_FLAG_FREEING | XFS_ALLOC_FLAG_TRYLOCK));
    3284  2394575776 :         error = xfs_read_agf(pag, tp,
    3285  2394575776 :                         (flags & XFS_ALLOC_FLAG_TRYLOCK) ? XBF_TRYLOCK : 0,
    3286             :                         &agfbp);
    3287  2395522987 :         if (error)
    3288             :                 return error;
    3289             : 
    3290  2387509713 :         agf = agfbp->b_addr;
    3291  4775019426 :         if (!xfs_perag_initialised_agf(pag)) {
    3292      779862 :                 pag->pagf_freeblks = be32_to_cpu(agf->agf_freeblks);
    3293      779862 :                 pag->pagf_btreeblks = be32_to_cpu(agf->agf_btreeblks);
    3294      779862 :                 pag->pagf_flcount = be32_to_cpu(agf->agf_flcount);
    3295      779862 :                 pag->pagf_longest = be32_to_cpu(agf->agf_longest);
    3296      779862 :                 pag->pagf_levels[XFS_BTNUM_BNOi] =
    3297      779862 :                         be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNOi]);
    3298      779862 :                 pag->pagf_levels[XFS_BTNUM_CNTi] =
    3299      779862 :                         be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]);
    3300      779862 :                 pag->pagf_levels[XFS_BTNUM_RMAPi] =
    3301      779862 :                         be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAPi]);
    3302      779862 :                 pag->pagf_refcount_level = be32_to_cpu(agf->agf_refcount_level);
    3303      779862 :                 if (xfs_agfl_needs_reset(pag->pag_mount, agf))
    3304          20 :                         set_bit(XFS_AGSTATE_AGFL_NEEDS_RESET, &pag->pag_opstate);
    3305             :                 else
    3306      779842 :                         clear_bit(XFS_AGSTATE_AGFL_NEEDS_RESET, &pag->pag_opstate);
    3307             : 
    3308             :                 /*
    3309             :                  * Update the in-core allocbt counter. Filter out the rmapbt
    3310             :                  * subset of the btreeblks counter because the rmapbt is managed
    3311             :                  * by perag reservation. Subtract one for the rmapbt root block
    3312             :                  * because the rmap counter includes it while the btreeblks
    3313             :                  * counter only tracks non-root blocks.
    3314             :                  */
    3315      780288 :                 allocbt_blks = pag->pagf_btreeblks;
    3316      780288 :                 if (xfs_has_rmapbt(pag->pag_mount))
    3317      760177 :                         allocbt_blks -= be32_to_cpu(agf->agf_rmap_blocks) - 1;
    3318      780288 :                 if (allocbt_blks > 0)
    3319       64361 :                         atomic64_add(allocbt_blks,
    3320             :                                         &pag->pag_mount->m_allocbt_blks);
    3321             : 
    3322      780288 :                 set_bit(XFS_AGSTATE_AGF_INIT, &pag->pag_opstate);
    3323             :         }
    3324             : #ifdef DEBUG
    3325  4773459702 :         else if (!xfs_is_shutdown(pag->pag_mount)) {
    3326  2386715474 :                 ASSERT(pag->pagf_freeblks == be32_to_cpu(agf->agf_freeblks));
    3327  2386715474 :                 ASSERT(pag->pagf_btreeblks == be32_to_cpu(agf->agf_btreeblks));
    3328  2386715474 :                 ASSERT(pag->pagf_flcount == be32_to_cpu(agf->agf_flcount));
    3329  2386715474 :                 ASSERT(pag->pagf_longest == be32_to_cpu(agf->agf_longest));
    3330  2386715474 :                 ASSERT(pag->pagf_levels[XFS_BTNUM_BNOi] ==
    3331             :                        be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNOi]));
    3332  2386715474 :                 ASSERT(pag->pagf_levels[XFS_BTNUM_CNTi] ==
    3333             :                        be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]));
    3334             :         }
    3335             : #endif
    3336  2387510030 :         if (agfbpp)
    3337  2386190283 :                 *agfbpp = agfbp;
    3338             :         else
    3339     1319747 :                 xfs_trans_brelse(tp, agfbp);
    3340             :         return 0;
    3341             : }
    3342             : 
    3343             : /*
    3344             :  * Pre-proces allocation arguments to set initial state that we don't require
    3345             :  * callers to set up correctly, as well as bounds check the allocation args
    3346             :  * that are set up.
    3347             :  */
    3348             : static int
    3349   102237992 : xfs_alloc_vextent_check_args(
    3350             :         struct xfs_alloc_arg    *args,
    3351             :         xfs_fsblock_t           target,
    3352             :         xfs_agnumber_t          *minimum_agno)
    3353             : {
    3354   102237992 :         struct xfs_mount        *mp = args->mp;
    3355   102237992 :         xfs_agblock_t           agsize;
    3356             : 
    3357   102237992 :         args->fsbno = NULLFSBLOCK;
    3358             : 
    3359   102237992 :         *minimum_agno = 0;
    3360   102237992 :         if (args->tp->t_highest_agno != NULLAGNUMBER)
    3361     1793657 :                 *minimum_agno = args->tp->t_highest_agno;
    3362             : 
    3363             :         /*
    3364             :          * Just fix this up, for the case where the last a.g. is shorter
    3365             :          * (or there's only one a.g.) and the caller couldn't easily figure
    3366             :          * that out (xfs_bmap_alloc).
    3367             :          */
    3368   102237992 :         agsize = mp->m_sb.sb_agblocks;
    3369   102237992 :         if (args->maxlen > agsize)
    3370           0 :                 args->maxlen = agsize;
    3371   102237992 :         if (args->alignment == 0)
    3372     5884920 :                 args->alignment = 1;
    3373             : 
    3374   102237992 :         ASSERT(args->minlen > 0);
    3375   102237992 :         ASSERT(args->maxlen > 0);
    3376   102237992 :         ASSERT(args->alignment > 0);
    3377   102237992 :         ASSERT(args->resv != XFS_AG_RESV_AGFL);
    3378             : 
    3379   102237992 :         ASSERT(XFS_FSB_TO_AGNO(mp, target) < mp->m_sb.sb_agcount);
    3380   102237992 :         ASSERT(XFS_FSB_TO_AGBNO(mp, target) < agsize);
    3381   102222078 :         ASSERT(args->minlen <= args->maxlen);
    3382   102222078 :         ASSERT(args->minlen <= agsize);
    3383   102222078 :         ASSERT(args->mod < args->prod);
    3384             : 
    3385   102222078 :         if (XFS_FSB_TO_AGNO(mp, target) >= mp->m_sb.sb_agcount ||
    3386   102222078 :             XFS_FSB_TO_AGBNO(mp, target) >= agsize ||
    3387   102212947 :             args->minlen > args->maxlen || args->minlen > agsize ||
    3388   102212947 :             args->mod >= args->prod) {
    3389           0 :                 trace_xfs_alloc_vextent_badargs(args);
    3390           0 :                 return -ENOSPC;
    3391             :         }
    3392             : 
    3393   102212947 :         if (args->agno != NULLAGNUMBER && *minimum_agno > args->agno) {
    3394        9380 :                 trace_xfs_alloc_vextent_skip_deadlock(args);
    3395        9380 :                 return -ENOSPC;
    3396             :         }
    3397             :         return 0;
    3398             : 
    3399             : }
    3400             : 
    3401             : /*
    3402             :  * Prepare an AG for allocation. If the AG is not prepared to accept the
    3403             :  * allocation, return failure.
    3404             :  *
    3405             :  * XXX(dgc): The complexity of "need_pag" will go away as all caller paths are
    3406             :  * modified to hold their own perag references.
    3407             :  */
    3408             : static int
    3409   290535973 : xfs_alloc_vextent_prepare_ag(
    3410             :         struct xfs_alloc_arg    *args,
    3411             :         uint32_t                alloc_flags)
    3412             : {
    3413   290535973 :         bool                    need_pag = !args->pag;
    3414   290535973 :         int                     error;
    3415             : 
    3416   290535973 :         if (need_pag)
    3417           0 :                 args->pag = xfs_perag_get(args->mp, args->agno);
    3418             : 
    3419   290535973 :         args->agbp = NULL;
    3420   290535973 :         error = xfs_alloc_fix_freelist(args, alloc_flags);
    3421   290549236 :         if (error) {
    3422        1381 :                 trace_xfs_alloc_vextent_nofix(args);
    3423        1381 :                 if (need_pag)
    3424           0 :                         xfs_perag_put(args->pag);
    3425        1381 :                 args->agbno = NULLAGBLOCK;
    3426        1381 :                 return error;
    3427             :         }
    3428   290547855 :         if (!args->agbp) {
    3429             :                 /* cannot allocate in this AG at all */
    3430   191950798 :                 trace_xfs_alloc_vextent_noagbp(args);
    3431   191948731 :                 args->agbno = NULLAGBLOCK;
    3432   191948731 :                 return 0;
    3433             :         }
    3434    98597057 :         args->wasfromfl = 0;
    3435    98597057 :         return 0;
    3436             : }
    3437             : 
    3438             : /*
    3439             :  * Post-process allocation results to account for the allocation if it succeed
    3440             :  * and set the allocated block number correctly for the caller.
    3441             :  *
    3442             :  * XXX: we should really be returning ENOSPC for ENOSPC, not
    3443             :  * hiding it behind a "successful" NULLFSBLOCK allocation.
    3444             :  */
    3445             : static int
    3446   102319060 : xfs_alloc_vextent_finish(
    3447             :         struct xfs_alloc_arg    *args,
    3448             :         xfs_agnumber_t          minimum_agno,
    3449             :         int                     alloc_error,
    3450             :         bool                    drop_perag)
    3451             : {
    3452   102319060 :         struct xfs_mount        *mp = args->mp;
    3453   102319060 :         int                     error = 0;
    3454             : 
    3455             :         /*
    3456             :          * We can end up here with a locked AGF. If we failed, the caller is
    3457             :          * likely going to try to allocate again with different parameters, and
    3458             :          * that can widen the AGs that are searched for free space. If we have
    3459             :          * to do BMBT block allocation, we have to do a new allocation.
    3460             :          *
    3461             :          * Hence leaving this function with the AGF locked opens up potential
    3462             :          * ABBA AGF deadlocks because a future allocation attempt in this
    3463             :          * transaction may attempt to lock a lower number AGF.
    3464             :          *
    3465             :          * We can't release the AGF until the transaction is commited, so at
    3466             :          * this point we must update the "first allocation" tracker to point at
    3467             :          * this AG if the tracker is empty or points to a lower AG. This allows
    3468             :          * the next allocation attempt to be modified appropriately to avoid
    3469             :          * deadlocks.
    3470             :          */
    3471   102319060 :         if (args->agbp &&
    3472    98603766 :             (args->tp->t_highest_agno == NULLAGNUMBER ||
    3473     1782723 :              args->agno > minimum_agno))
    3474    97057299 :                 args->tp->t_highest_agno = args->agno;
    3475             : 
    3476             :         /*
    3477             :          * If the allocation failed with an error or we had an ENOSPC result,
    3478             :          * preserve the returned error whilst also marking the allocation result
    3479             :          * as "no extent allocated". This ensures that callers that fail to
    3480             :          * capture the error will still treat it as a failed allocation.
    3481             :          */
    3482   102319060 :         if (alloc_error || args->agbno == NULLAGBLOCK) {
    3483     4602394 :                 args->fsbno = NULLFSBLOCK;
    3484     4602394 :                 error = alloc_error;
    3485     4602394 :                 goto out_drop_perag;
    3486             :         }
    3487             : 
    3488    97716666 :         args->fsbno = XFS_AGB_TO_FSB(mp, args->agno, args->agbno);
    3489             : 
    3490    97716666 :         ASSERT(args->len >= args->minlen);
    3491    97716666 :         ASSERT(args->len <= args->maxlen);
    3492    97716666 :         ASSERT(args->agbno % args->alignment == 0);
    3493    97716666 :         XFS_AG_CHECK_DADDR(mp, XFS_FSB_TO_DADDR(mp, args->fsbno), args->len);
    3494             : 
    3495             :         /* if not file data, insert new block into the reverse map btree */
    3496    97711984 :         if (!xfs_rmap_should_skip_owner_update(&args->oinfo)) {
    3497     7290699 :                 error = xfs_rmap_alloc(args->tp, args->agbp, args->pag,
    3498     7290699 :                                        args->agbno, args->len, &args->oinfo);
    3499     7291329 :                 if (error)
    3500          40 :                         goto out_drop_perag;
    3501             :         }
    3502             : 
    3503    97712574 :         if (!args->wasfromfl) {
    3504    97720018 :                 error = xfs_alloc_update_counters(args->tp, args->agbp,
    3505    97720018 :                                                   -((long)(args->len)));
    3506    97734320 :                 if (error)
    3507           0 :                         goto out_drop_perag;
    3508             : 
    3509    97734320 :                 ASSERT(!xfs_extent_busy_search(mp, args->pag, args->agbno,
    3510             :                                 args->len));
    3511             :         }
    3512             : 
    3513    97719409 :         xfs_ag_resv_alloc_extent(args->pag, args->resv, args);
    3514             : 
    3515    97694531 :         XFS_STATS_INC(mp, xs_allocx);
    3516    97724004 :         XFS_STATS_ADD(mp, xs_allocb, args->len);
    3517             : 
    3518    97726955 :         trace_xfs_alloc_vextent_finish(args);
    3519             : 
    3520   102324079 : out_drop_perag:
    3521   102324079 :         if (drop_perag && args->pag) {
    3522    96124694 :                 xfs_perag_rele(args->pag);
    3523    96138204 :                 args->pag = NULL;
    3524             :         }
    3525   102337589 :         return error;
    3526             : }
    3527             : 
    3528             : /*
    3529             :  * Allocate within a single AG only. This uses a best-fit length algorithm so if
    3530             :  * you need an exact sized allocation without locality constraints, this is the
    3531             :  * fastest way to do it.
    3532             :  *
    3533             :  * Caller is expected to hold a perag reference in args->pag.
    3534             :  */
    3535             : int
    3536           0 : xfs_alloc_vextent_this_ag(
    3537             :         struct xfs_alloc_arg    *args,
    3538             :         xfs_agnumber_t          agno)
    3539             : {
    3540           0 :         struct xfs_mount        *mp = args->mp;
    3541           0 :         xfs_agnumber_t          minimum_agno;
    3542           0 :         uint32_t                alloc_flags = 0;
    3543           0 :         int                     error;
    3544             : 
    3545           0 :         ASSERT(args->pag != NULL);
    3546           0 :         ASSERT(args->pag->pag_agno == agno);
    3547             : 
    3548           0 :         args->agno = agno;
    3549           0 :         args->agbno = 0;
    3550             : 
    3551           0 :         trace_xfs_alloc_vextent_this_ag(args);
    3552             : 
    3553           0 :         error = xfs_alloc_vextent_check_args(args, XFS_AGB_TO_FSB(mp, agno, 0),
    3554             :                         &minimum_agno);
    3555           0 :         if (error) {
    3556           0 :                 if (error == -ENOSPC)
    3557             :                         return 0;
    3558           0 :                 return error;
    3559             :         }
    3560             : 
    3561           0 :         error = xfs_alloc_vextent_prepare_ag(args, alloc_flags);
    3562           0 :         if (!error && args->agbp)
    3563           0 :                 error = xfs_alloc_ag_vextent_size(args, alloc_flags);
    3564             : 
    3565           0 :         return xfs_alloc_vextent_finish(args, minimum_agno, error, false);
    3566             : }
    3567             : 
    3568             : /*
    3569             :  * Iterate all AGs trying to allocate an extent starting from @start_ag.
    3570             :  *
    3571             :  * If the incoming allocation type is XFS_ALLOCTYPE_NEAR_BNO, it means the
    3572             :  * allocation attempts in @start_agno have locality information. If we fail to
    3573             :  * allocate in that AG, then we revert to anywhere-in-AG for all the other AGs
    3574             :  * we attempt to allocation in as there is no locality optimisation possible for
    3575             :  * those allocations.
    3576             :  *
    3577             :  * On return, args->pag may be left referenced if we finish before the "all
    3578             :  * failed" return point. The allocation finish still needs the perag, and
    3579             :  * so the caller will release it once they've finished the allocation.
    3580             :  *
    3581             :  * When we wrap the AG iteration at the end of the filesystem, we have to be
    3582             :  * careful not to wrap into AGs below ones we already have locked in the
    3583             :  * transaction if we are doing a blocking iteration. This will result in an
    3584             :  * out-of-order locking of AGFs and hence can cause deadlocks.
    3585             :  */
    3586             : static int
    3587    96042285 : xfs_alloc_vextent_iterate_ags(
    3588             :         struct xfs_alloc_arg    *args,
    3589             :         xfs_agnumber_t          minimum_agno,
    3590             :         xfs_agnumber_t          start_agno,
    3591             :         xfs_agblock_t           target_agbno,
    3592             :         uint32_t                alloc_flags)
    3593             : {
    3594    96042285 :         struct xfs_mount        *mp = args->mp;
    3595    96042285 :         xfs_agnumber_t          restart_agno = minimum_agno;
    3596    96042285 :         xfs_agnumber_t          agno;
    3597    96042285 :         int                     error = 0;
    3598             : 
    3599    96042285 :         if (alloc_flags & XFS_ALLOC_FLAG_TRYLOCK)
    3600    96042285 :                 restart_agno = 0;
    3601           0 : restart:
    3602   285343032 :         for_each_perag_wrap_range(mp, start_agno, restart_agno,
    3603             :                         mp->m_sb.sb_agcount, agno, args->pag) {
    3604   284357624 :                 args->agno = agno;
    3605   284357624 :                 error = xfs_alloc_vextent_prepare_ag(args, alloc_flags);
    3606   284340032 :                 if (error)
    3607             :                         break;
    3608   284338651 :                 if (!args->agbp) {
    3609   188667880 :                         trace_xfs_alloc_vextent_loopfailed(args);
    3610   188667256 :                         continue;
    3611             :                 }
    3612             : 
    3613             :                 /*
    3614             :                  * Allocation is supposed to succeed now, so break out of the
    3615             :                  * loop regardless of whether we succeed or not.
    3616             :                  */
    3617    95670771 :                 if (args->agno == start_agno && target_agbno) {
    3618    88541538 :                         args->agbno = target_agbno;
    3619    88541538 :                         error = xfs_alloc_ag_vextent_near(args, alloc_flags);
    3620             :                 } else {
    3621     7129233 :                         args->agbno = 0;
    3622     7129233 :                         error = xfs_alloc_ag_vextent_size(args, alloc_flags);
    3623             :                 }
    3624             :                 break;
    3625             :         }
    3626    96770376 :         if (error) {
    3627        4599 :                 xfs_perag_rele(args->pag);
    3628        4599 :                 args->pag = NULL;
    3629        4599 :                 return error;
    3630             :         }
    3631    96765777 :         if (args->agbp)
    3632             :                 return 0;
    3633             : 
    3634             :         /*
    3635             :          * We didn't find an AG we can alloation from. If we were given
    3636             :          * constraining flags by the caller, drop them and retry the allocation
    3637             :          * without any constraints being set.
    3638             :          */
    3639     1066219 :         if (alloc_flags & XFS_ALLOC_FLAG_TRYLOCK) {
    3640      630058 :                 alloc_flags &= ~XFS_ALLOC_FLAG_TRYLOCK;
    3641      630058 :                 restart_agno = minimum_agno;
    3642      630058 :                 goto restart;
    3643             :         }
    3644             : 
    3645      436161 :         ASSERT(args->pag == NULL);
    3646      436161 :         trace_xfs_alloc_vextent_allfailed(args);
    3647      436161 :         return 0;
    3648             : }
    3649             : 
    3650             : /*
    3651             :  * Iterate from the AGs from the start AG to the end of the filesystem, trying
    3652             :  * to allocate blocks. It starts with a near allocation attempt in the initial
    3653             :  * AG, then falls back to anywhere-in-ag after the first AG fails. It will wrap
    3654             :  * back to zero if allowed by previous allocations in this transaction,
    3655             :  * otherwise will wrap back to the start AG and run a second blocking pass to
    3656             :  * the end of the filesystem.
    3657             :  */
    3658             : int
    3659    93417888 : xfs_alloc_vextent_start_ag(
    3660             :         struct xfs_alloc_arg    *args,
    3661             :         xfs_fsblock_t           target)
    3662             : {
    3663    93417888 :         struct xfs_mount        *mp = args->mp;
    3664    93417888 :         xfs_agnumber_t          minimum_agno;
    3665    93417888 :         xfs_agnumber_t          start_agno;
    3666    93417888 :         xfs_agnumber_t          rotorstep = xfs_rotorstep;
    3667    93417888 :         bool                    bump_rotor = false;
    3668    93417888 :         uint32_t                alloc_flags = XFS_ALLOC_FLAG_TRYLOCK;
    3669    93417888 :         int                     error;
    3670             : 
    3671    93417888 :         ASSERT(args->pag == NULL);
    3672             : 
    3673    93417888 :         args->agno = NULLAGNUMBER;
    3674    93417888 :         args->agbno = NULLAGBLOCK;
    3675             : 
    3676    93417888 :         trace_xfs_alloc_vextent_start_ag(args);
    3677             : 
    3678    93391256 :         error = xfs_alloc_vextent_check_args(args, target, &minimum_agno);
    3679    93392790 :         if (error) {
    3680           0 :                 if (error == -ENOSPC)
    3681             :                         return 0;
    3682           0 :                 return error;
    3683             :         }
    3684             : 
    3685    96647594 :         if ((args->datatype & XFS_ALLOC_INITIAL_USER_DATA) &&
    3686             :             xfs_is_inode32(mp)) {
    3687           0 :                 target = XFS_AGB_TO_FSB(mp,
    3688             :                                 ((mp->m_agfrotor / rotorstep) %
    3689             :                                 mp->m_sb.sb_agcount), 0);
    3690           0 :                 bump_rotor = 1;
    3691             :         }
    3692             : 
    3693    93392790 :         start_agno = max(minimum_agno, XFS_FSB_TO_AGNO(mp, target));
    3694    93397674 :         error = xfs_alloc_vextent_iterate_ags(args, minimum_agno, start_agno,
    3695    93392790 :                         XFS_FSB_TO_AGBNO(mp, target), alloc_flags);
    3696             : 
    3697    93505362 :         if (bump_rotor) {
    3698           0 :                 if (args->agno == start_agno)
    3699           0 :                         mp->m_agfrotor = (mp->m_agfrotor + 1) %
    3700           0 :                                 (mp->m_sb.sb_agcount * rotorstep);
    3701             :                 else
    3702           0 :                         mp->m_agfrotor = (args->agno * rotorstep + 1) %
    3703           0 :                                 (mp->m_sb.sb_agcount * rotorstep);
    3704             :         }
    3705             : 
    3706    93505362 :         return xfs_alloc_vextent_finish(args, minimum_agno, error, true);
    3707             : }
    3708             : 
    3709             : /*
    3710             :  * Iterate from the agno indicated via @target through to the end of the
    3711             :  * filesystem attempting blocking allocation. This does not wrap or try a second
    3712             :  * pass, so will not recurse into AGs lower than indicated by the target.
    3713             :  */
    3714             : int
    3715     2637248 : xfs_alloc_vextent_first_ag(
    3716             :         struct xfs_alloc_arg    *args,
    3717             :         xfs_fsblock_t           target)
    3718             :  {
    3719     2637248 :         struct xfs_mount        *mp = args->mp;
    3720     2637248 :         xfs_agnumber_t          minimum_agno;
    3721     2637248 :         xfs_agnumber_t          start_agno;
    3722     2637248 :         uint32_t                alloc_flags = XFS_ALLOC_FLAG_TRYLOCK;
    3723     2637248 :         int                     error;
    3724             : 
    3725     2637248 :         ASSERT(args->pag == NULL);
    3726             : 
    3727     2637248 :         args->agno = NULLAGNUMBER;
    3728     2637248 :         args->agbno = NULLAGBLOCK;
    3729             : 
    3730     2637248 :         trace_xfs_alloc_vextent_first_ag(args);
    3731             : 
    3732     2634545 :         error = xfs_alloc_vextent_check_args(args, target, &minimum_agno);
    3733     2633963 :         if (error) {
    3734           0 :                 if (error == -ENOSPC)
    3735             :                         return 0;
    3736           0 :                 return error;
    3737             :         }
    3738             : 
    3739     2633963 :         start_agno = max(minimum_agno, XFS_FSB_TO_AGNO(mp, target));
    3740     2634047 :         error = xfs_alloc_vextent_iterate_ags(args, minimum_agno, start_agno,
    3741     2633963 :                         XFS_FSB_TO_AGBNO(mp, target), alloc_flags);
    3742     2639823 :         return xfs_alloc_vextent_finish(args, minimum_agno, error, true);
    3743             : }
    3744             : 
    3745             : /*
    3746             :  * Allocate at the exact block target or fail. Caller is expected to hold a
    3747             :  * perag reference in args->pag.
    3748             :  */
    3749             : int
    3750     1921884 : xfs_alloc_vextent_exact_bno(
    3751             :         struct xfs_alloc_arg    *args,
    3752             :         xfs_fsblock_t           target)
    3753             : {
    3754     1921884 :         struct xfs_mount        *mp = args->mp;
    3755     1921884 :         xfs_agnumber_t          minimum_agno;
    3756     1921884 :         int                     error;
    3757             : 
    3758     1921884 :         ASSERT(args->pag != NULL);
    3759     1921884 :         ASSERT(args->pag->pag_agno == XFS_FSB_TO_AGNO(mp, target));
    3760             : 
    3761     1921884 :         args->agno = XFS_FSB_TO_AGNO(mp, target);
    3762     1921884 :         args->agbno = XFS_FSB_TO_AGBNO(mp, target);
    3763             : 
    3764     1921834 :         trace_xfs_alloc_vextent_exact_bno(args);
    3765             : 
    3766     1921845 :         error = xfs_alloc_vextent_check_args(args, target, &minimum_agno);
    3767     1921847 :         if (error) {
    3768        1304 :                 if (error == -ENOSPC)
    3769             :                         return 0;
    3770           0 :                 return error;
    3771             :         }
    3772             : 
    3773     1920543 :         error = xfs_alloc_vextent_prepare_ag(args, 0);
    3774     1920673 :         if (!error && args->agbp)
    3775     1112073 :                 error = xfs_alloc_ag_vextent_exact(args);
    3776             : 
    3777     1920674 :         return xfs_alloc_vextent_finish(args, minimum_agno, error, false);
    3778             : }
    3779             : 
    3780             : /*
    3781             :  * Allocate an extent as close to the target as possible. If there are not
    3782             :  * viable candidates in the AG, then fail the allocation.
    3783             :  *
    3784             :  * Caller may or may not have a per-ag reference in args->pag.
    3785             :  */
    3786             : int
    3787     4281674 : xfs_alloc_vextent_near_bno(
    3788             :         struct xfs_alloc_arg    *args,
    3789             :         xfs_fsblock_t           target)
    3790             : {
    3791     4281674 :         struct xfs_mount        *mp = args->mp;
    3792     4281674 :         xfs_agnumber_t          minimum_agno;
    3793     4281674 :         bool                    needs_perag = args->pag == NULL;
    3794     4281674 :         uint32_t                alloc_flags = 0;
    3795     4281674 :         int                     error;
    3796             : 
    3797     4281674 :         if (!needs_perag)
    3798     3848415 :                 ASSERT(args->pag->pag_agno == XFS_FSB_TO_AGNO(mp, target));
    3799             : 
    3800     4281674 :         args->agno = XFS_FSB_TO_AGNO(mp, target);
    3801     4281674 :         args->agbno = XFS_FSB_TO_AGBNO(mp, target);
    3802             : 
    3803     4280864 :         trace_xfs_alloc_vextent_near_bno(args);
    3804             : 
    3805     4280995 :         error = xfs_alloc_vextent_check_args(args, target, &minimum_agno);
    3806     4281520 :         if (error) {
    3807        8076 :                 if (error == -ENOSPC)
    3808             :                         return 0;
    3809           0 :                 return error;
    3810             :         }
    3811             : 
    3812     4273444 :         if (needs_perag)
    3813      433144 :                 args->pag = xfs_perag_grab(mp, args->agno);
    3814             : 
    3815     4274525 :         error = xfs_alloc_vextent_prepare_ag(args, alloc_flags);
    3816     4274716 :         if (!error && args->agbp)
    3817     1801987 :                 error = xfs_alloc_ag_vextent_near(args, alloc_flags);
    3818             : 
    3819     4274666 :         return xfs_alloc_vextent_finish(args, minimum_agno, error, needs_perag);
    3820             : }
    3821             : 
    3822             : /* Ensure that the freelist is at full capacity. */
    3823             : int
    3824   550100988 : xfs_free_extent_fix_freelist(
    3825             :         struct xfs_trans        *tp,
    3826             :         struct xfs_perag        *pag,
    3827             :         struct xfs_buf          **agbp)
    3828             : {
    3829   550100988 :         struct xfs_alloc_arg    args;
    3830   550100988 :         int                     error;
    3831             : 
    3832   550100988 :         memset(&args, 0, sizeof(struct xfs_alloc_arg));
    3833   550100988 :         args.tp = tp;
    3834   550100988 :         args.mp = tp->t_mountp;
    3835   550100988 :         args.agno = pag->pag_agno;
    3836   550100988 :         args.pag = pag;
    3837             : 
    3838             :         /*
    3839             :          * validate that the block number is legal - the enables us to detect
    3840             :          * and handle a silent filesystem corruption rather than crashing.
    3841             :          */
    3842   550100988 :         if (args.agno >= args.mp->m_sb.sb_agcount)
    3843             :                 return -EFSCORRUPTED;
    3844             : 
    3845   550100988 :         error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING);
    3846   550109780 :         if (error)
    3847             :                 return error;
    3848             : 
    3849   550108468 :         *agbp = args.agbp;
    3850   550108468 :         return 0;
    3851             : }
    3852             : 
    3853             : /*
    3854             :  * Free an extent.
    3855             :  * Just break up the extent address and hand off to xfs_free_ag_extent
    3856             :  * after fixing up the freelist.
    3857             :  */
    3858             : int
    3859   105571524 : __xfs_free_extent(
    3860             :         struct xfs_trans                *tp,
    3861             :         struct xfs_perag                *pag,
    3862             :         xfs_agblock_t                   agbno,
    3863             :         xfs_extlen_t                    len,
    3864             :         const struct xfs_owner_info     *oinfo,
    3865             :         enum xfs_ag_resv_type           type,
    3866             :         bool                            skip_discard)
    3867             : {
    3868   105571524 :         struct xfs_mount                *mp = tp->t_mountp;
    3869   105571524 :         struct xfs_buf                  *agbp;
    3870   105571524 :         struct xfs_agf                  *agf;
    3871   105571524 :         int                             error;
    3872   105571524 :         unsigned int                    busy_flags = 0;
    3873             : 
    3874   105571524 :         ASSERT(len != 0);
    3875   105571524 :         ASSERT(type != XFS_AG_RESV_AGFL);
    3876             : 
    3877   105571524 :         if (XFS_TEST_ERROR(false, mp,
    3878             :                         XFS_ERRTAG_FREE_EXTENT))
    3879             :                 return -EIO;
    3880             : 
    3881   105563824 :         error = xfs_free_extent_fix_freelist(tp, pag, &agbp);
    3882   105575407 :         if (error) {
    3883          32 :                 if (xfs_metadata_is_sick(error))
    3884           0 :                         xfs_ag_mark_sick(pag, XFS_SICK_AG_BNOBT);
    3885          32 :                 return error;
    3886             :         }
    3887             : 
    3888   105575375 :         agf = agbp->b_addr;
    3889             : 
    3890   105575375 :         if (XFS_IS_CORRUPT(mp, agbno >= mp->m_sb.sb_agblocks)) {
    3891           0 :                 xfs_ag_mark_sick(pag, XFS_SICK_AG_BNOBT);
    3892           0 :                 error = -EFSCORRUPTED;
    3893           0 :                 goto err_release;
    3894             :         }
    3895             : 
    3896             :         /* validate the extent size is legal now we have the agf locked */
    3897   105575375 :         if (XFS_IS_CORRUPT(mp, agbno + len > be32_to_cpu(agf->agf_length))) {
    3898           0 :                 xfs_ag_mark_sick(pag, XFS_SICK_AG_BNOBT);
    3899           0 :                 error = -EFSCORRUPTED;
    3900           0 :                 goto err_release;
    3901             :         }
    3902             : 
    3903   105575375 :         error = xfs_free_ag_extent(tp, agbp, pag->pag_agno, agbno, len, oinfo,
    3904             :                         type);
    3905   105552612 :         if (error)
    3906         233 :                 goto err_release;
    3907             : 
    3908   105552379 :         if (skip_discard)
    3909     8336647 :                 busy_flags |= XFS_EXTENT_BUSY_SKIP_DISCARD;
    3910   105552379 :         xfs_extent_busy_insert(tp, pag, agbno, len, busy_flags);
    3911   105552379 :         return 0;
    3912             : 
    3913         233 : err_release:
    3914         233 :         xfs_trans_brelse(tp, agbp);
    3915         233 :         return error;
    3916             : }
    3917             : 
    3918             : struct xfs_alloc_query_range_info {
    3919             :         xfs_alloc_query_range_fn        fn;
    3920             :         void                            *priv;
    3921             : };
    3922             : 
    3923             : /* Format btree record and pass to our callback. */
    3924             : STATIC int
    3925  2102369203 : xfs_alloc_query_range_helper(
    3926             :         struct xfs_btree_cur            *cur,
    3927             :         const union xfs_btree_rec       *rec,
    3928             :         void                            *priv)
    3929             : {
    3930  2102369203 :         struct xfs_alloc_query_range_info       *query = priv;
    3931  2102369203 :         struct xfs_alloc_rec_incore             irec;
    3932  2102369203 :         xfs_failaddr_t                          fa;
    3933             : 
    3934  2102369203 :         xfs_alloc_btrec_to_irec(rec, &irec);
    3935  2102369203 :         fa = xfs_alloc_check_irec(cur, &irec);
    3936  2102332940 :         if (fa)
    3937           0 :                 return xfs_alloc_complain_bad_rec(cur, fa, &irec);
    3938             : 
    3939  2102332940 :         return query->fn(cur, &irec, query->priv);
    3940             : }
    3941             : 
    3942             : /* Find all free space within a given range of blocks. */
    3943             : int
    3944       22088 : xfs_alloc_query_range(
    3945             :         struct xfs_btree_cur                    *cur,
    3946             :         const struct xfs_alloc_rec_incore       *low_rec,
    3947             :         const struct xfs_alloc_rec_incore       *high_rec,
    3948             :         xfs_alloc_query_range_fn                fn,
    3949             :         void                                    *priv)
    3950             : {
    3951       22088 :         union xfs_btree_irec                    low_brec = { .a = *low_rec };
    3952       22088 :         union xfs_btree_irec                    high_brec = { .a = *high_rec };
    3953       22088 :         struct xfs_alloc_query_range_info       query = { .priv = priv, .fn = fn };
    3954             : 
    3955       22088 :         ASSERT(cur->bc_btnum == XFS_BTNUM_BNO);
    3956       22088 :         return xfs_btree_query_range(cur, &low_brec, &high_brec,
    3957             :                         xfs_alloc_query_range_helper, &query);
    3958             : }
    3959             : 
    3960             : /* Find all free space records. */
    3961             : int
    3962     1087573 : xfs_alloc_query_all(
    3963             :         struct xfs_btree_cur                    *cur,
    3964             :         xfs_alloc_query_range_fn                fn,
    3965             :         void                                    *priv)
    3966             : {
    3967     1087573 :         struct xfs_alloc_query_range_info       query;
    3968             : 
    3969     1087573 :         ASSERT(cur->bc_btnum == XFS_BTNUM_BNO);
    3970     1087573 :         query.priv = priv;
    3971     1087573 :         query.fn = fn;
    3972     1087573 :         return xfs_btree_query_all(cur, xfs_alloc_query_range_helper, &query);
    3973             : }
    3974             : 
    3975             : /*
    3976             :  * Scan part of the keyspace of the free space and tell us if the area has no
    3977             :  * records, is fully mapped by records, or is partially filled.
    3978             :  */
    3979             : int
    3980  2180179492 : xfs_alloc_has_records(
    3981             :         struct xfs_btree_cur    *cur,
    3982             :         xfs_agblock_t           bno,
    3983             :         xfs_extlen_t            len,
    3984             :         enum xbtree_recpacking  *outcome)
    3985             : {
    3986  2180179492 :         union xfs_btree_irec    low;
    3987  2180179492 :         union xfs_btree_irec    high;
    3988             : 
    3989  2180179492 :         memset(&low, 0, sizeof(low));
    3990  2180179492 :         low.a.ar_startblock = bno;
    3991  2180179492 :         memset(&high, 0xFF, sizeof(high));
    3992  2180179492 :         high.a.ar_startblock = bno + len - 1;
    3993             : 
    3994  2180179492 :         return xfs_btree_has_records(cur, &low, &high, NULL, outcome);
    3995             : }
    3996             : 
    3997             : /*
    3998             :  * Walk all the blocks in the AGFL.  The @walk_fn can return any negative
    3999             :  * error code or XFS_ITER_*.
    4000             :  */
    4001             : int
    4002    82943458 : xfs_agfl_walk(
    4003             :         struct xfs_mount        *mp,
    4004             :         struct xfs_agf          *agf,
    4005             :         struct xfs_buf          *agflbp,
    4006             :         xfs_agfl_walk_fn        walk_fn,
    4007             :         void                    *priv)
    4008             : {
    4009    82943458 :         __be32                  *agfl_bno;
    4010    82943458 :         unsigned int            i;
    4011    82943458 :         int                     error;
    4012             : 
    4013    82943458 :         agfl_bno = xfs_buf_to_agfl_bno(agflbp);
    4014    82943458 :         i = be32_to_cpu(agf->agf_flfirst);
    4015             : 
    4016             :         /* Nothing to walk in an empty AGFL. */
    4017    82943458 :         if (agf->agf_flcount == cpu_to_be32(0))
    4018             :                 return 0;
    4019             : 
    4020             :         /* Otherwise, walk from first to last, wrapping as needed. */
    4021   746613262 :         for (;;) {
    4022   746613262 :                 error = walk_fn(mp, be32_to_cpu(agfl_bno[i]), priv);
    4023   746614067 :                 if (error)
    4024     4828372 :                         return error;
    4025   741785695 :                 if (i == be32_to_cpu(agf->agf_fllast))
    4026             :                         break;
    4027  1327436332 :                 if (++i == xfs_agfl_size(mp))
    4028      748751 :                         i = 0;
    4029             :         }
    4030             : 
    4031             :         return 0;
    4032             : }
    4033             : 
    4034             : int __init
    4035          59 : xfs_extfree_intent_init_cache(void)
    4036             : {
    4037          59 :         xfs_extfree_item_cache = kmem_cache_create("xfs_extfree_intent",
    4038             :                         sizeof(struct xfs_extent_free_item),
    4039             :                         0, 0, NULL);
    4040             : 
    4041          59 :         return xfs_extfree_item_cache != NULL ? 0 : -ENOMEM;
    4042             : }
    4043             : 
    4044             : void
    4045          58 : xfs_extfree_intent_destroy_cache(void)
    4046             : {
    4047          58 :         kmem_cache_destroy(xfs_extfree_item_cache);
    4048          58 :         xfs_extfree_item_cache = NULL;
    4049          58 : }
    4050             : 
    4051             : /*
    4052             :  * Find the next chunk of free space in @pag starting at @agbno and going no
    4053             :  * higher than @end_agbno.  Set @agbno and @len to whatever free space we find,
    4054             :  * or to @end_agbno if we find no space.
    4055             :  */
    4056             : int
    4057         709 : xfs_alloc_find_freesp(
    4058             :         struct xfs_trans        *tp,
    4059             :         struct xfs_perag        *pag,
    4060             :         xfs_agblock_t           *agbno,
    4061             :         xfs_agblock_t           end_agbno,
    4062             :         xfs_extlen_t            *len)
    4063             : {
    4064         709 :         struct xfs_mount        *mp = pag->pag_mount;
    4065         709 :         struct xfs_btree_cur    *cur;
    4066         709 :         struct xfs_buf          *agf_bp = NULL;
    4067         709 :         xfs_agblock_t           found_agbno;
    4068         709 :         xfs_extlen_t            found_len;
    4069         709 :         int                     found;
    4070         709 :         int                     error;
    4071             : 
    4072         709 :         trace_xfs_alloc_find_freesp(mp, pag->pag_agno, *agbno,
    4073         709 :                         end_agbno - *agbno);
    4074             : 
    4075         709 :         error = xfs_alloc_read_agf(pag, tp, 0, &agf_bp);
    4076         709 :         if (error)
    4077             :                 return error;
    4078             : 
    4079         709 :         cur = xfs_allocbt_init_cursor(mp, tp, agf_bp, pag, XFS_BTNUM_BNO);
    4080             : 
    4081             :         /* Try to find a free extent that starts before here. */
    4082         709 :         error = xfs_alloc_lookup_le(cur, *agbno, 0, &found);
    4083         709 :         if (error)
    4084           0 :                 goto out_cur;
    4085         709 :         if (found) {
    4086         146 :                 error = xfs_alloc_get_rec(cur, &found_agbno, &found_len,
    4087             :                                 &found);
    4088         146 :                 if (error)
    4089           0 :                         goto out_cur;
    4090         146 :                 if (XFS_IS_CORRUPT(mp, !found)) {
    4091           0 :                         xfs_btree_mark_sick(cur);
    4092           0 :                         error = -EFSCORRUPTED;
    4093           0 :                         goto out_cur;
    4094             :                 }
    4095             : 
    4096         146 :                 if (found_agbno + found_len > *agbno)
    4097         106 :                         goto found;
    4098             :         }
    4099             : 
    4100             :         /* Examine the next record if free extent not in range. */
    4101         603 :         error = xfs_btree_increment(cur, 0, &found);
    4102         603 :         if (error)
    4103           0 :                 goto out_cur;
    4104         603 :         if (!found)
    4105          40 :                 goto next_ag;
    4106             : 
    4107         563 :         error = xfs_alloc_get_rec(cur, &found_agbno, &found_len, &found);
    4108         563 :         if (error)
    4109           0 :                 goto out_cur;
    4110         563 :         if (XFS_IS_CORRUPT(mp, !found)) {
    4111           0 :                 xfs_btree_mark_sick(cur);
    4112           0 :                 error = -EFSCORRUPTED;
    4113           0 :                 goto out_cur;
    4114             :         }
    4115             : 
    4116         563 :         if (found_agbno >= end_agbno)
    4117         506 :                 goto next_ag;
    4118             : 
    4119          57 : found:
    4120             :         /* Found something, so update the mapping. */
    4121         163 :         trace_xfs_alloc_find_freesp_done(mp, pag->pag_agno, found_agbno,
    4122             :                         found_len);
    4123         163 :         if (found_agbno < *agbno) {
    4124           8 :                 found_len -= *agbno - found_agbno;
    4125           8 :                 found_agbno = *agbno;
    4126             :         }
    4127         163 :         *len = found_len;
    4128         163 :         *agbno = found_agbno;
    4129         163 :         goto out_cur;
    4130         546 : next_ag:
    4131             :         /* Found nothing, so advance the cursor beyond the end of the range. */
    4132         546 :         *agbno = end_agbno;
    4133         546 :         *len = 0;
    4134         709 : out_cur:
    4135         709 :         xfs_btree_del_cursor(cur, error);
    4136         709 :         return error;
    4137             : }

Generated by: LCOV version 1.14