LCOV - code coverage report
Current view: top level - fs/xfs/libxfs - xfs_alloc.c (source / functions) Hit Total Coverage
Test: fstests of 6.5.0-rc4-xfsa @ Mon Jul 31 20:08:27 PDT 2023 Lines: 1540 1869 82.4 %
Date: 2023-07-31 20:08:27 Functions: 73 75 97.3 %

          Line data    Source code
       1             : // SPDX-License-Identifier: GPL-2.0
       2             : /*
       3             :  * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
       4             :  * All Rights Reserved.
       5             :  */
       6             : #include "xfs.h"
       7             : #include "xfs_fs.h"
       8             : #include "xfs_format.h"
       9             : #include "xfs_log_format.h"
      10             : #include "xfs_shared.h"
      11             : #include "xfs_trans_resv.h"
      12             : #include "xfs_bit.h"
      13             : #include "xfs_mount.h"
      14             : #include "xfs_defer.h"
      15             : #include "xfs_btree.h"
      16             : #include "xfs_rmap.h"
      17             : #include "xfs_alloc_btree.h"
      18             : #include "xfs_alloc.h"
      19             : #include "xfs_extent_busy.h"
      20             : #include "xfs_errortag.h"
      21             : #include "xfs_error.h"
      22             : #include "xfs_trace.h"
      23             : #include "xfs_trans.h"
      24             : #include "xfs_buf_item.h"
      25             : #include "xfs_log.h"
      26             : #include "xfs_ag.h"
      27             : #include "xfs_ag_resv.h"
      28             : #include "xfs_bmap.h"
      29             : #include "xfs_health.h"
      30             : 
      31             : struct kmem_cache       *xfs_extfree_item_cache;
      32             : 
      33             : struct workqueue_struct *xfs_alloc_wq;
      34             : 
      35             : #define XFS_ABSDIFF(a,b)        (((a) <= (b)) ? ((b) - (a)) : ((a) - (b)))
      36             : 
      37             : #define XFSA_FIXUP_BNO_OK       1
      38             : #define XFSA_FIXUP_CNT_OK       2
      39             : 
      40             : /*
      41             :  * Size of the AGFL.  For CRC-enabled filesystes we steal a couple of slots in
      42             :  * the beginning of the block for a proper header with the location information
      43             :  * and CRC.
      44             :  */
      45             : unsigned int
      46     8048250 : xfs_agfl_size(
      47             :         struct xfs_mount        *mp)
      48             : {
      49  1106912166 :         unsigned int            size = mp->m_sb.sb_sectsize;
      50             : 
      51    11532890 :         if (xfs_has_crc(mp))
      52  1104710383 :                 size -= sizeof(struct xfs_agfl);
      53             : 
      54  1106912166 :         return size / sizeof(xfs_agblock_t);
      55             : }
      56             : 
      57             : unsigned int
      58      191942 : xfs_refc_block(
      59             :         struct xfs_mount        *mp)
      60             : {
      61      191942 :         if (xfs_has_rmapbt(mp))
      62      189660 :                 return XFS_RMAP_BLOCK(mp) + 1;
      63        2282 :         if (xfs_has_finobt(mp))
      64           0 :                 return XFS_FIBT_BLOCK(mp) + 1;
      65        2282 :         return XFS_IBT_BLOCK(mp) + 1;
      66             : }
      67             : 
      68             : xfs_extlen_t
      69       35788 : xfs_prealloc_blocks(
      70             :         struct xfs_mount        *mp)
      71             : {
      72       35788 :         if (xfs_has_reflink(mp))
      73       35707 :                 return xfs_refc_block(mp) + 1;
      74          81 :         if (xfs_has_rmapbt(mp))
      75           5 :                 return XFS_RMAP_BLOCK(mp) + 1;
      76          76 :         if (xfs_has_finobt(mp))
      77          28 :                 return XFS_FIBT_BLOCK(mp) + 1;
      78          48 :         return XFS_IBT_BLOCK(mp) + 1;
      79             : }
      80             : 
      81             : /*
      82             :  * The number of blocks per AG that we withhold from xfs_mod_fdblocks to
      83             :  * guarantee that we can refill the AGFL prior to allocating space in a nearly
      84             :  * full AG.  Although the space described by the free space btrees, the
      85             :  * blocks used by the freesp btrees themselves, and the blocks owned by the
      86             :  * AGFL are counted in the ondisk fdblocks, it's a mistake to let the ondisk
      87             :  * free space in the AG drop so low that the free space btrees cannot refill an
      88             :  * empty AGFL up to the minimum level.  Rather than grind through empty AGs
      89             :  * until the fs goes down, we subtract this many AG blocks from the incore
      90             :  * fdblocks to ensure user allocation does not overcommit the space the
      91             :  * filesystem needs for the AGFLs.  The rmap btree uses a per-AG reservation to
      92             :  * withhold space from xfs_mod_fdblocks, so we do not account for that here.
      93             :  */
      94             : #define XFS_ALLOCBT_AGFL_RESERVE        4
      95             : 
      96             : /*
      97             :  * Compute the number of blocks that we set aside to guarantee the ability to
      98             :  * refill the AGFL and handle a full bmap btree split.
      99             :  *
     100             :  * In order to avoid ENOSPC-related deadlock caused by out-of-order locking of
     101             :  * AGF buffer (PV 947395), we place constraints on the relationship among
     102             :  * actual allocations for data blocks, freelist blocks, and potential file data
     103             :  * bmap btree blocks. However, these restrictions may result in no actual space
     104             :  * allocated for a delayed extent, for example, a data block in a certain AG is
     105             :  * allocated but there is no additional block for the additional bmap btree
     106             :  * block due to a split of the bmap btree of the file. The result of this may
     107             :  * lead to an infinite loop when the file gets flushed to disk and all delayed
     108             :  * extents need to be actually allocated. To get around this, we explicitly set
     109             :  * aside a few blocks which will not be reserved in delayed allocation.
     110             :  *
     111             :  * For each AG, we need to reserve enough blocks to replenish a totally empty
     112             :  * AGFL and 4 more to handle a potential split of the file's bmap btree.
     113             :  */
     114             : unsigned int
     115       35839 : xfs_alloc_set_aside(
     116             :         struct xfs_mount        *mp)
     117             : {
     118       35839 :         return mp->m_sb.sb_agcount * (XFS_ALLOCBT_AGFL_RESERVE + 4);
     119             : }
     120             : 
     121             : /*
     122             :  * When deciding how much space to allocate out of an AG, we limit the
     123             :  * allocation maximum size to the size the AG. However, we cannot use all the
     124             :  * blocks in the AG - some are permanently used by metadata. These
     125             :  * blocks are generally:
     126             :  *      - the AG superblock, AGF, AGI and AGFL
     127             :  *      - the AGF (bno and cnt) and AGI btree root blocks, and optionally
     128             :  *        the AGI free inode and rmap btree root blocks.
     129             :  *      - blocks on the AGFL according to xfs_alloc_set_aside() limits
     130             :  *      - the rmapbt root block
     131             :  *
     132             :  * The AG headers are sector sized, so the amount of space they take up is
     133             :  * dependent on filesystem geometry. The others are all single blocks.
     134             :  */
     135             : unsigned int
     136       24343 : xfs_alloc_ag_max_usable(
     137             :         struct xfs_mount        *mp)
     138             : {
     139       24343 :         unsigned int            blocks;
     140             : 
     141       24343 :         blocks = XFS_BB_TO_FSB(mp, XFS_FSS_TO_BB(mp, 4)); /* ag headers */
     142       24343 :         blocks += XFS_ALLOCBT_AGFL_RESERVE;
     143       24343 :         blocks += 3;                    /* AGF, AGI btree root blocks */
     144       24343 :         if (xfs_has_finobt(mp))
     145       24293 :                 blocks++;               /* finobt root block */
     146       24343 :         if (xfs_has_rmapbt(mp))
     147       24269 :                 blocks++;               /* rmap root block */
     148       24343 :         if (xfs_has_reflink(mp))
     149       24264 :                 blocks++;               /* refcount root block */
     150             : 
     151       24343 :         return mp->m_sb.sb_agblocks - blocks;
     152             : }
     153             : 
     154             : /*
     155             :  * Lookup the record equal to [bno, len] in the btree given by cur.
     156             :  */
     157             : STATIC int                              /* error */
     158   255299483 : xfs_alloc_lookup_eq(
     159             :         struct xfs_btree_cur    *cur,   /* btree cursor */
     160             :         xfs_agblock_t           bno,    /* starting block of extent */
     161             :         xfs_extlen_t            len,    /* length of extent */
     162             :         int                     *stat)  /* success/failure */
     163             : {
     164   255299483 :         int                     error;
     165             : 
     166   255299483 :         cur->bc_rec.a.ar_startblock = bno;
     167   255299483 :         cur->bc_rec.a.ar_blockcount = len;
     168   255299483 :         error = xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat);
     169   255318586 :         cur->bc_ag.abt.active = (*stat == 1);
     170   255318586 :         return error;
     171             : }
     172             : 
     173             : /*
     174             :  * Lookup the first record greater than or equal to [bno, len]
     175             :  * in the btree given by cur.
     176             :  */
     177             : int                             /* error */
     178        6243 : xfs_alloc_lookup_ge(
     179             :         struct xfs_btree_cur    *cur,   /* btree cursor */
     180             :         xfs_agblock_t           bno,    /* starting block of extent */
     181             :         xfs_extlen_t            len,    /* length of extent */
     182             :         int                     *stat)  /* success/failure */
     183             : {
     184   272144852 :         int                     error;
     185             : 
     186   272144852 :         cur->bc_rec.a.ar_startblock = bno;
     187   272144852 :         cur->bc_rec.a.ar_blockcount = len;
     188        6243 :         error = xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat);
     189   272139298 :         cur->bc_ag.abt.active = (*stat == 1);
     190   272139298 :         return error;
     191             : }
     192             : 
     193             : /*
     194             :  * Lookup the first record less than or equal to [bno, len]
     195             :  * in the btree given by cur.
     196             :  */
     197             : int                                     /* error */
     198   387924045 : xfs_alloc_lookup_le(
     199             :         struct xfs_btree_cur    *cur,   /* btree cursor */
     200             :         xfs_agblock_t           bno,    /* starting block of extent */
     201             :         xfs_extlen_t            len,    /* length of extent */
     202             :         int                     *stat)  /* success/failure */
     203             : {
     204   476489664 :         int                     error;
     205   476489664 :         cur->bc_rec.a.ar_startblock = bno;
     206   476489664 :         cur->bc_rec.a.ar_blockcount = len;
     207   387924045 :         error = xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat);
     208   476490522 :         cur->bc_ag.abt.active = (*stat == 1);
     209   476490522 :         return error;
     210             : }
     211             : 
     212             : static inline bool
     213             : xfs_alloc_cur_active(
     214             :         struct xfs_btree_cur    *cur)
     215             : {
     216  8161164209 :         return cur && cur->bc_ag.abt.active;
     217             : }
     218             : 
     219             : /*
     220             :  * Update the record referred to by cur to the value given
     221             :  * by [bno, len].
     222             :  * This either works (return 0) or gets an EFSCORRUPTED error.
     223             :  */
     224             : STATIC int                              /* error */
     225    65824400 : xfs_alloc_update(
     226             :         struct xfs_btree_cur    *cur,   /* btree cursor */
     227             :         xfs_agblock_t           bno,    /* starting block of extent */
     228             :         xfs_extlen_t            len)    /* length of extent */
     229             : {
     230    65824400 :         union xfs_btree_rec     rec;
     231             : 
     232    65824400 :         rec.alloc.ar_startblock = cpu_to_be32(bno);
     233    65824400 :         rec.alloc.ar_blockcount = cpu_to_be32(len);
     234    65824400 :         return xfs_btree_update(cur, &rec);
     235             : }
     236             : 
     237             : /* Convert the ondisk btree record to its incore representation. */
     238             : void
     239  6112348677 : xfs_alloc_btrec_to_irec(
     240             :         const union xfs_btree_rec       *rec,
     241             :         struct xfs_alloc_rec_incore     *irec)
     242             : {
     243  6112348677 :         irec->ar_startblock = be32_to_cpu(rec->alloc.ar_startblock);
     244  6112348677 :         irec->ar_blockcount = be32_to_cpu(rec->alloc.ar_blockcount);
     245  6112348677 : }
     246             : 
     247             : inline xfs_failaddr_t
     248  6157385767 : xfs_alloc_check_perag_irec(
     249             :         struct xfs_perag                *pag,
     250             :         const struct xfs_alloc_rec_incore *irec)
     251             : {
     252  6157385767 :         if (irec->ar_blockcount == 0)
     253           0 :                 return __this_address;
     254             : 
     255             :         /* check for valid extent range, including overflow */
     256  6157385767 :         if (!xfs_verify_agbext(pag, irec->ar_startblock, irec->ar_blockcount))
     257           0 :                 return __this_address;
     258             : 
     259             :         return NULL;
     260             : }
     261             : 
     262             : /* Simple checks for free space records. */
     263             : xfs_failaddr_t
     264   387433508 : xfs_alloc_check_irec(
     265             :         struct xfs_btree_cur            *cur,
     266             :         const struct xfs_alloc_rec_incore *irec)
     267             : {
     268   387433508 :         return xfs_alloc_check_perag_irec(cur->bc_ag.pag, irec);
     269             : }
     270             : 
     271             : static inline int
     272           0 : xfs_alloc_complain_bad_rec(
     273             :         struct xfs_btree_cur            *cur,
     274             :         xfs_failaddr_t                  fa,
     275             :         const struct xfs_alloc_rec_incore *irec)
     276             : {
     277           0 :         struct xfs_mount                *mp = cur->bc_mp;
     278             : 
     279           0 :         xfs_warn(mp,
     280             :                 "%s Freespace BTree record corruption in AG %d detected at %pS!",
     281             :                 cur->bc_btnum == XFS_BTNUM_BNO ? "Block" : "Size",
     282             :                 cur->bc_ag.pag->pag_agno, fa);
     283           0 :         xfs_warn(mp,
     284             :                 "start block 0x%x block count 0x%x", irec->ar_startblock,
     285             :                 irec->ar_blockcount);
     286           0 :         xfs_btree_mark_sick(cur);
     287           0 :         return -EFSCORRUPTED;
     288             : }
     289             : 
     290             : /*
     291             :  * Get the data from the pointed-to record.
     292             :  */
     293             : int                                     /* error */
     294  4783944381 : xfs_alloc_get_rec(
     295             :         struct xfs_btree_cur    *cur,   /* btree cursor */
     296             :         xfs_agblock_t           *bno,   /* output: starting block of extent */
     297             :         xfs_extlen_t            *len,   /* output: length of extent */
     298             :         int                     *stat)  /* output: success/failure */
     299             : {
     300  4783944381 :         struct xfs_alloc_rec_incore irec;
     301  4783944381 :         union xfs_btree_rec     *rec;
     302  4783944381 :         xfs_failaddr_t          fa;
     303  4783944381 :         int                     error;
     304             : 
     305  4783944381 :         error = xfs_btree_get_rec(cur, &rec, stat);
     306  4775819117 :         if (error || !(*stat))
     307             :                 return error;
     308             : 
     309  4784117760 :         xfs_alloc_btrec_to_irec(rec, &irec);
     310  4785498248 :         fa = xfs_alloc_check_irec(cur, &irec);
     311  4786231856 :         if (fa)
     312           0 :                 return xfs_alloc_complain_bad_rec(cur, fa, &irec);
     313             : 
     314  4786231856 :         *bno = irec.ar_startblock;
     315  4786231856 :         *len = irec.ar_blockcount;
     316  4786231856 :         return 0;
     317             : }
     318             : 
     319             : /*
     320             :  * Compute aligned version of the found extent.
     321             :  * Takes alignment and min length into account.
     322             :  */
     323             : STATIC bool
     324  3746514146 : xfs_alloc_compute_aligned(
     325             :         xfs_alloc_arg_t *args,          /* allocation argument structure */
     326             :         xfs_agblock_t   foundbno,       /* starting block in found extent */
     327             :         xfs_extlen_t    foundlen,       /* length in found extent */
     328             :         xfs_agblock_t   *resbno,        /* result block number */
     329             :         xfs_extlen_t    *reslen,        /* result length */
     330             :         unsigned        *busy_gen)
     331             : {
     332  3746514146 :         xfs_agblock_t   bno = foundbno;
     333  3746514146 :         xfs_extlen_t    len = foundlen;
     334  3746514146 :         xfs_extlen_t    diff;
     335  3746514146 :         bool            busy;
     336             : 
     337             :         /* Trim busy sections out of found extent */
     338  3746514146 :         busy = xfs_extent_busy_trim(args, &bno, &len, busy_gen);
     339             : 
     340             :         /*
     341             :          * If we have a largish extent that happens to start before min_agbno,
     342             :          * see if we can shift it into range...
     343             :          */
     344  3750380441 :         if (bno < args->min_agbno && bno + len > args->min_agbno) {
     345        1164 :                 diff = args->min_agbno - bno;
     346        1164 :                 if (len > diff) {
     347        1164 :                         bno += diff;
     348        1164 :                         len -= diff;
     349             :                 }
     350             :         }
     351             : 
     352  3750380441 :         if (args->alignment > 1 && len >= args->minlen) {
     353    12585563 :                 xfs_agblock_t   aligned_bno = roundup(bno, args->alignment);
     354             : 
     355    12585563 :                 diff = aligned_bno - bno;
     356             : 
     357    12585563 :                 *resbno = aligned_bno;
     358    12585563 :                 *reslen = diff >= len ? 0 : len - diff;
     359             :         } else {
     360  3737794878 :                 *resbno = bno;
     361  3737794878 :                 *reslen = len;
     362             :         }
     363             : 
     364  3750380441 :         return busy;
     365             : }
     366             : 
     367             : /*
     368             :  * Compute best start block and diff for "near" allocations.
     369             :  * freelen >= wantlen already checked by caller.
     370             :  */
     371             : STATIC xfs_extlen_t                     /* difference value (absolute) */
     372  2132079603 : xfs_alloc_compute_diff(
     373             :         xfs_agblock_t   wantbno,        /* target starting block */
     374             :         xfs_extlen_t    wantlen,        /* target length */
     375             :         xfs_extlen_t    alignment,      /* target alignment */
     376             :         int             datatype,       /* are we allocating data? */
     377             :         xfs_agblock_t   freebno,        /* freespace's starting block */
     378             :         xfs_extlen_t    freelen,        /* freespace's length */
     379             :         xfs_agblock_t   *newbnop)       /* result: best start block from free */
     380             : {
     381  2132079603 :         xfs_agblock_t   freeend;        /* end of freespace extent */
     382  2132079603 :         xfs_agblock_t   newbno1;        /* return block number */
     383  2132079603 :         xfs_agblock_t   newbno2;        /* other new block number */
     384  2132079603 :         xfs_extlen_t    newlen1=0;      /* length with newbno1 */
     385  2132079603 :         xfs_extlen_t    newlen2=0;      /* length with newbno2 */
     386  2132079603 :         xfs_agblock_t   wantend;        /* end of target extent */
     387  2132079603 :         bool            userdata = datatype & XFS_ALLOC_USERDATA;
     388             : 
     389  2132079603 :         ASSERT(freelen >= wantlen);
     390  2132079603 :         freeend = freebno + freelen;
     391  2132079603 :         wantend = wantbno + wantlen;
     392             :         /*
     393             :          * We want to allocate from the start of a free extent if it is past
     394             :          * the desired block or if we are allocating user data and the free
     395             :          * extent is before desired block. The second case is there to allow
     396             :          * for contiguous allocation from the remaining free space if the file
     397             :          * grows in the short term.
     398             :          */
     399  2132079603 :         if (freebno >= wantbno || (userdata && freeend < wantend)) {
     400  1499890583 :                 if ((newbno1 = roundup(freebno, alignment)) >= freeend)
     401           0 :                         newbno1 = NULLAGBLOCK;
     402   632189020 :         } else if (freeend >= wantend && alignment > 1) {
     403           0 :                 newbno1 = roundup(wantbno, alignment);
     404           0 :                 newbno2 = newbno1 - alignment;
     405           0 :                 if (newbno1 >= freeend)
     406             :                         newbno1 = NULLAGBLOCK;
     407             :                 else
     408           0 :                         newlen1 = XFS_EXTLEN_MIN(wantlen, freeend - newbno1);
     409           0 :                 if (newbno2 < freebno)
     410             :                         newbno2 = NULLAGBLOCK;
     411             :                 else
     412           0 :                         newlen2 = XFS_EXTLEN_MIN(wantlen, freeend - newbno2);
     413           0 :                 if (newbno1 != NULLAGBLOCK && newbno2 != NULLAGBLOCK) {
     414           0 :                         if (newlen1 < newlen2 ||
     415           0 :                             (newlen1 == newlen2 &&
     416           0 :                              XFS_ABSDIFF(newbno1, wantbno) >
     417           0 :                              XFS_ABSDIFF(newbno2, wantbno)))
     418           0 :                                 newbno1 = newbno2;
     419           0 :                 } else if (newbno2 != NULLAGBLOCK)
     420           0 :                         newbno1 = newbno2;
     421   632189020 :         } else if (freeend >= wantend) {
     422             :                 newbno1 = wantbno;
     423   631660539 :         } else if (alignment > 1) {
     424       14086 :                 newbno1 = roundup(freeend - wantlen, alignment);
     425       14086 :                 if (newbno1 > freeend - wantlen &&
     426        8862 :                     newbno1 - alignment >= freebno)
     427             :                         newbno1 -= alignment;
     428        5224 :                 else if (newbno1 >= freeend)
     429           0 :                         newbno1 = NULLAGBLOCK;
     430             :         } else
     431   631646453 :                 newbno1 = freeend - wantlen;
     432  2132079603 :         *newbnop = newbno1;
     433  2132079603 :         return newbno1 == NULLAGBLOCK ? 0 : XFS_ABSDIFF(newbno1, wantbno);
     434             : }
     435             : 
     436             : /*
     437             :  * Fix up the length, based on mod and prod.
     438             :  * len should be k * prod + mod for some k.
     439             :  * If len is too small it is returned unchanged.
     440             :  * If len hits maxlen it is left alone.
     441             :  */
     442             : STATIC void
     443  2135357631 : xfs_alloc_fix_len(
     444             :         xfs_alloc_arg_t *args)          /* allocation argument structure */
     445             : {
     446  2135357631 :         xfs_extlen_t    k;
     447  2135357631 :         xfs_extlen_t    rlen;
     448             : 
     449  2135357631 :         ASSERT(args->mod < args->prod);
     450  2135357631 :         rlen = args->len;
     451  2135357631 :         ASSERT(rlen >= args->minlen);
     452  2135357631 :         ASSERT(rlen <= args->maxlen);
     453  2135357631 :         if (args->prod <= 1 || rlen < args->mod || rlen == args->maxlen ||
     454       21584 :             (args->mod == 0 && rlen < args->prod))
     455             :                 return;
     456       35016 :         k = rlen % args->prod;
     457       35016 :         if (k == args->mod)
     458             :                 return;
     459       28492 :         if (k > args->mod)
     460       13991 :                 rlen = rlen - (k - args->mod);
     461             :         else
     462       14501 :                 rlen = rlen - args->prod + (args->mod - k);
     463             :         /* casts to (int) catch length underflows */
     464       28492 :         if ((int)rlen < (int)args->minlen)
     465             :                 return;
     466        6008 :         ASSERT(rlen >= args->minlen && rlen <= args->maxlen);
     467        6008 :         ASSERT(rlen % args->prod == args->mod);
     468        6008 :         ASSERT(args->pag->pagf_freeblks + args->pag->pagf_flcount >=
     469             :                 rlen + args->minleft);
     470        6008 :         args->len = rlen;
     471             : }
     472             : 
     473             : /*
     474             :  * Update the two btrees, logically removing from freespace the extent
     475             :  * starting at rbno, rlen blocks.  The extent is contained within the
     476             :  * actual (current) free extent fbno for flen blocks.
     477             :  * Flags are passed in indicating whether the cursors are set to the
     478             :  * relevant records.
     479             :  */
     480             : STATIC int                              /* error code */
     481    64163456 : xfs_alloc_fixup_trees(
     482             :         struct xfs_btree_cur *cnt_cur,  /* cursor for by-size btree */
     483             :         struct xfs_btree_cur *bno_cur,  /* cursor for by-block btree */
     484             :         xfs_agblock_t   fbno,           /* starting block of free extent */
     485             :         xfs_extlen_t    flen,           /* length of free extent */
     486             :         xfs_agblock_t   rbno,           /* starting block of returned extent */
     487             :         xfs_extlen_t    rlen,           /* length of returned extent */
     488             :         int             flags)          /* flags, XFSA_FIXUP_... */
     489             : {
     490    64163456 :         int             error;          /* error code */
     491    64163456 :         int             i;              /* operation results */
     492    64163456 :         xfs_agblock_t   nfbno1;         /* first new free startblock */
     493    64163456 :         xfs_agblock_t   nfbno2;         /* second new free startblock */
     494    64163456 :         xfs_extlen_t    nflen1=0;       /* first new free length */
     495    64163456 :         xfs_extlen_t    nflen2=0;       /* second new free length */
     496    64163456 :         struct xfs_mount *mp;
     497             : 
     498    64163456 :         mp = cnt_cur->bc_mp;
     499             : 
     500             :         /*
     501             :          * Look up the record in the by-size tree if necessary.
     502             :          */
     503    64163456 :         if (flags & XFSA_FIXUP_CNT_OK) {
     504             : #ifdef DEBUG
     505     3202724 :                 if ((error = xfs_alloc_get_rec(cnt_cur, &nfbno1, &nflen1, &i)))
     506             :                         return error;
     507     3202885 :                 if (XFS_IS_CORRUPT(mp,
     508             :                                    i != 1 ||
     509             :                                    nfbno1 != fbno ||
     510             :                                    nflen1 != flen)) {
     511           0 :                         xfs_btree_mark_sick(cnt_cur);
     512           0 :                         return -EFSCORRUPTED;
     513             :                 }
     514             : #endif
     515             :         } else {
     516    60960732 :                 if ((error = xfs_alloc_lookup_eq(cnt_cur, fbno, flen, &i)))
     517             :                         return error;
     518    60961951 :                 if (XFS_IS_CORRUPT(mp, i != 1)) {
     519           0 :                         xfs_btree_mark_sick(cnt_cur);
     520           0 :                         return -EFSCORRUPTED;
     521             :                 }
     522             :         }
     523             :         /*
     524             :          * Look up the record in the by-block tree if necessary.
     525             :          */
     526    64164836 :         if (flags & XFSA_FIXUP_BNO_OK) {
     527             : #ifdef DEBUG
     528       72177 :                 if ((error = xfs_alloc_get_rec(bno_cur, &nfbno1, &nflen1, &i)))
     529             :                         return error;
     530       72183 :                 if (XFS_IS_CORRUPT(mp,
     531             :                                    i != 1 ||
     532             :                                    nfbno1 != fbno ||
     533             :                                    nflen1 != flen)) {
     534           0 :                         xfs_btree_mark_sick(bno_cur);
     535           0 :                         return -EFSCORRUPTED;
     536             :                 }
     537             : #endif
     538             :         } else {
     539    64092659 :                 if ((error = xfs_alloc_lookup_eq(bno_cur, fbno, flen, &i)))
     540             :                         return error;
     541    64093969 :                 if (XFS_IS_CORRUPT(mp, i != 1)) {
     542           0 :                         xfs_btree_mark_sick(bno_cur);
     543           0 :                         return -EFSCORRUPTED;
     544             :                 }
     545             :         }
     546             : 
     547             : #ifdef DEBUG
     548    64166152 :         if (bno_cur->bc_nlevels == 1 && cnt_cur->bc_nlevels == 1) {
     549    42896794 :                 struct xfs_btree_block  *bnoblock;
     550    42896794 :                 struct xfs_btree_block  *cntblock;
     551             : 
     552    42896794 :                 bnoblock = XFS_BUF_TO_BLOCK(bno_cur->bc_levels[0].bp);
     553    42896794 :                 cntblock = XFS_BUF_TO_BLOCK(cnt_cur->bc_levels[0].bp);
     554             : 
     555    42896794 :                 if (XFS_IS_CORRUPT(mp,
     556             :                                    bnoblock->bb_numrecs !=
     557             :                                    cntblock->bb_numrecs)) {
     558           0 :                         xfs_btree_mark_sick(bno_cur);
     559           0 :                         return -EFSCORRUPTED;
     560             :                 }
     561             :         }
     562             : #endif
     563             : 
     564             :         /*
     565             :          * Deal with all four cases: the allocated record is contained
     566             :          * within the freespace record, so we can have new freespace
     567             :          * at either (or both) end, or no freespace remaining.
     568             :          */
     569    64166152 :         if (rbno == fbno && rlen == flen)
     570    20179694 :                 nfbno1 = nfbno2 = NULLAGBLOCK;
     571    43986458 :         else if (rbno == fbno) {
     572    38027407 :                 nfbno1 = rbno + rlen;
     573    38027407 :                 nflen1 = flen - rlen;
     574    38027407 :                 nfbno2 = NULLAGBLOCK;
     575     5959051 :         } else if (rbno + rlen == fbno + flen) {
     576     3522870 :                 nfbno1 = fbno;
     577     3522870 :                 nflen1 = flen - rlen;
     578     3522870 :                 nfbno2 = NULLAGBLOCK;
     579             :         } else {
     580     2436181 :                 nfbno1 = fbno;
     581     2436181 :                 nflen1 = rbno - fbno;
     582     2436181 :                 nfbno2 = rbno + rlen;
     583     2436181 :                 nflen2 = (fbno + flen) - nfbno2;
     584             :         }
     585             :         /*
     586             :          * Delete the entry from the by-size btree.
     587             :          */
     588    64166152 :         if ((error = xfs_btree_delete(cnt_cur, &i)))
     589             :                 return error;
     590    64165080 :         if (XFS_IS_CORRUPT(mp, i != 1)) {
     591           0 :                 xfs_btree_mark_sick(cnt_cur);
     592           0 :                 return -EFSCORRUPTED;
     593             :         }
     594             :         /*
     595             :          * Add new by-size btree entry(s).
     596             :          */
     597    64165080 :         if (nfbno1 != NULLAGBLOCK) {
     598    43985992 :                 if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno1, nflen1, &i)))
     599             :                         return error;
     600    43986063 :                 if (XFS_IS_CORRUPT(mp, i != 0)) {
     601           0 :                         xfs_btree_mark_sick(cnt_cur);
     602           0 :                         return -EFSCORRUPTED;
     603             :                 }
     604    43986063 :                 if ((error = xfs_btree_insert(cnt_cur, &i)))
     605             :                         return error;
     606    43985827 :                 if (XFS_IS_CORRUPT(mp, i != 1)) {
     607           0 :                         xfs_btree_mark_sick(cnt_cur);
     608           0 :                         return -EFSCORRUPTED;
     609             :                 }
     610             :         }
     611    64164915 :         if (nfbno2 != NULLAGBLOCK) {
     612     2436173 :                 if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno2, nflen2, &i)))
     613             :                         return error;
     614     2436176 :                 if (XFS_IS_CORRUPT(mp, i != 0)) {
     615           0 :                         xfs_btree_mark_sick(cnt_cur);
     616           0 :                         return -EFSCORRUPTED;
     617             :                 }
     618     2436176 :                 if ((error = xfs_btree_insert(cnt_cur, &i)))
     619             :                         return error;
     620     2436183 :                 if (XFS_IS_CORRUPT(mp, i != 1)) {
     621           0 :                         xfs_btree_mark_sick(cnt_cur);
     622           0 :                         return -EFSCORRUPTED;
     623             :                 }
     624             :         }
     625             :         /*
     626             :          * Fix up the by-block btree entry(s).
     627             :          */
     628    64164925 :         if (nfbno1 == NULLAGBLOCK) {
     629             :                 /*
     630             :                  * No remaining freespace, just delete the by-block tree entry.
     631             :                  */
     632    20179447 :                 if ((error = xfs_btree_delete(bno_cur, &i)))
     633             :                         return error;
     634    20179759 :                 if (XFS_IS_CORRUPT(mp, i != 1)) {
     635           0 :                         xfs_btree_mark_sick(bno_cur);
     636           0 :                         return -EFSCORRUPTED;
     637             :                 }
     638             :         } else {
     639             :                 /*
     640             :                  * Update the by-block entry to start later|be shorter.
     641             :                  */
     642    43985478 :                 if ((error = xfs_alloc_update(bno_cur, nfbno1, nflen1)))
     643             :                         return error;
     644             :         }
     645    64164566 :         if (nfbno2 != NULLAGBLOCK) {
     646             :                 /*
     647             :                  * 2 resulting free entries, need to add one.
     648             :                  */
     649     2436186 :                 if ((error = xfs_alloc_lookup_eq(bno_cur, nfbno2, nflen2, &i)))
     650             :                         return error;
     651     2436184 :                 if (XFS_IS_CORRUPT(mp, i != 0)) {
     652           0 :                         xfs_btree_mark_sick(bno_cur);
     653           0 :                         return -EFSCORRUPTED;
     654             :                 }
     655     2436184 :                 if ((error = xfs_btree_insert(bno_cur, &i)))
     656             :                         return error;
     657     2436179 :                 if (XFS_IS_CORRUPT(mp, i != 1)) {
     658           0 :                         xfs_btree_mark_sick(bno_cur);
     659           0 :                         return -EFSCORRUPTED;
     660             :                 }
     661             :         }
     662             :         return 0;
     663             : }
     664             : 
     665             : /*
     666             :  * We do not verify the AGFL contents against AGF-based index counters here,
     667             :  * even though we may have access to the perag that contains shadow copies. We
     668             :  * don't know if the AGF based counters have been checked, and if they have they
     669             :  * still may be inconsistent because they haven't yet been reset on the first
     670             :  * allocation after the AGF has been read in.
     671             :  *
     672             :  * This means we can only check that all agfl entries contain valid or null
     673             :  * values because we can't reliably determine the active range to exclude
     674             :  * NULLAGBNO as a valid value.
     675             :  *
     676             :  * However, we can't even do that for v4 format filesystems because there are
     677             :  * old versions of mkfs out there that does not initialise the AGFL to known,
     678             :  * verifiable values. HEnce we can't tell the difference between a AGFL block
     679             :  * allocated by mkfs and a corrupted AGFL block here on v4 filesystems.
     680             :  *
     681             :  * As a result, we can only fully validate AGFL block numbers when we pull them
     682             :  * from the freelist in xfs_alloc_get_freelist().
     683             :  */
     684             : static xfs_failaddr_t
     685      617548 : xfs_agfl_verify(
     686             :         struct xfs_buf  *bp)
     687             : {
     688      617548 :         struct xfs_mount *mp = bp->b_mount;
     689      617548 :         struct xfs_agfl *agfl = XFS_BUF_TO_AGFL(bp);
     690      617548 :         __be32          *agfl_bno = xfs_buf_to_agfl_bno(bp);
     691      617548 :         int             i;
     692             : 
     693      617548 :         if (!xfs_has_crc(mp))
     694             :                 return NULL;
     695             : 
     696      617544 :         if (!xfs_verify_magic(bp, agfl->agfl_magicnum))
     697           0 :                 return __this_address;
     698      617544 :         if (!uuid_equal(&agfl->agfl_uuid, &mp->m_sb.sb_meta_uuid))
     699           0 :                 return __this_address;
     700             :         /*
     701             :          * during growfs operations, the perag is not fully initialised,
     702             :          * so we can't use it for any useful checking. growfs ensures we can't
     703             :          * use it by using uncached buffers that don't have the perag attached
     704             :          * so we can detect and avoid this problem.
     705             :          */
     706     1229680 :         if (bp->b_pag && be32_to_cpu(agfl->agfl_seqno) != bp->b_pag->pag_agno)
     707           0 :                 return __this_address;
     708             : 
     709  1243433178 :         for (i = 0; i < xfs_agfl_size(mp); i++) {
     710   621099166 :                 if (be32_to_cpu(agfl_bno[i]) != NULLAGBLOCK &&
     711   212495030 :                     be32_to_cpu(agfl_bno[i]) >= mp->m_sb.sb_agblocks)
     712           0 :                         return __this_address;
     713             :         }
     714             : 
     715      617544 :         if (!xfs_log_check_lsn(mp, be64_to_cpu(XFS_BUF_TO_AGFL(bp)->agfl_lsn)))
     716           0 :                 return __this_address;
     717             :         return NULL;
     718             : }
     719             : 
     720             : static void
     721       64487 : xfs_agfl_read_verify(
     722             :         struct xfs_buf  *bp)
     723             : {
     724       64487 :         struct xfs_mount *mp = bp->b_mount;
     725       64487 :         xfs_failaddr_t  fa;
     726             : 
     727             :         /*
     728             :          * There is no verification of non-crc AGFLs because mkfs does not
     729             :          * initialise the AGFL to zero or NULL. Hence the only valid part of the
     730             :          * AGFL is what the AGF says is active. We can't get to the AGF, so we
     731             :          * can't verify just those entries are valid.
     732             :          */
     733       64487 :         if (!xfs_has_crc(mp))
     734             :                 return;
     735             : 
     736       64469 :         if (!xfs_buf_verify_cksum(bp, XFS_AGFL_CRC_OFF))
     737         128 :                 xfs_verifier_error(bp, -EFSBADCRC, __this_address);
     738             :         else {
     739       64341 :                 fa = xfs_agfl_verify(bp);
     740       64341 :                 if (fa)
     741           0 :                         xfs_verifier_error(bp, -EFSCORRUPTED, fa);
     742             :         }
     743             : }
     744             : 
     745             : static void
     746       94338 : xfs_agfl_write_verify(
     747             :         struct xfs_buf  *bp)
     748             : {
     749       94338 :         struct xfs_mount        *mp = bp->b_mount;
     750       94338 :         struct xfs_buf_log_item *bip = bp->b_log_item;
     751       94338 :         xfs_failaddr_t          fa;
     752             : 
     753             :         /* no verification of non-crc AGFLs */
     754       94338 :         if (!xfs_has_crc(mp))
     755             :                 return;
     756             : 
     757       92056 :         fa = xfs_agfl_verify(bp);
     758       92056 :         if (fa) {
     759           0 :                 xfs_verifier_error(bp, -EFSCORRUPTED, fa);
     760           0 :                 return;
     761             :         }
     762             : 
     763       92056 :         if (bip)
     764       86648 :                 XFS_BUF_TO_AGFL(bp)->agfl_lsn = cpu_to_be64(bip->bli_item.li_lsn);
     765             : 
     766       92056 :         xfs_buf_update_cksum(bp, XFS_AGFL_CRC_OFF);
     767             : }
     768             : 
     769             : const struct xfs_buf_ops xfs_agfl_buf_ops = {
     770             :         .name = "xfs_agfl",
     771             :         .magic = { cpu_to_be32(XFS_AGFL_MAGIC), cpu_to_be32(XFS_AGFL_MAGIC) },
     772             :         .verify_read = xfs_agfl_read_verify,
     773             :         .verify_write = xfs_agfl_write_verify,
     774             :         .verify_struct = xfs_agfl_verify,
     775             : };
     776             : 
     777             : /*
     778             :  * Read in the allocation group free block array.
     779             :  */
     780             : int
     781   374432574 : xfs_alloc_read_agfl(
     782             :         struct xfs_perag        *pag,
     783             :         struct xfs_trans        *tp,
     784             :         struct xfs_buf          **bpp)
     785             : {
     786   374432574 :         struct xfs_mount        *mp = pag->pag_mount;
     787   374432574 :         struct xfs_buf          *bp;
     788   374432574 :         int                     error;
     789             : 
     790   374432574 :         error = xfs_trans_read_buf(
     791   374432574 :                         mp, tp, mp->m_ddev_targp,
     792   374432574 :                         XFS_AG_DADDR(mp, pag->pag_agno, XFS_AGFL_DADDR(mp)),
     793             :                         XFS_FSS_TO_BB(mp, 1), 0, &bp, &xfs_agfl_buf_ops);
     794   374439529 :         if (xfs_metadata_is_sick(error))
     795         128 :                 xfs_ag_mark_sick(pag, XFS_SICK_AG_AGFL);
     796   374439529 :         if (error)
     797             :                 return error;
     798   374429019 :         xfs_buf_set_ref(bp, XFS_AGFL_REF);
     799   374438653 :         *bpp = bp;
     800   374438653 :         return 0;
     801             : }
     802             : 
     803             : STATIC int
     804   117177052 : xfs_alloc_update_counters(
     805             :         struct xfs_trans        *tp,
     806             :         struct xfs_buf          *agbp,
     807             :         long                    len)
     808             : {
     809   117177052 :         struct xfs_agf          *agf = agbp->b_addr;
     810             : 
     811   117177052 :         agbp->b_pag->pagf_freeblks += len;
     812   117177052 :         be32_add_cpu(&agf->agf_freeblks, len);
     813             : 
     814   351533367 :         if (unlikely(be32_to_cpu(agf->agf_freeblks) >
     815             :                      be32_to_cpu(agf->agf_length))) {
     816           0 :                 xfs_buf_mark_corrupt(agbp);
     817           0 :                 xfs_ag_mark_sick(agbp->b_pag, XFS_SICK_AG_AGF);
     818           0 :                 return -EFSCORRUPTED;
     819             :         }
     820             : 
     821   117177789 :         xfs_alloc_log_agf(tp, agbp, XFS_AGF_FREEBLKS);
     822   117177789 :         return 0;
     823             : }
     824             : 
     825             : /*
     826             :  * Block allocation algorithm and data structures.
     827             :  */
     828             : struct xfs_alloc_cur {
     829             :         struct xfs_btree_cur            *cnt;   /* btree cursors */
     830             :         struct xfs_btree_cur            *bnolt;
     831             :         struct xfs_btree_cur            *bnogt;
     832             :         xfs_extlen_t                    cur_len;/* current search length */
     833             :         xfs_agblock_t                   rec_bno;/* extent startblock */
     834             :         xfs_extlen_t                    rec_len;/* extent length */
     835             :         xfs_agblock_t                   bno;    /* alloc bno */
     836             :         xfs_extlen_t                    len;    /* alloc len */
     837             :         xfs_extlen_t                    diff;   /* diff from search bno */
     838             :         unsigned int                    busy_gen;/* busy state */
     839             :         bool                            busy;
     840             : };
     841             : 
     842             : /*
     843             :  * Set up cursors, etc. in the extent allocation cursor. This function can be
     844             :  * called multiple times to reset an initialized structure without having to
     845             :  * reallocate cursors.
     846             :  */
     847             : static int
     848    60926324 : xfs_alloc_cur_setup(
     849             :         struct xfs_alloc_arg    *args,
     850             :         struct xfs_alloc_cur    *acur)
     851             : {
     852    60926324 :         int                     error;
     853    60926324 :         int                     i;
     854             : 
     855    60926324 :         acur->cur_len = args->maxlen;
     856    60926324 :         acur->rec_bno = 0;
     857    60926324 :         acur->rec_len = 0;
     858    60926324 :         acur->bno = 0;
     859    60926324 :         acur->len = 0;
     860    60926324 :         acur->diff = -1;
     861    60926324 :         acur->busy = false;
     862    60926324 :         acur->busy_gen = 0;
     863             : 
     864             :         /*
     865             :          * Perform an initial cntbt lookup to check for availability of maxlen
     866             :          * extents. If this fails, we'll return -ENOSPC to signal the caller to
     867             :          * attempt a small allocation.
     868             :          */
     869    60926324 :         if (!acur->cnt)
     870    60892421 :                 acur->cnt = xfs_allocbt_init_cursor(args->mp, args->tp,
     871             :                                         args->agbp, args->pag, XFS_BTNUM_CNT);
     872    60924809 :         error = xfs_alloc_lookup_ge(acur->cnt, 0, args->maxlen, &i);
     873    60927179 :         if (error)
     874             :                 return error;
     875             : 
     876             :         /*
     877             :          * Allocate the bnobt left and right search cursors.
     878             :          */
     879    60926697 :         if (!acur->bnolt)
     880    60892086 :                 acur->bnolt = xfs_allocbt_init_cursor(args->mp, args->tp,
     881             :                                         args->agbp, args->pag, XFS_BTNUM_BNO);
     882    60926287 :         if (!acur->bnogt)
     883    60891787 :                 acur->bnogt = xfs_allocbt_init_cursor(args->mp, args->tp,
     884             :                                         args->agbp, args->pag, XFS_BTNUM_BNO);
     885    60926153 :         return i == 1 ? 0 : -ENOSPC;
     886             : }
     887             : 
     888             : static void
     889    60891527 : xfs_alloc_cur_close(
     890             :         struct xfs_alloc_cur    *acur,
     891             :         bool                    error)
     892             : {
     893    60891527 :         int                     cur_error = XFS_BTREE_NOERROR;
     894             : 
     895    60891527 :         if (error)
     896        1028 :                 cur_error = XFS_BTREE_ERROR;
     897             : 
     898    60891527 :         if (acur->cnt)
     899    60891527 :                 xfs_btree_del_cursor(acur->cnt, cur_error);
     900    60891933 :         if (acur->bnolt)
     901    60891451 :                 xfs_btree_del_cursor(acur->bnolt, cur_error);
     902    60891443 :         if (acur->bnogt)
     903    60890961 :                 xfs_btree_del_cursor(acur->bnogt, cur_error);
     904    60891529 :         acur->cnt = acur->bnolt = acur->bnogt = NULL;
     905    60891529 : }
     906             : 
     907             : /*
     908             :  * Check an extent for allocation and track the best available candidate in the
     909             :  * allocation structure. The cursor is deactivated if it has entered an out of
     910             :  * range state based on allocation arguments. Optionally return the extent
     911             :  * extent geometry and allocation status if requested by the caller.
     912             :  */
     913             : static int
     914  3848274183 : xfs_alloc_cur_check(
     915             :         struct xfs_alloc_arg    *args,
     916             :         struct xfs_alloc_cur    *acur,
     917             :         struct xfs_btree_cur    *cur,
     918             :         int                     *new)
     919             : {
     920  3848274183 :         int                     error, i;
     921  3848274183 :         xfs_agblock_t           bno, bnoa, bnew;
     922  3848274183 :         xfs_extlen_t            len, lena, diff = -1;
     923  3848274183 :         bool                    busy;
     924  3848274183 :         unsigned                busy_gen = 0;
     925  3848274183 :         bool                    deactivate = false;
     926  3848274183 :         bool                    isbnobt = cur->bc_btnum == XFS_BTNUM_BNO;
     927             : 
     928  3848274183 :         *new = 0;
     929             : 
     930  3848274183 :         error = xfs_alloc_get_rec(cur, &bno, &len, &i);
     931  3846344827 :         if (error)
     932             :                 return error;
     933  3846344827 :         if (XFS_IS_CORRUPT(args->mp, i != 1)) {
     934           0 :                 xfs_btree_mark_sick(cur);
     935           0 :                 return -EFSCORRUPTED;
     936             :         }
     937             : 
     938             :         /*
     939             :          * Check minlen and deactivate a cntbt cursor if out of acceptable size
     940             :          * range (i.e., walking backwards looking for a minlen extent).
     941             :          */
     942  3846344827 :         if (len < args->minlen) {
     943   249792773 :                 deactivate = !isbnobt;
     944   249792773 :                 goto out;
     945             :         }
     946             : 
     947  3596552054 :         busy = xfs_alloc_compute_aligned(args, bno, len, &bnoa, &lena,
     948             :                                          &busy_gen);
     949  3605995058 :         acur->busy |= busy;
     950  3605995058 :         if (busy)
     951  1614749424 :                 acur->busy_gen = busy_gen;
     952             :         /* deactivate a bnobt cursor outside of locality range */
     953  3605995058 :         if (bnoa < args->min_agbno || bnoa > args->max_agbno) {
     954        3558 :                 deactivate = isbnobt;
     955        3558 :                 goto out;
     956             :         }
     957  3605991500 :         if (lena < args->minlen)
     958  1473994669 :                 goto out;
     959             : 
     960  2131996831 :         args->len = XFS_EXTLEN_MIN(lena, args->maxlen);
     961  2131996831 :         xfs_alloc_fix_len(args);
     962  2131920015 :         ASSERT(args->len >= args->minlen);
     963  2131920015 :         if (args->len < acur->len)
     964       51223 :                 goto out;
     965             : 
     966             :         /*
     967             :          * We have an aligned record that satisfies minlen and beats or matches
     968             :          * the candidate extent size. Compare locality for near allocation mode.
     969             :          */
     970  2131868792 :         diff = xfs_alloc_compute_diff(args->agbno, args->len,
     971             :                                       args->alignment, args->datatype,
     972             :                                       bnoa, lena, &bnew);
     973  2132353973 :         if (bnew == NULLAGBLOCK)
     974           0 :                 goto out;
     975             : 
     976             :         /*
     977             :          * Deactivate a bnobt cursor with worse locality than the current best.
     978             :          */
     979  2132353973 :         if (diff > acur->diff) {
     980  1555439004 :                 deactivate = isbnobt;
     981  1555439004 :                 goto out;
     982             :         }
     983             : 
     984   576914969 :         ASSERT(args->len > acur->len ||
     985             :                (args->len == acur->len && diff <= acur->diff));
     986   576914969 :         acur->rec_bno = bno;
     987   576914969 :         acur->rec_len = len;
     988   576914969 :         acur->bno = bnew;
     989   576914969 :         acur->len = args->len;
     990   576914969 :         acur->diff = diff;
     991   576914969 :         *new = 1;
     992             : 
     993             :         /*
     994             :          * We're done if we found a perfect allocation. This only deactivates
     995             :          * the current cursor, but this is just an optimization to terminate a
     996             :          * cntbt search that otherwise runs to the edge of the tree.
     997             :          */
     998   576914969 :         if (acur->diff == 0 && acur->len == args->maxlen)
     999             :                 deactivate = true;
    1000   569585621 : out:
    1001  3279281227 :         if (deactivate)
    1002    16567864 :                 cur->bc_ag.abt.active = false;
    1003  3856196196 :         trace_xfs_alloc_cur_check(args->mp, cur->bc_btnum, bno, len, diff,
    1004  3856196196 :                                   *new);
    1005  3856196196 :         return 0;
    1006             : }
    1007             : 
    1008             : /*
    1009             :  * Complete an allocation of a candidate extent. Remove the extent from both
    1010             :  * trees and update the args structure.
    1011             :  */
    1012             : STATIC int
    1013    60889456 : xfs_alloc_cur_finish(
    1014             :         struct xfs_alloc_arg    *args,
    1015             :         struct xfs_alloc_cur    *acur)
    1016             : {
    1017    60889456 :         struct xfs_agf __maybe_unused *agf = args->agbp->b_addr;
    1018    60889456 :         int                     error;
    1019             : 
    1020    60889456 :         ASSERT(acur->cnt && acur->bnolt);
    1021    60889456 :         ASSERT(acur->bno >= acur->rec_bno);
    1022    60889456 :         ASSERT(acur->bno + acur->len <= acur->rec_bno + acur->rec_len);
    1023   121778912 :         ASSERT(acur->rec_bno + acur->rec_len <= be32_to_cpu(agf->agf_length));
    1024             : 
    1025    60889456 :         error = xfs_alloc_fixup_trees(acur->cnt, acur->bnolt, acur->rec_bno,
    1026             :                                       acur->rec_len, acur->bno, acur->len, 0);
    1027    60890824 :         if (error)
    1028             :                 return error;
    1029             : 
    1030    60890718 :         args->agbno = acur->bno;
    1031    60890718 :         args->len = acur->len;
    1032    60890718 :         args->wasfromfl = 0;
    1033             : 
    1034    60890718 :         trace_xfs_alloc_cur(args);
    1035    60890718 :         return 0;
    1036             : }
    1037             : 
    1038             : /*
    1039             :  * Locality allocation lookup algorithm. This expects a cntbt cursor and uses
    1040             :  * bno optimized lookup to search for extents with ideal size and locality.
    1041             :  */
    1042             : STATIC int
    1043   137183350 : xfs_alloc_cntbt_iter(
    1044             :         struct xfs_alloc_arg            *args,
    1045             :         struct xfs_alloc_cur            *acur)
    1046             : {
    1047   137183350 :         struct xfs_btree_cur    *cur = acur->cnt;
    1048   137183350 :         xfs_agblock_t           bno;
    1049   137183350 :         xfs_extlen_t            len, cur_len;
    1050   137183350 :         int                     error;
    1051   137183350 :         int                     i;
    1052             : 
    1053   274366700 :         if (!xfs_alloc_cur_active(cur))
    1054             :                 return 0;
    1055             : 
    1056             :         /* locality optimized lookup */
    1057   137153148 :         cur_len = acur->cur_len;
    1058   137153148 :         error = xfs_alloc_lookup_ge(cur, args->agbno, cur_len, &i);
    1059   137144766 :         if (error)
    1060             :                 return error;
    1061   137144766 :         if (i == 0)
    1062             :                 return 0;
    1063   124777805 :         error = xfs_alloc_get_rec(cur, &bno, &len, &i);
    1064   124773073 :         if (error)
    1065             :                 return error;
    1066             : 
    1067             :         /* check the current record and update search length from it */
    1068   124773001 :         error = xfs_alloc_cur_check(args, acur, cur, &i);
    1069   124773853 :         if (error)
    1070             :                 return error;
    1071   124773853 :         ASSERT(len >= acur->cur_len);
    1072   124773853 :         acur->cur_len = len;
    1073             : 
    1074             :         /*
    1075             :          * We looked up the first record >= [agbno, len] above. The agbno is a
    1076             :          * secondary key and so the current record may lie just before or after
    1077             :          * agbno. If it is past agbno, check the previous record too so long as
    1078             :          * the length matches as it may be closer. Don't check a smaller record
    1079             :          * because that could deactivate our cursor.
    1080             :          */
    1081   124773853 :         if (bno > args->agbno) {
    1082   121373533 :                 error = xfs_btree_decrement(cur, 0, &i);
    1083   121381447 :                 if (!error && i) {
    1084   120373543 :                         error = xfs_alloc_get_rec(cur, &bno, &len, &i);
    1085   120362356 :                         if (!error && i && len == acur->cur_len)
    1086    33582048 :                                 error = xfs_alloc_cur_check(args, acur, cur,
    1087             :                                                             &i);
    1088             :                 }
    1089   121370370 :                 if (error)
    1090             :                         return error;
    1091             :         }
    1092             : 
    1093             :         /*
    1094             :          * Increment the search key until we find at least one allocation
    1095             :          * candidate or if the extent we found was larger. Otherwise, double the
    1096             :          * search key to optimize the search. Efficiency is more important here
    1097             :          * than absolute best locality.
    1098             :          */
    1099   124770690 :         cur_len <<= 1;
    1100   124770690 :         if (!acur->len || acur->cur_len >= cur_len)
    1101    83427264 :                 acur->cur_len++;
    1102             :         else
    1103    41343426 :                 acur->cur_len = cur_len;
    1104             : 
    1105             :         return error;
    1106             : }
    1107             : 
    1108             : /*
    1109             :  * Deal with the case where only small freespaces remain. Either return the
    1110             :  * contents of the last freespace record, or allocate space from the freelist if
    1111             :  * there is nothing in the tree.
    1112             :  */
    1113             : STATIC int                      /* error */
    1114       94772 : xfs_alloc_ag_vextent_small(
    1115             :         struct xfs_alloc_arg    *args,  /* allocation argument structure */
    1116             :         struct xfs_btree_cur    *ccur,  /* optional by-size cursor */
    1117             :         xfs_agblock_t           *fbnop, /* result block number */
    1118             :         xfs_extlen_t            *flenp, /* result length */
    1119             :         int                     *stat)  /* status: 0-freelist, 1-normal/none */
    1120             : {
    1121       94772 :         struct xfs_agf          *agf = args->agbp->b_addr;
    1122       94772 :         int                     error = 0;
    1123       94772 :         xfs_agblock_t           fbno = NULLAGBLOCK;
    1124       94772 :         xfs_extlen_t            flen = 0;
    1125       94772 :         int                     i = 0;
    1126             : 
    1127             :         /*
    1128             :          * If a cntbt cursor is provided, try to allocate the largest record in
    1129             :          * the tree. Try the AGFL if the cntbt is empty, otherwise fail the
    1130             :          * allocation. Make sure to respect minleft even when pulling from the
    1131             :          * freelist.
    1132             :          */
    1133       94772 :         if (ccur)
    1134       94772 :                 error = xfs_btree_decrement(ccur, 0, &i);
    1135       94771 :         if (error)
    1136           0 :                 goto error;
    1137       94771 :         if (i) {
    1138       94771 :                 error = xfs_alloc_get_rec(ccur, &fbno, &flen, &i);
    1139       94772 :                 if (error)
    1140           0 :                         goto error;
    1141       94772 :                 if (XFS_IS_CORRUPT(args->mp, i != 1)) {
    1142           0 :                         xfs_btree_mark_sick(ccur);
    1143           0 :                         error = -EFSCORRUPTED;
    1144           0 :                         goto error;
    1145             :                 }
    1146       94772 :                 goto out;
    1147             :         }
    1148             : 
    1149           0 :         if (args->minlen != 1 || args->alignment != 1 ||
    1150           0 :             args->resv == XFS_AG_RESV_AGFL ||
    1151           0 :             be32_to_cpu(agf->agf_flcount) <= args->minleft)
    1152           0 :                 goto out;
    1153             : 
    1154           0 :         error = xfs_alloc_get_freelist(args->pag, args->tp, args->agbp,
    1155             :                         &fbno, 0);
    1156           0 :         if (error)
    1157           0 :                 goto error;
    1158           0 :         if (fbno == NULLAGBLOCK)
    1159           0 :                 goto out;
    1160             : 
    1161           0 :         xfs_extent_busy_reuse(args->mp, args->pag, fbno, 1,
    1162           0 :                               (args->datatype & XFS_ALLOC_NOBUSY));
    1163             : 
    1164           0 :         if (args->datatype & XFS_ALLOC_USERDATA) {
    1165           0 :                 struct xfs_buf  *bp;
    1166             : 
    1167           0 :                 error = xfs_trans_get_buf(args->tp, args->mp->m_ddev_targp,
    1168           0 :                                 XFS_AGB_TO_DADDR(args->mp, args->agno, fbno),
    1169             :                                 args->mp->m_bsize, 0, &bp);
    1170           0 :                 if (error)
    1171           0 :                         goto error;
    1172           0 :                 xfs_trans_binval(args->tp, bp);
    1173             :         }
    1174           0 :         *fbnop = args->agbno = fbno;
    1175           0 :         *flenp = args->len = 1;
    1176           0 :         if (XFS_IS_CORRUPT(args->mp, fbno >= be32_to_cpu(agf->agf_length))) {
    1177           0 :                 xfs_btree_mark_sick(ccur);
    1178           0 :                 error = -EFSCORRUPTED;
    1179           0 :                 goto error;
    1180             :         }
    1181           0 :         args->wasfromfl = 1;
    1182           0 :         trace_xfs_alloc_small_freelist(args);
    1183             : 
    1184             :         /*
    1185             :          * If we're feeding an AGFL block to something that doesn't live in the
    1186             :          * free space, we need to clear out the OWN_AG rmap.
    1187             :          */
    1188           0 :         error = xfs_rmap_free(args->tp, args->agbp, args->pag, fbno, 1,
    1189             :                               &XFS_RMAP_OINFO_AG);
    1190           0 :         if (error)
    1191           0 :                 goto error;
    1192             : 
    1193           0 :         *stat = 0;
    1194           0 :         return 0;
    1195             : 
    1196       94772 : out:
    1197             :         /*
    1198             :          * Can't do the allocation, give up.
    1199             :          */
    1200       94772 :         if (flen < args->minlen) {
    1201           0 :                 args->agbno = NULLAGBLOCK;
    1202           0 :                 trace_xfs_alloc_small_notenough(args);
    1203           0 :                 flen = 0;
    1204             :         }
    1205       94772 :         *fbnop = fbno;
    1206       94772 :         *flenp = flen;
    1207       94772 :         *stat = 1;
    1208       94772 :         trace_xfs_alloc_small_done(args);
    1209       94772 :         return 0;
    1210             : 
    1211           0 : error:
    1212           0 :         trace_xfs_alloc_small_error(args);
    1213           0 :         return error;
    1214             : }
    1215             : 
    1216             : /*
    1217             :  * Allocate a variable extent at exactly agno/bno.
    1218             :  * Extent's length (returned in *len) will be between minlen and maxlen,
    1219             :  * and of the form k * prod + mod unless there's nothing that large.
    1220             :  * Return the starting a.g. block (bno), or NULLAGBLOCK if we can't do it.
    1221             :  */
    1222             : STATIC int                      /* error */
    1223      315165 : xfs_alloc_ag_vextent_exact(
    1224             :         xfs_alloc_arg_t *args)  /* allocation argument structure */
    1225             : {
    1226      315165 :         struct xfs_agf __maybe_unused *agf = args->agbp->b_addr;
    1227      315165 :         struct xfs_btree_cur *bno_cur;/* by block-number btree cursor */
    1228      315165 :         struct xfs_btree_cur *cnt_cur;/* by count btree cursor */
    1229      315165 :         int             error;
    1230      315165 :         xfs_agblock_t   fbno;   /* start block of found extent */
    1231      315165 :         xfs_extlen_t    flen;   /* length of found extent */
    1232      315165 :         xfs_agblock_t   tbno;   /* start block of busy extent */
    1233      315165 :         xfs_extlen_t    tlen;   /* length of busy extent */
    1234      315165 :         xfs_agblock_t   tend;   /* end block of busy extent */
    1235      315165 :         int             i;      /* success/failure of operation */
    1236      315165 :         unsigned        busy_gen;
    1237             : 
    1238      315165 :         ASSERT(args->alignment == 1);
    1239             : 
    1240             :         /*
    1241             :          * Allocate/initialize a cursor for the by-number freespace btree.
    1242             :          */
    1243      315165 :         bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
    1244             :                                           args->pag, XFS_BTNUM_BNO);
    1245             : 
    1246             :         /*
    1247             :          * Lookup bno and minlen in the btree (minlen is irrelevant, really).
    1248             :          * Look for the closest free block <= bno, it must contain bno
    1249             :          * if any free block does.
    1250             :          */
    1251      315167 :         error = xfs_alloc_lookup_le(bno_cur, args->agbno, args->minlen, &i);
    1252      315168 :         if (error)
    1253           1 :                 goto error0;
    1254      315167 :         if (!i)
    1255       39860 :                 goto not_found;
    1256             : 
    1257             :         /*
    1258             :          * Grab the freespace record.
    1259             :          */
    1260      275307 :         error = xfs_alloc_get_rec(bno_cur, &fbno, &flen, &i);
    1261      275307 :         if (error)
    1262           0 :                 goto error0;
    1263      275307 :         if (XFS_IS_CORRUPT(args->mp, i != 1)) {
    1264           0 :                 xfs_btree_mark_sick(bno_cur);
    1265           0 :                 error = -EFSCORRUPTED;
    1266           0 :                 goto error0;
    1267             :         }
    1268      275307 :         ASSERT(fbno <= args->agbno);
    1269             : 
    1270             :         /*
    1271             :          * Check for overlapping busy extents.
    1272             :          */
    1273      275307 :         tbno = fbno;
    1274      275307 :         tlen = flen;
    1275      275307 :         xfs_extent_busy_trim(args, &tbno, &tlen, &busy_gen);
    1276             : 
    1277             :         /*
    1278             :          * Give up if the start of the extent is busy, or the freespace isn't
    1279             :          * long enough for the minimum request.
    1280             :          */
    1281      275306 :         if (tbno > args->agbno)
    1282         243 :                 goto not_found;
    1283      275063 :         if (tlen < args->minlen)
    1284      196565 :                 goto not_found;
    1285       78498 :         tend = tbno + tlen;
    1286       78498 :         if (tend < args->agbno + args->minlen)
    1287        6316 :                 goto not_found;
    1288             : 
    1289             :         /*
    1290             :          * End of extent will be smaller of the freespace end and the
    1291             :          * maximal requested end.
    1292             :          *
    1293             :          * Fix the length according to mod and prod if given.
    1294             :          */
    1295       72182 :         args->len = XFS_AGBLOCK_MIN(tend, args->agbno + args->maxlen)
    1296       72182 :                                                 - args->agbno;
    1297       72182 :         xfs_alloc_fix_len(args);
    1298       72181 :         ASSERT(args->agbno + args->len <= tend);
    1299             : 
    1300             :         /*
    1301             :          * We are allocating agbno for args->len
    1302             :          * Allocate/initialize a cursor for the by-size btree.
    1303             :          */
    1304       72181 :         cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
    1305             :                                         args->pag, XFS_BTNUM_CNT);
    1306      144362 :         ASSERT(args->agbno + args->len <= be32_to_cpu(agf->agf_length));
    1307       72181 :         error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen, args->agbno,
    1308             :                                       args->len, XFSA_FIXUP_BNO_OK);
    1309       72183 :         if (error) {
    1310           0 :                 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
    1311           0 :                 goto error0;
    1312             :         }
    1313             : 
    1314       72183 :         xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
    1315       72183 :         xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
    1316             : 
    1317       72183 :         args->wasfromfl = 0;
    1318       72183 :         trace_xfs_alloc_exact_done(args);
    1319       72183 :         return 0;
    1320             : 
    1321      242984 : not_found:
    1322             :         /* Didn't find it, return null. */
    1323      242984 :         xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
    1324      242984 :         args->agbno = NULLAGBLOCK;
    1325      242984 :         trace_xfs_alloc_exact_notfound(args);
    1326      242984 :         return 0;
    1327             : 
    1328           1 : error0:
    1329           1 :         xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
    1330           1 :         trace_xfs_alloc_exact_error(args);
    1331           1 :         return error;
    1332             : }
    1333             : 
    1334             : /*
    1335             :  * Search a given number of btree records in a given direction. Check each
    1336             :  * record against the good extent we've already found.
    1337             :  */
    1338             : STATIC int
    1339   354293874 : xfs_alloc_walk_iter(
    1340             :         struct xfs_alloc_arg    *args,
    1341             :         struct xfs_alloc_cur    *acur,
    1342             :         struct xfs_btree_cur    *cur,
    1343             :         bool                    increment,
    1344             :         bool                    find_one, /* quit on first candidate */
    1345             :         int                     count,    /* rec count (-1 for infinite) */
    1346             :         int                     *stat)
    1347             : {
    1348   354293874 :         int                     error;
    1349   354293874 :         int                     i;
    1350             : 
    1351   354293874 :         *stat = 0;
    1352             : 
    1353             :         /*
    1354             :          * Search so long as the cursor is active or we find a better extent.
    1355             :          * The cursor is deactivated if it extends beyond the range of the
    1356             :          * current allocation candidate.
    1357             :          */
    1358  8007810146 :         while (xfs_alloc_cur_active(cur) && count) {
    1359  3692653375 :                 error = xfs_alloc_cur_check(args, acur, cur, &i);
    1360  3696509299 :                 if (error)
    1361           0 :                         return error;
    1362  3696509299 :                 if (i == 1) {
    1363   554217829 :                         *stat = 1;
    1364   554217829 :                         if (find_one)
    1365             :                                 break;
    1366             :                 }
    1367  7329979044 :                 if (!xfs_alloc_cur_active(cur))
    1368             :                         break;
    1369             : 
    1370  3653371228 :                 if (increment)
    1371  3174069249 :                         error = xfs_btree_increment(cur, 0, &i);
    1372             :                 else
    1373   479301979 :                         error = xfs_btree_decrement(cur, 0, &i);
    1374  3649611205 :                 if (error)
    1375           6 :                         return error;
    1376  3649611199 :                 if (i == 0)
    1377    25978145 :                         cur->bc_ag.abt.active = false;
    1378             : 
    1379  3649611199 :                 if (count > 0)
    1380   259784263 :                         count--;
    1381             :         }
    1382             : 
    1383             :         return 0;
    1384             : }
    1385             : 
    1386             : /*
    1387             :  * Search the by-bno and by-size btrees in parallel in search of an extent with
    1388             :  * ideal locality based on the NEAR mode ->agbno locality hint.
    1389             :  */
    1390             : STATIC int
    1391    35231381 : xfs_alloc_ag_vextent_locality(
    1392             :         struct xfs_alloc_arg    *args,
    1393             :         struct xfs_alloc_cur    *acur,
    1394             :         int                     *stat)
    1395             : {
    1396    35231381 :         struct xfs_btree_cur    *fbcur = NULL;
    1397    35231381 :         int                     error;
    1398    35231381 :         int                     i;
    1399    35231381 :         bool                    fbinc;
    1400             : 
    1401    35231381 :         ASSERT(acur->len == 0);
    1402             : 
    1403    35231381 :         *stat = 0;
    1404             : 
    1405    35231381 :         error = xfs_alloc_lookup_ge(acur->cnt, args->agbno, acur->cur_len, &i);
    1406    35231771 :         if (error)
    1407             :                 return error;
    1408    35231870 :         error = xfs_alloc_lookup_le(acur->bnolt, args->agbno, 0, &i);
    1409    35231952 :         if (error)
    1410             :                 return error;
    1411    35231778 :         error = xfs_alloc_lookup_ge(acur->bnogt, args->agbno, 0, &i);
    1412    35231828 :         if (error)
    1413             :                 return error;
    1414             : 
    1415             :         /*
    1416             :          * Search the bnobt and cntbt in parallel. Search the bnobt left and
    1417             :          * right and lookup the closest extent to the locality hint for each
    1418             :          * extent size key in the cntbt. The entire search terminates
    1419             :          * immediately on a bnobt hit because that means we've found best case
    1420             :          * locality. Otherwise the search continues until the cntbt cursor runs
    1421             :          * off the end of the tree. If no allocation candidate is found at this
    1422             :          * point, give up on locality, walk backwards from the end of the cntbt
    1423             :          * and take the first available extent.
    1424             :          *
    1425             :          * The parallel tree searches balance each other out to provide fairly
    1426             :          * consistent performance for various situations. The bnobt search can
    1427             :          * have pathological behavior in the worst case scenario of larger
    1428             :          * allocation requests and fragmented free space. On the other hand, the
    1429             :          * bnobt is able to satisfy most smaller allocation requests much more
    1430             :          * quickly than the cntbt. The cntbt search can sift through fragmented
    1431             :          * free space and sets of free extents for larger allocation requests
    1432             :          * more quickly than the bnobt. Since the locality hint is just a hint
    1433             :          * and we don't want to scan the entire bnobt for perfect locality, the
    1434             :          * cntbt search essentially bounds the bnobt search such that we can
    1435             :          * find good enough locality at reasonable performance in most cases.
    1436             :          */
    1437    22705689 :         while (xfs_alloc_cur_active(acur->bnolt) ||
    1438   319985553 :                xfs_alloc_cur_active(acur->bnogt) ||
    1439       12849 :                xfs_alloc_cur_active(acur->cnt)) {
    1440             : 
    1441   159986352 :                 trace_xfs_alloc_cur_lookup(args);
    1442             : 
    1443             :                 /*
    1444             :                  * Search the bnobt left and right. In the case of a hit, finish
    1445             :                  * the search in the opposite direction and we're done.
    1446             :                  */
    1447   159979807 :                 error = xfs_alloc_walk_iter(args, acur, acur->bnolt, false,
    1448             :                                             true, 1, &i);
    1449   160010128 :                 if (error)
    1450           0 :                         return error;
    1451   160010128 :                 if (i == 1) {
    1452    14153342 :                         trace_xfs_alloc_cur_left(args);
    1453    14153445 :                         fbcur = acur->bnogt;
    1454    14153445 :                         fbinc = true;
    1455    14153445 :                         break;
    1456             :                 }
    1457   145856786 :                 error = xfs_alloc_walk_iter(args, acur, acur->bnogt, true, true,
    1458             :                                             1, &i);
    1459   145864625 :                 if (error)
    1460           3 :                         return error;
    1461   145864622 :                 if (i == 1) {
    1462     8682651 :                         trace_xfs_alloc_cur_right(args);
    1463     8682683 :                         fbcur = acur->bnolt;
    1464     8682683 :                         fbinc = false;
    1465     8682683 :                         break;
    1466             :                 }
    1467             : 
    1468             :                 /*
    1469             :                  * Check the extent with best locality based on the current
    1470             :                  * extent size search key and keep track of the best candidate.
    1471             :                  */
    1472   137181971 :                 error = xfs_alloc_cntbt_iter(args, acur);
    1473   137149853 :                 if (error)
    1474           0 :                         return error;
    1475   274299706 :                 if (!xfs_alloc_cur_active(acur->cnt)) {
    1476    12395329 :                         trace_xfs_alloc_cur_lookup_done(args);
    1477    12395329 :                         break;
    1478             :                 }
    1479             :         }
    1480             : 
    1481             :         /*
    1482             :          * If we failed to find anything due to busy extents, return empty
    1483             :          * handed so the caller can flush and retry. If no busy extents were
    1484             :          * found, walk backwards from the end of the cntbt as a last resort.
    1485             :          */
    1486    70463042 :         if (!xfs_alloc_cur_active(acur->cnt) && !acur->len && !acur->busy) {
    1487       25999 :                 error = xfs_btree_decrement(acur->cnt, 0, &i);
    1488       25999 :                 if (error)
    1489             :                         return error;
    1490       25999 :                 if (i) {
    1491       25999 :                         acur->cnt->bc_ag.abt.active = true;
    1492       25999 :                         fbcur = acur->cnt;
    1493       25999 :                         fbinc = false;
    1494             :                 }
    1495             :         }
    1496             : 
    1497             :         /*
    1498             :          * Search in the opposite direction for a better entry in the case of
    1499             :          * a bnobt hit or walk backwards from the end of the cntbt.
    1500             :          */
    1501    35231521 :         if (fbcur) {
    1502    22862013 :                 error = xfs_alloc_walk_iter(args, acur, fbcur, fbinc, true, -1,
    1503             :                                             &i);
    1504    22862288 :                 if (error)
    1505             :                         return error;
    1506             :         }
    1507             : 
    1508    35231793 :         if (acur->len)
    1509    35195857 :                 *stat = 1;
    1510             : 
    1511             :         return 0;
    1512             : }
    1513             : 
    1514             : /* Check the last block of the cnt btree for allocations. */
    1515             : static int
    1516    51495913 : xfs_alloc_ag_vextent_lastblock(
    1517             :         struct xfs_alloc_arg    *args,
    1518             :         struct xfs_alloc_cur    *acur,
    1519             :         xfs_agblock_t           *bno,
    1520             :         xfs_extlen_t            *len,
    1521             :         bool                    *allocated)
    1522             : {
    1523    51495913 :         int                     error;
    1524    51495913 :         int                     i;
    1525             : 
    1526             : #ifdef DEBUG
    1527             :         /* Randomly don't execute the first algorithm. */
    1528    51495913 :         if (get_random_u32_below(2))
    1529             :                 return 0;
    1530             : #endif
    1531             : 
    1532             :         /*
    1533             :          * Start from the entry that lookup found, sequence through all larger
    1534             :          * free blocks.  If we're actually pointing at a record smaller than
    1535             :          * maxlen, go to the start of this block, and skip all those smaller
    1536             :          * than minlen.
    1537             :          */
    1538    25741525 :         if (*len || args->alignment > 1) {
    1539      271303 :                 acur->cnt->bc_levels[0].ptr = 1;
    1540    34253223 :                 do {
    1541    34253223 :                         error = xfs_alloc_get_rec(acur->cnt, bno, len, &i);
    1542    34253221 :                         if (error)
    1543           0 :                                 return error;
    1544    34253221 :                         if (XFS_IS_CORRUPT(args->mp, i != 1)) {
    1545           0 :                                 xfs_btree_mark_sick(acur->cnt);
    1546           0 :                                 return -EFSCORRUPTED;
    1547             :                         }
    1548    34253221 :                         if (*len >= args->minlen)
    1549             :                                 break;
    1550    33981916 :                         error = xfs_btree_increment(acur->cnt, 0, &i);
    1551    33981920 :                         if (error)
    1552           0 :                                 return error;
    1553    33981920 :                 } while (i);
    1554      271305 :                 ASSERT(*len >= args->minlen);
    1555      271305 :                 if (!i)
    1556             :                         return 0;
    1557             :         }
    1558             : 
    1559    25741527 :         error = xfs_alloc_walk_iter(args, acur, acur->cnt, true, false, -1, &i);
    1560    25741710 :         if (error)
    1561             :                 return error;
    1562             : 
    1563             :         /*
    1564             :          * It didn't work.  We COULD be in a case where there's a good record
    1565             :          * somewhere, so try again.
    1566             :          */
    1567    25741710 :         if (acur->len == 0)
    1568             :                 return 0;
    1569             : 
    1570    25695086 :         trace_xfs_alloc_near_first(args);
    1571    25695308 :         *allocated = true;
    1572    25695308 :         return 0;
    1573             : }
    1574             : 
    1575             : /*
    1576             :  * Allocate a variable extent near bno in the allocation group agno.
    1577             :  * Extent's length (returned in len) will be between minlen and maxlen,
    1578             :  * and of the form k * prod + mod unless there's nothing that large.
    1579             :  * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
    1580             :  */
    1581             : STATIC int
    1582    60892225 : xfs_alloc_ag_vextent_near(
    1583             :         struct xfs_alloc_arg    *args,
    1584             :         uint32_t                alloc_flags)
    1585             : {
    1586    60892225 :         struct xfs_alloc_cur    acur = {};
    1587    60892225 :         int                     error;          /* error code */
    1588    60892225 :         int                     i;              /* result code, temporary */
    1589    60892225 :         xfs_agblock_t           bno;
    1590    60892225 :         xfs_extlen_t            len;
    1591             : 
    1592             :         /* handle uninitialized agbno range so caller doesn't have to */
    1593    60892225 :         if (!args->min_agbno && !args->max_agbno)
    1594    60572724 :                 args->max_agbno = args->mp->m_sb.sb_agblocks - 1;
    1595    60892225 :         ASSERT(args->min_agbno <= args->max_agbno);
    1596             : 
    1597             :         /* clamp agbno to the range if it's outside */
    1598    60892225 :         if (args->agbno < args->min_agbno)
    1599       94005 :                 args->agbno = args->min_agbno;
    1600    60892225 :         if (args->agbno > args->max_agbno)
    1601           0 :                 args->agbno = args->max_agbno;
    1602             : 
    1603             :         /* Retry once quickly if we find busy extents before blocking. */
    1604    60892225 :         alloc_flags |= XFS_ALLOC_FLAG_TRYFLUSH;
    1605    60926906 : restart:
    1606    60926906 :         len = 0;
    1607             : 
    1608             :         /*
    1609             :          * Set up cursors and see if there are any free extents as big as
    1610             :          * maxlen. If not, pick the last entry in the tree unless the tree is
    1611             :          * empty.
    1612             :          */
    1613    60926906 :         error = xfs_alloc_cur_setup(args, &acur);
    1614    60926672 :         if (error == -ENOSPC) {
    1615       65139 :                 error = xfs_alloc_ag_vextent_small(args, acur.cnt, &bno,
    1616             :                                 &len, &i);
    1617       65139 :                 if (error)
    1618           0 :                         goto out;
    1619       65139 :                 if (i == 0 || len == 0) {
    1620           0 :                         trace_xfs_alloc_near_noentry(args);
    1621           0 :                         goto out;
    1622             :                 }
    1623       65139 :                 ASSERT(i == 1);
    1624    60861533 :         } else if (error) {
    1625         482 :                 goto out;
    1626             :         }
    1627             : 
    1628             :         /*
    1629             :          * First algorithm.
    1630             :          * If the requested extent is large wrt the freespaces available
    1631             :          * in this a.g., then the cursor will be pointing to a btree entry
    1632             :          * near the right edge of the tree.  If it's in the last btree leaf
    1633             :          * block, then we just examine all the entries in that block
    1634             :          * that are big enough, and pick the best one.
    1635             :          */
    1636    60926190 :         if (xfs_btree_islastblock(acur.cnt, 0)) {
    1637    51495838 :                 bool            allocated = false;
    1638             : 
    1639    51495838 :                 error = xfs_alloc_ag_vextent_lastblock(args, &acur, &bno, &len,
    1640             :                                 &allocated);
    1641    51496343 :                 if (error)
    1642           0 :                         goto out;
    1643    51496343 :                 if (allocated)
    1644    25695310 :                         goto alloc_finish;
    1645             :         }
    1646             : 
    1647             :         /*
    1648             :          * Second algorithm. Combined cntbt and bnobt search to find ideal
    1649             :          * locality.
    1650             :          */
    1651    35231259 :         error = xfs_alloc_ag_vextent_locality(args, &acur, &i);
    1652    35231502 :         if (error)
    1653         241 :                 goto out;
    1654             : 
    1655             :         /*
    1656             :          * If we couldn't get anything, give up.
    1657             :          */
    1658    35231261 :         if (!acur.len) {
    1659       35374 :                 if (acur.busy) {
    1660             :                         /*
    1661             :                          * Our only valid extents must have been busy. Flush and
    1662             :                          * retry the allocation again. If we get an -EAGAIN
    1663             :                          * error, we're being told that a deadlock was avoided
    1664             :                          * and the current transaction needs committing before
    1665             :                          * the allocation can be retried.
    1666             :                          */
    1667       34685 :                         trace_xfs_alloc_near_busy(args);
    1668       34685 :                         error = xfs_extent_busy_flush(args->tp, args->pag,
    1669             :                                         acur.busy_gen, alloc_flags);
    1670       34685 :                         if (error)
    1671           4 :                                 goto out;
    1672             : 
    1673       34681 :                         alloc_flags &= ~XFS_ALLOC_FLAG_TRYFLUSH;
    1674       34681 :                         goto restart;
    1675             :                 }
    1676         689 :                 trace_xfs_alloc_size_neither(args);
    1677         689 :                 args->agbno = NULLAGBLOCK;
    1678         689 :                 goto out;
    1679             :         }
    1680             : 
    1681    35195887 : alloc_finish:
    1682             :         /* fix up btrees on a successful allocation */
    1683    60891197 :         error = xfs_alloc_cur_finish(args, &acur);
    1684             : 
    1685    60891842 : out:
    1686    60891842 :         xfs_alloc_cur_close(&acur, error);
    1687    60892344 :         return error;
    1688             : }
    1689             : 
    1690             : /*
    1691             :  * Allocate a variable extent anywhere in the allocation group agno.
    1692             :  * Extent's length (returned in len) will be between minlen and maxlen,
    1693             :  * and of the form k * prod + mod unless there's nothing that large.
    1694             :  * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
    1695             :  */
    1696             : static int
    1697     3219888 : xfs_alloc_ag_vextent_size(
    1698             :         struct xfs_alloc_arg    *args,
    1699             :         uint32_t                alloc_flags)
    1700             : {
    1701     3219888 :         struct xfs_agf          *agf = args->agbp->b_addr;
    1702     3219888 :         struct xfs_btree_cur    *bno_cur;
    1703     3219888 :         struct xfs_btree_cur    *cnt_cur;
    1704     3219888 :         xfs_agblock_t           fbno;           /* start of found freespace */
    1705     3219888 :         xfs_extlen_t            flen;           /* length of found freespace */
    1706     3219888 :         xfs_agblock_t           rbno;           /* returned block number */
    1707     3219888 :         xfs_extlen_t            rlen;           /* length of returned extent */
    1708     3219888 :         bool                    busy;
    1709     3219888 :         unsigned                busy_gen;
    1710     3219888 :         int                     error;
    1711     3219888 :         int                     i;
    1712             : 
    1713             :         /* Retry once quickly if we find busy extents before blocking. */
    1714     3219888 :         alloc_flags |= XFS_ALLOC_FLAG_TRYFLUSH;
    1715             : restart:
    1716             :         /*
    1717             :          * Allocate and initialize a cursor for the by-size btree.
    1718             :          */
    1719     3230280 :         cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
    1720             :                                         args->pag, XFS_BTNUM_CNT);
    1721     3230288 :         bno_cur = NULL;
    1722             : 
    1723             :         /*
    1724             :          * Look for an entry >= maxlen+alignment-1 blocks.
    1725             :          */
    1726     3230296 :         if ((error = xfs_alloc_lookup_ge(cnt_cur, 0,
    1727     3230288 :                         args->maxlen + args->alignment - 1, &i)))
    1728          74 :                 goto error0;
    1729             : 
    1730             :         /*
    1731             :          * If none then we have to settle for a smaller extent. In the case that
    1732             :          * there are no large extents, this will return the last entry in the
    1733             :          * tree unless the tree is empty. In the case that there are only busy
    1734             :          * large extents, this will return the largest small extent unless there
    1735             :          * are no smaller extents available.
    1736             :          */
    1737     3230222 :         if (!i) {
    1738       29633 :                 error = xfs_alloc_ag_vextent_small(args, cnt_cur,
    1739             :                                                    &fbno, &flen, &i);
    1740       29633 :                 if (error)
    1741           0 :                         goto error0;
    1742       29633 :                 if (i == 0 || flen == 0) {
    1743           0 :                         xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
    1744           0 :                         trace_xfs_alloc_size_noentry(args);
    1745           0 :                         return 0;
    1746             :                 }
    1747       29633 :                 ASSERT(i == 1);
    1748       29633 :                 busy = xfs_alloc_compute_aligned(args, fbno, flen, &rbno,
    1749             :                                 &rlen, &busy_gen);
    1750             :         } else {
    1751             :                 /*
    1752             :                  * Search for a non-busy extent that is large enough.
    1753             :                  */
    1754   284000763 :                 for (;;) {
    1755   143600676 :                         error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, &i);
    1756   143590265 :                         if (error)
    1757           0 :                                 goto error0;
    1758   143590265 :                         if (XFS_IS_CORRUPT(args->mp, i != 1)) {
    1759           0 :                                 xfs_btree_mark_sick(cnt_cur);
    1760           0 :                                 error = -EFSCORRUPTED;
    1761           0 :                                 goto error0;
    1762             :                         }
    1763             : 
    1764   143590265 :                         busy = xfs_alloc_compute_aligned(args, fbno, flen,
    1765             :                                         &rbno, &rlen, &busy_gen);
    1766             : 
    1767   143600602 :                         if (rlen >= args->maxlen)
    1768             :                                 break;
    1769             : 
    1770   140410208 :                         error = xfs_btree_increment(cnt_cur, 0, &i);
    1771   140410285 :                         if (error)
    1772           0 :                                 goto error0;
    1773   140410285 :                         if (i)
    1774   140400087 :                                 continue;
    1775             : 
    1776             :                         /*
    1777             :                          * Our only valid extents must have been busy. Flush and
    1778             :                          * retry the allocation again. If we get an -EAGAIN
    1779             :                          * error, we're being told that a deadlock was avoided
    1780             :                          * and the current transaction needs committing before
    1781             :                          * the allocation can be retried.
    1782             :                          */
    1783       10198 :                         trace_xfs_alloc_size_busy(args);
    1784       10198 :                         error = xfs_extent_busy_flush(args->tp, args->pag,
    1785             :                                         busy_gen, alloc_flags);
    1786       10198 :                         if (error)
    1787           0 :                                 goto error0;
    1788             : 
    1789       10198 :                         alloc_flags &= ~XFS_ALLOC_FLAG_TRYFLUSH;
    1790       10198 :                         xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
    1791       10198 :                         goto restart;
    1792             :                 }
    1793             :         }
    1794             : 
    1795             :         /*
    1796             :          * In the first case above, we got the last entry in the
    1797             :          * by-size btree.  Now we check to see if the space hits maxlen
    1798             :          * once aligned; if not, we search left for something better.
    1799             :          * This can't happen in the second case above.
    1800             :          */
    1801     3219986 :         rlen = XFS_EXTLEN_MIN(args->maxlen, rlen);
    1802     3219986 :         if (XFS_IS_CORRUPT(args->mp,
    1803             :                            rlen != 0 &&
    1804             :                            (rlen > flen ||
    1805             :                             rbno + rlen > fbno + flen))) {
    1806           0 :                 xfs_btree_mark_sick(cnt_cur);
    1807           0 :                 error = -EFSCORRUPTED;
    1808           0 :                 goto error0;
    1809             :         }
    1810     3219986 :         if (rlen < args->maxlen) {
    1811       29633 :                 xfs_agblock_t   bestfbno;
    1812       29633 :                 xfs_extlen_t    bestflen;
    1813       29633 :                 xfs_agblock_t   bestrbno;
    1814       29633 :                 xfs_extlen_t    bestrlen;
    1815             : 
    1816       29633 :                 bestrlen = rlen;
    1817       29633 :                 bestrbno = rbno;
    1818       29633 :                 bestflen = flen;
    1819       29633 :                 bestfbno = fbno;
    1820     3566824 :                 for (;;) {
    1821     3566824 :                         if ((error = xfs_btree_decrement(cnt_cur, 0, &i)))
    1822           0 :                                 goto error0;
    1823     3566824 :                         if (i == 0)
    1824             :                                 break;
    1825     3566684 :                         if ((error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen,
    1826             :                                         &i)))
    1827           0 :                                 goto error0;
    1828     3566684 :                         if (XFS_IS_CORRUPT(args->mp, i != 1)) {
    1829           0 :                                 xfs_btree_mark_sick(cnt_cur);
    1830           0 :                                 error = -EFSCORRUPTED;
    1831           0 :                                 goto error0;
    1832             :                         }
    1833     3566684 :                         if (flen < bestrlen)
    1834             :                                 break;
    1835     3537191 :                         busy = xfs_alloc_compute_aligned(args, fbno, flen,
    1836             :                                         &rbno, &rlen, &busy_gen);
    1837     3537191 :                         rlen = XFS_EXTLEN_MIN(args->maxlen, rlen);
    1838     3537191 :                         if (XFS_IS_CORRUPT(args->mp,
    1839             :                                            rlen != 0 &&
    1840             :                                            (rlen > flen ||
    1841             :                                             rbno + rlen > fbno + flen))) {
    1842           0 :                                 xfs_btree_mark_sick(cnt_cur);
    1843           0 :                                 error = -EFSCORRUPTED;
    1844           0 :                                 goto error0;
    1845             :                         }
    1846     3537191 :                         if (rlen > bestrlen) {
    1847       17091 :                                 bestrlen = rlen;
    1848       17091 :                                 bestrbno = rbno;
    1849       17091 :                                 bestflen = flen;
    1850       17091 :                                 bestfbno = fbno;
    1851       17091 :                                 if (rlen == args->maxlen)
    1852             :                                         break;
    1853             :                         }
    1854             :                 }
    1855       29633 :                 if ((error = xfs_alloc_lookup_eq(cnt_cur, bestfbno, bestflen,
    1856             :                                 &i)))
    1857           0 :                         goto error0;
    1858       29633 :                 if (XFS_IS_CORRUPT(args->mp, i != 1)) {
    1859           0 :                         xfs_btree_mark_sick(cnt_cur);
    1860           0 :                         error = -EFSCORRUPTED;
    1861           0 :                         goto error0;
    1862             :                 }
    1863       29633 :                 rlen = bestrlen;
    1864       29633 :                 rbno = bestrbno;
    1865       29633 :                 flen = bestflen;
    1866       29633 :                 fbno = bestfbno;
    1867             :         }
    1868     3219986 :         args->wasfromfl = 0;
    1869             :         /*
    1870             :          * Fix up the length.
    1871             :          */
    1872     3219986 :         args->len = rlen;
    1873     3219986 :         if (rlen < args->minlen) {
    1874       17129 :                 if (busy) {
    1875             :                         /*
    1876             :                          * Our only valid extents must have been busy. Flush and
    1877             :                          * retry the allocation again. If we get an -EAGAIN
    1878             :                          * error, we're being told that a deadlock was avoided
    1879             :                          * and the current transaction needs committing before
    1880             :                          * the allocation can be retried.
    1881             :                          */
    1882         194 :                         trace_xfs_alloc_size_busy(args);
    1883         194 :                         error = xfs_extent_busy_flush(args->tp, args->pag,
    1884             :                                         busy_gen, alloc_flags);
    1885         194 :                         if (error)
    1886           0 :                                 goto error0;
    1887             : 
    1888         194 :                         alloc_flags &= ~XFS_ALLOC_FLAG_TRYFLUSH;
    1889         194 :                         xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
    1890         194 :                         goto restart;
    1891             :                 }
    1892       16935 :                 goto out_nominleft;
    1893             :         }
    1894     3202857 :         xfs_alloc_fix_len(args);
    1895             : 
    1896     3202880 :         rlen = args->len;
    1897     3202880 :         if (XFS_IS_CORRUPT(args->mp, rlen > flen)) {
    1898           0 :                 xfs_btree_mark_sick(cnt_cur);
    1899           0 :                 error = -EFSCORRUPTED;
    1900           0 :                 goto error0;
    1901             :         }
    1902             :         /*
    1903             :          * Allocate and initialize a cursor for the by-block tree.
    1904             :          */
    1905     3202880 :         bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
    1906             :                                         args->pag, XFS_BTNUM_BNO);
    1907     3202852 :         if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen,
    1908             :                         rbno, rlen, XFSA_FIXUP_CNT_OK)))
    1909          55 :                 goto error0;
    1910     3202815 :         xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
    1911     3202825 :         xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
    1912     3202826 :         cnt_cur = bno_cur = NULL;
    1913     3202826 :         args->len = rlen;
    1914     3202826 :         args->agbno = rbno;
    1915     6405652 :         if (XFS_IS_CORRUPT(args->mp,
    1916             :                            args->agbno + args->len >
    1917             :                            be32_to_cpu(agf->agf_length))) {
    1918           0 :                 xfs_ag_mark_sick(args->pag, XFS_SICK_AG_BNOBT);
    1919           0 :                 error = -EFSCORRUPTED;
    1920           0 :                 goto error0;
    1921             :         }
    1922     3202826 :         trace_xfs_alloc_size_done(args);
    1923     3202826 :         return 0;
    1924             : 
    1925         129 : error0:
    1926         129 :         trace_xfs_alloc_size_error(args);
    1927         129 :         if (cnt_cur)
    1928         129 :                 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
    1929         129 :         if (bno_cur)
    1930          55 :                 xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
    1931             :         return error;
    1932             : 
    1933             : out_nominleft:
    1934       16935 :         xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
    1935       16935 :         trace_xfs_alloc_size_nominleft(args);
    1936       16935 :         args->agbno = NULLAGBLOCK;
    1937       16935 :         return 0;
    1938             : }
    1939             : 
    1940             : /*
    1941             :  * Free the extent starting at agno/bno for length.
    1942             :  */
    1943             : STATIC int
    1944    53018256 : xfs_free_ag_extent(
    1945             :         struct xfs_trans                *tp,
    1946             :         struct xfs_buf                  *agbp,
    1947             :         xfs_agnumber_t                  agno,
    1948             :         xfs_agblock_t                   bno,
    1949             :         xfs_extlen_t                    len,
    1950             :         const struct xfs_owner_info     *oinfo,
    1951             :         enum xfs_ag_resv_type           type)
    1952             : {
    1953    53018256 :         struct xfs_mount                *mp;
    1954    53018256 :         struct xfs_btree_cur            *bno_cur;
    1955    53018256 :         struct xfs_btree_cur            *cnt_cur;
    1956    53018256 :         xfs_agblock_t                   gtbno; /* start of right neighbor */
    1957    53018256 :         xfs_extlen_t                    gtlen; /* length of right neighbor */
    1958    53018256 :         xfs_agblock_t                   ltbno; /* start of left neighbor */
    1959    53018256 :         xfs_extlen_t                    ltlen; /* length of left neighbor */
    1960    53018256 :         xfs_agblock_t                   nbno; /* new starting block of freesp */
    1961    53018256 :         xfs_extlen_t                    nlen; /* new length of freespace */
    1962    53018256 :         int                             haveleft; /* have a left neighbor */
    1963    53018256 :         int                             haveright; /* have a right neighbor */
    1964    53018256 :         int                             i;
    1965    53018256 :         int                             error;
    1966    53018256 :         struct xfs_perag                *pag = agbp->b_pag;
    1967             : 
    1968    53018256 :         bno_cur = cnt_cur = NULL;
    1969    53018256 :         mp = tp->t_mountp;
    1970             : 
    1971    53018256 :         if (!xfs_rmap_should_skip_owner_update(oinfo)) {
    1972     1596437 :                 error = xfs_rmap_free(tp, agbp, pag, bno, len, oinfo);
    1973     1596432 :                 if (error)
    1974          17 :                         goto error0;
    1975             :         }
    1976             : 
    1977             :         /*
    1978             :          * Allocate and initialize a cursor for the by-block btree.
    1979             :          */
    1980    53018234 :         bno_cur = xfs_allocbt_init_cursor(mp, tp, agbp, pag, XFS_BTNUM_BNO);
    1981             :         /*
    1982             :          * Look for a neighboring block on the left (lower block numbers)
    1983             :          * that is contiguous with this space.
    1984             :          */
    1985    53018356 :         if ((error = xfs_alloc_lookup_le(bno_cur, bno, len, &haveleft)))
    1986         172 :                 goto error0;
    1987    53018210 :         if (haveleft) {
    1988             :                 /*
    1989             :                  * There is a block to our left.
    1990             :                  */
    1991    51544719 :                 if ((error = xfs_alloc_get_rec(bno_cur, &ltbno, &ltlen, &i)))
    1992           0 :                         goto error0;
    1993    51544676 :                 if (XFS_IS_CORRUPT(mp, i != 1)) {
    1994           0 :                         xfs_btree_mark_sick(bno_cur);
    1995           0 :                         error = -EFSCORRUPTED;
    1996           0 :                         goto error0;
    1997             :                 }
    1998             :                 /*
    1999             :                  * It's not contiguous, though.
    2000             :                  */
    2001    51544676 :                 if (ltbno + ltlen < bno)
    2002    39424537 :                         haveleft = 0;
    2003             :                 else {
    2004             :                         /*
    2005             :                          * If this failure happens the request to free this
    2006             :                          * space was invalid, it's (partly) already free.
    2007             :                          * Very bad.
    2008             :                          */
    2009    12120139 :                         if (XFS_IS_CORRUPT(mp, ltbno + ltlen > bno)) {
    2010           0 :                                 xfs_btree_mark_sick(bno_cur);
    2011           0 :                                 error = -EFSCORRUPTED;
    2012           0 :                                 goto error0;
    2013             :                         }
    2014             :                 }
    2015             :         }
    2016             :         /*
    2017             :          * Look for a neighboring block on the right (higher block numbers)
    2018             :          * that is contiguous with this space.
    2019             :          */
    2020    53018167 :         if ((error = xfs_btree_increment(bno_cur, 0, &haveright)))
    2021           0 :                 goto error0;
    2022    53017955 :         if (haveright) {
    2023             :                 /*
    2024             :                  * There is a block to our right.
    2025             :                  */
    2026    52955639 :                 if ((error = xfs_alloc_get_rec(bno_cur, &gtbno, &gtlen, &i)))
    2027           0 :                         goto error0;
    2028    52955854 :                 if (XFS_IS_CORRUPT(mp, i != 1)) {
    2029           0 :                         xfs_btree_mark_sick(bno_cur);
    2030           0 :                         error = -EFSCORRUPTED;
    2031           0 :                         goto error0;
    2032             :                 }
    2033             :                 /*
    2034             :                  * It's not contiguous, though.
    2035             :                  */
    2036    52955854 :                 if (bno + len < gtbno)
    2037    36704581 :                         haveright = 0;
    2038             :                 else {
    2039             :                         /*
    2040             :                          * If this failure happens the request to free this
    2041             :                          * space was invalid, it's (partly) already free.
    2042             :                          * Very bad.
    2043             :                          */
    2044    16251273 :                         if (XFS_IS_CORRUPT(mp, bno + len > gtbno)) {
    2045           0 :                                 xfs_btree_mark_sick(bno_cur);
    2046           0 :                                 error = -EFSCORRUPTED;
    2047           0 :                                 goto error0;
    2048             :                         }
    2049             :                 }
    2050             :         }
    2051             :         /*
    2052             :          * Now allocate and initialize a cursor for the by-size tree.
    2053             :          */
    2054    53018170 :         cnt_cur = xfs_allocbt_init_cursor(mp, tp, agbp, pag, XFS_BTNUM_CNT);
    2055             :         /*
    2056             :          * Have both left and right contiguous neighbors.
    2057             :          * Merge all three into a single free block.
    2058             :          */
    2059    53018105 :         if (haveleft && haveright) {
    2060             :                 /*
    2061             :                  * Delete the old by-size entry on the left.
    2062             :                  */
    2063     6531986 :                 if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i)))
    2064          19 :                         goto error0;
    2065     6531968 :                 if (XFS_IS_CORRUPT(mp, i != 1)) {
    2066           0 :                         xfs_btree_mark_sick(cnt_cur);
    2067           0 :                         error = -EFSCORRUPTED;
    2068           0 :                         goto error0;
    2069             :                 }
    2070     6531968 :                 if ((error = xfs_btree_delete(cnt_cur, &i)))
    2071           0 :                         goto error0;
    2072     6531968 :                 if (XFS_IS_CORRUPT(mp, i != 1)) {
    2073           0 :                         xfs_btree_mark_sick(cnt_cur);
    2074           0 :                         error = -EFSCORRUPTED;
    2075           0 :                         goto error0;
    2076             :                 }
    2077             :                 /*
    2078             :                  * Delete the old by-size entry on the right.
    2079             :                  */
    2080     6531968 :                 if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i)))
    2081           7 :                         goto error0;
    2082     6531958 :                 if (XFS_IS_CORRUPT(mp, i != 1)) {
    2083           0 :                         xfs_btree_mark_sick(cnt_cur);
    2084           0 :                         error = -EFSCORRUPTED;
    2085           0 :                         goto error0;
    2086             :                 }
    2087     6531958 :                 if ((error = xfs_btree_delete(cnt_cur, &i)))
    2088           0 :                         goto error0;
    2089     6531962 :                 if (XFS_IS_CORRUPT(mp, i != 1)) {
    2090           0 :                         xfs_btree_mark_sick(cnt_cur);
    2091           0 :                         error = -EFSCORRUPTED;
    2092           0 :                         goto error0;
    2093             :                 }
    2094             :                 /*
    2095             :                  * Delete the old by-block entry for the right block.
    2096             :                  */
    2097     6531962 :                 if ((error = xfs_btree_delete(bno_cur, &i)))
    2098           0 :                         goto error0;
    2099     6531960 :                 if (XFS_IS_CORRUPT(mp, i != 1)) {
    2100           0 :                         xfs_btree_mark_sick(bno_cur);
    2101           0 :                         error = -EFSCORRUPTED;
    2102           0 :                         goto error0;
    2103             :                 }
    2104             :                 /*
    2105             :                  * Move the by-block cursor back to the left neighbor.
    2106             :                  */
    2107     6531960 :                 if ((error = xfs_btree_decrement(bno_cur, 0, &i)))
    2108           0 :                         goto error0;
    2109     6531958 :                 if (XFS_IS_CORRUPT(mp, i != 1)) {
    2110           0 :                         xfs_btree_mark_sick(bno_cur);
    2111           0 :                         error = -EFSCORRUPTED;
    2112           0 :                         goto error0;
    2113             :                 }
    2114             : #ifdef DEBUG
    2115             :                 /*
    2116             :                  * Check that this is the right record: delete didn't
    2117             :                  * mangle the cursor.
    2118             :                  */
    2119             :                 {
    2120     6531958 :                         xfs_agblock_t   xxbno;
    2121     6531958 :                         xfs_extlen_t    xxlen;
    2122             : 
    2123     6531958 :                         if ((error = xfs_alloc_get_rec(bno_cur, &xxbno, &xxlen,
    2124             :                                         &i)))
    2125           0 :                                 goto error0;
    2126     6531958 :                         if (XFS_IS_CORRUPT(mp,
    2127             :                                            i != 1 ||
    2128             :                                            xxbno != ltbno ||
    2129             :                                            xxlen != ltlen)) {
    2130           0 :                                 xfs_btree_mark_sick(bno_cur);
    2131           0 :                                 error = -EFSCORRUPTED;
    2132           0 :                                 goto error0;
    2133             :                         }
    2134             :                 }
    2135             : #endif
    2136             :                 /*
    2137             :                  * Update remaining by-block entry to the new, joined block.
    2138             :                  */
    2139     6531958 :                 nbno = ltbno;
    2140     6531958 :                 nlen = len + ltlen + gtlen;
    2141     6531958 :                 if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
    2142           0 :                         goto error0;
    2143             :         }
    2144             :         /*
    2145             :          * Have only a left contiguous neighbor.
    2146             :          * Merge it together with the new freespace.
    2147             :          */
    2148    46486119 :         else if (haveleft) {
    2149             :                 /*
    2150             :                  * Delete the old by-size entry on the left.
    2151             :                  */
    2152     5588149 :                 if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i)))
    2153           4 :                         goto error0;
    2154     5588149 :                 if (XFS_IS_CORRUPT(mp, i != 1)) {
    2155           0 :                         xfs_btree_mark_sick(cnt_cur);
    2156           0 :                         error = -EFSCORRUPTED;
    2157           0 :                         goto error0;
    2158             :                 }
    2159     5588149 :                 if ((error = xfs_btree_delete(cnt_cur, &i)))
    2160           0 :                         goto error0;
    2161     5588150 :                 if (XFS_IS_CORRUPT(mp, i != 1)) {
    2162           0 :                         xfs_btree_mark_sick(cnt_cur);
    2163           0 :                         error = -EFSCORRUPTED;
    2164           0 :                         goto error0;
    2165             :                 }
    2166             :                 /*
    2167             :                  * Back up the by-block cursor to the left neighbor, and
    2168             :                  * update its length.
    2169             :                  */
    2170     5588150 :                 if ((error = xfs_btree_decrement(bno_cur, 0, &i)))
    2171           0 :                         goto error0;
    2172     5588146 :                 if (XFS_IS_CORRUPT(mp, i != 1)) {
    2173           0 :                         xfs_btree_mark_sick(bno_cur);
    2174           0 :                         error = -EFSCORRUPTED;
    2175           0 :                         goto error0;
    2176             :                 }
    2177     5588146 :                 nbno = ltbno;
    2178     5588146 :                 nlen = len + ltlen;
    2179     5588146 :                 if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
    2180           0 :                         goto error0;
    2181             :         }
    2182             :         /*
    2183             :          * Have only a right contiguous neighbor.
    2184             :          * Merge it together with the new freespace.
    2185             :          */
    2186    40897970 :         else if (haveright) {
    2187             :                 /*
    2188             :                  * Delete the old by-size entry on the right.
    2189             :                  */
    2190     9719284 :                 if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i)))
    2191          20 :                         goto error0;
    2192     9719262 :                 if (XFS_IS_CORRUPT(mp, i != 1)) {
    2193           0 :                         xfs_btree_mark_sick(cnt_cur);
    2194           0 :                         error = -EFSCORRUPTED;
    2195           0 :                         goto error0;
    2196             :                 }
    2197     9719262 :                 if ((error = xfs_btree_delete(cnt_cur, &i)))
    2198           0 :                         goto error0;
    2199     9719272 :                 if (XFS_IS_CORRUPT(mp, i != 1)) {
    2200           0 :                         xfs_btree_mark_sick(cnt_cur);
    2201           0 :                         error = -EFSCORRUPTED;
    2202           0 :                         goto error0;
    2203             :                 }
    2204             :                 /*
    2205             :                  * Update the starting block and length of the right
    2206             :                  * neighbor in the by-block tree.
    2207             :                  */
    2208     9719272 :                 nbno = bno;
    2209     9719272 :                 nlen = len + gtlen;
    2210     9719272 :                 if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
    2211           0 :                         goto error0;
    2212             :         }
    2213             :         /*
    2214             :          * No contiguous neighbors.
    2215             :          * Insert the new freespace into the by-block tree.
    2216             :          */
    2217             :         else {
    2218    31178686 :                 nbno = bno;
    2219    31178686 :                 nlen = len;
    2220    31178686 :                 if ((error = xfs_btree_insert(bno_cur, &i)))
    2221           0 :                         goto error0;
    2222    31178655 :                 if (XFS_IS_CORRUPT(mp, i != 1)) {
    2223           0 :                         xfs_btree_mark_sick(bno_cur);
    2224           0 :                         error = -EFSCORRUPTED;
    2225           0 :                         goto error0;
    2226             :                 }
    2227             :         }
    2228    53018038 :         xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
    2229    53018023 :         bno_cur = NULL;
    2230             :         /*
    2231             :          * In all cases we need to insert the new freespace in the by-size tree.
    2232             :          */
    2233    53018023 :         if ((error = xfs_alloc_lookup_eq(cnt_cur, nbno, nlen, &i)))
    2234          39 :                 goto error0;
    2235    53017999 :         if (XFS_IS_CORRUPT(mp, i != 0)) {
    2236           0 :                 xfs_btree_mark_sick(cnt_cur);
    2237           0 :                 error = -EFSCORRUPTED;
    2238           0 :                 goto error0;
    2239             :         }
    2240    53017999 :         if ((error = xfs_btree_insert(cnt_cur, &i)))
    2241           0 :                 goto error0;
    2242    53017891 :         if (XFS_IS_CORRUPT(mp, i != 1)) {
    2243           0 :                 xfs_btree_mark_sick(cnt_cur);
    2244           0 :                 error = -EFSCORRUPTED;
    2245           0 :                 goto error0;
    2246             :         }
    2247    53017891 :         xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
    2248    53017936 :         cnt_cur = NULL;
    2249             : 
    2250             :         /*
    2251             :          * Update the freespace totals in the ag and superblock.
    2252             :          */
    2253    53017936 :         error = xfs_alloc_update_counters(tp, agbp, len);
    2254    53017954 :         xfs_ag_resv_free_extent(agbp->b_pag, type, tp, len);
    2255    53017579 :         if (error)
    2256           0 :                 goto error0;
    2257             : 
    2258    53017579 :         XFS_STATS_INC(mp, xs_freex);
    2259    53017579 :         XFS_STATS_ADD(mp, xs_freeb, len);
    2260             : 
    2261    53017579 :         trace_xfs_free_extent(mp, agno, bno, len, type, haveleft, haveright);
    2262             : 
    2263    53017579 :         return 0;
    2264             : 
    2265         278 :  error0:
    2266         278 :         trace_xfs_free_extent(mp, agno, bno, len, type, -1, -1);
    2267         278 :         if (bno_cur)
    2268         222 :                 xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
    2269         278 :         if (cnt_cur)
    2270          89 :                 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
    2271             :         return error;
    2272             : }
    2273             : 
    2274             : /*
    2275             :  * Visible (exported) allocation/free functions.
    2276             :  * Some of these are used just by xfs_alloc_btree.c and this file.
    2277             :  */
    2278             : 
    2279             : /*
    2280             :  * Compute and fill in value of m_alloc_maxlevels.
    2281             :  */
    2282             : void
    2283       24337 : xfs_alloc_compute_maxlevels(
    2284             :         xfs_mount_t     *mp)    /* file system mount structure */
    2285             : {
    2286       48674 :         mp->m_alloc_maxlevels = xfs_btree_compute_maxlevels(mp->m_alloc_mnr,
    2287       24337 :                         (mp->m_sb.sb_agblocks + 1) / 2);
    2288       24337 :         ASSERT(mp->m_alloc_maxlevels <= xfs_allocbt_maxlevels_ondisk());
    2289       24337 : }
    2290             : 
    2291             : /*
    2292             :  * Find the length of the longest extent in an AG.  The 'need' parameter
    2293             :  * specifies how much space we're going to need for the AGFL and the
    2294             :  * 'reserved' parameter tells us how many blocks in this AG are reserved for
    2295             :  * other callers.
    2296             :  */
    2297             : xfs_extlen_t
    2298   226169299 : xfs_alloc_longest_free_extent(
    2299             :         struct xfs_perag        *pag,
    2300             :         xfs_extlen_t            need,
    2301             :         xfs_extlen_t            reserved)
    2302             : {
    2303   226169299 :         xfs_extlen_t            delta = 0;
    2304             : 
    2305             :         /*
    2306             :          * If the AGFL needs a recharge, we'll have to subtract that from the
    2307             :          * longest extent.
    2308             :          */
    2309   226169299 :         if (need > pag->pagf_flcount)
    2310      402499 :                 delta = need - pag->pagf_flcount;
    2311             : 
    2312             :         /*
    2313             :          * If we cannot maintain others' reservations with space from the
    2314             :          * not-longest freesp extents, we'll have to subtract /that/ from
    2315             :          * the longest extent too.
    2316             :          */
    2317   226169299 :         if (pag->pagf_freeblks - pag->pagf_longest < reserved)
    2318   182502619 :                 delta += reserved - (pag->pagf_freeblks - pag->pagf_longest);
    2319             : 
    2320             :         /*
    2321             :          * If the longest extent is long enough to satisfy all the
    2322             :          * reservations and AGFL rules in place, we can return this extent.
    2323             :          */
    2324   226169299 :         if (pag->pagf_longest > delta)
    2325   226134408 :                 return min_t(xfs_extlen_t, pag->pag_mount->m_ag_max_usable,
    2326             :                                 pag->pagf_longest - delta);
    2327             : 
    2328             :         /* Otherwise, let the caller try for 1 block if there's space. */
    2329       34891 :         return pag->pagf_flcount > 0 || pag->pagf_longest > 0;
    2330             : }
    2331             : 
    2332             : /*
    2333             :  * Compute the minimum length of the AGFL in the given AG.  If @pag is NULL,
    2334             :  * return the largest possible minimum length.
    2335             :  */
    2336             : unsigned int
    2337   843540793 : xfs_alloc_min_freelist(
    2338             :         struct xfs_mount        *mp,
    2339             :         struct xfs_perag        *pag)
    2340             : {
    2341             :         /* AG btrees have at least 1 level. */
    2342   843540793 :         static const uint8_t    fake_levels[XFS_BTNUM_AGF] = {1, 1, 1};
    2343   843540793 :         const uint8_t           *levels = pag ? pag->pagf_levels : fake_levels;
    2344   843540793 :         unsigned int            min_free;
    2345             : 
    2346   843540793 :         ASSERT(mp->m_alloc_maxlevels > 0);
    2347             : 
    2348             :         /* space needed by-bno freespace btree */
    2349   843540793 :         min_free = min_t(unsigned int, levels[XFS_BTNUM_BNOi] + 1,
    2350             :                                        mp->m_alloc_maxlevels);
    2351             :         /* space needed by-size freespace btree */
    2352   843540793 :         min_free += min_t(unsigned int, levels[XFS_BTNUM_CNTi] + 1,
    2353             :                                        mp->m_alloc_maxlevels);
    2354             :         /* space needed reverse mapping used space btree */
    2355   843540793 :         if (xfs_has_rmapbt(mp))
    2356   843279004 :                 min_free += min_t(unsigned int, levels[XFS_BTNUM_RMAPi] + 1,
    2357             :                                                 mp->m_rmap_maxlevels);
    2358             : 
    2359   843540793 :         return min_free;
    2360             : }
    2361             : 
    2362             : /*
    2363             :  * Check if the operation we are fixing up the freelist for should go ahead or
    2364             :  * not. If we are freeing blocks, we always allow it, otherwise the allocation
    2365             :  * is dependent on whether the size and shape of free space available will
    2366             :  * permit the requested allocation to take place.
    2367             :  */
    2368             : static bool
    2369   782851334 : xfs_alloc_space_available(
    2370             :         struct xfs_alloc_arg    *args,
    2371             :         xfs_extlen_t            min_free,
    2372             :         int                     flags)
    2373             : {
    2374   782851334 :         struct xfs_perag        *pag = args->pag;
    2375   782851334 :         xfs_extlen_t            alloc_len, longest;
    2376   782851334 :         xfs_extlen_t            reservation; /* blocks that are still reserved */
    2377   782851334 :         int                     available;
    2378   782851334 :         xfs_extlen_t            agflcount;
    2379             : 
    2380   782851334 :         if (flags & XFS_ALLOC_FLAG_FREEING)
    2381             :                 return true;
    2382             : 
    2383   165361714 :         reservation = xfs_ag_resv_needed(pag, args->resv);
    2384             : 
    2385             :         /* do we have enough contiguous free space for the allocation? */
    2386   165358781 :         alloc_len = args->minlen + (args->alignment - 1) + args->minalignslop;
    2387   165358781 :         longest = xfs_alloc_longest_free_extent(pag, min_free, reservation);
    2388   165358781 :         if (longest < alloc_len)
    2389             :                 return false;
    2390             : 
    2391             :         /*
    2392             :          * Do we have enough free space remaining for the allocation? Don't
    2393             :          * account extra agfl blocks because we are about to defer free them,
    2394             :          * making them unavailable until the current transaction commits.
    2395             :          */
    2396   150815841 :         agflcount = min_t(xfs_extlen_t, pag->pagf_flcount, min_free);
    2397   150815841 :         available = (int)(pag->pagf_freeblks + agflcount -
    2398   150815841 :                           reservation - min_free - args->minleft);
    2399   150815841 :         if (available < (int)max(args->total, alloc_len))
    2400             :                 return false;
    2401             : 
    2402             :         /*
    2403             :          * Clamp maxlen to the amount of free space available for the actual
    2404             :          * extent allocation.
    2405             :          */
    2406   131193655 :         if (available < (int)args->maxlen && !(flags & XFS_ALLOC_FLAG_CHECK)) {
    2407       16485 :                 args->maxlen = available;
    2408       16485 :                 ASSERT(args->maxlen > 0);
    2409       16485 :                 ASSERT(args->maxlen >= args->minlen);
    2410             :         }
    2411             : 
    2412             :         return true;
    2413             : }
    2414             : 
    2415             : int
    2416      194204 : xfs_free_agfl_block(
    2417             :         struct xfs_trans        *tp,
    2418             :         xfs_agnumber_t          agno,
    2419             :         xfs_agblock_t           agbno,
    2420             :         struct xfs_buf          *agbp,
    2421             :         struct xfs_owner_info   *oinfo)
    2422             : {
    2423      194204 :         int                     error;
    2424      194204 :         struct xfs_buf          *bp;
    2425             : 
    2426      194204 :         error = xfs_free_ag_extent(tp, agbp, agno, agbno, 1, oinfo,
    2427             :                                    XFS_AG_RESV_AGFL);
    2428      194204 :         if (error)
    2429             :                 return error;
    2430             : 
    2431      194202 :         error = xfs_trans_get_buf(tp, tp->t_mountp->m_ddev_targp,
    2432      194202 :                         XFS_AGB_TO_DADDR(tp->t_mountp, agno, agbno),
    2433             :                         tp->t_mountp->m_bsize, 0, &bp);
    2434      194202 :         if (error)
    2435             :                 return error;
    2436      194202 :         xfs_trans_binval(tp, bp);
    2437             : 
    2438      194202 :         return 0;
    2439             : }
    2440             : 
    2441             : /*
    2442             :  * Check the agfl fields of the agf for inconsistency or corruption.
    2443             :  *
    2444             :  * The original purpose was to detect an agfl header padding mismatch between
    2445             :  * current and early v5 kernels. This problem manifests as a 1-slot size
    2446             :  * difference between the on-disk flcount and the active [first, last] range of
    2447             :  * a wrapped agfl.
    2448             :  *
    2449             :  * However, we need to use these same checks to catch agfl count corruptions
    2450             :  * unrelated to padding. This could occur on any v4 or v5 filesystem, so either
    2451             :  * way, we need to reset the agfl and warn the user.
    2452             :  *
    2453             :  * Return true if a reset is required before the agfl can be used, false
    2454             :  * otherwise.
    2455             :  */
    2456             : static bool
    2457      279019 : xfs_agfl_needs_reset(
    2458             :         struct xfs_mount        *mp,
    2459             :         struct xfs_agf          *agf)
    2460             : {
    2461      279019 :         uint32_t                f = be32_to_cpu(agf->agf_flfirst);
    2462      279019 :         uint32_t                l = be32_to_cpu(agf->agf_fllast);
    2463      279019 :         uint32_t                c = be32_to_cpu(agf->agf_flcount);
    2464      279019 :         int                     agfl_size = xfs_agfl_size(mp);
    2465      279019 :         int                     active;
    2466             : 
    2467             :         /*
    2468             :          * The agf read verifier catches severe corruption of these fields.
    2469             :          * Repeat some sanity checks to cover a packed -> unpacked mismatch if
    2470             :          * the verifier allows it.
    2471             :          */
    2472      279019 :         if (f >= agfl_size || l >= agfl_size)
    2473             :                 return true;
    2474      279017 :         if (c > agfl_size)
    2475             :                 return true;
    2476             : 
    2477             :         /*
    2478             :          * Check consistency between the on-disk count and the active range. An
    2479             :          * agfl padding mismatch manifests as an inconsistent flcount.
    2480             :          */
    2481      279017 :         if (c && l >= f)
    2482      268660 :                 active = l - f + 1;
    2483       10357 :         else if (c)
    2484         124 :                 active = agfl_size - f + l + 1;
    2485             :         else
    2486             :                 active = 0;
    2487             : 
    2488      279017 :         return active != c;
    2489             : }
    2490             : 
    2491             : /*
    2492             :  * Reset the agfl to an empty state. Ignore/drop any existing blocks since the
    2493             :  * agfl content cannot be trusted. Warn the user that a repair is required to
    2494             :  * recover leaked blocks.
    2495             :  *
    2496             :  * The purpose of this mechanism is to handle filesystems affected by the agfl
    2497             :  * header padding mismatch problem. A reset keeps the filesystem online with a
    2498             :  * relatively minor free space accounting inconsistency rather than suffer the
    2499             :  * inevitable crash from use of an invalid agfl block.
    2500             :  */
    2501             : static void
    2502           4 : xfs_agfl_reset(
    2503             :         struct xfs_trans        *tp,
    2504             :         struct xfs_buf          *agbp,
    2505             :         struct xfs_perag        *pag)
    2506             : {
    2507           4 :         struct xfs_mount        *mp = tp->t_mountp;
    2508           4 :         struct xfs_agf          *agf = agbp->b_addr;
    2509             : 
    2510           8 :         ASSERT(xfs_perag_agfl_needs_reset(pag));
    2511           4 :         trace_xfs_agfl_reset(mp, agf, 0, _RET_IP_);
    2512             : 
    2513           4 :         xfs_warn(mp,
    2514             :                "WARNING: Reset corrupted AGFL on AG %u. %d blocks leaked. "
    2515             :                "Please unmount and run xfs_repair.",
    2516             :                  pag->pag_agno, pag->pagf_flcount);
    2517             : 
    2518           4 :         agf->agf_flfirst = 0;
    2519           8 :         agf->agf_fllast = cpu_to_be32(xfs_agfl_size(mp) - 1);
    2520           4 :         agf->agf_flcount = 0;
    2521           4 :         xfs_alloc_log_agf(tp, agbp, XFS_AGF_FLFIRST | XFS_AGF_FLLAST |
    2522             :                                     XFS_AGF_FLCOUNT);
    2523             : 
    2524           4 :         pag->pagf_flcount = 0;
    2525           4 :         clear_bit(XFS_AGSTATE_AGFL_NEEDS_RESET, &pag->pag_opstate);
    2526           4 : }
    2527             : 
    2528             : /*
    2529             :  * Defer an AGFL block free. This is effectively equivalent to
    2530             :  * xfs_free_extent_later() with some special handling particular to AGFL blocks.
    2531             :  *
    2532             :  * Deferring AGFL frees helps prevent log reservation overruns due to too many
    2533             :  * allocation operations in a transaction. AGFL frees are prone to this problem
    2534             :  * because for one they are always freed one at a time. Further, an immediate
    2535             :  * AGFL block free can cause a btree join and require another block free before
    2536             :  * the real allocation can proceed. Deferring the free disconnects freeing up
    2537             :  * the AGFL slot from freeing the block.
    2538             :  */
    2539             : static int
    2540      194204 : xfs_defer_agfl_block(
    2541             :         struct xfs_trans                *tp,
    2542             :         xfs_agnumber_t                  agno,
    2543             :         xfs_agblock_t                   agbno,
    2544             :         struct xfs_owner_info           *oinfo)
    2545             : {
    2546      194204 :         struct xfs_mount                *mp = tp->t_mountp;
    2547      194204 :         struct xfs_extent_free_item     *xefi;
    2548      194204 :         xfs_fsblock_t                   fsbno = XFS_AGB_TO_FSB(mp, agno, agbno);
    2549             : 
    2550      194204 :         ASSERT(xfs_extfree_item_cache != NULL);
    2551      194204 :         ASSERT(oinfo != NULL);
    2552             : 
    2553      194204 :         if (XFS_IS_CORRUPT(mp, !xfs_verify_fsbno(mp, fsbno)))
    2554           0 :                 return -EFSCORRUPTED;
    2555             : 
    2556      194204 :         xefi = kmem_cache_zalloc(xfs_extfree_item_cache,
    2557             :                                GFP_KERNEL | __GFP_NOFAIL);
    2558      194204 :         xefi->xefi_startblock = fsbno;
    2559      194204 :         xefi->xefi_blockcount = 1;
    2560      194204 :         xefi->xefi_owner = oinfo->oi_owner;
    2561      194204 :         xefi->xefi_agresv = XFS_AG_RESV_AGFL;
    2562             : 
    2563      194204 :         trace_xfs_agfl_free_defer(mp, xefi);
    2564             : 
    2565      194204 :         xfs_extent_free_get_group(mp, xefi);
    2566      194204 :         xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_AGFL_FREE, &xefi->xefi_list);
    2567      194204 :         return 0;
    2568             : }
    2569             : 
    2570             : /*
    2571             :  * Add the extent to the list of extents to be free at transaction end.
    2572             :  * The list is maintained sorted (by block number).
    2573             :  */
    2574             : int
    2575    68642878 : xfs_free_extent_later(
    2576             :         struct xfs_trans                *tp,
    2577             :         xfs_fsblock_t                   bno,
    2578             :         xfs_filblks_t                   len,
    2579             :         const struct xfs_owner_info     *oinfo,
    2580             :         enum xfs_ag_resv_type           type,
    2581             :         unsigned int                    flags)
    2582             : {
    2583    68642878 :         struct xfs_extent_free_item     *xefi;
    2584    68642878 :         struct xfs_mount                *mp = tp->t_mountp;
    2585    68642878 :         enum xfs_defer_ops_type         optype;
    2586             : #ifdef DEBUG
    2587    68642878 :         xfs_agnumber_t                  agno;
    2588    68642878 :         xfs_agblock_t                   agbno;
    2589             : 
    2590    68642878 :         ASSERT(bno != NULLFSBLOCK);
    2591    68642878 :         ASSERT(len > 0);
    2592    68642878 :         ASSERT(len <= XFS_MAX_BMBT_EXTLEN);
    2593    68642878 :         ASSERT(!isnullstartblock(bno));
    2594    68642878 :         if (flags & XFS_FREE_EXTENT_REALTIME) {
    2595    15822334 :                 ASSERT(bno < mp->m_sb.sb_rblocks);
    2596    15822334 :                 ASSERT(len <= mp->m_sb.sb_rblocks);
    2597    15822334 :                 ASSERT(bno + len <= mp->m_sb.sb_rblocks);
    2598             :         } else {
    2599    52820544 :                 agno = XFS_FSB_TO_AGNO(mp, bno);
    2600    52820544 :                 agbno = XFS_FSB_TO_AGBNO(mp, bno);
    2601             : 
    2602    52820544 :                 ASSERT(agno < mp->m_sb.sb_agcount);
    2603    52820544 :                 ASSERT(agbno < mp->m_sb.sb_agblocks);
    2604    52820544 :                 ASSERT(len < mp->m_sb.sb_agblocks);
    2605    52820544 :                 ASSERT(agbno + len <= mp->m_sb.sb_agblocks);
    2606             :         }
    2607             : #endif
    2608    68642878 :         ASSERT(!(flags & ~XFS_FREE_EXTENT_ALL_FLAGS));
    2609    68642878 :         ASSERT(xfs_extfree_item_cache != NULL);
    2610    68642878 :         ASSERT(type != XFS_AG_RESV_AGFL);
    2611             : 
    2612    68642878 :         if (flags & XFS_FREE_EXTENT_REALTIME) {
    2613    15822350 :                 if (XFS_IS_CORRUPT(mp, !xfs_verify_rtbext(mp, bno, len)))
    2614           0 :                         return -EFSCORRUPTED;
    2615             :         } else {
    2616    52820528 :                 if (XFS_IS_CORRUPT(mp, !xfs_verify_fsbext(mp, bno, len)))
    2617           0 :                         return -EFSCORRUPTED;
    2618             :         }
    2619             : 
    2620    68644095 :         xefi = kmem_cache_zalloc(xfs_extfree_item_cache,
    2621             :                                GFP_KERNEL | __GFP_NOFAIL);
    2622    68644594 :         xefi->xefi_startblock = bno;
    2623    68644594 :         xefi->xefi_blockcount = (xfs_extlen_t)len;
    2624    68644594 :         xefi->xefi_agresv = type;
    2625    68644594 :         if (flags & XFS_FREE_EXTENT_SKIP_DISCARD)
    2626     2874515 :                 xefi->xefi_flags |= XFS_EFI_SKIP_DISCARD;
    2627    68644594 :         if (flags & XFS_FREE_EXTENT_REALTIME) {
    2628             :                 /*
    2629             :                  * Realtime and data section EFIs must use separate
    2630             :                  * transactions to finish deferred work because updates to
    2631             :                  * realtime metadata files can lock AGFs to allocate btree
    2632             :                  * blocks and we don't want that mixing with the AGF locks
    2633             :                  * taken to finish data section EFIs.
    2634             :                  */
    2635    15822331 :                 optype = XFS_DEFER_OPS_TYPE_FREE_RT;
    2636    15822331 :                 xefi->xefi_flags |= XFS_EFI_REALTIME;
    2637             :         } else {
    2638             :                 optype = XFS_DEFER_OPS_TYPE_FREE;
    2639             :         }
    2640    68644594 :         if (oinfo) {
    2641     1401708 :                 ASSERT(oinfo->oi_offset == 0);
    2642             : 
    2643     1401708 :                 if (oinfo->oi_flags & XFS_OWNER_INFO_ATTR_FORK)
    2644          43 :                         xefi->xefi_flags |= XFS_EFI_ATTR_FORK;
    2645     1401708 :                 if (oinfo->oi_flags & XFS_OWNER_INFO_BMBT_BLOCK)
    2646     1010444 :                         xefi->xefi_flags |= XFS_EFI_BMBT_BLOCK;
    2647     1401708 :                 xefi->xefi_owner = oinfo->oi_owner;
    2648             :         } else {
    2649    67242886 :                 xefi->xefi_owner = XFS_RMAP_OWN_NULL;
    2650             :         }
    2651             : 
    2652    68644594 :         trace_xfs_extent_free_defer(mp, xefi);
    2653             : 
    2654    68642698 :         xfs_extent_free_get_group(mp, xefi);
    2655    68640815 :         xfs_defer_add(tp, optype, &xefi->xefi_list);
    2656    68640815 :         return 0;
    2657             : }
    2658             : 
    2659             : #ifdef DEBUG
    2660             : /*
    2661             :  * Check if an AGF has a free extent record whose length is equal to
    2662             :  * args->minlen.
    2663             :  */
    2664             : STATIC int
    2665      367212 : xfs_exact_minlen_extent_available(
    2666             :         struct xfs_alloc_arg    *args,
    2667             :         struct xfs_buf          *agbp,
    2668             :         int                     *stat)
    2669             : {
    2670      367212 :         struct xfs_btree_cur    *cnt_cur;
    2671      367212 :         xfs_agblock_t           fbno;
    2672      367212 :         xfs_extlen_t            flen;
    2673      367212 :         int                     error = 0;
    2674             : 
    2675      367212 :         cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, agbp,
    2676             :                                         args->pag, XFS_BTNUM_CNT);
    2677      367205 :         error = xfs_alloc_lookup_ge(cnt_cur, 0, args->minlen, stat);
    2678      367215 :         if (error)
    2679           0 :                 goto out;
    2680             : 
    2681      367215 :         if (*stat == 0) {
    2682           0 :                 xfs_btree_mark_sick(cnt_cur);
    2683           0 :                 error = -EFSCORRUPTED;
    2684           0 :                 goto out;
    2685             :         }
    2686             : 
    2687      367215 :         error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, stat);
    2688      367207 :         if (error)
    2689           0 :                 goto out;
    2690             : 
    2691      367207 :         if (*stat == 1 && flen != args->minlen)
    2692        2986 :                 *stat = 0;
    2693             : 
    2694      364221 : out:
    2695      367207 :         xfs_btree_del_cursor(cnt_cur, error);
    2696             : 
    2697      367215 :         return error;
    2698             : }
    2699             : #endif
    2700             : 
    2701             : /*
    2702             :  * Decide whether to use this allocation group for this allocation.
    2703             :  * If so, fix up the btree freelist's size.
    2704             :  */
    2705             : int                     /* error */
    2706   410127050 : xfs_alloc_fix_freelist(
    2707             :         struct xfs_alloc_arg    *args,  /* allocation argument structure */
    2708             :         uint32_t                alloc_flags)
    2709             : {
    2710   410127050 :         struct xfs_mount        *mp = args->mp;
    2711   410127050 :         struct xfs_perag        *pag = args->pag;
    2712   410127050 :         struct xfs_trans        *tp = args->tp;
    2713   410127050 :         struct xfs_buf          *agbp = NULL;
    2714   410127050 :         struct xfs_buf          *agflbp = NULL;
    2715   410127050 :         struct xfs_alloc_arg    targs;  /* local allocation arguments */
    2716   410127050 :         xfs_agblock_t           bno;    /* freelist block */
    2717   410127050 :         xfs_extlen_t            need;   /* total blocks needed in freelist */
    2718   410127050 :         int                     error = 0;
    2719             : 
    2720             :         /* deferred ops (AGFL block frees) require permanent transactions */
    2721   410127050 :         ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
    2722             : 
    2723   820254100 :         if (!xfs_perag_initialised_agf(pag)) {
    2724        2408 :                 error = xfs_alloc_read_agf(pag, tp, alloc_flags, &agbp);
    2725        2408 :                 if (error) {
    2726             :                         /* Couldn't lock the AGF so skip this AG. */
    2727        1614 :                         if (error == -EAGAIN)
    2728        1614 :                                 error = 0;
    2729        1614 :                         goto out_no_agbp;
    2730             :                 }
    2731             :         }
    2732             : 
    2733             :         /*
    2734             :          * If this is a metadata preferred pag and we are user data then try
    2735             :          * somewhere else if we are not being asked to try harder at this
    2736             :          * point
    2737             :          */
    2738   820250872 :         if (xfs_perag_prefers_metadata(pag) &&
    2739           0 :             (args->datatype & XFS_ALLOC_USERDATA) &&
    2740           0 :             (alloc_flags & XFS_ALLOC_FLAG_TRYLOCK)) {
    2741           0 :                 ASSERT(!(alloc_flags & XFS_ALLOC_FLAG_FREEING));
    2742           0 :                 goto out_agbp_relse;
    2743             :         }
    2744             : 
    2745   410125436 :         need = xfs_alloc_min_freelist(mp, pag);
    2746   410129582 :         if (!xfs_alloc_space_available(args, need, alloc_flags |
    2747             :                         XFS_ALLOC_FLAG_CHECK))
    2748    34164763 :                 goto out_agbp_relse;
    2749             : 
    2750             :         /*
    2751             :          * Get the a.g. freespace buffer.
    2752             :          * Can fail if we're not blocking on locks, and it's held.
    2753             :          */
    2754   375970154 :         if (!agbp) {
    2755   375965542 :                 error = xfs_alloc_read_agf(pag, tp, alloc_flags, &agbp);
    2756   375971837 :                 if (error) {
    2757             :                         /* Couldn't lock the AGF so skip this AG. */
    2758     3149117 :                         if (error == -EAGAIN)
    2759     3148163 :                                 error = 0;
    2760     3149117 :                         goto out_no_agbp;
    2761             :                 }
    2762             :         }
    2763             : 
    2764             :         /* reset a padding mismatched agfl before final free space check */
    2765   745654664 :         if (xfs_perag_agfl_needs_reset(pag))
    2766           4 :                 xfs_agfl_reset(tp, agbp, pag);
    2767             : 
    2768             :         /* If there isn't enough total space or single-extent, reject it. */
    2769   372827332 :         need = xfs_alloc_min_freelist(mp, pag);
    2770   372822262 :         if (!xfs_alloc_space_available(args, need, alloc_flags))
    2771         361 :                 goto out_agbp_relse;
    2772             : 
    2773             : #ifdef DEBUG
    2774   372811640 :         if (args->alloc_minlen_only) {
    2775      367209 :                 int stat;
    2776             : 
    2777      367209 :                 error = xfs_exact_minlen_extent_available(args, agbp, &stat);
    2778      367215 :                 if (error || !stat)
    2779        2986 :                         goto out_agbp_relse;
    2780             :         }
    2781             : #endif
    2782             :         /*
    2783             :          * Make the freelist shorter if it's too long.
    2784             :          *
    2785             :          * Note that from this point onwards, we will always release the agf and
    2786             :          * agfl buffers on error. This handles the case where we error out and
    2787             :          * the buffers are clean or may not have been joined to the transaction
    2788             :          * and hence need to be released manually. If they have been joined to
    2789             :          * the transaction, then xfs_trans_brelse() will handle them
    2790             :          * appropriately based on the recursion count and dirty state of the
    2791             :          * buffer.
    2792             :          *
    2793             :          * XXX (dgc): When we have lots of free space, does this buy us
    2794             :          * anything other than extra overhead when we need to put more blocks
    2795             :          * back on the free list? Maybe we should only do this when space is
    2796             :          * getting low or the AGFL is more than half full?
    2797             :          *
    2798             :          * The NOSHRINK flag prevents the AGFL from being shrunk if it's too
    2799             :          * big; the NORMAP flag prevents AGFL expand/shrink operations from
    2800             :          * updating the rmapbt.  Both flags are used in xfs_repair while we're
    2801             :          * rebuilding the rmapbt, and neither are used by the kernel.  They're
    2802             :          * both required to ensure that rmaps are correctly recorded for the
    2803             :          * regenerated AGFL, bnobt, and cntbt.  See repair/phase5.c and
    2804             :          * repair/rmap.c in xfsprogs for details.
    2805             :          */
    2806   372808660 :         memset(&targs, 0, sizeof(targs));
    2807             :         /* struct copy below */
    2808   372808660 :         if (alloc_flags & XFS_ALLOC_FLAG_NORMAP)
    2809       10840 :                 targs.oinfo = XFS_RMAP_OINFO_SKIP_UPDATE;
    2810             :         else
    2811   372797820 :                 targs.oinfo = XFS_RMAP_OINFO_AG;
    2812   373002864 :         while (!(alloc_flags & XFS_ALLOC_FLAG_NOSHRINK) &&
    2813   373002864 :                         pag->pagf_flcount > need) {
    2814      180727 :                 error = xfs_alloc_get_freelist(pag, tp, agbp, &bno, 0);
    2815      194204 :                 if (error)
    2816           0 :                         goto out_agbp_relse;
    2817             : 
    2818             :                 /* defer agfl frees */
    2819      194204 :                 error = xfs_defer_agfl_block(tp, args->agno, bno, &targs.oinfo);
    2820      194204 :                 if (error)
    2821           0 :                         goto out_agbp_relse;
    2822             :         }
    2823             : 
    2824   372822137 :         targs.tp = tp;
    2825   372822137 :         targs.mp = mp;
    2826   372822137 :         targs.agbp = agbp;
    2827   372822137 :         targs.agno = args->agno;
    2828   372822137 :         targs.alignment = targs.minlen = targs.prod = 1;
    2829   372822137 :         targs.pag = pag;
    2830   372822137 :         error = xfs_alloc_read_agfl(pag, tp, &agflbp);
    2831   372824559 :         if (error)
    2832         811 :                 goto out_agbp_relse;
    2833             : 
    2834             :         /* Make the freelist longer if it's too short. */
    2835   373238631 :         while (pag->pagf_flcount < need) {
    2836      417560 :                 targs.agbno = 0;
    2837      417560 :                 targs.maxlen = need - pag->pagf_flcount;
    2838      417560 :                 targs.resv = XFS_AG_RESV_AGFL;
    2839             : 
    2840             :                 /* Allocate as many blocks as possible at once. */
    2841      417560 :                 error = xfs_alloc_ag_vextent_size(&targs, alloc_flags);
    2842      414894 :                 if (error)
    2843           4 :                         goto out_agflbp_relse;
    2844             : 
    2845             :                 /*
    2846             :                  * Stop if we run out.  Won't happen if callers are obeying
    2847             :                  * the restrictions correctly.  Can happen for free calls
    2848             :                  * on a completely full ag.
    2849             :                  */
    2850      414890 :                 if (targs.agbno == NULLAGBLOCK) {
    2851           0 :                         if (alloc_flags & XFS_ALLOC_FLAG_FREEING)
    2852             :                                 break;
    2853           0 :                         goto out_agflbp_relse;
    2854             :                 }
    2855             : 
    2856      414890 :                 if (!xfs_rmap_should_skip_owner_update(&targs.oinfo)) {
    2857      414885 :                         error = xfs_rmap_alloc(tp, agbp, pag,
    2858             :                                        targs.agbno, targs.len, &targs.oinfo);
    2859      414885 :                         if (error)
    2860           7 :                                 goto out_agflbp_relse;
    2861             :                 }
    2862      414883 :                 error = xfs_alloc_update_counters(tp, agbp,
    2863      414883 :                                                   -((long)(targs.len)));
    2864      414883 :                 if (error)
    2865           0 :                         goto out_agflbp_relse;
    2866             : 
    2867             :                 /*
    2868             :                  * Put each allocated block on the list.
    2869             :                  */
    2870      874229 :                 for (bno = targs.agbno; bno < targs.agbno + targs.len; bno++) {
    2871      459346 :                         error = xfs_alloc_put_freelist(pag, tp, agbp,
    2872             :                                                         agflbp, bno, 0);
    2873      459346 :                         if (error)
    2874           0 :                                 goto out_agflbp_relse;
    2875             :                 }
    2876             :         }
    2877   372821071 :         xfs_trans_brelse(tp, agflbp);
    2878   372825082 :         args->agbp = agbp;
    2879   372825082 :         return 0;
    2880             : 
    2881          11 : out_agflbp_relse:
    2882          11 :         xfs_trans_brelse(tp, agflbp);
    2883    34168932 : out_agbp_relse:
    2884    34168932 :         if (agbp)
    2885        4168 :                 xfs_trans_brelse(tp, agbp);
    2886    34164764 : out_no_agbp:
    2887    37319664 :         args->agbp = NULL;
    2888    37319664 :         return error;
    2889             : }
    2890             : 
    2891             : /*
    2892             :  * Get a block from the freelist.
    2893             :  * Returns with the buffer for the block gotten.
    2894             :  */
    2895             : int
    2896      638168 : xfs_alloc_get_freelist(
    2897             :         struct xfs_perag        *pag,
    2898             :         struct xfs_trans        *tp,
    2899             :         struct xfs_buf          *agbp,
    2900             :         xfs_agblock_t           *bnop,
    2901             :         int                     btreeblk)
    2902             : {
    2903      638168 :         struct xfs_agf          *agf = agbp->b_addr;
    2904      638168 :         struct xfs_buf          *agflbp;
    2905      638168 :         xfs_agblock_t           bno;
    2906      638168 :         __be32                  *agfl_bno;
    2907      638168 :         int                     error;
    2908      638168 :         uint32_t                logflags;
    2909      638168 :         struct xfs_mount        *mp = tp->t_mountp;
    2910             : 
    2911             :         /*
    2912             :          * Freelist is empty, give up.
    2913             :          */
    2914      638168 :         if (!agf->agf_flcount) {
    2915           0 :                 *bnop = NULLAGBLOCK;
    2916           0 :                 return 0;
    2917             :         }
    2918             :         /*
    2919             :          * Read the array of free blocks.
    2920             :          */
    2921      638168 :         error = xfs_alloc_read_agfl(pag, tp, &agflbp);
    2922      638168 :         if (error)
    2923             :                 return error;
    2924             : 
    2925             : 
    2926             :         /*
    2927             :          * Get the block number and update the data structures.
    2928             :          */
    2929      638168 :         agfl_bno = xfs_buf_to_agfl_bno(agflbp);
    2930      638168 :         bno = be32_to_cpu(agfl_bno[be32_to_cpu(agf->agf_flfirst)]);
    2931     1276336 :         if (XFS_IS_CORRUPT(tp->t_mountp, !xfs_verify_agbno(pag, bno)))
    2932           0 :                 return -EFSCORRUPTED;
    2933             : 
    2934      638168 :         be32_add_cpu(&agf->agf_flfirst, 1);
    2935      638168 :         xfs_trans_brelse(tp, agflbp);
    2936     1276336 :         if (be32_to_cpu(agf->agf_flfirst) == xfs_agfl_size(mp))
    2937         397 :                 agf->agf_flfirst = 0;
    2938             : 
    2939     1276336 :         ASSERT(!xfs_perag_agfl_needs_reset(pag));
    2940      638168 :         be32_add_cpu(&agf->agf_flcount, -1);
    2941      638168 :         pag->pagf_flcount--;
    2942             : 
    2943      638168 :         logflags = XFS_AGF_FLFIRST | XFS_AGF_FLCOUNT;
    2944      638168 :         if (btreeblk) {
    2945      443964 :                 be32_add_cpu(&agf->agf_btreeblks, 1);
    2946      443964 :                 pag->pagf_btreeblks++;
    2947      443964 :                 logflags |= XFS_AGF_BTREEBLKS;
    2948             :         }
    2949             : 
    2950      638168 :         xfs_alloc_log_agf(tp, agbp, logflags);
    2951      638168 :         *bnop = bno;
    2952             : 
    2953      638168 :         return 0;
    2954             : }
    2955             : 
    2956             : /*
    2957             :  * Log the given fields from the agf structure.
    2958             :  */
    2959             : void
    2960   148782840 : xfs_alloc_log_agf(
    2961             :         struct xfs_trans        *tp,
    2962             :         struct xfs_buf          *bp,
    2963             :         uint32_t                fields)
    2964             : {
    2965   148782840 :         int     first;          /* first byte offset */
    2966   148782840 :         int     last;           /* last byte offset */
    2967   148782840 :         static const short      offsets[] = {
    2968             :                 offsetof(xfs_agf_t, agf_magicnum),
    2969             :                 offsetof(xfs_agf_t, agf_versionnum),
    2970             :                 offsetof(xfs_agf_t, agf_seqno),
    2971             :                 offsetof(xfs_agf_t, agf_length),
    2972             :                 offsetof(xfs_agf_t, agf_roots[0]),
    2973             :                 offsetof(xfs_agf_t, agf_levels[0]),
    2974             :                 offsetof(xfs_agf_t, agf_flfirst),
    2975             :                 offsetof(xfs_agf_t, agf_fllast),
    2976             :                 offsetof(xfs_agf_t, agf_flcount),
    2977             :                 offsetof(xfs_agf_t, agf_freeblks),
    2978             :                 offsetof(xfs_agf_t, agf_longest),
    2979             :                 offsetof(xfs_agf_t, agf_btreeblks),
    2980             :                 offsetof(xfs_agf_t, agf_uuid),
    2981             :                 offsetof(xfs_agf_t, agf_rmap_blocks),
    2982             :                 offsetof(xfs_agf_t, agf_refcount_blocks),
    2983             :                 offsetof(xfs_agf_t, agf_refcount_root),
    2984             :                 offsetof(xfs_agf_t, agf_refcount_level),
    2985             :                 /* needed so that we don't log the whole rest of the structure: */
    2986             :                 offsetof(xfs_agf_t, agf_spare64),
    2987             :                 sizeof(xfs_agf_t)
    2988             :         };
    2989             : 
    2990   148782840 :         trace_xfs_agf(tp->t_mountp, bp->b_addr, fields, _RET_IP_);
    2991             : 
    2992   148785092 :         xfs_trans_buf_set_type(tp, bp, XFS_BLFT_AGF_BUF);
    2993             : 
    2994   148786197 :         xfs_btree_offsets(fields, offsets, XFS_AGF_NUM_BITS, &first, &last);
    2995   148787063 :         xfs_trans_log_buf(tp, bp, (uint)first, (uint)last);
    2996   148787246 : }
    2997             : 
    2998             : /*
    2999             :  * Put the block on the freelist for the allocation group.
    3000             :  */
    3001             : int
    3002      641028 : xfs_alloc_put_freelist(
    3003             :         struct xfs_perag        *pag,
    3004             :         struct xfs_trans        *tp,
    3005             :         struct xfs_buf          *agbp,
    3006             :         struct xfs_buf          *agflbp,
    3007             :         xfs_agblock_t           bno,
    3008             :         int                     btreeblk)
    3009             : {
    3010      641028 :         struct xfs_mount        *mp = tp->t_mountp;
    3011      641028 :         struct xfs_agf          *agf = agbp->b_addr;
    3012      641028 :         __be32                  *blockp;
    3013      641028 :         int                     error;
    3014      641028 :         uint32_t                logflags;
    3015      641028 :         __be32                  *agfl_bno;
    3016      641028 :         int                     startoff;
    3017             : 
    3018      641028 :         if (!agflbp) {
    3019      181682 :                 error = xfs_alloc_read_agfl(pag, tp, &agflbp);
    3020      181682 :                 if (error)
    3021             :                         return error;
    3022             :         }
    3023             : 
    3024      641028 :         be32_add_cpu(&agf->agf_fllast, 1);
    3025     1282054 :         if (be32_to_cpu(agf->agf_fllast) == xfs_agfl_size(mp))
    3026         399 :                 agf->agf_fllast = 0;
    3027             : 
    3028     1282054 :         ASSERT(!xfs_perag_agfl_needs_reset(pag));
    3029      641027 :         be32_add_cpu(&agf->agf_flcount, 1);
    3030      641027 :         pag->pagf_flcount++;
    3031             : 
    3032      641027 :         logflags = XFS_AGF_FLLAST | XFS_AGF_FLCOUNT;
    3033      641027 :         if (btreeblk) {
    3034      181682 :                 be32_add_cpu(&agf->agf_btreeblks, -1);
    3035      181682 :                 pag->pagf_btreeblks--;
    3036      181682 :                 logflags |= XFS_AGF_BTREEBLKS;
    3037             :         }
    3038             : 
    3039      641027 :         xfs_alloc_log_agf(tp, agbp, logflags);
    3040             : 
    3041     1282056 :         ASSERT(be32_to_cpu(agf->agf_flcount) <= xfs_agfl_size(mp));
    3042             : 
    3043      641028 :         agfl_bno = xfs_buf_to_agfl_bno(agflbp);
    3044      641028 :         blockp = &agfl_bno[be32_to_cpu(agf->agf_fllast)];
    3045      641028 :         *blockp = cpu_to_be32(bno);
    3046      641028 :         startoff = (char *)blockp - (char *)agflbp->b_addr;
    3047             : 
    3048      641028 :         xfs_alloc_log_agf(tp, agbp, logflags);
    3049             : 
    3050      641028 :         xfs_trans_buf_set_type(tp, agflbp, XFS_BLFT_AGFL_BUF);
    3051      641028 :         xfs_trans_log_buf(tp, agflbp, startoff,
    3052             :                           startoff + sizeof(xfs_agblock_t) - 1);
    3053      641028 :         return 0;
    3054             : }
    3055             : 
    3056             : /*
    3057             :  * Check that this AGF/AGI header's sequence number and length matches the AG
    3058             :  * number and size in fsblocks.
    3059             :  */
    3060             : xfs_failaddr_t
    3061     2798264 : xfs_validate_ag_length(
    3062             :         struct xfs_buf          *bp,
    3063             :         uint32_t                seqno,
    3064             :         uint32_t                length)
    3065             : {
    3066     2798264 :         struct xfs_mount        *mp = bp->b_mount;
    3067             :         /*
    3068             :          * During growfs operations, the perag is not fully initialised,
    3069             :          * so we can't use it for any useful checking. growfs ensures we can't
    3070             :          * use it by using uncached buffers that don't have the perag attached
    3071             :          * so we can detect and avoid this problem.
    3072             :          */
    3073     2798264 :         if (bp->b_pag && seqno != bp->b_pag->pag_agno)
    3074           0 :                 return __this_address;
    3075             : 
    3076             :         /*
    3077             :          * Only the last AG in the filesystem is allowed to be shorter
    3078             :          * than the AG size recorded in the superblock.
    3079             :          */
    3080     2798264 :         if (length != mp->m_sb.sb_agblocks) {
    3081             :                 /*
    3082             :                  * During growfs, the new last AG can get here before we
    3083             :                  * have updated the superblock. Give it a pass on the seqno
    3084             :                  * check.
    3085             :                  */
    3086        1075 :                 if (bp->b_pag && seqno != mp->m_sb.sb_agcount - 1)
    3087           0 :                         return __this_address;
    3088        1075 :                 if (length < XFS_MIN_AG_BLOCKS)
    3089           0 :                         return __this_address;
    3090        1075 :                 if (length > mp->m_sb.sb_agblocks)
    3091           2 :                         return __this_address;
    3092             :         }
    3093             : 
    3094             :         return NULL;
    3095             : }
    3096             : 
    3097             : /*
    3098             :  * Verify the AGF is consistent.
    3099             :  *
    3100             :  * We do not verify the AGFL indexes in the AGF are fully consistent here
    3101             :  * because of issues with variable on-disk structure sizes. Instead, we check
    3102             :  * the agfl indexes for consistency when we initialise the perag from the AGF
    3103             :  * information after a read completes.
    3104             :  *
    3105             :  * If the index is inconsistent, then we mark the perag as needing an AGFL
    3106             :  * reset. The first AGFL update performed then resets the AGFL indexes and
    3107             :  * refills the AGFL with known good free blocks, allowing the filesystem to
    3108             :  * continue operating normally at the cost of a few leaked free space blocks.
    3109             :  */
    3110             : static xfs_failaddr_t
    3111     1564422 : xfs_agf_verify(
    3112             :         struct xfs_buf          *bp)
    3113             : {
    3114     1564422 :         struct xfs_mount        *mp = bp->b_mount;
    3115     1564422 :         struct xfs_agf          *agf = bp->b_addr;
    3116     1564422 :         xfs_failaddr_t          fa;
    3117     1564422 :         uint32_t                agf_seqno = be32_to_cpu(agf->agf_seqno);
    3118     1564422 :         uint32_t                agf_length = be32_to_cpu(agf->agf_length);
    3119             : 
    3120     1564422 :         if (xfs_has_crc(mp)) {
    3121     1562105 :                 if (!uuid_equal(&agf->agf_uuid, &mp->m_sb.sb_meta_uuid))
    3122           0 :                         return __this_address;
    3123     1562104 :                 if (!xfs_log_check_lsn(mp, be64_to_cpu(agf->agf_lsn)))
    3124           0 :                         return __this_address;
    3125             :         }
    3126             : 
    3127     1564424 :         if (!xfs_verify_magic(bp, agf->agf_magicnum))
    3128           0 :                 return __this_address;
    3129             : 
    3130     1564423 :         if (!XFS_AGF_GOOD_VERSION(be32_to_cpu(agf->agf_versionnum)))
    3131           5 :                 return __this_address;
    3132             : 
    3133             :         /*
    3134             :          * Both agf_seqno and agf_length need to validated before anything else
    3135             :          * block number related in the AGF or AGFL can be checked.
    3136             :          */
    3137     1564418 :         fa = xfs_validate_ag_length(bp, agf_seqno, agf_length);
    3138     1564419 :         if (fa)
    3139             :                 return fa;
    3140             : 
    3141     3128834 :         if (be32_to_cpu(agf->agf_flfirst) >= xfs_agfl_size(mp))
    3142           2 :                 return __this_address;
    3143     3128830 :         if (be32_to_cpu(agf->agf_fllast) >= xfs_agfl_size(mp))
    3144           0 :                 return __this_address;
    3145     3128830 :         if (be32_to_cpu(agf->agf_flcount) > xfs_agfl_size(mp))
    3146           0 :                 return __this_address;
    3147             : 
    3148     4693245 :         if (be32_to_cpu(agf->agf_freeblks) < be32_to_cpu(agf->agf_longest) ||
    3149             :             be32_to_cpu(agf->agf_freeblks) > agf_length)
    3150           3 :                 return __this_address;
    3151             : 
    3152     1564415 :         if (be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) < 1 ||
    3153     1564415 :             be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) < 1 ||
    3154     1564415 :             be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) >
    3155     3128831 :                                                 mp->m_alloc_maxlevels ||
    3156     1564416 :             be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) >
    3157             :                                                 mp->m_alloc_maxlevels)
    3158           2 :                 return __this_address;
    3159             : 
    3160     3128819 :         if (xfs_has_lazysbcount(mp) &&
    3161     1564405 :             be32_to_cpu(agf->agf_btreeblks) > agf_length)
    3162           0 :                 return __this_address;
    3163             : 
    3164     1564414 :         if (xfs_has_rmapbt(mp)) {
    3165     3123596 :                 if (be32_to_cpu(agf->agf_rmap_blocks) > agf_length)
    3166           2 :                         return __this_address;
    3167             : 
    3168     1561796 :                 if (be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) < 1 ||
    3169     1561794 :                     be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) >
    3170     1561794 :                                                         mp->m_rmap_maxlevels)
    3171           2 :                         return __this_address;
    3172             :         }
    3173             : 
    3174     1564410 :         if (xfs_has_reflink(mp)) {
    3175     3123462 :                 if (be32_to_cpu(agf->agf_refcount_blocks) > agf_length)
    3176           2 :                         return __this_address;
    3177             : 
    3178     1561729 :                 if (be32_to_cpu(agf->agf_refcount_level) < 1 ||
    3179     3123462 :                     be32_to_cpu(agf->agf_refcount_level) > mp->m_refc_maxlevels)
    3180           0 :                         return __this_address;
    3181             :         }
    3182             : 
    3183             :         return NULL;
    3184             : }
    3185             : 
    3186             : static void
    3187      220020 : xfs_agf_read_verify(
    3188             :         struct xfs_buf  *bp)
    3189             : {
    3190      220020 :         struct xfs_mount *mp = bp->b_mount;
    3191      220020 :         xfs_failaddr_t  fa;
    3192             : 
    3193      440022 :         if (xfs_has_crc(mp) &&
    3194             :             !xfs_buf_verify_cksum(bp, XFS_AGF_CRC_OFF))
    3195           8 :                 xfs_verifier_error(bp, -EFSBADCRC, __this_address);
    3196             :         else {
    3197      220012 :                 fa = xfs_agf_verify(bp);
    3198      220012 :                 if (XFS_TEST_ERROR(fa, mp, XFS_ERRTAG_ALLOC_READ_AGF))
    3199          12 :                         xfs_verifier_error(bp, -EFSCORRUPTED, fa);
    3200             :         }
    3201      220020 : }
    3202             : 
    3203             : static void
    3204      906210 : xfs_agf_write_verify(
    3205             :         struct xfs_buf  *bp)
    3206             : {
    3207      906210 :         struct xfs_mount        *mp = bp->b_mount;
    3208      906210 :         struct xfs_buf_log_item *bip = bp->b_log_item;
    3209      906210 :         struct xfs_agf          *agf = bp->b_addr;
    3210      906210 :         xfs_failaddr_t          fa;
    3211             : 
    3212      906210 :         fa = xfs_agf_verify(bp);
    3213      906210 :         if (fa) {
    3214           0 :                 xfs_verifier_error(bp, -EFSCORRUPTED, fa);
    3215           0 :                 return;
    3216             :         }
    3217             : 
    3218      906210 :         if (!xfs_has_crc(mp))
    3219             :                 return;
    3220             : 
    3221      903916 :         if (bip)
    3222      898508 :                 agf->agf_lsn = cpu_to_be64(bip->bli_item.li_lsn);
    3223             : 
    3224      903916 :         xfs_buf_update_cksum(bp, XFS_AGF_CRC_OFF);
    3225             : }
    3226             : 
    3227             : const struct xfs_buf_ops xfs_agf_buf_ops = {
    3228             :         .name = "xfs_agf",
    3229             :         .magic = { cpu_to_be32(XFS_AGF_MAGIC), cpu_to_be32(XFS_AGF_MAGIC) },
    3230             :         .verify_read = xfs_agf_read_verify,
    3231             :         .verify_write = xfs_agf_write_verify,
    3232             :         .verify_struct = xfs_agf_verify,
    3233             : };
    3234             : 
    3235             : /*
    3236             :  * Read in the allocation group header (free/alloc section).
    3237             :  */
    3238             : int
    3239  1879765743 : xfs_read_agf(
    3240             :         struct xfs_perag        *pag,
    3241             :         struct xfs_trans        *tp,
    3242             :         int                     flags,
    3243             :         struct xfs_buf          **agfbpp)
    3244             : {
    3245  1879765743 :         struct xfs_mount        *mp = pag->pag_mount;
    3246  1879765743 :         int                     error;
    3247             : 
    3248  1879765743 :         trace_xfs_read_agf(pag->pag_mount, pag->pag_agno);
    3249             : 
    3250  1879782655 :         error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
    3251  1879782655 :                         XFS_AG_DADDR(mp, pag->pag_agno, XFS_AGF_DADDR(mp)),
    3252             :                         XFS_FSS_TO_BB(mp, 1), flags, agfbpp, &xfs_agf_buf_ops);
    3253  1879847693 :         if (xfs_metadata_is_sick(error))
    3254          20 :                 xfs_ag_mark_sick(pag, XFS_SICK_AG_AGF);
    3255  1879847693 :         if (error)
    3256             :                 return error;
    3257             : 
    3258  1876698213 :         xfs_buf_set_ref(*agfbpp, XFS_AGF_REF);
    3259  1876698213 :         return 0;
    3260             : }
    3261             : 
    3262             : /*
    3263             :  * Read in the allocation group header (free/alloc section) and initialise the
    3264             :  * perag structure if necessary. If the caller provides @agfbpp, then return the
    3265             :  * locked buffer to the caller, otherwise free it.
    3266             :  */
    3267             : int
    3268  1879773910 : xfs_alloc_read_agf(
    3269             :         struct xfs_perag        *pag,
    3270             :         struct xfs_trans        *tp,
    3271             :         int                     flags,
    3272             :         struct xfs_buf          **agfbpp)
    3273             : {
    3274  1879773910 :         struct xfs_buf          *agfbp;
    3275  1879773910 :         struct xfs_agf          *agf;
    3276  1879773910 :         int                     error;
    3277  1879773910 :         int                     allocbt_blks;
    3278             : 
    3279  1879773910 :         trace_xfs_alloc_read_agf(pag->pag_mount, pag->pag_agno);
    3280             : 
    3281             :         /* We don't support trylock when freeing. */
    3282  1879793950 :         ASSERT((flags & (XFS_ALLOC_FLAG_FREEING | XFS_ALLOC_FLAG_TRYLOCK)) !=
    3283             :                         (XFS_ALLOC_FLAG_FREEING | XFS_ALLOC_FLAG_TRYLOCK));
    3284  1879793950 :         error = xfs_read_agf(pag, tp,
    3285  1879793950 :                         (flags & XFS_ALLOC_FLAG_TRYLOCK) ? XBF_TRYLOCK : 0,
    3286             :                         &agfbp);
    3287  1879840999 :         if (error)
    3288             :                 return error;
    3289             : 
    3290  1876688351 :         agf = agfbp->b_addr;
    3291  3753376702 :         if (!xfs_perag_initialised_agf(pag)) {
    3292      279018 :                 pag->pagf_freeblks = be32_to_cpu(agf->agf_freeblks);
    3293      279018 :                 pag->pagf_btreeblks = be32_to_cpu(agf->agf_btreeblks);
    3294      279018 :                 pag->pagf_flcount = be32_to_cpu(agf->agf_flcount);
    3295      279018 :                 pag->pagf_longest = be32_to_cpu(agf->agf_longest);
    3296      279018 :                 pag->pagf_levels[XFS_BTNUM_BNOi] =
    3297      279018 :                         be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNOi]);
    3298      279018 :                 pag->pagf_levels[XFS_BTNUM_CNTi] =
    3299      279018 :                         be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]);
    3300      279018 :                 pag->pagf_levels[XFS_BTNUM_RMAPi] =
    3301      279018 :                         be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAPi]);
    3302      279018 :                 pag->pagf_refcount_level = be32_to_cpu(agf->agf_refcount_level);
    3303      279018 :                 if (xfs_agfl_needs_reset(pag->pag_mount, agf))
    3304           4 :                         set_bit(XFS_AGSTATE_AGFL_NEEDS_RESET, &pag->pag_opstate);
    3305             :                 else
    3306      279014 :                         clear_bit(XFS_AGSTATE_AGFL_NEEDS_RESET, &pag->pag_opstate);
    3307             : 
    3308             :                 /*
    3309             :                  * Update the in-core allocbt counter. Filter out the rmapbt
    3310             :                  * subset of the btreeblks counter because the rmapbt is managed
    3311             :                  * by perag reservation. Subtract one for the rmapbt root block
    3312             :                  * because the rmap counter includes it while the btreeblks
    3313             :                  * counter only tracks non-root blocks.
    3314             :                  */
    3315      279018 :                 allocbt_blks = pag->pagf_btreeblks;
    3316      279018 :                 if (xfs_has_rmapbt(pag->pag_mount))
    3317      278880 :                         allocbt_blks -= be32_to_cpu(agf->agf_rmap_blocks) - 1;
    3318      279018 :                 if (allocbt_blks > 0)
    3319       49505 :                         atomic64_add(allocbt_blks,
    3320             :                                         &pag->pag_mount->m_allocbt_blks);
    3321             : 
    3322      279018 :                 set_bit(XFS_AGSTATE_AGF_INIT, &pag->pag_opstate);
    3323             :         }
    3324             : #ifdef DEBUG
    3325  3752818666 :         else if (!xfs_is_shutdown(pag->pag_mount)) {
    3326  3752818544 :                 ASSERT(pag->pagf_freeblks == be32_to_cpu(agf->agf_freeblks));
    3327  3752818544 :                 ASSERT(pag->pagf_btreeblks == be32_to_cpu(agf->agf_btreeblks));
    3328  3752818544 :                 ASSERT(pag->pagf_flcount == be32_to_cpu(agf->agf_flcount));
    3329  3752818544 :                 ASSERT(pag->pagf_longest == be32_to_cpu(agf->agf_longest));
    3330  3752818544 :                 ASSERT(pag->pagf_levels[XFS_BTNUM_BNOi] ==
    3331             :                        be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNOi]));
    3332  3752818544 :                 ASSERT(pag->pagf_levels[XFS_BTNUM_CNTi] ==
    3333             :                        be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]));
    3334             :         }
    3335             : #endif
    3336  1876688351 :         if (agfbpp)
    3337  1876276053 :                 *agfbpp = agfbp;
    3338             :         else
    3339      412298 :                 xfs_trans_brelse(tp, agfbp);
    3340             :         return 0;
    3341             : }
    3342             : 
    3343             : /*
    3344             :  * Pre-proces allocation arguments to set initial state that we don't require
    3345             :  * callers to set up correctly, as well as bounds check the allocation args
    3346             :  * that are set up.
    3347             :  */
    3348             : static int
    3349    64582013 : xfs_alloc_vextent_check_args(
    3350             :         struct xfs_alloc_arg    *args,
    3351             :         xfs_fsblock_t           target,
    3352             :         xfs_agnumber_t          *minimum_agno)
    3353             : {
    3354    64582013 :         struct xfs_mount        *mp = args->mp;
    3355    64582013 :         xfs_agblock_t           agsize;
    3356             : 
    3357    64582013 :         args->fsbno = NULLFSBLOCK;
    3358             : 
    3359    64582013 :         *minimum_agno = 0;
    3360    64582013 :         if (args->tp->t_highest_agno != NULLAGNUMBER)
    3361      793160 :                 *minimum_agno = args->tp->t_highest_agno;
    3362             : 
    3363             :         /*
    3364             :          * Just fix this up, for the case where the last a.g. is shorter
    3365             :          * (or there's only one a.g.) and the caller couldn't easily figure
    3366             :          * that out (xfs_bmap_alloc).
    3367             :          */
    3368    64582013 :         agsize = mp->m_sb.sb_agblocks;
    3369    64582013 :         if (args->maxlen > agsize)
    3370           0 :                 args->maxlen = agsize;
    3371    64582013 :         if (args->alignment == 0)
    3372     2302720 :                 args->alignment = 1;
    3373             : 
    3374    64582013 :         ASSERT(args->minlen > 0);
    3375    64582013 :         ASSERT(args->maxlen > 0);
    3376    64582013 :         ASSERT(args->alignment > 0);
    3377    64582013 :         ASSERT(args->resv != XFS_AG_RESV_AGFL);
    3378             : 
    3379    64582013 :         ASSERT(XFS_FSB_TO_AGNO(mp, target) < mp->m_sb.sb_agcount);
    3380    64582013 :         ASSERT(XFS_FSB_TO_AGBNO(mp, target) < agsize);
    3381    64582013 :         ASSERT(args->minlen <= args->maxlen);
    3382    64582013 :         ASSERT(args->minlen <= agsize);
    3383    64582013 :         ASSERT(args->mod < args->prod);
    3384             : 
    3385    64582013 :         if (XFS_FSB_TO_AGNO(mp, target) >= mp->m_sb.sb_agcount ||
    3386    64582013 :             XFS_FSB_TO_AGBNO(mp, target) >= agsize ||
    3387    64582013 :             args->minlen > args->maxlen || args->minlen > agsize ||
    3388    64582013 :             args->mod >= args->prod) {
    3389           0 :                 trace_xfs_alloc_vextent_badargs(args);
    3390           0 :                 return -ENOSPC;
    3391             :         }
    3392             : 
    3393    64582013 :         if (args->agno != NULLAGNUMBER && *minimum_agno > args->agno) {
    3394           0 :                 trace_xfs_alloc_vextent_skip_deadlock(args);
    3395           0 :                 return -ENOSPC;
    3396             :         }
    3397             :         return 0;
    3398             : 
    3399             : }
    3400             : 
    3401             : /*
    3402             :  * Prepare an AG for allocation. If the AG is not prepared to accept the
    3403             :  * allocation, return failure.
    3404             :  *
    3405             :  * XXX(dgc): The complexity of "need_pag" will go away as all caller paths are
    3406             :  * modified to hold their own perag references.
    3407             :  */
    3408             : static int
    3409   101329238 : xfs_alloc_vextent_prepare_ag(
    3410             :         struct xfs_alloc_arg    *args,
    3411             :         uint32_t                alloc_flags)
    3412             : {
    3413   101329238 :         bool                    need_pag = !args->pag;
    3414   101329238 :         int                     error;
    3415             : 
    3416   101329238 :         if (need_pag)
    3417           0 :                 args->pag = xfs_perag_get(args->mp, args->agno);
    3418             : 
    3419   101329238 :         args->agbp = NULL;
    3420   101329238 :         error = xfs_alloc_fix_freelist(args, alloc_flags);
    3421   101330452 :         if (error) {
    3422         795 :                 trace_xfs_alloc_vextent_nofix(args);
    3423         795 :                 if (need_pag)
    3424           0 :                         xfs_perag_put(args->pag);
    3425         795 :                 args->agbno = NULLAGBLOCK;
    3426         795 :                 return error;
    3427             :         }
    3428   101329657 :         if (!args->agbp) {
    3429             :                 /* cannot allocate in this AG at all */
    3430    37317723 :                 trace_xfs_alloc_vextent_noagbp(args);
    3431    37317878 :                 args->agbno = NULLAGBLOCK;
    3432    37317878 :                 return 0;
    3433             :         }
    3434    64011934 :         args->wasfromfl = 0;
    3435    64011934 :         return 0;
    3436             : }
    3437             : 
    3438             : /*
    3439             :  * Post-process allocation results to account for the allocation if it succeed
    3440             :  * and set the allocated block number correctly for the caller.
    3441             :  *
    3442             :  * XXX: we should really be returning ENOSPC for ENOSPC, not
    3443             :  * hiding it behind a "successful" NULLFSBLOCK allocation.
    3444             :  */
    3445             : static int
    3446    64585286 : xfs_alloc_vextent_finish(
    3447             :         struct xfs_alloc_arg    *args,
    3448             :         xfs_agnumber_t          minimum_agno,
    3449             :         int                     alloc_error,
    3450             :         bool                    drop_perag)
    3451             : {
    3452    64585286 :         struct xfs_mount        *mp = args->mp;
    3453    64585286 :         int                     error = 0;
    3454             : 
    3455             :         /*
    3456             :          * We can end up here with a locked AGF. If we failed, the caller is
    3457             :          * likely going to try to allocate again with different parameters, and
    3458             :          * that can widen the AGs that are searched for free space. If we have
    3459             :          * to do BMBT block allocation, we have to do a new allocation.
    3460             :          *
    3461             :          * Hence leaving this function with the AGF locked opens up potential
    3462             :          * ABBA AGF deadlocks because a future allocation attempt in this
    3463             :          * transaction may attempt to lock a lower number AGF.
    3464             :          *
    3465             :          * We can't release the AGF until the transaction is commited, so at
    3466             :          * this point we must update the "first allocation" tracker to point at
    3467             :          * this AG if the tracker is empty or points to a lower AG. This allows
    3468             :          * the next allocation attempt to be modified appropriately to avoid
    3469             :          * deadlocks.
    3470             :          */
    3471    64585286 :         if (args->agbp &&
    3472    64010454 :             (args->tp->t_highest_agno == NULLAGNUMBER ||
    3473      790956 :              args->agno > minimum_agno))
    3474    63266557 :                 args->tp->t_highest_agno = args->agno;
    3475             : 
    3476             :         /*
    3477             :          * If the allocation failed with an error or we had an ENOSPC result,
    3478             :          * preserve the returned error whilst also marking the allocation result
    3479             :          * as "no extent allocated". This ensures that callers that fail to
    3480             :          * capture the error will still treat it as a failed allocation.
    3481             :          */
    3482    64585286 :         if (alloc_error || args->agbno == NULLAGBLOCK) {
    3483      834768 :                 args->fsbno = NULLFSBLOCK;
    3484      834768 :                 error = alloc_error;
    3485      834768 :                 goto out_drop_perag;
    3486             :         }
    3487             : 
    3488    63750518 :         args->fsbno = XFS_AGB_TO_FSB(mp, args->agno, args->agbno);
    3489             : 
    3490    63750518 :         ASSERT(args->len >= args->minlen);
    3491    63750518 :         ASSERT(args->len <= args->maxlen);
    3492    63750518 :         ASSERT(args->agbno % args->alignment == 0);
    3493    63750518 :         XFS_AG_CHECK_DADDR(mp, XFS_FSB_TO_DADDR(mp, args->fsbno), args->len);
    3494             : 
    3495             :         /* if not file data, insert new block into the reverse map btree */
    3496    63750518 :         if (!xfs_rmap_should_skip_owner_update(&args->oinfo)) {
    3497     2926960 :                 error = xfs_rmap_alloc(args->tp, args->agbp, args->pag,
    3498     2926960 :                                        args->agbno, args->len, &args->oinfo);
    3499     2926970 :                 if (error)
    3500          45 :                         goto out_drop_perag;
    3501             :         }
    3502             : 
    3503    63750483 :         if (!args->wasfromfl) {
    3504    63749062 :                 error = xfs_alloc_update_counters(args->tp, args->agbp,
    3505    63749062 :                                                   -((long)(args->len)));
    3506    63749741 :                 if (error)
    3507           0 :                         goto out_drop_perag;
    3508             : 
    3509    63749741 :                 ASSERT(!xfs_extent_busy_search(mp, args->pag, args->agbno,
    3510             :                                 args->len));
    3511             :         }
    3512             : 
    3513    63750703 :         xfs_ag_resv_alloc_extent(args->pag, args->resv, args);
    3514             : 
    3515    63748351 :         XFS_STATS_INC(mp, xs_allocx);
    3516    63748351 :         XFS_STATS_ADD(mp, xs_allocb, args->len);
    3517             : 
    3518    63748351 :         trace_xfs_alloc_vextent_finish(args);
    3519             : 
    3520    64584203 : out_drop_perag:
    3521    64584203 :         if (drop_perag && args->pag) {
    3522    63027744 :                 xfs_perag_rele(args->pag);
    3523    63028064 :                 args->pag = NULL;
    3524             :         }
    3525    64584523 :         return error;
    3526             : }
    3527             : 
    3528             : /*
    3529             :  * Allocate within a single AG only. This uses a best-fit length algorithm so if
    3530             :  * you need an exact sized allocation without locality constraints, this is the
    3531             :  * fastest way to do it.
    3532             :  *
    3533             :  * Caller is expected to hold a perag reference in args->pag.
    3534             :  */
    3535             : int
    3536           0 : xfs_alloc_vextent_this_ag(
    3537             :         struct xfs_alloc_arg    *args,
    3538             :         xfs_agnumber_t          agno)
    3539             : {
    3540           0 :         struct xfs_mount        *mp = args->mp;
    3541           0 :         xfs_agnumber_t          minimum_agno;
    3542           0 :         uint32_t                alloc_flags = 0;
    3543           0 :         int                     error;
    3544             : 
    3545           0 :         ASSERT(args->pag != NULL);
    3546           0 :         ASSERT(args->pag->pag_agno == agno);
    3547             : 
    3548           0 :         args->agno = agno;
    3549           0 :         args->agbno = 0;
    3550             : 
    3551           0 :         trace_xfs_alloc_vextent_this_ag(args);
    3552             : 
    3553           0 :         error = xfs_alloc_vextent_check_args(args, XFS_AGB_TO_FSB(mp, agno, 0),
    3554             :                         &minimum_agno);
    3555           0 :         if (error) {
    3556           0 :                 if (error == -ENOSPC)
    3557             :                         return 0;
    3558           0 :                 return error;
    3559             :         }
    3560             : 
    3561           0 :         error = xfs_alloc_vextent_prepare_ag(args, alloc_flags);
    3562           0 :         if (!error && args->agbp)
    3563           0 :                 error = xfs_alloc_ag_vextent_size(args, alloc_flags);
    3564             : 
    3565           0 :         return xfs_alloc_vextent_finish(args, minimum_agno, error, false);
    3566             : }
    3567             : 
    3568             : /*
    3569             :  * Iterate all AGs trying to allocate an extent starting from @start_ag.
    3570             :  *
    3571             :  * If the incoming allocation type is XFS_ALLOCTYPE_NEAR_BNO, it means the
    3572             :  * allocation attempts in @start_agno have locality information. If we fail to
    3573             :  * allocate in that AG, then we revert to anywhere-in-AG for all the other AGs
    3574             :  * we attempt to allocation in as there is no locality optimisation possible for
    3575             :  * those allocations.
    3576             :  *
    3577             :  * On return, args->pag may be left referenced if we finish before the "all
    3578             :  * failed" return point. The allocation finish still needs the perag, and
    3579             :  * so the caller will release it once they've finished the allocation.
    3580             :  *
    3581             :  * When we wrap the AG iteration at the end of the filesystem, we have to be
    3582             :  * careful not to wrap into AGs below ones we already have locked in the
    3583             :  * transaction if we are doing a blocking iteration. This will result in an
    3584             :  * out-of-order locking of AGFs and hence can cause deadlocks.
    3585             :  */
    3586             : static int
    3587    62861914 : xfs_alloc_vextent_iterate_ags(
    3588             :         struct xfs_alloc_arg    *args,
    3589             :         xfs_agnumber_t          minimum_agno,
    3590             :         xfs_agnumber_t          start_agno,
    3591             :         xfs_agblock_t           target_agbno,
    3592             :         uint32_t                alloc_flags)
    3593             : {
    3594    62861914 :         struct xfs_mount        *mp = args->mp;
    3595    62861914 :         xfs_agnumber_t          restart_agno = minimum_agno;
    3596    62861914 :         xfs_agnumber_t          agno;
    3597    62861914 :         int                     error = 0;
    3598             : 
    3599    62861914 :         if (alloc_flags & XFS_ALLOC_FLAG_TRYLOCK)
    3600    62861914 :                 restart_agno = 0;
    3601           0 : restart:
    3602    99654470 :         for_each_perag_wrap_range(mp, start_agno, restart_agno,
    3603             :                         mp->m_sb.sb_agcount, agno, args->pag) {
    3604    99607657 :                 args->agno = agno;
    3605    99607657 :                 error = xfs_alloc_vextent_prepare_ag(args, alloc_flags);
    3606    99609773 :                 if (error)
    3607             :                         break;
    3608    99608980 :                 if (!args->agbp) {
    3609    36760737 :                         trace_xfs_alloc_vextent_loopfailed(args);
    3610    36760738 :                         continue;
    3611             :                 }
    3612             : 
    3613             :                 /*
    3614             :                  * Allocation is supposed to succeed now, so break out of the
    3615             :                  * loop regardless of whether we succeed or not.
    3616             :                  */
    3617    62848243 :                 if (args->agno == start_agno && target_agbno) {
    3618    60042504 :                         args->agbno = target_agbno;
    3619    60042504 :                         error = xfs_alloc_ag_vextent_near(args, alloc_flags);
    3620             :                 } else {
    3621     2805739 :                         args->agbno = 0;
    3622     2805739 :                         error = xfs_alloc_ag_vextent_size(args, alloc_flags);
    3623             :                 }
    3624             :                 break;
    3625             :         }
    3626    62894976 :         if (error) {
    3627        1943 :                 xfs_perag_rele(args->pag);
    3628        1943 :                 args->pag = NULL;
    3629        1943 :                 return error;
    3630             :         }
    3631    62893033 :         if (args->agbp)
    3632             :                 return 0;
    3633             : 
    3634             :         /*
    3635             :          * We didn't find an AG we can alloation from. If we were given
    3636             :          * constraining flags by the caller, drop them and retry the allocation
    3637             :          * without any constraints being set.
    3638             :          */
    3639       46889 :         if (alloc_flags & XFS_ALLOC_FLAG_TRYLOCK) {
    3640       31815 :                 alloc_flags &= ~XFS_ALLOC_FLAG_TRYLOCK;
    3641       31815 :                 restart_agno = minimum_agno;
    3642       31815 :                 goto restart;
    3643             :         }
    3644             : 
    3645       15074 :         ASSERT(args->pag == NULL);
    3646       15074 :         trace_xfs_alloc_vextent_allfailed(args);
    3647       15074 :         return 0;
    3648             : }
    3649             : 
    3650             : /*
    3651             :  * Iterate from the AGs from the start AG to the end of the filesystem, trying
    3652             :  * to allocate blocks. It starts with a near allocation attempt in the initial
    3653             :  * AG, then falls back to anywhere-in-ag after the first AG fails. It will wrap
    3654             :  * back to zero if allowed by previous allocations in this transaction,
    3655             :  * otherwise will wrap back to the start AG and run a second blocking pass to
    3656             :  * the end of the filesystem.
    3657             :  */
    3658             : int
    3659    62497678 : xfs_alloc_vextent_start_ag(
    3660             :         struct xfs_alloc_arg    *args,
    3661             :         xfs_fsblock_t           target)
    3662             : {
    3663    62497678 :         struct xfs_mount        *mp = args->mp;
    3664    62497678 :         xfs_agnumber_t          minimum_agno;
    3665    62497678 :         xfs_agnumber_t          start_agno;
    3666    62497678 :         xfs_agnumber_t          rotorstep = xfs_rotorstep;
    3667    62497678 :         bool                    bump_rotor = false;
    3668    62497678 :         uint32_t                alloc_flags = XFS_ALLOC_FLAG_TRYLOCK;
    3669    62497678 :         int                     error;
    3670             : 
    3671    62497678 :         ASSERT(args->pag == NULL);
    3672             : 
    3673    62497678 :         args->agno = NULLAGNUMBER;
    3674    62497678 :         args->agbno = NULLAGBLOCK;
    3675             : 
    3676    62497678 :         trace_xfs_alloc_vextent_start_ag(args);
    3677             : 
    3678    62497689 :         error = xfs_alloc_vextent_check_args(args, target, &minimum_agno);
    3679    62496545 :         if (error) {
    3680           0 :                 if (error == -ENOSPC)
    3681             :                         return 0;
    3682           0 :                 return error;
    3683             :         }
    3684             : 
    3685    64278829 :         if ((args->datatype & XFS_ALLOC_INITIAL_USER_DATA) &&
    3686             :             xfs_is_inode32(mp)) {
    3687           0 :                 target = XFS_AGB_TO_FSB(mp,
    3688             :                                 ((mp->m_agfrotor / rotorstep) %
    3689             :                                 mp->m_sb.sb_agcount), 0);
    3690           0 :                 bump_rotor = 1;
    3691             :         }
    3692             : 
    3693    62496545 :         start_agno = max(minimum_agno, XFS_FSB_TO_AGNO(mp, target));
    3694    62496545 :         error = xfs_alloc_vextent_iterate_ags(args, minimum_agno, start_agno,
    3695             :                         XFS_FSB_TO_AGBNO(mp, target), alloc_flags);
    3696             : 
    3697    62498692 :         if (bump_rotor) {
    3698           0 :                 if (args->agno == start_agno)
    3699           0 :                         mp->m_agfrotor = (mp->m_agfrotor + 1) %
    3700           0 :                                 (mp->m_sb.sb_agcount * rotorstep);
    3701             :                 else
    3702           0 :                         mp->m_agfrotor = (args->agno * rotorstep + 1) %
    3703           0 :                                 (mp->m_sb.sb_agcount * rotorstep);
    3704             :         }
    3705             : 
    3706    62498692 :         return xfs_alloc_vextent_finish(args, minimum_agno, error, true);
    3707             : }
    3708             : 
    3709             : /*
    3710             :  * Iterate from the agno indicated via @target through to the end of the
    3711             :  * filesystem attempting blocking allocation. This does not wrap or try a second
    3712             :  * pass, so will not recurse into AGs lower than indicated by the target.
    3713             :  */
    3714             : int
    3715      364191 : xfs_alloc_vextent_first_ag(
    3716             :         struct xfs_alloc_arg    *args,
    3717             :         xfs_fsblock_t           target)
    3718             :  {
    3719      364191 :         struct xfs_mount        *mp = args->mp;
    3720      364191 :         xfs_agnumber_t          minimum_agno;
    3721      364191 :         xfs_agnumber_t          start_agno;
    3722      364191 :         uint32_t                alloc_flags = XFS_ALLOC_FLAG_TRYLOCK;
    3723      364191 :         int                     error;
    3724             : 
    3725      364191 :         ASSERT(args->pag == NULL);
    3726             : 
    3727      364191 :         args->agno = NULLAGNUMBER;
    3728      364191 :         args->agbno = NULLAGBLOCK;
    3729             : 
    3730      364191 :         trace_xfs_alloc_vextent_first_ag(args);
    3731             : 
    3732      364260 :         error = xfs_alloc_vextent_check_args(args, target, &minimum_agno);
    3733      364247 :         if (error) {
    3734           0 :                 if (error == -ENOSPC)
    3735             :                         return 0;
    3736           0 :                 return error;
    3737             :         }
    3738             : 
    3739      364247 :         start_agno = max(minimum_agno, XFS_FSB_TO_AGNO(mp, target));
    3740      364247 :         error = xfs_alloc_vextent_iterate_ags(args, minimum_agno, start_agno,
    3741             :                         XFS_FSB_TO_AGBNO(mp, target), alloc_flags);
    3742      364260 :         return xfs_alloc_vextent_finish(args, minimum_agno, error, true);
    3743             : }
    3744             : 
    3745             : /*
    3746             :  * Allocate at the exact block target or fail. Caller is expected to hold a
    3747             :  * perag reference in args->pag.
    3748             :  */
    3749             : int
    3750      444736 : xfs_alloc_vextent_exact_bno(
    3751             :         struct xfs_alloc_arg    *args,
    3752             :         xfs_fsblock_t           target)
    3753             : {
    3754      444736 :         struct xfs_mount        *mp = args->mp;
    3755      444736 :         xfs_agnumber_t          minimum_agno;
    3756      444736 :         int                     error;
    3757             : 
    3758      444736 :         ASSERT(args->pag != NULL);
    3759      444736 :         ASSERT(args->pag->pag_agno == XFS_FSB_TO_AGNO(mp, target));
    3760             : 
    3761      444736 :         args->agno = XFS_FSB_TO_AGNO(mp, target);
    3762      444736 :         args->agbno = XFS_FSB_TO_AGBNO(mp, target);
    3763             : 
    3764      444736 :         trace_xfs_alloc_vextent_exact_bno(args);
    3765             : 
    3766      444734 :         error = xfs_alloc_vextent_check_args(args, target, &minimum_agno);
    3767      444733 :         if (error) {
    3768           0 :                 if (error == -ENOSPC)
    3769             :                         return 0;
    3770           0 :                 return error;
    3771             :         }
    3772             : 
    3773      444733 :         error = xfs_alloc_vextent_prepare_ag(args, 0);
    3774      444734 :         if (!error && args->agbp)
    3775      315165 :                 error = xfs_alloc_ag_vextent_exact(args);
    3776             : 
    3777      444737 :         return xfs_alloc_vextent_finish(args, minimum_agno, error, false);
    3778             : }
    3779             : 
    3780             : /*
    3781             :  * Allocate an extent as close to the target as possible. If there are not
    3782             :  * viable candidates in the AG, then fail the allocation.
    3783             :  *
    3784             :  * Caller may or may not have a per-ag reference in args->pag.
    3785             :  */
    3786             : int
    3787     1277431 : xfs_alloc_vextent_near_bno(
    3788             :         struct xfs_alloc_arg    *args,
    3789             :         xfs_fsblock_t           target)
    3790             : {
    3791     1277431 :         struct xfs_mount        *mp = args->mp;
    3792     1277431 :         xfs_agnumber_t          minimum_agno;
    3793     1277431 :         bool                    needs_perag = args->pag == NULL;
    3794     1277431 :         uint32_t                alloc_flags = 0;
    3795     1277431 :         int                     error;
    3796             : 
    3797     1277431 :         if (!needs_perag)
    3798     1094272 :                 ASSERT(args->pag->pag_agno == XFS_FSB_TO_AGNO(mp, target));
    3799             : 
    3800     1277431 :         args->agno = XFS_FSB_TO_AGNO(mp, target);
    3801     1277431 :         args->agbno = XFS_FSB_TO_AGBNO(mp, target);
    3802             : 
    3803     1277431 :         trace_xfs_alloc_vextent_near_bno(args);
    3804             : 
    3805     1277411 :         error = xfs_alloc_vextent_check_args(args, target, &minimum_agno);
    3806     1277403 :         if (error) {
    3807           0 :                 if (error == -ENOSPC)
    3808             :                         return 0;
    3809           0 :                 return error;
    3810             :         }
    3811             : 
    3812     1277403 :         if (needs_perag)
    3813      183133 :                 args->pag = xfs_perag_grab(mp, args->agno);
    3814             : 
    3815     1277423 :         error = xfs_alloc_vextent_prepare_ag(args, alloc_flags);
    3816     1277445 :         if (!error && args->agbp)
    3817      849873 :                 error = xfs_alloc_ag_vextent_near(args, alloc_flags);
    3818             : 
    3819     1277444 :         return xfs_alloc_vextent_finish(args, minimum_agno, error, needs_perag);
    3820             : }
    3821             : 
    3822             : /* Ensure that the freelist is at full capacity. */
    3823             : int
    3824   308797037 : xfs_free_extent_fix_freelist(
    3825             :         struct xfs_trans        *tp,
    3826             :         struct xfs_perag        *pag,
    3827             :         struct xfs_buf          **agbp)
    3828             : {
    3829   308797037 :         struct xfs_alloc_arg    args;
    3830   308797037 :         int                     error;
    3831             : 
    3832   308797037 :         memset(&args, 0, sizeof(struct xfs_alloc_arg));
    3833   308797037 :         args.tp = tp;
    3834   308797037 :         args.mp = tp->t_mountp;
    3835   308797037 :         args.agno = pag->pag_agno;
    3836   308797037 :         args.pag = pag;
    3837             : 
    3838             :         /*
    3839             :          * validate that the block number is legal - the enables us to detect
    3840             :          * and handle a silent filesystem corruption rather than crashing.
    3841             :          */
    3842   308797037 :         if (args.agno >= args.mp->m_sb.sb_agcount)
    3843             :                 return -EFSCORRUPTED;
    3844             : 
    3845   308797037 :         error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING);
    3846   308796329 :         if (error)
    3847             :                 return error;
    3848             : 
    3849   308795348 :         *agbp = args.agbp;
    3850   308795348 :         return 0;
    3851             : }
    3852             : 
    3853             : /*
    3854             :  * Free an extent.
    3855             :  * Just break up the extent address and hand off to xfs_free_ag_extent
    3856             :  * after fixing up the freelist.
    3857             :  */
    3858             : int
    3859    52824169 : __xfs_free_extent(
    3860             :         struct xfs_trans                *tp,
    3861             :         struct xfs_perag                *pag,
    3862             :         xfs_agblock_t                   agbno,
    3863             :         xfs_extlen_t                    len,
    3864             :         const struct xfs_owner_info     *oinfo,
    3865             :         enum xfs_ag_resv_type           type,
    3866             :         bool                            skip_discard)
    3867             : {
    3868    52824169 :         struct xfs_mount                *mp = tp->t_mountp;
    3869    52824169 :         struct xfs_buf                  *agbp;
    3870    52824169 :         struct xfs_agf                  *agf;
    3871    52824169 :         int                             error;
    3872    52824169 :         unsigned int                    busy_flags = 0;
    3873             : 
    3874    52824169 :         ASSERT(len != 0);
    3875    52824169 :         ASSERT(type != XFS_AG_RESV_AGFL);
    3876             : 
    3877    52824169 :         if (XFS_TEST_ERROR(false, mp,
    3878             :                         XFS_ERRTAG_FREE_EXTENT))
    3879             :                 return -EIO;
    3880             : 
    3881    52824208 :         error = xfs_free_extent_fix_freelist(tp, pag, &agbp);
    3882    52824131 :         if (error) {
    3883          15 :                 if (xfs_metadata_is_sick(error))
    3884           0 :                         xfs_ag_mark_sick(pag, XFS_SICK_AG_BNOBT);
    3885          15 :                 return error;
    3886             :         }
    3887             : 
    3888    52824116 :         agf = agbp->b_addr;
    3889             : 
    3890    52824116 :         if (XFS_IS_CORRUPT(mp, agbno >= mp->m_sb.sb_agblocks)) {
    3891           0 :                 xfs_ag_mark_sick(pag, XFS_SICK_AG_BNOBT);
    3892           0 :                 error = -EFSCORRUPTED;
    3893           0 :                 goto err_release;
    3894             :         }
    3895             : 
    3896             :         /* validate the extent size is legal now we have the agf locked */
    3897   105648232 :         if (XFS_IS_CORRUPT(mp, agbno + len > be32_to_cpu(agf->agf_length))) {
    3898           0 :                 xfs_ag_mark_sick(pag, XFS_SICK_AG_BNOBT);
    3899           0 :                 error = -EFSCORRUPTED;
    3900           0 :                 goto err_release;
    3901             :         }
    3902             : 
    3903    52824116 :         error = xfs_free_ag_extent(tp, agbp, pag->pag_agno, agbno, len, oinfo,
    3904             :                         type);
    3905    52823870 :         if (error)
    3906         276 :                 goto err_release;
    3907             : 
    3908    52823594 :         if (skip_discard)
    3909     1852746 :                 busy_flags |= XFS_EXTENT_BUSY_SKIP_DISCARD;
    3910    52823594 :         xfs_extent_busy_insert(tp, pag, agbno, len, busy_flags);
    3911    52823594 :         return 0;
    3912             : 
    3913         276 : err_release:
    3914         276 :         xfs_trans_brelse(tp, agbp);
    3915         276 :         return error;
    3916             : }
    3917             : 
    3918             : struct xfs_alloc_query_range_info {
    3919             :         xfs_alloc_query_range_fn        fn;
    3920             :         void                            *priv;
    3921             : };
    3922             : 
    3923             : /* Format btree record and pass to our callback. */
    3924             : STATIC int
    3925   941459424 : xfs_alloc_query_range_helper(
    3926             :         struct xfs_btree_cur            *cur,
    3927             :         const union xfs_btree_rec       *rec,
    3928             :         void                            *priv)
    3929             : {
    3930   941459424 :         struct xfs_alloc_query_range_info       *query = priv;
    3931   941459424 :         struct xfs_alloc_rec_incore             irec;
    3932   941459424 :         xfs_failaddr_t                          fa;
    3933             : 
    3934   941459424 :         xfs_alloc_btrec_to_irec(rec, &irec);
    3935   941461480 :         fa = xfs_alloc_check_irec(cur, &irec);
    3936   941464878 :         if (fa)
    3937           0 :                 return xfs_alloc_complain_bad_rec(cur, fa, &irec);
    3938             : 
    3939   941464878 :         return query->fn(cur, &irec, query->priv);
    3940             : }
    3941             : 
    3942             : /* Find all free space within a given range of blocks. */
    3943             : int
    3944        1762 : xfs_alloc_query_range(
    3945             :         struct xfs_btree_cur                    *cur,
    3946             :         const struct xfs_alloc_rec_incore       *low_rec,
    3947             :         const struct xfs_alloc_rec_incore       *high_rec,
    3948             :         xfs_alloc_query_range_fn                fn,
    3949             :         void                                    *priv)
    3950             : {
    3951        1762 :         union xfs_btree_irec                    low_brec = { .a = *low_rec };
    3952        1762 :         union xfs_btree_irec                    high_brec = { .a = *high_rec };
    3953        1762 :         struct xfs_alloc_query_range_info       query = { .priv = priv, .fn = fn };
    3954             : 
    3955        1762 :         ASSERT(cur->bc_btnum == XFS_BTNUM_BNO);
    3956        1762 :         return xfs_btree_query_range(cur, &low_brec, &high_brec,
    3957             :                         xfs_alloc_query_range_helper, &query);
    3958             : }
    3959             : 
    3960             : /* Find all free space records. */
    3961             : int
    3962      572498 : xfs_alloc_query_all(
    3963             :         struct xfs_btree_cur                    *cur,
    3964             :         xfs_alloc_query_range_fn                fn,
    3965             :         void                                    *priv)
    3966             : {
    3967      572498 :         struct xfs_alloc_query_range_info       query;
    3968             : 
    3969      572498 :         ASSERT(cur->bc_btnum == XFS_BTNUM_BNO);
    3970      572498 :         query.priv = priv;
    3971      572498 :         query.fn = fn;
    3972      572498 :         return xfs_btree_query_all(cur, xfs_alloc_query_range_helper, &query);
    3973             : }
    3974             : 
    3975             : /*
    3976             :  * Scan part of the keyspace of the free space and tell us if the area has no
    3977             :  * records, is fully mapped by records, or is partially filled.
    3978             :  */
    3979             : int
    3980  1854883470 : xfs_alloc_has_records(
    3981             :         struct xfs_btree_cur    *cur,
    3982             :         xfs_agblock_t           bno,
    3983             :         xfs_extlen_t            len,
    3984             :         enum xbtree_recpacking  *outcome)
    3985             : {
    3986  1854883470 :         union xfs_btree_irec    low;
    3987  1854883470 :         union xfs_btree_irec    high;
    3988             : 
    3989  1854883470 :         memset(&low, 0, sizeof(low));
    3990  1854883470 :         low.a.ar_startblock = bno;
    3991  1854883470 :         memset(&high, 0xFF, sizeof(high));
    3992  1854883470 :         high.a.ar_startblock = bno + len - 1;
    3993             : 
    3994  1854883470 :         return xfs_btree_has_records(cur, &low, &high, NULL, outcome);
    3995             : }
    3996             : 
    3997             : /*
    3998             :  * Walk all the blocks in the AGFL.  The @walk_fn can return any negative
    3999             :  * error code or XFS_ITER_*.
    4000             :  */
    4001             : int
    4002    58330447 : xfs_agfl_walk(
    4003             :         struct xfs_mount        *mp,
    4004             :         struct xfs_agf          *agf,
    4005             :         struct xfs_buf          *agflbp,
    4006             :         xfs_agfl_walk_fn        walk_fn,
    4007             :         void                    *priv)
    4008             : {
    4009    58330447 :         __be32                  *agfl_bno;
    4010    58330447 :         unsigned int            i;
    4011    58330447 :         int                     error;
    4012             : 
    4013    58330447 :         agfl_bno = xfs_buf_to_agfl_bno(agflbp);
    4014    58330447 :         i = be32_to_cpu(agf->agf_flfirst);
    4015             : 
    4016             :         /* Nothing to walk in an empty AGFL. */
    4017    58330447 :         if (agf->agf_flcount == cpu_to_be32(0))
    4018             :                 return 0;
    4019             : 
    4020             :         /* Otherwise, walk from first to last, wrapping as needed. */
    4021   531704372 :         for (;;) {
    4022  1063408744 :                 error = walk_fn(mp, be32_to_cpu(agfl_bno[i]), priv);
    4023   531704387 :                 if (error)
    4024     2960742 :                         return error;
    4025  1057487290 :                 if (i == be32_to_cpu(agf->agf_fllast))
    4026             :                         break;
    4027   946767160 :                 if (++i == xfs_agfl_size(mp))
    4028           0 :                         i = 0;
    4029             :         }
    4030             : 
    4031             :         return 0;
    4032             : }
    4033             : 
    4034             : int __init
    4035          12 : xfs_extfree_intent_init_cache(void)
    4036             : {
    4037          12 :         xfs_extfree_item_cache = kmem_cache_create("xfs_extfree_intent",
    4038             :                         sizeof(struct xfs_extent_free_item),
    4039             :                         0, 0, NULL);
    4040             : 
    4041          12 :         return xfs_extfree_item_cache != NULL ? 0 : -ENOMEM;
    4042             : }
    4043             : 
    4044             : void
    4045          12 : xfs_extfree_intent_destroy_cache(void)
    4046             : {
    4047          12 :         kmem_cache_destroy(xfs_extfree_item_cache);
    4048          12 :         xfs_extfree_item_cache = NULL;
    4049          12 : }
    4050             : 
    4051             : /*
    4052             :  * Find the next chunk of free space in @pag starting at @agbno and going no
    4053             :  * higher than @end_agbno.  Set @agbno and @len to whatever free space we find,
    4054             :  * or to @end_agbno if we find no space.
    4055             :  */
    4056             : int
    4057         226 : xfs_alloc_find_freesp(
    4058             :         struct xfs_trans        *tp,
    4059             :         struct xfs_perag        *pag,
    4060             :         xfs_agblock_t           *agbno,
    4061             :         xfs_agblock_t           end_agbno,
    4062             :         xfs_extlen_t            *len)
    4063             : {
    4064         226 :         struct xfs_mount        *mp = pag->pag_mount;
    4065         226 :         struct xfs_btree_cur    *cur;
    4066         226 :         struct xfs_buf          *agf_bp = NULL;
    4067         226 :         xfs_agblock_t           found_agbno;
    4068         226 :         xfs_extlen_t            found_len;
    4069         226 :         int                     found;
    4070         226 :         int                     error;
    4071             : 
    4072         226 :         trace_xfs_alloc_find_freesp(mp, pag->pag_agno, *agbno,
    4073             :                         end_agbno - *agbno);
    4074             : 
    4075         226 :         error = xfs_alloc_read_agf(pag, tp, 0, &agf_bp);
    4076         226 :         if (error)
    4077             :                 return error;
    4078             : 
    4079         226 :         cur = xfs_allocbt_init_cursor(mp, tp, agf_bp, pag, XFS_BTNUM_BNO);
    4080             : 
    4081             :         /* Try to find a free extent that starts before here. */
    4082         226 :         error = xfs_alloc_lookup_le(cur, *agbno, 0, &found);
    4083         226 :         if (error)
    4084           0 :                 goto out_cur;
    4085         226 :         if (found) {
    4086          24 :                 error = xfs_alloc_get_rec(cur, &found_agbno, &found_len,
    4087             :                                 &found);
    4088          24 :                 if (error)
    4089           0 :                         goto out_cur;
    4090          24 :                 if (XFS_IS_CORRUPT(mp, !found)) {
    4091           0 :                         xfs_btree_mark_sick(cur);
    4092           0 :                         error = -EFSCORRUPTED;
    4093           0 :                         goto out_cur;
    4094             :                 }
    4095             : 
    4096          24 :                 if (found_agbno + found_len > *agbno)
    4097          24 :                         goto found;
    4098             :         }
    4099             : 
    4100             :         /* Examine the next record if free extent not in range. */
    4101         202 :         error = xfs_btree_increment(cur, 0, &found);
    4102         202 :         if (error)
    4103           0 :                 goto out_cur;
    4104         202 :         if (!found)
    4105           0 :                 goto next_ag;
    4106             : 
    4107         202 :         error = xfs_alloc_get_rec(cur, &found_agbno, &found_len, &found);
    4108         202 :         if (error)
    4109           0 :                 goto out_cur;
    4110         202 :         if (XFS_IS_CORRUPT(mp, !found)) {
    4111           0 :                 xfs_btree_mark_sick(cur);
    4112           0 :                 error = -EFSCORRUPTED;
    4113           0 :                 goto out_cur;
    4114             :         }
    4115             : 
    4116         202 :         if (found_agbno >= end_agbno)
    4117         182 :                 goto next_ag;
    4118             : 
    4119          20 : found:
    4120             :         /* Found something, so update the mapping. */
    4121          44 :         trace_xfs_alloc_find_freesp_done(mp, pag->pag_agno, found_agbno,
    4122             :                         found_len);
    4123          44 :         if (found_agbno < *agbno) {
    4124           0 :                 found_len -= *agbno - found_agbno;
    4125           0 :                 found_agbno = *agbno;
    4126             :         }
    4127          44 :         *len = found_len;
    4128          44 :         *agbno = found_agbno;
    4129          44 :         goto out_cur;
    4130         182 : next_ag:
    4131             :         /* Found nothing, so advance the cursor beyond the end of the range. */
    4132         182 :         *agbno = end_agbno;
    4133         182 :         *len = 0;
    4134         226 : out_cur:
    4135         226 :         xfs_btree_del_cursor(cur, error);
    4136         226 :         return error;
    4137             : }

Generated by: LCOV version 1.14