LCOV - code coverage report
Current view: top level - fs/xfs/libxfs - xfs_alloc.c (source / functions) Hit Total Coverage
Test: fstests of 6.5.0-rc3-acha @ Mon Jul 31 20:08:06 PDT 2023 Lines: 1488 1805 82.4 %
Date: 2023-07-31 20:08:07 Functions: 72 74 97.3 %

          Line data    Source code
       1             : // SPDX-License-Identifier: GPL-2.0
       2             : /*
       3             :  * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
       4             :  * All Rights Reserved.
       5             :  */
       6             : #include "xfs.h"
       7             : #include "xfs_fs.h"
       8             : #include "xfs_format.h"
       9             : #include "xfs_log_format.h"
      10             : #include "xfs_shared.h"
      11             : #include "xfs_trans_resv.h"
      12             : #include "xfs_bit.h"
      13             : #include "xfs_mount.h"
      14             : #include "xfs_defer.h"
      15             : #include "xfs_btree.h"
      16             : #include "xfs_rmap.h"
      17             : #include "xfs_alloc_btree.h"
      18             : #include "xfs_alloc.h"
      19             : #include "xfs_extent_busy.h"
      20             : #include "xfs_errortag.h"
      21             : #include "xfs_error.h"
      22             : #include "xfs_trace.h"
      23             : #include "xfs_trans.h"
      24             : #include "xfs_buf_item.h"
      25             : #include "xfs_log.h"
      26             : #include "xfs_ag.h"
      27             : #include "xfs_ag_resv.h"
      28             : #include "xfs_bmap.h"
      29             : #include "xfs_health.h"
      30             : 
      31             : struct kmem_cache       *xfs_extfree_item_cache;
      32             : 
      33             : struct workqueue_struct *xfs_alloc_wq;
      34             : 
      35             : #define XFS_ABSDIFF(a,b)        (((a) <= (b)) ? ((b) - (a)) : ((a) - (b)))
      36             : 
      37             : #define XFSA_FIXUP_BNO_OK       1
      38             : #define XFSA_FIXUP_CNT_OK       2
      39             : 
      40             : /*
      41             :  * Size of the AGFL.  For CRC-enabled filesystes we steal a couple of slots in
      42             :  * the beginning of the block for a proper header with the location information
      43             :  * and CRC.
      44             :  */
      45             : unsigned int
      46     7705953 : xfs_agfl_size(
      47             :         struct xfs_mount        *mp)
      48             : {
      49   852444707 :         unsigned int            size = mp->m_sb.sb_sectsize;
      50             : 
      51    10967103 :         if (xfs_has_crc(mp))
      52   850244287 :                 size -= sizeof(struct xfs_agfl);
      53             : 
      54   852444707 :         return size / sizeof(xfs_agblock_t);
      55             : }
      56             : 
      57             : unsigned int
      58      149913 : xfs_refc_block(
      59             :         struct xfs_mount        *mp)
      60             : {
      61      149913 :         if (xfs_has_rmapbt(mp))
      62      147631 :                 return XFS_RMAP_BLOCK(mp) + 1;
      63        2282 :         if (xfs_has_finobt(mp))
      64           0 :                 return XFS_FIBT_BLOCK(mp) + 1;
      65        2282 :         return XFS_IBT_BLOCK(mp) + 1;
      66             : }
      67             : 
      68             : xfs_extlen_t
      69       35484 : xfs_prealloc_blocks(
      70             :         struct xfs_mount        *mp)
      71             : {
      72       35484 :         if (xfs_has_reflink(mp))
      73       35224 :                 return xfs_refc_block(mp) + 1;
      74         260 :         if (xfs_has_rmapbt(mp))
      75           5 :                 return XFS_RMAP_BLOCK(mp) + 1;
      76         255 :         if (xfs_has_finobt(mp))
      77         207 :                 return XFS_FIBT_BLOCK(mp) + 1;
      78          48 :         return XFS_IBT_BLOCK(mp) + 1;
      79             : }
      80             : 
      81             : /*
      82             :  * The number of blocks per AG that we withhold from xfs_mod_fdblocks to
      83             :  * guarantee that we can refill the AGFL prior to allocating space in a nearly
      84             :  * full AG.  Although the space described by the free space btrees, the
      85             :  * blocks used by the freesp btrees themselves, and the blocks owned by the
      86             :  * AGFL are counted in the ondisk fdblocks, it's a mistake to let the ondisk
      87             :  * free space in the AG drop so low that the free space btrees cannot refill an
      88             :  * empty AGFL up to the minimum level.  Rather than grind through empty AGs
      89             :  * until the fs goes down, we subtract this many AG blocks from the incore
      90             :  * fdblocks to ensure user allocation does not overcommit the space the
      91             :  * filesystem needs for the AGFLs.  The rmap btree uses a per-AG reservation to
      92             :  * withhold space from xfs_mod_fdblocks, so we do not account for that here.
      93             :  */
      94             : #define XFS_ALLOCBT_AGFL_RESERVE        4
      95             : 
      96             : /*
      97             :  * Compute the number of blocks that we set aside to guarantee the ability to
      98             :  * refill the AGFL and handle a full bmap btree split.
      99             :  *
     100             :  * In order to avoid ENOSPC-related deadlock caused by out-of-order locking of
     101             :  * AGF buffer (PV 947395), we place constraints on the relationship among
     102             :  * actual allocations for data blocks, freelist blocks, and potential file data
     103             :  * bmap btree blocks. However, these restrictions may result in no actual space
     104             :  * allocated for a delayed extent, for example, a data block in a certain AG is
     105             :  * allocated but there is no additional block for the additional bmap btree
     106             :  * block due to a split of the bmap btree of the file. The result of this may
     107             :  * lead to an infinite loop when the file gets flushed to disk and all delayed
     108             :  * extents need to be actually allocated. To get around this, we explicitly set
     109             :  * aside a few blocks which will not be reserved in delayed allocation.
     110             :  *
     111             :  * For each AG, we need to reserve enough blocks to replenish a totally empty
     112             :  * AGFL and 4 more to handle a potential split of the file's bmap btree.
     113             :  */
     114             : unsigned int
     115       35534 : xfs_alloc_set_aside(
     116             :         struct xfs_mount        *mp)
     117             : {
     118       35534 :         return mp->m_sb.sb_agcount * (XFS_ALLOCBT_AGFL_RESERVE + 4);
     119             : }
     120             : 
     121             : /*
     122             :  * When deciding how much space to allocate out of an AG, we limit the
     123             :  * allocation maximum size to the size the AG. However, we cannot use all the
     124             :  * blocks in the AG - some are permanently used by metadata. These
     125             :  * blocks are generally:
     126             :  *      - the AG superblock, AGF, AGI and AGFL
     127             :  *      - the AGF (bno and cnt) and AGI btree root blocks, and optionally
     128             :  *        the AGI free inode and rmap btree root blocks.
     129             :  *      - blocks on the AGFL according to xfs_alloc_set_aside() limits
     130             :  *      - the rmapbt root block
     131             :  *
     132             :  * The AG headers are sector sized, so the amount of space they take up is
     133             :  * dependent on filesystem geometry. The others are all single blocks.
     134             :  */
     135             : unsigned int
     136       24125 : xfs_alloc_ag_max_usable(
     137             :         struct xfs_mount        *mp)
     138             : {
     139       24125 :         unsigned int            blocks;
     140             : 
     141       24125 :         blocks = XFS_BB_TO_FSB(mp, XFS_FSS_TO_BB(mp, 4)); /* ag headers */
     142       24125 :         blocks += XFS_ALLOCBT_AGFL_RESERVE;
     143       24125 :         blocks += 3;                    /* AGF, AGI btree root blocks */
     144       24125 :         if (xfs_has_finobt(mp))
     145       24075 :                 blocks++;               /* finobt root block */
     146       24125 :         if (xfs_has_rmapbt(mp))
     147       23872 :                 blocks++;               /* rmap root block */
     148       24125 :         if (xfs_has_reflink(mp))
     149       23867 :                 blocks++;               /* refcount root block */
     150             : 
     151       24125 :         return mp->m_sb.sb_agblocks - blocks;
     152             : }
     153             : 
     154             : /*
     155             :  * Lookup the record equal to [bno, len] in the btree given by cur.
     156             :  */
     157             : STATIC int                              /* error */
     158   245026396 : xfs_alloc_lookup_eq(
     159             :         struct xfs_btree_cur    *cur,   /* btree cursor */
     160             :         xfs_agblock_t           bno,    /* starting block of extent */
     161             :         xfs_extlen_t            len,    /* length of extent */
     162             :         int                     *stat)  /* success/failure */
     163             : {
     164   245026396 :         int                     error;
     165             : 
     166   245026396 :         cur->bc_rec.a.ar_startblock = bno;
     167   245026396 :         cur->bc_rec.a.ar_blockcount = len;
     168   245026396 :         error = xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat);
     169   245047076 :         cur->bc_ag.abt.active = (*stat == 1);
     170   245047076 :         return error;
     171             : }
     172             : 
     173             : /*
     174             :  * Lookup the first record greater than or equal to [bno, len]
     175             :  * in the btree given by cur.
     176             :  */
     177             : int                             /* error */
     178        7634 : xfs_alloc_lookup_ge(
     179             :         struct xfs_btree_cur    *cur,   /* btree cursor */
     180             :         xfs_agblock_t           bno,    /* starting block of extent */
     181             :         xfs_extlen_t            len,    /* length of extent */
     182             :         int                     *stat)  /* success/failure */
     183             : {
     184   349963065 :         int                     error;
     185             : 
     186   349963065 :         cur->bc_rec.a.ar_startblock = bno;
     187   349963065 :         cur->bc_rec.a.ar_blockcount = len;
     188        7634 :         error = xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat);
     189   349965677 :         cur->bc_ag.abt.active = (*stat == 1);
     190   349965677 :         return error;
     191             : }
     192             : 
     193             : /*
     194             :  * Lookup the first record less than or equal to [bno, len]
     195             :  * in the btree given by cur.
     196             :  */
     197             : int                                     /* error */
     198   383564800 : xfs_alloc_lookup_le(
     199             :         struct xfs_btree_cur    *cur,   /* btree cursor */
     200             :         xfs_agblock_t           bno,    /* starting block of extent */
     201             :         xfs_extlen_t            len,    /* length of extent */
     202             :         int                     *stat)  /* success/failure */
     203             : {
     204   468434934 :         int                     error;
     205   468434934 :         cur->bc_rec.a.ar_startblock = bno;
     206   468434934 :         cur->bc_rec.a.ar_blockcount = len;
     207   383564800 :         error = xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat);
     208   468434455 :         cur->bc_ag.abt.active = (*stat == 1);
     209   468434455 :         return error;
     210             : }
     211             : 
     212             : static inline bool
     213             : xfs_alloc_cur_active(
     214             :         struct xfs_btree_cur    *cur)
     215             : {
     216  9167794351 :         return cur && cur->bc_ag.abt.active;
     217             : }
     218             : 
     219             : /*
     220             :  * Update the record referred to by cur to the value given
     221             :  * by [bno, len].
     222             :  * This either works (return 0) or gets an EFSCORRUPTED error.
     223             :  */
     224             : STATIC int                              /* error */
     225    67266676 : xfs_alloc_update(
     226             :         struct xfs_btree_cur    *cur,   /* btree cursor */
     227             :         xfs_agblock_t           bno,    /* starting block of extent */
     228             :         xfs_extlen_t            len)    /* length of extent */
     229             : {
     230    67266676 :         union xfs_btree_rec     rec;
     231             : 
     232    67266676 :         rec.alloc.ar_startblock = cpu_to_be32(bno);
     233    67266676 :         rec.alloc.ar_blockcount = cpu_to_be32(len);
     234    67266676 :         return xfs_btree_update(cur, &rec);
     235             : }
     236             : 
     237             : /* Convert the ondisk btree record to its incore representation. */
     238             : void
     239  6693932717 : xfs_alloc_btrec_to_irec(
     240             :         const union xfs_btree_rec       *rec,
     241             :         struct xfs_alloc_rec_incore     *irec)
     242             : {
     243  6693932717 :         irec->ar_startblock = be32_to_cpu(rec->alloc.ar_startblock);
     244  6693932717 :         irec->ar_blockcount = be32_to_cpu(rec->alloc.ar_blockcount);
     245  6693932717 : }
     246             : 
     247             : inline xfs_failaddr_t
     248  6737538018 : xfs_alloc_check_perag_irec(
     249             :         struct xfs_perag                *pag,
     250             :         const struct xfs_alloc_rec_incore *irec)
     251             : {
     252  6737538018 :         if (irec->ar_blockcount == 0)
     253           0 :                 return __this_address;
     254             : 
     255             :         /* check for valid extent range, including overflow */
     256  6737538018 :         if (!xfs_verify_agbext(pag, irec->ar_startblock, irec->ar_blockcount))
     257           0 :                 return __this_address;
     258             : 
     259             :         return NULL;
     260             : }
     261             : 
     262             : /* Simple checks for free space records. */
     263             : xfs_failaddr_t
     264   383153224 : xfs_alloc_check_irec(
     265             :         struct xfs_btree_cur            *cur,
     266             :         const struct xfs_alloc_rec_incore *irec)
     267             : {
     268   383153224 :         return xfs_alloc_check_perag_irec(cur->bc_ag.pag, irec);
     269             : }
     270             : 
     271             : static inline int
     272           0 : xfs_alloc_complain_bad_rec(
     273             :         struct xfs_btree_cur            *cur,
     274             :         xfs_failaddr_t                  fa,
     275             :         const struct xfs_alloc_rec_incore *irec)
     276             : {
     277           0 :         struct xfs_mount                *mp = cur->bc_mp;
     278             : 
     279           0 :         xfs_warn(mp,
     280             :                 "%s Freespace BTree record corruption in AG %d detected at %pS!",
     281             :                 cur->bc_btnum == XFS_BTNUM_BNO ? "Block" : "Size",
     282             :                 cur->bc_ag.pag->pag_agno, fa);
     283           0 :         xfs_warn(mp,
     284             :                 "start block 0x%x block count 0x%x", irec->ar_startblock,
     285             :                 irec->ar_blockcount);
     286           0 :         xfs_btree_mark_sick(cur);
     287           0 :         return -EFSCORRUPTED;
     288             : }
     289             : 
     290             : /*
     291             :  * Get the data from the pointed-to record.
     292             :  */
     293             : int                                     /* error */
     294  5360766486 : xfs_alloc_get_rec(
     295             :         struct xfs_btree_cur    *cur,   /* btree cursor */
     296             :         xfs_agblock_t           *bno,   /* output: starting block of extent */
     297             :         xfs_extlen_t            *len,   /* output: length of extent */
     298             :         int                     *stat)  /* output: success/failure */
     299             : {
     300  5360766486 :         struct xfs_alloc_rec_incore irec;
     301  5360766486 :         union xfs_btree_rec     *rec;
     302  5360766486 :         xfs_failaddr_t          fa;
     303  5360766486 :         int                     error;
     304             : 
     305  5360766486 :         error = xfs_btree_get_rec(cur, &rec, stat);
     306  5358013942 :         if (error || !(*stat))
     307             :                 return error;
     308             : 
     309  5359306543 :         xfs_alloc_btrec_to_irec(rec, &irec);
     310  5360720448 :         fa = xfs_alloc_check_irec(cur, &irec);
     311  5363198694 :         if (fa)
     312           0 :                 return xfs_alloc_complain_bad_rec(cur, fa, &irec);
     313             : 
     314  5363198694 :         *bno = irec.ar_startblock;
     315  5363198694 :         *len = irec.ar_blockcount;
     316  5363198694 :         return 0;
     317             : }
     318             : 
     319             : /*
     320             :  * Compute aligned version of the found extent.
     321             :  * Takes alignment and min length into account.
     322             :  */
     323             : STATIC bool
     324  4186222422 : xfs_alloc_compute_aligned(
     325             :         xfs_alloc_arg_t *args,          /* allocation argument structure */
     326             :         xfs_agblock_t   foundbno,       /* starting block in found extent */
     327             :         xfs_extlen_t    foundlen,       /* length in found extent */
     328             :         xfs_agblock_t   *resbno,        /* result block number */
     329             :         xfs_extlen_t    *reslen,        /* result length */
     330             :         unsigned        *busy_gen)
     331             : {
     332  4186222422 :         xfs_agblock_t   bno = foundbno;
     333  4186222422 :         xfs_extlen_t    len = foundlen;
     334  4186222422 :         xfs_extlen_t    diff;
     335  4186222422 :         bool            busy;
     336             : 
     337             :         /* Trim busy sections out of found extent */
     338  4186222422 :         busy = xfs_extent_busy_trim(args, &bno, &len, busy_gen);
     339             : 
     340             :         /*
     341             :          * If we have a largish extent that happens to start before min_agbno,
     342             :          * see if we can shift it into range...
     343             :          */
     344  4190383617 :         if (bno < args->min_agbno && bno + len > args->min_agbno) {
     345         590 :                 diff = args->min_agbno - bno;
     346         590 :                 if (len > diff) {
     347         590 :                         bno += diff;
     348         590 :                         len -= diff;
     349             :                 }
     350             :         }
     351             : 
     352  4190383617 :         if (args->alignment > 1 && len >= args->minlen) {
     353    11205336 :                 xfs_agblock_t   aligned_bno = roundup(bno, args->alignment);
     354             : 
     355    11205336 :                 diff = aligned_bno - bno;
     356             : 
     357    11205336 :                 *resbno = aligned_bno;
     358    11205336 :                 *reslen = diff >= len ? 0 : len - diff;
     359             :         } else {
     360  4179178281 :                 *resbno = bno;
     361  4179178281 :                 *reslen = len;
     362             :         }
     363             : 
     364  4190383617 :         return busy;
     365             : }
     366             : 
     367             : /*
     368             :  * Compute best start block and diff for "near" allocations.
     369             :  * freelen >= wantlen already checked by caller.
     370             :  */
     371             : STATIC xfs_extlen_t                     /* difference value (absolute) */
     372  2206898444 : xfs_alloc_compute_diff(
     373             :         xfs_agblock_t   wantbno,        /* target starting block */
     374             :         xfs_extlen_t    wantlen,        /* target length */
     375             :         xfs_extlen_t    alignment,      /* target alignment */
     376             :         int             datatype,       /* are we allocating data? */
     377             :         xfs_agblock_t   freebno,        /* freespace's starting block */
     378             :         xfs_extlen_t    freelen,        /* freespace's length */
     379             :         xfs_agblock_t   *newbnop)       /* result: best start block from free */
     380             : {
     381  2206898444 :         xfs_agblock_t   freeend;        /* end of freespace extent */
     382  2206898444 :         xfs_agblock_t   newbno1;        /* return block number */
     383  2206898444 :         xfs_agblock_t   newbno2;        /* other new block number */
     384  2206898444 :         xfs_extlen_t    newlen1=0;      /* length with newbno1 */
     385  2206898444 :         xfs_extlen_t    newlen2=0;      /* length with newbno2 */
     386  2206898444 :         xfs_agblock_t   wantend;        /* end of target extent */
     387  2206898444 :         bool            userdata = datatype & XFS_ALLOC_USERDATA;
     388             : 
     389  2206898444 :         ASSERT(freelen >= wantlen);
     390  2206898444 :         freeend = freebno + freelen;
     391  2206898444 :         wantend = wantbno + wantlen;
     392             :         /*
     393             :          * We want to allocate from the start of a free extent if it is past
     394             :          * the desired block or if we are allocating user data and the free
     395             :          * extent is before desired block. The second case is there to allow
     396             :          * for contiguous allocation from the remaining free space if the file
     397             :          * grows in the short term.
     398             :          */
     399  2206898444 :         if (freebno >= wantbno || (userdata && freeend < wantend)) {
     400  1787963537 :                 if ((newbno1 = roundup(freebno, alignment)) >= freeend)
     401           0 :                         newbno1 = NULLAGBLOCK;
     402   418934907 :         } else if (freeend >= wantend && alignment > 1) {
     403           0 :                 newbno1 = roundup(wantbno, alignment);
     404           0 :                 newbno2 = newbno1 - alignment;
     405           0 :                 if (newbno1 >= freeend)
     406             :                         newbno1 = NULLAGBLOCK;
     407             :                 else
     408           0 :                         newlen1 = XFS_EXTLEN_MIN(wantlen, freeend - newbno1);
     409           0 :                 if (newbno2 < freebno)
     410             :                         newbno2 = NULLAGBLOCK;
     411             :                 else
     412           0 :                         newlen2 = XFS_EXTLEN_MIN(wantlen, freeend - newbno2);
     413           0 :                 if (newbno1 != NULLAGBLOCK && newbno2 != NULLAGBLOCK) {
     414           0 :                         if (newlen1 < newlen2 ||
     415           0 :                             (newlen1 == newlen2 &&
     416           0 :                              XFS_ABSDIFF(newbno1, wantbno) >
     417           0 :                              XFS_ABSDIFF(newbno2, wantbno)))
     418           0 :                                 newbno1 = newbno2;
     419           0 :                 } else if (newbno2 != NULLAGBLOCK)
     420           0 :                         newbno1 = newbno2;
     421   418934907 :         } else if (freeend >= wantend) {
     422             :                 newbno1 = wantbno;
     423   418439175 :         } else if (alignment > 1) {
     424       20110 :                 newbno1 = roundup(freeend - wantlen, alignment);
     425       20110 :                 if (newbno1 > freeend - wantlen &&
     426       12496 :                     newbno1 - alignment >= freebno)
     427             :                         newbno1 -= alignment;
     428        7614 :                 else if (newbno1 >= freeend)
     429           0 :                         newbno1 = NULLAGBLOCK;
     430             :         } else
     431   418419065 :                 newbno1 = freeend - wantlen;
     432  2206898444 :         *newbnop = newbno1;
     433  2206898444 :         return newbno1 == NULLAGBLOCK ? 0 : XFS_ABSDIFF(newbno1, wantbno);
     434             : }
     435             : 
     436             : /*
     437             :  * Fix up the length, based on mod and prod.
     438             :  * len should be k * prod + mod for some k.
     439             :  * If len is too small it is returned unchanged.
     440             :  * If len hits maxlen it is left alone.
     441             :  */
     442             : STATIC void
     443  2209362052 : xfs_alloc_fix_len(
     444             :         xfs_alloc_arg_t *args)          /* allocation argument structure */
     445             : {
     446  2209362052 :         xfs_extlen_t    k;
     447  2209362052 :         xfs_extlen_t    rlen;
     448             : 
     449  2209362052 :         ASSERT(args->mod < args->prod);
     450  2209362052 :         rlen = args->len;
     451  2209362052 :         ASSERT(rlen >= args->minlen);
     452  2209362052 :         ASSERT(rlen <= args->maxlen);
     453  2209362052 :         if (args->prod <= 1 || rlen < args->mod || rlen == args->maxlen ||
     454       17693 :             (args->mod == 0 && rlen < args->prod))
     455             :                 return;
     456       30383 :         k = rlen % args->prod;
     457       30383 :         if (k == args->mod)
     458             :                 return;
     459       24454 :         if (k > args->mod)
     460       12501 :                 rlen = rlen - (k - args->mod);
     461             :         else
     462       11953 :                 rlen = rlen - args->prod + (args->mod - k);
     463             :         /* casts to (int) catch length underflows */
     464       24454 :         if ((int)rlen < (int)args->minlen)
     465             :                 return;
     466        5923 :         ASSERT(rlen >= args->minlen && rlen <= args->maxlen);
     467        5923 :         ASSERT(rlen % args->prod == args->mod);
     468        5923 :         ASSERT(args->pag->pagf_freeblks + args->pag->pagf_flcount >=
     469             :                 rlen + args->minleft);
     470        5923 :         args->len = rlen;
     471             : }
     472             : 
     473             : /*
     474             :  * Update the two btrees, logically removing from freespace the extent
     475             :  * starting at rbno, rlen blocks.  The extent is contained within the
     476             :  * actual (current) free extent fbno for flen blocks.
     477             :  * Flags are passed in indicating whether the cursors are set to the
     478             :  * relevant records.
     479             :  */
     480             : STATIC int                              /* error code */
     481    58281472 : xfs_alloc_fixup_trees(
     482             :         struct xfs_btree_cur *cnt_cur,  /* cursor for by-size btree */
     483             :         struct xfs_btree_cur *bno_cur,  /* cursor for by-block btree */
     484             :         xfs_agblock_t   fbno,           /* starting block of free extent */
     485             :         xfs_extlen_t    flen,           /* length of free extent */
     486             :         xfs_agblock_t   rbno,           /* starting block of returned extent */
     487             :         xfs_extlen_t    rlen,           /* length of returned extent */
     488             :         int             flags)          /* flags, XFSA_FIXUP_... */
     489             : {
     490    58281472 :         int             error;          /* error code */
     491    58281472 :         int             i;              /* operation results */
     492    58281472 :         xfs_agblock_t   nfbno1;         /* first new free startblock */
     493    58281472 :         xfs_agblock_t   nfbno2;         /* second new free startblock */
     494    58281472 :         xfs_extlen_t    nflen1=0;       /* first new free length */
     495    58281472 :         xfs_extlen_t    nflen2=0;       /* second new free length */
     496    58281472 :         struct xfs_mount *mp;
     497             : 
     498    58281472 :         mp = cnt_cur->bc_mp;
     499             : 
     500             :         /*
     501             :          * Look up the record in the by-size tree if necessary.
     502             :          */
     503    58281472 :         if (flags & XFSA_FIXUP_CNT_OK) {
     504             : #ifdef DEBUG
     505     2871536 :                 if ((error = xfs_alloc_get_rec(cnt_cur, &nfbno1, &nflen1, &i)))
     506             :                         return error;
     507     2871526 :                 if (XFS_IS_CORRUPT(mp,
     508             :                                    i != 1 ||
     509             :                                    nfbno1 != fbno ||
     510             :                                    nflen1 != flen)) {
     511           0 :                         xfs_btree_mark_sick(cnt_cur);
     512           0 :                         return -EFSCORRUPTED;
     513             :                 }
     514             : #endif
     515             :         } else {
     516    55409936 :                 if ((error = xfs_alloc_lookup_eq(cnt_cur, fbno, flen, &i)))
     517             :                         return error;
     518    55410372 :                 if (XFS_IS_CORRUPT(mp, i != 1)) {
     519           0 :                         xfs_btree_mark_sick(cnt_cur);
     520           0 :                         return -EFSCORRUPTED;
     521             :                 }
     522             :         }
     523             :         /*
     524             :          * Look up the record in the by-block tree if necessary.
     525             :          */
     526    58281898 :         if (flags & XFSA_FIXUP_BNO_OK) {
     527             : #ifdef DEBUG
     528       77488 :                 if ((error = xfs_alloc_get_rec(bno_cur, &nfbno1, &nflen1, &i)))
     529             :                         return error;
     530       77485 :                 if (XFS_IS_CORRUPT(mp,
     531             :                                    i != 1 ||
     532             :                                    nfbno1 != fbno ||
     533             :                                    nflen1 != flen)) {
     534           0 :                         xfs_btree_mark_sick(bno_cur);
     535           0 :                         return -EFSCORRUPTED;
     536             :                 }
     537             : #endif
     538             :         } else {
     539    58204410 :                 if ((error = xfs_alloc_lookup_eq(bno_cur, fbno, flen, &i)))
     540             :                         return error;
     541    58205374 :                 if (XFS_IS_CORRUPT(mp, i != 1)) {
     542           0 :                         xfs_btree_mark_sick(bno_cur);
     543           0 :                         return -EFSCORRUPTED;
     544             :                 }
     545             :         }
     546             : 
     547             : #ifdef DEBUG
     548    58282859 :         if (bno_cur->bc_nlevels == 1 && cnt_cur->bc_nlevels == 1) {
     549    36367842 :                 struct xfs_btree_block  *bnoblock;
     550    36367842 :                 struct xfs_btree_block  *cntblock;
     551             : 
     552    36367842 :                 bnoblock = XFS_BUF_TO_BLOCK(bno_cur->bc_levels[0].bp);
     553    36367842 :                 cntblock = XFS_BUF_TO_BLOCK(cnt_cur->bc_levels[0].bp);
     554             : 
     555    36367842 :                 if (XFS_IS_CORRUPT(mp,
     556             :                                    bnoblock->bb_numrecs !=
     557             :                                    cntblock->bb_numrecs)) {
     558           0 :                         xfs_btree_mark_sick(bno_cur);
     559           0 :                         return -EFSCORRUPTED;
     560             :                 }
     561             :         }
     562             : #endif
     563             : 
     564             :         /*
     565             :          * Deal with all four cases: the allocated record is contained
     566             :          * within the freespace record, so we can have new freespace
     567             :          * at either (or both) end, or no freespace remaining.
     568             :          */
     569    58282859 :         if (rbno == fbno && rlen == flen)
     570    15226341 :                 nfbno1 = nfbno2 = NULLAGBLOCK;
     571    43056518 :         else if (rbno == fbno) {
     572    37061449 :                 nfbno1 = rbno + rlen;
     573    37061449 :                 nflen1 = flen - rlen;
     574    37061449 :                 nfbno2 = NULLAGBLOCK;
     575     5995069 :         } else if (rbno + rlen == fbno + flen) {
     576     3465232 :                 nfbno1 = fbno;
     577     3465232 :                 nflen1 = flen - rlen;
     578     3465232 :                 nfbno2 = NULLAGBLOCK;
     579             :         } else {
     580     2529837 :                 nfbno1 = fbno;
     581     2529837 :                 nflen1 = rbno - fbno;
     582     2529837 :                 nfbno2 = rbno + rlen;
     583     2529837 :                 nflen2 = (fbno + flen) - nfbno2;
     584             :         }
     585             :         /*
     586             :          * Delete the entry from the by-size btree.
     587             :          */
     588    58282859 :         if ((error = xfs_btree_delete(cnt_cur, &i)))
     589             :                 return error;
     590    58282096 :         if (XFS_IS_CORRUPT(mp, i != 1)) {
     591           0 :                 xfs_btree_mark_sick(cnt_cur);
     592           0 :                 return -EFSCORRUPTED;
     593             :         }
     594             :         /*
     595             :          * Add new by-size btree entry(s).
     596             :          */
     597    58282096 :         if (nfbno1 != NULLAGBLOCK) {
     598    43055964 :                 if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno1, nflen1, &i)))
     599             :                         return error;
     600    43056118 :                 if (XFS_IS_CORRUPT(mp, i != 0)) {
     601           0 :                         xfs_btree_mark_sick(cnt_cur);
     602           0 :                         return -EFSCORRUPTED;
     603             :                 }
     604    43056118 :                 if ((error = xfs_btree_insert(cnt_cur, &i)))
     605             :                         return error;
     606    43055771 :                 if (XFS_IS_CORRUPT(mp, i != 1)) {
     607           0 :                         xfs_btree_mark_sick(cnt_cur);
     608           0 :                         return -EFSCORRUPTED;
     609             :                 }
     610             :         }
     611    58281903 :         if (nfbno2 != NULLAGBLOCK) {
     612     2529827 :                 if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno2, nflen2, &i)))
     613             :                         return error;
     614     2529830 :                 if (XFS_IS_CORRUPT(mp, i != 0)) {
     615           0 :                         xfs_btree_mark_sick(cnt_cur);
     616           0 :                         return -EFSCORRUPTED;
     617             :                 }
     618     2529830 :                 if ((error = xfs_btree_insert(cnt_cur, &i)))
     619             :                         return error;
     620     2529839 :                 if (XFS_IS_CORRUPT(mp, i != 1)) {
     621           0 :                         xfs_btree_mark_sick(cnt_cur);
     622           0 :                         return -EFSCORRUPTED;
     623             :                 }
     624             :         }
     625             :         /*
     626             :          * Fix up the by-block btree entry(s).
     627             :          */
     628    58281915 :         if (nfbno1 == NULLAGBLOCK) {
     629             :                 /*
     630             :                  * No remaining freespace, just delete the by-block tree entry.
     631             :                  */
     632    15226173 :                 if ((error = xfs_btree_delete(bno_cur, &i)))
     633             :                         return error;
     634    15226147 :                 if (XFS_IS_CORRUPT(mp, i != 1)) {
     635           0 :                         xfs_btree_mark_sick(bno_cur);
     636           0 :                         return -EFSCORRUPTED;
     637             :                 }
     638             :         } else {
     639             :                 /*
     640             :                  * Update the by-block entry to start later|be shorter.
     641             :                  */
     642    43055742 :                 if ((error = xfs_alloc_update(bno_cur, nfbno1, nflen1)))
     643             :                         return error;
     644             :         }
     645    58282072 :         if (nfbno2 != NULLAGBLOCK) {
     646             :                 /*
     647             :                  * 2 resulting free entries, need to add one.
     648             :                  */
     649     2529841 :                 if ((error = xfs_alloc_lookup_eq(bno_cur, nfbno2, nflen2, &i)))
     650             :                         return error;
     651     2529841 :                 if (XFS_IS_CORRUPT(mp, i != 0)) {
     652           0 :                         xfs_btree_mark_sick(bno_cur);
     653           0 :                         return -EFSCORRUPTED;
     654             :                 }
     655     2529841 :                 if ((error = xfs_btree_insert(bno_cur, &i)))
     656             :                         return error;
     657     2529835 :                 if (XFS_IS_CORRUPT(mp, i != 1)) {
     658           0 :                         xfs_btree_mark_sick(bno_cur);
     659           0 :                         return -EFSCORRUPTED;
     660             :                 }
     661             :         }
     662             :         return 0;
     663             : }
     664             : 
     665             : /*
     666             :  * We do not verify the AGFL contents against AGF-based index counters here,
     667             :  * even though we may have access to the perag that contains shadow copies. We
     668             :  * don't know if the AGF based counters have been checked, and if they have they
     669             :  * still may be inconsistent because they haven't yet been reset on the first
     670             :  * allocation after the AGF has been read in.
     671             :  *
     672             :  * This means we can only check that all agfl entries contain valid or null
     673             :  * values because we can't reliably determine the active range to exclude
     674             :  * NULLAGBNO as a valid value.
     675             :  *
     676             :  * However, we can't even do that for v4 format filesystems because there are
     677             :  * old versions of mkfs out there that does not initialise the AGFL to known,
     678             :  * verifiable values. HEnce we can't tell the difference between a AGFL block
     679             :  * allocated by mkfs and a corrupted AGFL block here on v4 filesystems.
     680             :  *
     681             :  * As a result, we can only fully validate AGFL block numbers when we pull them
     682             :  * from the freelist in xfs_alloc_get_freelist().
     683             :  */
     684             : static xfs_failaddr_t
     685      477067 : xfs_agfl_verify(
     686             :         struct xfs_buf  *bp)
     687             : {
     688      477067 :         struct xfs_mount *mp = bp->b_mount;
     689      477067 :         struct xfs_agfl *agfl = XFS_BUF_TO_AGFL(bp);
     690      477067 :         __be32          *agfl_bno = xfs_buf_to_agfl_bno(bp);
     691      477067 :         int             i;
     692             : 
     693      477067 :         if (!xfs_has_crc(mp))
     694             :                 return NULL;
     695             : 
     696      477063 :         if (!xfs_verify_magic(bp, agfl->agfl_magicnum))
     697           0 :                 return __this_address;
     698      477061 :         if (!uuid_equal(&agfl->agfl_uuid, &mp->m_sb.sb_meta_uuid))
     699           0 :                 return __this_address;
     700             :         /*
     701             :          * during growfs operations, the perag is not fully initialised,
     702             :          * so we can't use it for any useful checking. growfs ensures we can't
     703             :          * use it by using uncached buffers that don't have the perag attached
     704             :          * so we can detect and avoid this problem.
     705             :          */
     706      948716 :         if (bp->b_pag && be32_to_cpu(agfl->agfl_seqno) != bp->b_pag->pag_agno)
     707           0 :                 return __this_address;
     708             : 
     709   957856009 :         for (i = 0; i < xfs_agfl_size(mp); i++) {
     710   478450373 :                 if (be32_to_cpu(agfl_bno[i]) != NULLAGBLOCK &&
     711   182977336 :                     be32_to_cpu(agfl_bno[i]) >= mp->m_sb.sb_agblocks)
     712           0 :                         return __this_address;
     713             :         }
     714             : 
     715      477063 :         if (!xfs_log_check_lsn(mp, be64_to_cpu(XFS_BUF_TO_AGFL(bp)->agfl_lsn)))
     716           0 :                 return __this_address;
     717             :         return NULL;
     718             : }
     719             : 
     720             : static void
     721       63394 : xfs_agfl_read_verify(
     722             :         struct xfs_buf  *bp)
     723             : {
     724       63394 :         struct xfs_mount *mp = bp->b_mount;
     725       63394 :         xfs_failaddr_t  fa;
     726             : 
     727             :         /*
     728             :          * There is no verification of non-crc AGFLs because mkfs does not
     729             :          * initialise the AGFL to zero or NULL. Hence the only valid part of the
     730             :          * AGFL is what the AGF says is active. We can't get to the AGF, so we
     731             :          * can't verify just those entries are valid.
     732             :          */
     733       63394 :         if (!xfs_has_crc(mp))
     734             :                 return;
     735             : 
     736       63376 :         if (!xfs_buf_verify_cksum(bp, XFS_AGFL_CRC_OFF))
     737         128 :                 xfs_verifier_error(bp, -EFSBADCRC, __this_address);
     738             :         else {
     739       63248 :                 fa = xfs_agfl_verify(bp);
     740       63248 :                 if (fa)
     741           0 :                         xfs_verifier_error(bp, -EFSCORRUPTED, fa);
     742             :         }
     743             : }
     744             : 
     745             : static void
     746       68548 : xfs_agfl_write_verify(
     747             :         struct xfs_buf  *bp)
     748             : {
     749       68548 :         struct xfs_mount        *mp = bp->b_mount;
     750       68548 :         struct xfs_buf_log_item *bip = bp->b_log_item;
     751       68548 :         xfs_failaddr_t          fa;
     752             : 
     753             :         /* no verification of non-crc AGFLs */
     754       68548 :         if (!xfs_has_crc(mp))
     755             :                 return;
     756             : 
     757       66266 :         fa = xfs_agfl_verify(bp);
     758       66266 :         if (fa) {
     759           0 :                 xfs_verifier_error(bp, -EFSCORRUPTED, fa);
     760           0 :                 return;
     761             :         }
     762             : 
     763       66266 :         if (bip)
     764       60858 :                 XFS_BUF_TO_AGFL(bp)->agfl_lsn = cpu_to_be64(bip->bli_item.li_lsn);
     765             : 
     766       66266 :         xfs_buf_update_cksum(bp, XFS_AGFL_CRC_OFF);
     767             : }
     768             : 
     769             : const struct xfs_buf_ops xfs_agfl_buf_ops = {
     770             :         .name = "xfs_agfl",
     771             :         .magic = { cpu_to_be32(XFS_AGFL_MAGIC), cpu_to_be32(XFS_AGFL_MAGIC) },
     772             :         .verify_read = xfs_agfl_read_verify,
     773             :         .verify_write = xfs_agfl_write_verify,
     774             :         .verify_struct = xfs_agfl_verify,
     775             : };
     776             : 
     777             : /*
     778             :  * Read in the allocation group free block array.
     779             :  */
     780             : int
     781   325280921 : xfs_alloc_read_agfl(
     782             :         struct xfs_perag        *pag,
     783             :         struct xfs_trans        *tp,
     784             :         struct xfs_buf          **bpp)
     785             : {
     786   325280921 :         struct xfs_mount        *mp = pag->pag_mount;
     787   325280921 :         struct xfs_buf          *bp;
     788   325280921 :         int                     error;
     789             : 
     790   325280921 :         error = xfs_trans_read_buf(
     791   325280921 :                         mp, tp, mp->m_ddev_targp,
     792   325280921 :                         XFS_AG_DADDR(mp, pag->pag_agno, XFS_AGFL_DADDR(mp)),
     793             :                         XFS_FSS_TO_BB(mp, 1), 0, &bp, &xfs_agfl_buf_ops);
     794   325287710 :         if (xfs_metadata_is_sick(error))
     795         128 :                 xfs_ag_mark_sick(pag, XFS_SICK_AG_AGFL);
     796   325287710 :         if (error)
     797             :                 return error;
     798   325286756 :         xfs_buf_set_ref(bp, XFS_AGFL_REF);
     799   325288338 :         *bpp = bp;
     800   325288338 :         return 0;
     801             : }
     802             : 
     803             : STATIC int
     804   109652907 : xfs_alloc_update_counters(
     805             :         struct xfs_trans        *tp,
     806             :         struct xfs_buf          *agbp,
     807             :         long                    len)
     808             : {
     809   109652907 :         struct xfs_agf          *agf = agbp->b_addr;
     810             : 
     811   109652907 :         agbp->b_pag->pagf_freeblks += len;
     812   109652907 :         be32_add_cpu(&agf->agf_freeblks, len);
     813             : 
     814   328962261 :         if (unlikely(be32_to_cpu(agf->agf_freeblks) >
     815             :                      be32_to_cpu(agf->agf_length))) {
     816           0 :                 xfs_buf_mark_corrupt(agbp);
     817           0 :                 xfs_ag_mark_sick(agbp->b_pag, XFS_SICK_AG_AGF);
     818           0 :                 return -EFSCORRUPTED;
     819             :         }
     820             : 
     821   109654087 :         xfs_alloc_log_agf(tp, agbp, XFS_AGF_FREEBLKS);
     822   109654087 :         return 0;
     823             : }
     824             : 
     825             : /*
     826             :  * Block allocation algorithm and data structures.
     827             :  */
     828             : struct xfs_alloc_cur {
     829             :         struct xfs_btree_cur            *cnt;   /* btree cursors */
     830             :         struct xfs_btree_cur            *bnolt;
     831             :         struct xfs_btree_cur            *bnogt;
     832             :         xfs_extlen_t                    cur_len;/* current search length */
     833             :         xfs_agblock_t                   rec_bno;/* extent startblock */
     834             :         xfs_extlen_t                    rec_len;/* extent length */
     835             :         xfs_agblock_t                   bno;    /* alloc bno */
     836             :         xfs_extlen_t                    len;    /* alloc len */
     837             :         xfs_extlen_t                    diff;   /* diff from search bno */
     838             :         unsigned int                    busy_gen;/* busy state */
     839             :         bool                            busy;
     840             : };
     841             : 
     842             : /*
     843             :  * Set up cursors, etc. in the extent allocation cursor. This function can be
     844             :  * called multiple times to reset an initialized structure without having to
     845             :  * reallocate cursors.
     846             :  */
     847             : static int
     848    55365361 : xfs_alloc_cur_setup(
     849             :         struct xfs_alloc_arg    *args,
     850             :         struct xfs_alloc_cur    *acur)
     851             : {
     852    55365361 :         int                     error;
     853    55365361 :         int                     i;
     854             : 
     855    55365361 :         acur->cur_len = args->maxlen;
     856    55365361 :         acur->rec_bno = 0;
     857    55365361 :         acur->rec_len = 0;
     858    55365361 :         acur->bno = 0;
     859    55365361 :         acur->len = 0;
     860    55365361 :         acur->diff = -1;
     861    55365361 :         acur->busy = false;
     862    55365361 :         acur->busy_gen = 0;
     863             : 
     864             :         /*
     865             :          * Perform an initial cntbt lookup to check for availability of maxlen
     866             :          * extents. If this fails, we'll return -ENOSPC to signal the caller to
     867             :          * attempt a small allocation.
     868             :          */
     869    55365361 :         if (!acur->cnt)
     870    55333276 :                 acur->cnt = xfs_allocbt_init_cursor(args->mp, args->tp,
     871             :                                         args->agbp, args->pag, XFS_BTNUM_CNT);
     872    55363182 :         error = xfs_alloc_lookup_ge(acur->cnt, 0, args->maxlen, &i);
     873    55367627 :         if (error)
     874             :                 return error;
     875             : 
     876             :         /*
     877             :          * Allocate the bnobt left and right search cursors.
     878             :          */
     879    55367283 :         if (!acur->bnolt)
     880    55333875 :                 acur->bnolt = xfs_allocbt_init_cursor(args->mp, args->tp,
     881             :                                         args->agbp, args->pag, XFS_BTNUM_BNO);
     882    55366716 :         if (!acur->bnogt)
     883    55334048 :                 acur->bnogt = xfs_allocbt_init_cursor(args->mp, args->tp,
     884             :                                         args->agbp, args->pag, XFS_BTNUM_BNO);
     885    55366021 :         return i == 1 ? 0 : -ENOSPC;
     886             : }
     887             : 
     888             : static void
     889    55333128 : xfs_alloc_cur_close(
     890             :         struct xfs_alloc_cur    *acur,
     891             :         bool                    error)
     892             : {
     893    55333128 :         int                     cur_error = XFS_BTREE_NOERROR;
     894             : 
     895    55333128 :         if (error)
     896         700 :                 cur_error = XFS_BTREE_ERROR;
     897             : 
     898    55333128 :         if (acur->cnt)
     899    55333128 :                 xfs_btree_del_cursor(acur->cnt, cur_error);
     900    55333784 :         if (acur->bnolt)
     901    55333440 :                 xfs_btree_del_cursor(acur->bnolt, cur_error);
     902    55333870 :         if (acur->bnogt)
     903    55333526 :                 xfs_btree_del_cursor(acur->bnogt, cur_error);
     904    55333910 :         acur->cnt = acur->bnolt = acur->bnogt = NULL;
     905    55333910 : }
     906             : 
     907             : /*
     908             :  * Check an extent for allocation and track the best available candidate in the
     909             :  * allocation structure. The cursor is deactivated if it has entered an out of
     910             :  * range state based on allocation arguments. Optionally return the extent
     911             :  * extent geometry and allocation status if requested by the caller.
     912             :  */
     913             : static int
     914  4229298031 : xfs_alloc_cur_check(
     915             :         struct xfs_alloc_arg    *args,
     916             :         struct xfs_alloc_cur    *acur,
     917             :         struct xfs_btree_cur    *cur,
     918             :         int                     *new)
     919             : {
     920  4229298031 :         int                     error, i;
     921  4229298031 :         xfs_agblock_t           bno, bnoa, bnew;
     922  4229298031 :         xfs_extlen_t            len, lena, diff = -1;
     923  4229298031 :         bool                    busy;
     924  4229298031 :         unsigned                busy_gen = 0;
     925  4229298031 :         bool                    deactivate = false;
     926  4229298031 :         bool                    isbnobt = cur->bc_btnum == XFS_BTNUM_BNO;
     927             : 
     928  4229298031 :         *new = 0;
     929             : 
     930  4229298031 :         error = xfs_alloc_get_rec(cur, &bno, &len, &i);
     931  4233172770 :         if (error)
     932             :                 return error;
     933  4233172770 :         if (XFS_IS_CORRUPT(args->mp, i != 1)) {
     934           0 :                 xfs_btree_mark_sick(cur);
     935           0 :                 return -EFSCORRUPTED;
     936             :         }
     937             : 
     938             :         /*
     939             :          * Check minlen and deactivate a cntbt cursor if out of acceptable size
     940             :          * range (i.e., walking backwards looking for a minlen extent).
     941             :          */
     942  4233172770 :         if (len < args->minlen) {
     943   232846464 :                 deactivate = !isbnobt;
     944   232846464 :                 goto out;
     945             :         }
     946             : 
     947  4000326306 :         busy = xfs_alloc_compute_aligned(args, bno, len, &bnoa, &lena,
     948             :                                          &busy_gen);
     949  4008546637 :         acur->busy |= busy;
     950  4008546637 :         if (busy)
     951  1960792145 :                 acur->busy_gen = busy_gen;
     952             :         /* deactivate a bnobt cursor outside of locality range */
     953  4008546637 :         if (bnoa < args->min_agbno || bnoa > args->max_agbno) {
     954        1167 :                 deactivate = isbnobt;
     955        1167 :                 goto out;
     956             :         }
     957  4008545470 :         if (lena < args->minlen)
     958  1802590566 :                 goto out;
     959             : 
     960  2205954904 :         args->len = XFS_EXTLEN_MIN(lena, args->maxlen);
     961  2205954904 :         xfs_alloc_fix_len(args);
     962  2206413765 :         ASSERT(args->len >= args->minlen);
     963  2206413765 :         if (args->len < acur->len)
     964       31130 :                 goto out;
     965             : 
     966             :         /*
     967             :          * We have an aligned record that satisfies minlen and beats or matches
     968             :          * the candidate extent size. Compare locality for near allocation mode.
     969             :          */
     970  2206382635 :         diff = xfs_alloc_compute_diff(args->agbno, args->len,
     971             :                                       args->alignment, args->datatype,
     972             :                                       bnoa, lena, &bnew);
     973  2207261271 :         if (bnew == NULLAGBLOCK)
     974           0 :                 goto out;
     975             : 
     976             :         /*
     977             :          * Deactivate a bnobt cursor with worse locality than the current best.
     978             :          */
     979  2207261271 :         if (diff > acur->diff) {
     980  1822171988 :                 deactivate = isbnobt;
     981  1822171988 :                 goto out;
     982             :         }
     983             : 
     984   385089283 :         ASSERT(args->len > acur->len ||
     985             :                (args->len == acur->len && diff <= acur->diff));
     986   385089283 :         acur->rec_bno = bno;
     987   385089283 :         acur->rec_len = len;
     988   385089283 :         acur->bno = bnew;
     989   385089283 :         acur->len = args->len;
     990   385089283 :         acur->diff = diff;
     991   385089283 :         *new = 1;
     992             : 
     993             :         /*
     994             :          * We're done if we found a perfect allocation. This only deactivates
     995             :          * the current cursor, but this is just an optimization to terminate a
     996             :          * cntbt search that otherwise runs to the edge of the tree.
     997             :          */
     998   385089283 :         if (acur->diff == 0 && acur->len == args->maxlen)
     999             :                 deactivate = true;
    1000   377847446 : out:
    1001  3857641315 :         if (deactivate)
    1002    15160316 :                 cur->bc_ag.abt.active = false;
    1003  4242730598 :         trace_xfs_alloc_cur_check(args->mp, cur->bc_btnum, bno, len, diff,
    1004  4242730598 :                                   *new);
    1005  4242730598 :         return 0;
    1006             : }
    1007             : 
    1008             : /*
    1009             :  * Complete an allocation of a candidate extent. Remove the extent from both
    1010             :  * trees and update the args structure.
    1011             :  */
    1012             : STATIC int
    1013    55332031 : xfs_alloc_cur_finish(
    1014             :         struct xfs_alloc_arg    *args,
    1015             :         struct xfs_alloc_cur    *acur)
    1016             : {
    1017    55332031 :         struct xfs_agf __maybe_unused *agf = args->agbp->b_addr;
    1018    55332031 :         int                     error;
    1019             : 
    1020    55332031 :         ASSERT(acur->cnt && acur->bnolt);
    1021    55332031 :         ASSERT(acur->bno >= acur->rec_bno);
    1022    55332031 :         ASSERT(acur->bno + acur->len <= acur->rec_bno + acur->rec_len);
    1023   110664062 :         ASSERT(acur->rec_bno + acur->rec_len <= be32_to_cpu(agf->agf_length));
    1024             : 
    1025    55332031 :         error = xfs_alloc_fixup_trees(acur->cnt, acur->bnolt, acur->rec_bno,
    1026             :                                       acur->rec_len, acur->bno, acur->len, 0);
    1027    55333273 :         if (error)
    1028             :                 return error;
    1029             : 
    1030    55333166 :         args->agbno = acur->bno;
    1031    55333166 :         args->len = acur->len;
    1032    55333166 :         args->wasfromfl = 0;
    1033             : 
    1034    55333166 :         trace_xfs_alloc_cur(args);
    1035    55333166 :         return 0;
    1036             : }
    1037             : 
    1038             : /*
    1039             :  * Locality allocation lookup algorithm. This expects a cntbt cursor and uses
    1040             :  * bno optimized lookup to search for extents with ideal size and locality.
    1041             :  */
    1042             : STATIC int
    1043   224897159 : xfs_alloc_cntbt_iter(
    1044             :         struct xfs_alloc_arg            *args,
    1045             :         struct xfs_alloc_cur            *acur)
    1046             : {
    1047   224897159 :         struct xfs_btree_cur    *cur = acur->cnt;
    1048   224897159 :         xfs_agblock_t           bno;
    1049   224897159 :         xfs_extlen_t            len, cur_len;
    1050   224897159 :         int                     error;
    1051   224897159 :         int                     i;
    1052             : 
    1053   449794318 :         if (!xfs_alloc_cur_active(cur))
    1054             :                 return 0;
    1055             : 
    1056             :         /* locality optimized lookup */
    1057   224884049 :         cur_len = acur->cur_len;
    1058   224884049 :         error = xfs_alloc_lookup_ge(cur, args->agbno, cur_len, &i);
    1059   224881836 :         if (error)
    1060             :                 return error;
    1061   224881835 :         if (i == 0)
    1062             :                 return 0;
    1063   210864264 :         error = xfs_alloc_get_rec(cur, &bno, &len, &i);
    1064   210877295 :         if (error)
    1065             :                 return error;
    1066             : 
    1067             :         /* check the current record and update search length from it */
    1068   210879680 :         error = xfs_alloc_cur_check(args, acur, cur, &i);
    1069   210758216 :         if (error)
    1070             :                 return error;
    1071   210758216 :         ASSERT(len >= acur->cur_len);
    1072   210758216 :         acur->cur_len = len;
    1073             : 
    1074             :         /*
    1075             :          * We looked up the first record >= [agbno, len] above. The agbno is a
    1076             :          * secondary key and so the current record may lie just before or after
    1077             :          * agbno. If it is past agbno, check the previous record too so long as
    1078             :          * the length matches as it may be closer. Don't check a smaller record
    1079             :          * because that could deactivate our cursor.
    1080             :          */
    1081   210758216 :         if (bno > args->agbno) {
    1082   206970400 :                 error = xfs_btree_decrement(cur, 0, &i);
    1083   207009281 :                 if (!error && i) {
    1084   205881770 :                         error = xfs_alloc_get_rec(cur, &bno, &len, &i);
    1085   205883059 :                         if (!error && i && len == acur->cur_len)
    1086    52854028 :                                 error = xfs_alloc_cur_check(args, acur, cur,
    1087             :                                                             &i);
    1088             :                 }
    1089   207009740 :                 if (error)
    1090             :                         return error;
    1091             :         }
    1092             : 
    1093             :         /*
    1094             :          * Increment the search key until we find at least one allocation
    1095             :          * candidate or if the extent we found was larger. Otherwise, double the
    1096             :          * search key to optimize the search. Efficiency is more important here
    1097             :          * than absolute best locality.
    1098             :          */
    1099   210797556 :         cur_len <<= 1;
    1100   210797556 :         if (!acur->len || acur->cur_len >= cur_len)
    1101   160572399 :                 acur->cur_len++;
    1102             :         else
    1103    50225157 :                 acur->cur_len = cur_len;
    1104             : 
    1105             :         return error;
    1106             : }
    1107             : 
    1108             : /*
    1109             :  * Deal with the case where only small freespaces remain. Either return the
    1110             :  * contents of the last freespace record, or allocate space from the freelist if
    1111             :  * there is nothing in the tree.
    1112             :  */
    1113             : STATIC int                      /* error */
    1114       87078 : xfs_alloc_ag_vextent_small(
    1115             :         struct xfs_alloc_arg    *args,  /* allocation argument structure */
    1116             :         struct xfs_btree_cur    *ccur,  /* optional by-size cursor */
    1117             :         xfs_agblock_t           *fbnop, /* result block number */
    1118             :         xfs_extlen_t            *flenp, /* result length */
    1119             :         int                     *stat)  /* status: 0-freelist, 1-normal/none */
    1120             : {
    1121       87078 :         struct xfs_agf          *agf = args->agbp->b_addr;
    1122       87078 :         int                     error = 0;
    1123       87078 :         xfs_agblock_t           fbno = NULLAGBLOCK;
    1124       87078 :         xfs_extlen_t            flen = 0;
    1125       87078 :         int                     i = 0;
    1126             : 
    1127             :         /*
    1128             :          * If a cntbt cursor is provided, try to allocate the largest record in
    1129             :          * the tree. Try the AGFL if the cntbt is empty, otherwise fail the
    1130             :          * allocation. Make sure to respect minleft even when pulling from the
    1131             :          * freelist.
    1132             :          */
    1133       87078 :         if (ccur)
    1134       87078 :                 error = xfs_btree_decrement(ccur, 0, &i);
    1135       87078 :         if (error)
    1136           0 :                 goto error;
    1137       87078 :         if (i) {
    1138       87078 :                 error = xfs_alloc_get_rec(ccur, &fbno, &flen, &i);
    1139       87078 :                 if (error)
    1140           0 :                         goto error;
    1141       87078 :                 if (XFS_IS_CORRUPT(args->mp, i != 1)) {
    1142           0 :                         xfs_btree_mark_sick(ccur);
    1143           0 :                         error = -EFSCORRUPTED;
    1144           0 :                         goto error;
    1145             :                 }
    1146       87078 :                 goto out;
    1147             :         }
    1148             : 
    1149           0 :         if (args->minlen != 1 || args->alignment != 1 ||
    1150           0 :             args->resv == XFS_AG_RESV_AGFL ||
    1151           0 :             be32_to_cpu(agf->agf_flcount) <= args->minleft)
    1152           0 :                 goto out;
    1153             : 
    1154           0 :         error = xfs_alloc_get_freelist(args->pag, args->tp, args->agbp,
    1155             :                         &fbno, 0);
    1156           0 :         if (error)
    1157           0 :                 goto error;
    1158           0 :         if (fbno == NULLAGBLOCK)
    1159           0 :                 goto out;
    1160             : 
    1161           0 :         xfs_extent_busy_reuse(args->mp, args->pag, fbno, 1,
    1162           0 :                               (args->datatype & XFS_ALLOC_NOBUSY));
    1163             : 
    1164           0 :         if (args->datatype & XFS_ALLOC_USERDATA) {
    1165           0 :                 struct xfs_buf  *bp;
    1166             : 
    1167           0 :                 error = xfs_trans_get_buf(args->tp, args->mp->m_ddev_targp,
    1168           0 :                                 XFS_AGB_TO_DADDR(args->mp, args->agno, fbno),
    1169             :                                 args->mp->m_bsize, 0, &bp);
    1170           0 :                 if (error)
    1171           0 :                         goto error;
    1172           0 :                 xfs_trans_binval(args->tp, bp);
    1173             :         }
    1174           0 :         *fbnop = args->agbno = fbno;
    1175           0 :         *flenp = args->len = 1;
    1176           0 :         if (XFS_IS_CORRUPT(args->mp, fbno >= be32_to_cpu(agf->agf_length))) {
    1177           0 :                 xfs_btree_mark_sick(ccur);
    1178           0 :                 error = -EFSCORRUPTED;
    1179           0 :                 goto error;
    1180             :         }
    1181           0 :         args->wasfromfl = 1;
    1182           0 :         trace_xfs_alloc_small_freelist(args);
    1183             : 
    1184             :         /*
    1185             :          * If we're feeding an AGFL block to something that doesn't live in the
    1186             :          * free space, we need to clear out the OWN_AG rmap.
    1187             :          */
    1188           0 :         error = xfs_rmap_free(args->tp, args->agbp, args->pag, fbno, 1,
    1189             :                               &XFS_RMAP_OINFO_AG);
    1190           0 :         if (error)
    1191           0 :                 goto error;
    1192             : 
    1193           0 :         *stat = 0;
    1194           0 :         return 0;
    1195             : 
    1196       87078 : out:
    1197             :         /*
    1198             :          * Can't do the allocation, give up.
    1199             :          */
    1200       87078 :         if (flen < args->minlen) {
    1201           0 :                 args->agbno = NULLAGBLOCK;
    1202           0 :                 trace_xfs_alloc_small_notenough(args);
    1203           0 :                 flen = 0;
    1204             :         }
    1205       87078 :         *fbnop = fbno;
    1206       87078 :         *flenp = flen;
    1207       87078 :         *stat = 1;
    1208       87078 :         trace_xfs_alloc_small_done(args);
    1209       87078 :         return 0;
    1210             : 
    1211           0 : error:
    1212           0 :         trace_xfs_alloc_small_error(args);
    1213           0 :         return error;
    1214             : }
    1215             : 
    1216             : /*
    1217             :  * Allocate a variable extent at exactly agno/bno.
    1218             :  * Extent's length (returned in *len) will be between minlen and maxlen,
    1219             :  * and of the form k * prod + mod unless there's nothing that large.
    1220             :  * Return the starting a.g. block (bno), or NULLAGBLOCK if we can't do it.
    1221             :  */
    1222             : STATIC int                      /* error */
    1223      274560 : xfs_alloc_ag_vextent_exact(
    1224             :         xfs_alloc_arg_t *args)  /* allocation argument structure */
    1225             : {
    1226      274560 :         struct xfs_agf __maybe_unused *agf = args->agbp->b_addr;
    1227      274560 :         struct xfs_btree_cur *bno_cur;/* by block-number btree cursor */
    1228      274560 :         struct xfs_btree_cur *cnt_cur;/* by count btree cursor */
    1229      274560 :         int             error;
    1230      274560 :         xfs_agblock_t   fbno;   /* start block of found extent */
    1231      274560 :         xfs_extlen_t    flen;   /* length of found extent */
    1232      274560 :         xfs_agblock_t   tbno;   /* start block of busy extent */
    1233      274560 :         xfs_extlen_t    tlen;   /* length of busy extent */
    1234      274560 :         xfs_agblock_t   tend;   /* end block of busy extent */
    1235      274560 :         int             i;      /* success/failure of operation */
    1236      274560 :         unsigned        busy_gen;
    1237             : 
    1238      274560 :         ASSERT(args->alignment == 1);
    1239             : 
    1240             :         /*
    1241             :          * Allocate/initialize a cursor for the by-number freespace btree.
    1242             :          */
    1243      274560 :         bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
    1244             :                                           args->pag, XFS_BTNUM_BNO);
    1245             : 
    1246             :         /*
    1247             :          * Lookup bno and minlen in the btree (minlen is irrelevant, really).
    1248             :          * Look for the closest free block <= bno, it must contain bno
    1249             :          * if any free block does.
    1250             :          */
    1251      274561 :         error = xfs_alloc_lookup_le(bno_cur, args->agbno, args->minlen, &i);
    1252      274561 :         if (error)
    1253           2 :                 goto error0;
    1254      274559 :         if (!i)
    1255       21006 :                 goto not_found;
    1256             : 
    1257             :         /*
    1258             :          * Grab the freespace record.
    1259             :          */
    1260      253553 :         error = xfs_alloc_get_rec(bno_cur, &fbno, &flen, &i);
    1261      253552 :         if (error)
    1262           0 :                 goto error0;
    1263      253552 :         if (XFS_IS_CORRUPT(args->mp, i != 1)) {
    1264           0 :                 xfs_btree_mark_sick(bno_cur);
    1265           0 :                 error = -EFSCORRUPTED;
    1266           0 :                 goto error0;
    1267             :         }
    1268      253552 :         ASSERT(fbno <= args->agbno);
    1269             : 
    1270             :         /*
    1271             :          * Check for overlapping busy extents.
    1272             :          */
    1273      253552 :         tbno = fbno;
    1274      253552 :         tlen = flen;
    1275      253552 :         xfs_extent_busy_trim(args, &tbno, &tlen, &busy_gen);
    1276             : 
    1277             :         /*
    1278             :          * Give up if the start of the extent is busy, or the freespace isn't
    1279             :          * long enough for the minimum request.
    1280             :          */
    1281      253551 :         if (tbno > args->agbno)
    1282         316 :                 goto not_found;
    1283      253235 :         if (tlen < args->minlen)
    1284      170141 :                 goto not_found;
    1285       83094 :         tend = tbno + tlen;
    1286       83094 :         if (tend < args->agbno + args->minlen)
    1287        5609 :                 goto not_found;
    1288             : 
    1289             :         /*
    1290             :          * End of extent will be smaller of the freespace end and the
    1291             :          * maximal requested end.
    1292             :          *
    1293             :          * Fix the length according to mod and prod if given.
    1294             :          */
    1295       77485 :         args->len = XFS_AGBLOCK_MIN(tend, args->agbno + args->maxlen)
    1296       77485 :                                                 - args->agbno;
    1297       77485 :         xfs_alloc_fix_len(args);
    1298       77484 :         ASSERT(args->agbno + args->len <= tend);
    1299             : 
    1300             :         /*
    1301             :          * We are allocating agbno for args->len
    1302             :          * Allocate/initialize a cursor for the by-size btree.
    1303             :          */
    1304       77484 :         cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
    1305             :                                         args->pag, XFS_BTNUM_CNT);
    1306      154966 :         ASSERT(args->agbno + args->len <= be32_to_cpu(agf->agf_length));
    1307       77483 :         error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen, args->agbno,
    1308             :                                       args->len, XFSA_FIXUP_BNO_OK);
    1309       77488 :         if (error) {
    1310           0 :                 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
    1311           0 :                 goto error0;
    1312             :         }
    1313             : 
    1314       77488 :         xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
    1315       77488 :         xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
    1316             : 
    1317       77486 :         args->wasfromfl = 0;
    1318       77486 :         trace_xfs_alloc_exact_done(args);
    1319       77486 :         return 0;
    1320             : 
    1321      197072 : not_found:
    1322             :         /* Didn't find it, return null. */
    1323      197072 :         xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
    1324      197072 :         args->agbno = NULLAGBLOCK;
    1325      197072 :         trace_xfs_alloc_exact_notfound(args);
    1326      197072 :         return 0;
    1327             : 
    1328           2 : error0:
    1329           2 :         xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
    1330           2 :         trace_xfs_alloc_exact_error(args);
    1331           2 :         return error;
    1332             : }
    1333             : 
    1334             : /*
    1335             :  * Search a given number of btree records in a given direction. Check each
    1336             :  * record against the good extent we've already found.
    1337             :  */
    1338             : STATIC int
    1339   516593677 : xfs_alloc_walk_iter(
    1340             :         struct xfs_alloc_arg    *args,
    1341             :         struct xfs_alloc_cur    *acur,
    1342             :         struct xfs_btree_cur    *cur,
    1343             :         bool                    increment,
    1344             :         bool                    find_one, /* quit on first candidate */
    1345             :         int                     count,    /* rec count (-1 for infinite) */
    1346             :         int                     *stat)
    1347             : {
    1348   516593677 :         int                     error;
    1349   516593677 :         int                     i;
    1350             : 
    1351   516593677 :         *stat = 0;
    1352             : 
    1353             :         /*
    1354             :          * Search so long as the cursor is active or we find a better extent.
    1355             :          * The cursor is deactivated if it extends beyond the range of the
    1356             :          * current allocation candidate.
    1357             :          */
    1358  8899143180 :         while (xfs_alloc_cur_active(cur) && count) {
    1359  3969595154 :                 error = xfs_alloc_cur_check(args, acur, cur, &i);
    1360  3978241639 :                 if (error)
    1361           0 :                         return error;
    1362  3978241639 :                 if (i == 1) {
    1363   358970425 :                         *stat = 1;
    1364   358970425 :                         if (find_one)
    1365             :                                 break;
    1366             :                 }
    1367  7902938070 :                 if (!xfs_alloc_cur_active(cur))
    1368             :                         break;
    1369             : 
    1370  3941242747 :                 if (increment)
    1371  3470639029 :                         error = xfs_btree_increment(cur, 0, &i);
    1372             :                 else
    1373   470603718 :                         error = xfs_btree_decrement(cur, 0, &i);
    1374  3932977920 :                 if (error)
    1375           7 :                         return error;
    1376  3932977913 :                 if (i == 0)
    1377    23293853 :                         cur->bc_ag.abt.active = false;
    1378             : 
    1379  3932977913 :                 if (count > 0)
    1380   416049115 :                         count--;
    1381             :         }
    1382             : 
    1383             :         return 0;
    1384             : }
    1385             : 
    1386             : /*
    1387             :  * Search the by-bno and by-size btrees in parallel in search of an extent with
    1388             :  * ideal locality based on the NEAR mode ->agbno locality hint.
    1389             :  */
    1390             : STATIC int
    1391    33219495 : xfs_alloc_ag_vextent_locality(
    1392             :         struct xfs_alloc_arg    *args,
    1393             :         struct xfs_alloc_cur    *acur,
    1394             :         int                     *stat)
    1395             : {
    1396    33219495 :         struct xfs_btree_cur    *fbcur = NULL;
    1397    33219495 :         int                     error;
    1398    33219495 :         int                     i;
    1399    33219495 :         bool                    fbinc;
    1400             : 
    1401    33219495 :         ASSERT(acur->len == 0);
    1402             : 
    1403    33219495 :         *stat = 0;
    1404             : 
    1405    33219495 :         error = xfs_alloc_lookup_ge(acur->cnt, args->agbno, acur->cur_len, &i);
    1406    33219828 :         if (error)
    1407             :                 return error;
    1408    33219994 :         error = xfs_alloc_lookup_le(acur->bnolt, args->agbno, 0, &i);
    1409    33219656 :         if (error)
    1410             :                 return error;
    1411    33220228 :         error = xfs_alloc_lookup_ge(acur->bnogt, args->agbno, 0, &i);
    1412    33220302 :         if (error)
    1413             :                 return error;
    1414             : 
    1415             :         /*
    1416             :          * Search the bnobt and cntbt in parallel. Search the bnobt left and
    1417             :          * right and lookup the closest extent to the locality hint for each
    1418             :          * extent size key in the cntbt. The entire search terminates
    1419             :          * immediately on a bnobt hit because that means we've found best case
    1420             :          * locality. Otherwise the search continues until the cntbt cursor runs
    1421             :          * off the end of the tree. If no allocation candidate is found at this
    1422             :          * point, give up on locality, walk backwards from the end of the cntbt
    1423             :          * and take the first available extent.
    1424             :          *
    1425             :          * The parallel tree searches balance each other out to provide fairly
    1426             :          * consistent performance for various situations. The bnobt search can
    1427             :          * have pathological behavior in the worst case scenario of larger
    1428             :          * allocation requests and fragmented free space. On the other hand, the
    1429             :          * bnobt is able to satisfy most smaller allocation requests much more
    1430             :          * quickly than the cntbt. The cntbt search can sift through fragmented
    1431             :          * free space and sets of free extents for larger allocation requests
    1432             :          * more quickly than the bnobt. Since the locality hint is just a hint
    1433             :          * and we don't want to scan the entire bnobt for perfect locality, the
    1434             :          * cntbt search essentially bounds the bnobt search such that we can
    1435             :          * find good enough locality at reasonable performance in most cases.
    1436             :          */
    1437    39878596 :         while (xfs_alloc_cur_active(acur->bnolt) ||
    1438   487955450 :                xfs_alloc_cur_active(acur->bnogt) ||
    1439       11626 :                xfs_alloc_cur_active(acur->cnt)) {
    1440             : 
    1441   243971912 :                 trace_xfs_alloc_cur_lookup(args);
    1442             : 
    1443             :                 /*
    1444             :                  * Search the bnobt left and right. In the case of a hit, finish
    1445             :                  * the search in the opposite direction and we're done.
    1446             :                  */
    1447   243940006 :                 error = xfs_alloc_walk_iter(args, acur, acur->bnolt, false,
    1448             :                                             true, 1, &i);
    1449   244024331 :                 if (error)
    1450           0 :                         return error;
    1451   244024331 :                 if (i == 1) {
    1452    12192912 :                         trace_xfs_alloc_cur_left(args);
    1453    12193192 :                         fbcur = acur->bnogt;
    1454    12193192 :                         fbinc = true;
    1455    12193192 :                         break;
    1456             :                 }
    1457   231831419 :                 error = xfs_alloc_walk_iter(args, acur, acur->bnogt, true, true,
    1458             :                                             1, &i);
    1459   231885585 :                 if (error)
    1460           0 :                         return error;
    1461   231885585 :                 if (i == 1) {
    1462     7004171 :                         trace_xfs_alloc_cur_right(args);
    1463     7004198 :                         fbcur = acur->bnolt;
    1464     7004198 :                         fbinc = false;
    1465     7004198 :                         break;
    1466             :                 }
    1467             : 
    1468             :                 /*
    1469             :                  * Check the extent with best locality based on the current
    1470             :                  * extent size search key and keep track of the best candidate.
    1471             :                  */
    1472   224881414 :                 error = xfs_alloc_cntbt_iter(args, acur);
    1473   224774294 :                 if (error)
    1474           1 :                         return error;
    1475   449548586 :                 if (!xfs_alloc_cur_active(acur->cnt)) {
    1476    14022681 :                         trace_xfs_alloc_cur_lookup_done(args);
    1477    14022681 :                         break;
    1478             :                 }
    1479             :         }
    1480             : 
    1481             :         /*
    1482             :          * If we failed to find anything due to busy extents, return empty
    1483             :          * handed so the caller can flush and retry. If no busy extents were
    1484             :          * found, walk backwards from the end of the cntbt as a last resort.
    1485             :          */
    1486    66440280 :         if (!xfs_alloc_cur_active(acur->cnt) && !acur->len && !acur->busy) {
    1487       22241 :                 error = xfs_btree_decrement(acur->cnt, 0, &i);
    1488       22241 :                 if (error)
    1489             :                         return error;
    1490       22241 :                 if (i) {
    1491       22241 :                         acur->cnt->bc_ag.abt.active = true;
    1492       22241 :                         fbcur = acur->cnt;
    1493       22241 :                         fbinc = false;
    1494             :                 }
    1495             :         }
    1496             : 
    1497             :         /*
    1498             :          * Search in the opposite direction for a better entry in the case of
    1499             :          * a bnobt hit or walk backwards from the end of the cntbt.
    1500             :          */
    1501    33220140 :         if (fbcur) {
    1502    19219529 :                 error = xfs_alloc_walk_iter(args, acur, fbcur, fbinc, true, -1,
    1503             :                                             &i);
    1504    19219614 :                 if (error)
    1505             :                         return error;
    1506             :         }
    1507             : 
    1508    33220218 :         if (acur->len)
    1509    33186007 :                 *stat = 1;
    1510             : 
    1511             :         return 0;
    1512             : }
    1513             : 
    1514             : /* Check the last block of the cnt btree for allocations. */
    1515             : static int
    1516    44383625 : xfs_alloc_ag_vextent_lastblock(
    1517             :         struct xfs_alloc_arg    *args,
    1518             :         struct xfs_alloc_cur    *acur,
    1519             :         xfs_agblock_t           *bno,
    1520             :         xfs_extlen_t            *len,
    1521             :         bool                    *allocated)
    1522             : {
    1523    44383625 :         int                     error;
    1524    44383625 :         int                     i;
    1525             : 
    1526             : #ifdef DEBUG
    1527             :         /* Randomly don't execute the first algorithm. */
    1528    44383625 :         if (get_random_u32_below(2))
    1529             :                 return 0;
    1530             : #endif
    1531             : 
    1532             :         /*
    1533             :          * Start from the entry that lookup found, sequence through all larger
    1534             :          * free blocks.  If we're actually pointing at a record smaller than
    1535             :          * maxlen, go to the start of this block, and skip all those smaller
    1536             :          * than minlen.
    1537             :          */
    1538    22191019 :         if (*len || args->alignment > 1) {
    1539      225299 :                 acur->cnt->bc_levels[0].ptr = 1;
    1540    30695280 :                 do {
    1541    30695280 :                         error = xfs_alloc_get_rec(acur->cnt, bno, len, &i);
    1542    30695256 :                         if (error)
    1543           0 :                                 return error;
    1544    30695256 :                         if (XFS_IS_CORRUPT(args->mp, i != 1)) {
    1545           0 :                                 xfs_btree_mark_sick(acur->cnt);
    1546           0 :                                 return -EFSCORRUPTED;
    1547             :                         }
    1548    30695256 :                         if (*len >= args->minlen)
    1549             :                                 break;
    1550    30469959 :                         error = xfs_btree_increment(acur->cnt, 0, &i);
    1551    30469981 :                         if (error)
    1552           0 :                                 return error;
    1553    30469981 :                 } while (i);
    1554      225297 :                 ASSERT(*len >= args->minlen);
    1555      225297 :                 if (!i)
    1556             :                         return 0;
    1557             :         }
    1558             : 
    1559    22191017 :         error = xfs_alloc_walk_iter(args, acur, acur->cnt, true, false, -1, &i);
    1560    22191218 :         if (error)
    1561             :                 return error;
    1562             : 
    1563             :         /*
    1564             :          * It didn't work.  We COULD be in a case where there's a good record
    1565             :          * somewhere, so try again.
    1566             :          */
    1567    22191218 :         if (acur->len == 0)
    1568             :                 return 0;
    1569             : 
    1570    22147667 :         trace_xfs_alloc_near_first(args);
    1571    22147729 :         *allocated = true;
    1572    22147729 :         return 0;
    1573             : }
    1574             : 
    1575             : /*
    1576             :  * Allocate a variable extent near bno in the allocation group agno.
    1577             :  * Extent's length (returned in len) will be between minlen and maxlen,
    1578             :  * and of the form k * prod + mod unless there's nothing that large.
    1579             :  * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
    1580             :  */
    1581             : STATIC int
    1582    55333512 : xfs_alloc_ag_vextent_near(
    1583             :         struct xfs_alloc_arg    *args,
    1584             :         uint32_t                alloc_flags)
    1585             : {
    1586    55333512 :         struct xfs_alloc_cur    acur = {};
    1587    55333512 :         int                     error;          /* error code */
    1588    55333512 :         int                     i;              /* result code, temporary */
    1589    55333512 :         xfs_agblock_t           bno;
    1590    55333512 :         xfs_extlen_t            len;
    1591             : 
    1592             :         /* handle uninitialized agbno range so caller doesn't have to */
    1593    55333512 :         if (!args->min_agbno && !args->max_agbno)
    1594    55058534 :                 args->max_agbno = args->mp->m_sb.sb_agblocks - 1;
    1595    55333512 :         ASSERT(args->min_agbno <= args->max_agbno);
    1596             : 
    1597             :         /* clamp agbno to the range if it's outside */
    1598    55333512 :         if (args->agbno < args->min_agbno)
    1599       76947 :                 args->agbno = args->min_agbno;
    1600    55333512 :         if (args->agbno > args->max_agbno)
    1601           0 :                 args->agbno = args->max_agbno;
    1602             : 
    1603             :         /* Retry once quickly if we find busy extents before blocking. */
    1604    55333512 :         alloc_flags |= XFS_ALLOC_FLAG_TRYFLUSH;
    1605    55367095 : restart:
    1606    55367095 :         len = 0;
    1607             : 
    1608             :         /*
    1609             :          * Set up cursors and see if there are any free extents as big as
    1610             :          * maxlen. If not, pick the last entry in the tree unless the tree is
    1611             :          * empty.
    1612             :          */
    1613    55367095 :         error = xfs_alloc_cur_setup(args, &acur);
    1614    55366952 :         if (error == -ENOSPC) {
    1615       61249 :                 error = xfs_alloc_ag_vextent_small(args, acur.cnt, &bno,
    1616             :                                 &len, &i);
    1617       61249 :                 if (error)
    1618           0 :                         goto out;
    1619       61249 :                 if (i == 0 || len == 0) {
    1620           0 :                         trace_xfs_alloc_near_noentry(args);
    1621           0 :                         goto out;
    1622             :                 }
    1623       61249 :                 ASSERT(i == 1);
    1624    55305703 :         } else if (error) {
    1625         344 :                 goto out;
    1626             :         }
    1627             : 
    1628             :         /*
    1629             :          * First algorithm.
    1630             :          * If the requested extent is large wrt the freespaces available
    1631             :          * in this a.g., then the cursor will be pointing to a btree entry
    1632             :          * near the right edge of the tree.  If it's in the last btree leaf
    1633             :          * block, then we just examine all the entries in that block
    1634             :          * that are big enough, and pick the best one.
    1635             :          */
    1636    55366608 :         if (xfs_btree_islastblock(acur.cnt, 0)) {
    1637    44383640 :                 bool            allocated = false;
    1638             : 
    1639    44383640 :                 error = xfs_alloc_ag_vextent_lastblock(args, &acur, &bno, &len,
    1640             :                                 &allocated);
    1641    44384043 :                 if (error)
    1642           0 :                         goto out;
    1643    44384043 :                 if (allocated)
    1644    22147707 :                         goto alloc_finish;
    1645             :         }
    1646             : 
    1647             :         /*
    1648             :          * Second algorithm. Combined cntbt and bnobt search to find ideal
    1649             :          * locality.
    1650             :          */
    1651    33219755 :         error = xfs_alloc_ag_vextent_locality(args, &acur, &i);
    1652    33219531 :         if (error)
    1653         164 :                 goto out;
    1654             : 
    1655             :         /*
    1656             :          * If we couldn't get anything, give up.
    1657             :          */
    1658    33219367 :         if (!acur.len) {
    1659       33584 :                 if (acur.busy) {
    1660             :                         /*
    1661             :                          * Our only valid extents must have been busy. Flush and
    1662             :                          * retry the allocation again. If we get an -EAGAIN
    1663             :                          * error, we're being told that a deadlock was avoided
    1664             :                          * and the current transaction needs committing before
    1665             :                          * the allocation can be retried.
    1666             :                          */
    1667       33584 :                         trace_xfs_alloc_near_busy(args);
    1668       33584 :                         error = xfs_extent_busy_flush(args->tp, args->pag,
    1669             :                                         acur.busy_gen, alloc_flags);
    1670       33584 :                         if (error)
    1671           1 :                                 goto out;
    1672             : 
    1673       33583 :                         alloc_flags &= ~XFS_ALLOC_FLAG_TRYFLUSH;
    1674       33583 :                         goto restart;
    1675             :                 }
    1676           0 :                 trace_xfs_alloc_size_neither(args);
    1677           0 :                 args->agbno = NULLAGBLOCK;
    1678           0 :                 goto out;
    1679             :         }
    1680             : 
    1681    33185783 : alloc_finish:
    1682             :         /* fix up btrees on a successful allocation */
    1683    55333490 :         error = xfs_alloc_cur_finish(args, &acur);
    1684             : 
    1685    55333421 : out:
    1686    55333421 :         xfs_alloc_cur_close(&acur, error);
    1687    55334054 :         return error;
    1688             : }
    1689             : 
    1690             : /*
    1691             :  * Allocate a variable extent anywhere in the allocation group agno.
    1692             :  * Extent's length (returned in len) will be between minlen and maxlen,
    1693             :  * and of the form k * prod + mod unless there's nothing that large.
    1694             :  * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
    1695             :  */
    1696             : static int
    1697     2886321 : xfs_alloc_ag_vextent_size(
    1698             :         struct xfs_alloc_arg    *args,
    1699             :         uint32_t                alloc_flags)
    1700             : {
    1701     2886321 :         struct xfs_agf          *agf = args->agbp->b_addr;
    1702     2886321 :         struct xfs_btree_cur    *bno_cur;
    1703     2886321 :         struct xfs_btree_cur    *cnt_cur;
    1704     2886321 :         xfs_agblock_t           fbno;           /* start of found freespace */
    1705     2886321 :         xfs_extlen_t            flen;           /* length of found freespace */
    1706     2886321 :         xfs_agblock_t           rbno;           /* returned block number */
    1707     2886321 :         xfs_extlen_t            rlen;           /* length of returned extent */
    1708     2886321 :         bool                    busy;
    1709     2886321 :         unsigned                busy_gen;
    1710     2886321 :         int                     error;
    1711     2886321 :         int                     i;
    1712             : 
    1713             :         /* Retry once quickly if we find busy extents before blocking. */
    1714     2886321 :         alloc_flags |= XFS_ALLOC_FLAG_TRYFLUSH;
    1715             : restart:
    1716             :         /*
    1717             :          * Allocate and initialize a cursor for the by-size btree.
    1718             :          */
    1719     2894804 :         cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
    1720             :                                         args->pag, XFS_BTNUM_CNT);
    1721     2894891 :         bno_cur = NULL;
    1722             : 
    1723             :         /*
    1724             :          * Look for an entry >= maxlen+alignment-1 blocks.
    1725             :          */
    1726     2894898 :         if ((error = xfs_alloc_lookup_ge(cnt_cur, 0,
    1727     2894891 :                         args->maxlen + args->alignment - 1, &i)))
    1728          39 :                 goto error0;
    1729             : 
    1730             :         /*
    1731             :          * If none then we have to settle for a smaller extent. In the case that
    1732             :          * there are no large extents, this will return the last entry in the
    1733             :          * tree unless the tree is empty. In the case that there are only busy
    1734             :          * large extents, this will return the largest small extent unless there
    1735             :          * are no smaller extents available.
    1736             :          */
    1737     2894859 :         if (!i) {
    1738       25829 :                 error = xfs_alloc_ag_vextent_small(args, cnt_cur,
    1739             :                                                    &fbno, &flen, &i);
    1740       25829 :                 if (error)
    1741           0 :                         goto error0;
    1742       25829 :                 if (i == 0 || flen == 0) {
    1743           0 :                         xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
    1744           0 :                         trace_xfs_alloc_size_noentry(args);
    1745           0 :                         return 0;
    1746             :                 }
    1747       25829 :                 ASSERT(i == 1);
    1748       25829 :                 busy = xfs_alloc_compute_aligned(args, fbno, flen, &rbno,
    1749             :                                 &rlen, &busy_gen);
    1750             :         } else {
    1751             :                 /*
    1752             :                  * Search for a non-busy extent that is large enough.
    1753             :                  */
    1754   363985624 :                 for (;;) {
    1755   183427327 :                         error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, &i);
    1756   183441289 :                         if (error)
    1757           0 :                                 goto error0;
    1758   183441289 :                         if (XFS_IS_CORRUPT(args->mp, i != 1)) {
    1759           0 :                                 xfs_btree_mark_sick(cnt_cur);
    1760           0 :                                 error = -EFSCORRUPTED;
    1761           0 :                                 goto error0;
    1762             :                         }
    1763             : 
    1764   183441289 :                         busy = xfs_alloc_compute_aligned(args, fbno, flen,
    1765             :                                         &rbno, &rlen, &busy_gen);
    1766             : 
    1767   183442226 :                         if (rlen >= args->maxlen)
    1768             :                                 break;
    1769             : 
    1770   180581535 :                         error = xfs_btree_increment(cnt_cur, 0, &i);
    1771   180566625 :                         if (error)
    1772           0 :                                 goto error0;
    1773   180566625 :                         if (i)
    1774   180558297 :                                 continue;
    1775             : 
    1776             :                         /*
    1777             :                          * Our only valid extents must have been busy. Flush and
    1778             :                          * retry the allocation again. If we get an -EAGAIN
    1779             :                          * error, we're being told that a deadlock was avoided
    1780             :                          * and the current transaction needs committing before
    1781             :                          * the allocation can be retried.
    1782             :                          */
    1783        8328 :                         trace_xfs_alloc_size_busy(args);
    1784        8328 :                         error = xfs_extent_busy_flush(args->tp, args->pag,
    1785             :                                         busy_gen, alloc_flags);
    1786        8328 :                         if (error)
    1787           0 :                                 goto error0;
    1788             : 
    1789        8328 :                         alloc_flags &= ~XFS_ALLOC_FLAG_TRYFLUSH;
    1790        8328 :                         xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
    1791        8328 :                         goto restart;
    1792             :                 }
    1793             :         }
    1794             : 
    1795             :         /*
    1796             :          * In the first case above, we got the last entry in the
    1797             :          * by-size btree.  Now we check to see if the space hits maxlen
    1798             :          * once aligned; if not, we search left for something better.
    1799             :          * This can't happen in the second case above.
    1800             :          */
    1801     2886504 :         rlen = XFS_EXTLEN_MIN(args->maxlen, rlen);
    1802     2886504 :         if (XFS_IS_CORRUPT(args->mp,
    1803             :                            rlen != 0 &&
    1804             :                            (rlen > flen ||
    1805             :                             rbno + rlen > fbno + flen))) {
    1806           0 :                 xfs_btree_mark_sick(cnt_cur);
    1807           0 :                 error = -EFSCORRUPTED;
    1808           0 :                 goto error0;
    1809             :         }
    1810     2886504 :         if (rlen < args->maxlen) {
    1811       25829 :                 xfs_agblock_t   bestfbno;
    1812       25829 :                 xfs_extlen_t    bestflen;
    1813       25829 :                 xfs_agblock_t   bestrbno;
    1814       25829 :                 xfs_extlen_t    bestrlen;
    1815             : 
    1816       25829 :                 bestrlen = rlen;
    1817       25829 :                 bestrbno = rbno;
    1818       25829 :                 bestflen = flen;
    1819       25829 :                 bestfbno = fbno;
    1820     3587419 :                 for (;;) {
    1821     3587419 :                         if ((error = xfs_btree_decrement(cnt_cur, 0, &i)))
    1822           0 :                                 goto error0;
    1823     3587419 :                         if (i == 0)
    1824             :                                 break;
    1825     3587259 :                         if ((error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen,
    1826             :                                         &i)))
    1827           0 :                                 goto error0;
    1828     3587259 :                         if (XFS_IS_CORRUPT(args->mp, i != 1)) {
    1829           0 :                                 xfs_btree_mark_sick(cnt_cur);
    1830           0 :                                 error = -EFSCORRUPTED;
    1831           0 :                                 goto error0;
    1832             :                         }
    1833     3587259 :                         if (flen < bestrlen)
    1834             :                                 break;
    1835     3561590 :                         busy = xfs_alloc_compute_aligned(args, fbno, flen,
    1836             :                                         &rbno, &rlen, &busy_gen);
    1837     3561590 :                         rlen = XFS_EXTLEN_MIN(args->maxlen, rlen);
    1838     3561590 :                         if (XFS_IS_CORRUPT(args->mp,
    1839             :                                            rlen != 0 &&
    1840             :                                            (rlen > flen ||
    1841             :                                             rbno + rlen > fbno + flen))) {
    1842           0 :                                 xfs_btree_mark_sick(cnt_cur);
    1843           0 :                                 error = -EFSCORRUPTED;
    1844           0 :                                 goto error0;
    1845             :                         }
    1846     3561590 :                         if (rlen > bestrlen) {
    1847       14989 :                                 bestrlen = rlen;
    1848       14989 :                                 bestrbno = rbno;
    1849       14989 :                                 bestflen = flen;
    1850       14989 :                                 bestfbno = fbno;
    1851       14989 :                                 if (rlen == args->maxlen)
    1852             :                                         break;
    1853             :                         }
    1854             :                 }
    1855       25829 :                 if ((error = xfs_alloc_lookup_eq(cnt_cur, bestfbno, bestflen,
    1856             :                                 &i)))
    1857           0 :                         goto error0;
    1858       25829 :                 if (XFS_IS_CORRUPT(args->mp, i != 1)) {
    1859           0 :                         xfs_btree_mark_sick(cnt_cur);
    1860           0 :                         error = -EFSCORRUPTED;
    1861           0 :                         goto error0;
    1862             :                 }
    1863       25829 :                 rlen = bestrlen;
    1864       25829 :                 rbno = bestrbno;
    1865       25829 :                 flen = bestflen;
    1866       25829 :                 fbno = bestfbno;
    1867             :         }
    1868     2886504 :         args->wasfromfl = 0;
    1869             :         /*
    1870             :          * Fix up the length.
    1871             :          */
    1872     2886504 :         args->len = rlen;
    1873     2886504 :         if (rlen < args->minlen) {
    1874       14985 :                 if (busy) {
    1875             :                         /*
    1876             :                          * Our only valid extents must have been busy. Flush and
    1877             :                          * retry the allocation again. If we get an -EAGAIN
    1878             :                          * error, we're being told that a deadlock was avoided
    1879             :                          * and the current transaction needs committing before
    1880             :                          * the allocation can be retried.
    1881             :                          */
    1882         155 :                         trace_xfs_alloc_size_busy(args);
    1883         155 :                         error = xfs_extent_busy_flush(args->tp, args->pag,
    1884             :                                         busy_gen, alloc_flags);
    1885         155 :                         if (error)
    1886           0 :                                 goto error0;
    1887             : 
    1888         155 :                         alloc_flags &= ~XFS_ALLOC_FLAG_TRYFLUSH;
    1889         155 :                         xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
    1890         155 :                         goto restart;
    1891             :                 }
    1892       14830 :                 goto out_nominleft;
    1893             :         }
    1894     2871519 :         xfs_alloc_fix_len(args);
    1895             : 
    1896     2871507 :         rlen = args->len;
    1897     2871507 :         if (XFS_IS_CORRUPT(args->mp, rlen > flen)) {
    1898           0 :                 xfs_btree_mark_sick(cnt_cur);
    1899           0 :                 error = -EFSCORRUPTED;
    1900           0 :                 goto error0;
    1901             :         }
    1902             :         /*
    1903             :          * Allocate and initialize a cursor for the by-block tree.
    1904             :          */
    1905     2871507 :         bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
    1906             :                                         args->pag, XFS_BTNUM_BNO);
    1907     2871516 :         if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen,
    1908             :                         rbno, rlen, XFSA_FIXUP_CNT_OK)))
    1909          33 :                 goto error0;
    1910     2871498 :         xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
    1911     2871492 :         xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
    1912     2871487 :         cnt_cur = bno_cur = NULL;
    1913     2871487 :         args->len = rlen;
    1914     2871487 :         args->agbno = rbno;
    1915     5742974 :         if (XFS_IS_CORRUPT(args->mp,
    1916             :                            args->agbno + args->len >
    1917             :                            be32_to_cpu(agf->agf_length))) {
    1918           0 :                 xfs_ag_mark_sick(args->pag, XFS_SICK_AG_BNOBT);
    1919           0 :                 error = -EFSCORRUPTED;
    1920           0 :                 goto error0;
    1921             :         }
    1922     2871487 :         trace_xfs_alloc_size_done(args);
    1923     2871487 :         return 0;
    1924             : 
    1925          72 : error0:
    1926          72 :         trace_xfs_alloc_size_error(args);
    1927          72 :         if (cnt_cur)
    1928          72 :                 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
    1929          72 :         if (bno_cur)
    1930          33 :                 xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
    1931             :         return error;
    1932             : 
    1933             : out_nominleft:
    1934       14830 :         xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
    1935       14830 :         trace_xfs_alloc_size_nominleft(args);
    1936       14830 :         args->agbno = NULLAGBLOCK;
    1937       14830 :         return 0;
    1938             : }
    1939             : 
    1940             : /*
    1941             :  * Free the extent starting at agno/bno for length.
    1942             :  */
    1943             : STATIC int
    1944    51375887 : xfs_free_ag_extent(
    1945             :         struct xfs_trans                *tp,
    1946             :         struct xfs_buf                  *agbp,
    1947             :         xfs_agnumber_t                  agno,
    1948             :         xfs_agblock_t                   bno,
    1949             :         xfs_extlen_t                    len,
    1950             :         const struct xfs_owner_info     *oinfo,
    1951             :         enum xfs_ag_resv_type           type)
    1952             : {
    1953    51375887 :         struct xfs_mount                *mp;
    1954    51375887 :         struct xfs_btree_cur            *bno_cur;
    1955    51375887 :         struct xfs_btree_cur            *cnt_cur;
    1956    51375887 :         xfs_agblock_t                   gtbno; /* start of right neighbor */
    1957    51375887 :         xfs_extlen_t                    gtlen; /* length of right neighbor */
    1958    51375887 :         xfs_agblock_t                   ltbno; /* start of left neighbor */
    1959    51375887 :         xfs_extlen_t                    ltlen; /* length of left neighbor */
    1960    51375887 :         xfs_agblock_t                   nbno; /* new starting block of freesp */
    1961    51375887 :         xfs_extlen_t                    nlen; /* new length of freespace */
    1962    51375887 :         int                             haveleft; /* have a left neighbor */
    1963    51375887 :         int                             haveright; /* have a right neighbor */
    1964    51375887 :         int                             i;
    1965    51375887 :         int                             error;
    1966    51375887 :         struct xfs_perag                *pag = agbp->b_pag;
    1967             : 
    1968    51375887 :         bno_cur = cnt_cur = NULL;
    1969    51375887 :         mp = tp->t_mountp;
    1970             : 
    1971    51375887 :         if (!xfs_rmap_should_skip_owner_update(oinfo)) {
    1972     1048119 :                 error = xfs_rmap_free(tp, agbp, pag, bno, len, oinfo);
    1973     1048091 :                 if (error)
    1974          20 :                         goto error0;
    1975             :         }
    1976             : 
    1977             :         /*
    1978             :          * Allocate and initialize a cursor for the by-block btree.
    1979             :          */
    1980    51375839 :         bno_cur = xfs_allocbt_init_cursor(mp, tp, agbp, pag, XFS_BTNUM_BNO);
    1981             :         /*
    1982             :          * Look for a neighboring block on the left (lower block numbers)
    1983             :          * that is contiguous with this space.
    1984             :          */
    1985    51375579 :         if ((error = xfs_alloc_lookup_le(bno_cur, bno, len, &haveleft)))
    1986         126 :                 goto error0;
    1987    51375689 :         if (haveleft) {
    1988             :                 /*
    1989             :                  * There is a block to our left.
    1990             :                  */
    1991    50455099 :                 if ((error = xfs_alloc_get_rec(bno_cur, &ltbno, &ltlen, &i)))
    1992           0 :                         goto error0;
    1993    50455534 :                 if (XFS_IS_CORRUPT(mp, i != 1)) {
    1994           0 :                         xfs_btree_mark_sick(bno_cur);
    1995           0 :                         error = -EFSCORRUPTED;
    1996           0 :                         goto error0;
    1997             :                 }
    1998             :                 /*
    1999             :                  * It's not contiguous, though.
    2000             :                  */
    2001    50455534 :                 if (ltbno + ltlen < bno)
    2002    36742095 :                         haveleft = 0;
    2003             :                 else {
    2004             :                         /*
    2005             :                          * If this failure happens the request to free this
    2006             :                          * space was invalid, it's (partly) already free.
    2007             :                          * Very bad.
    2008             :                          */
    2009    13713439 :                         if (XFS_IS_CORRUPT(mp, ltbno + ltlen > bno)) {
    2010           0 :                                 xfs_btree_mark_sick(bno_cur);
    2011           0 :                                 error = -EFSCORRUPTED;
    2012           0 :                                 goto error0;
    2013             :                         }
    2014             :                 }
    2015             :         }
    2016             :         /*
    2017             :          * Look for a neighboring block on the right (higher block numbers)
    2018             :          * that is contiguous with this space.
    2019             :          */
    2020    51376124 :         if ((error = xfs_btree_increment(bno_cur, 0, &haveright)))
    2021           0 :                 goto error0;
    2022    51375362 :         if (haveright) {
    2023             :                 /*
    2024             :                  * There is a block to our right.
    2025             :                  */
    2026    51305013 :                 if ((error = xfs_alloc_get_rec(bno_cur, &gtbno, &gtlen, &i)))
    2027           0 :                         goto error0;
    2028    51306434 :                 if (XFS_IS_CORRUPT(mp, i != 1)) {
    2029           0 :                         xfs_btree_mark_sick(bno_cur);
    2030           0 :                         error = -EFSCORRUPTED;
    2031           0 :                         goto error0;
    2032             :                 }
    2033             :                 /*
    2034             :                  * It's not contiguous, though.
    2035             :                  */
    2036    51306434 :                 if (bno + len < gtbno)
    2037    33087925 :                         haveright = 0;
    2038             :                 else {
    2039             :                         /*
    2040             :                          * If this failure happens the request to free this
    2041             :                          * space was invalid, it's (partly) already free.
    2042             :                          * Very bad.
    2043             :                          */
    2044    18218509 :                         if (XFS_IS_CORRUPT(mp, bno + len > gtbno)) {
    2045           0 :                                 xfs_btree_mark_sick(bno_cur);
    2046           0 :                                 error = -EFSCORRUPTED;
    2047           0 :                                 goto error0;
    2048             :                         }
    2049             :                 }
    2050             :         }
    2051             :         /*
    2052             :          * Now allocate and initialize a cursor for the by-size tree.
    2053             :          */
    2054    51376783 :         cnt_cur = xfs_allocbt_init_cursor(mp, tp, agbp, pag, XFS_BTNUM_CNT);
    2055             :         /*
    2056             :          * Have both left and right contiguous neighbors.
    2057             :          * Merge all three into a single free block.
    2058             :          */
    2059    51376120 :         if (haveleft && haveright) {
    2060             :                 /*
    2061             :                  * Delete the old by-size entry on the left.
    2062             :                  */
    2063     7719796 :                 if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i)))
    2064          10 :                         goto error0;
    2065     7719795 :                 if (XFS_IS_CORRUPT(mp, i != 1)) {
    2066           0 :                         xfs_btree_mark_sick(cnt_cur);
    2067           0 :                         error = -EFSCORRUPTED;
    2068           0 :                         goto error0;
    2069             :                 }
    2070     7719795 :                 if ((error = xfs_btree_delete(cnt_cur, &i)))
    2071           0 :                         goto error0;
    2072     7719785 :                 if (XFS_IS_CORRUPT(mp, i != 1)) {
    2073           0 :                         xfs_btree_mark_sick(cnt_cur);
    2074           0 :                         error = -EFSCORRUPTED;
    2075           0 :                         goto error0;
    2076             :                 }
    2077             :                 /*
    2078             :                  * Delete the old by-size entry on the right.
    2079             :                  */
    2080     7719785 :                 if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i)))
    2081           7 :                         goto error0;
    2082     7719776 :                 if (XFS_IS_CORRUPT(mp, i != 1)) {
    2083           0 :                         xfs_btree_mark_sick(cnt_cur);
    2084           0 :                         error = -EFSCORRUPTED;
    2085           0 :                         goto error0;
    2086             :                 }
    2087     7719776 :                 if ((error = xfs_btree_delete(cnt_cur, &i)))
    2088           0 :                         goto error0;
    2089     7719788 :                 if (XFS_IS_CORRUPT(mp, i != 1)) {
    2090           0 :                         xfs_btree_mark_sick(cnt_cur);
    2091           0 :                         error = -EFSCORRUPTED;
    2092           0 :                         goto error0;
    2093             :                 }
    2094             :                 /*
    2095             :                  * Delete the old by-block entry for the right block.
    2096             :                  */
    2097     7719788 :                 if ((error = xfs_btree_delete(bno_cur, &i)))
    2098           0 :                         goto error0;
    2099     7719786 :                 if (XFS_IS_CORRUPT(mp, i != 1)) {
    2100           0 :                         xfs_btree_mark_sick(bno_cur);
    2101           0 :                         error = -EFSCORRUPTED;
    2102           0 :                         goto error0;
    2103             :                 }
    2104             :                 /*
    2105             :                  * Move the by-block cursor back to the left neighbor.
    2106             :                  */
    2107     7719786 :                 if ((error = xfs_btree_decrement(bno_cur, 0, &i)))
    2108           0 :                         goto error0;
    2109     7719767 :                 if (XFS_IS_CORRUPT(mp, i != 1)) {
    2110           0 :                         xfs_btree_mark_sick(bno_cur);
    2111           0 :                         error = -EFSCORRUPTED;
    2112           0 :                         goto error0;
    2113             :                 }
    2114             : #ifdef DEBUG
    2115             :                 /*
    2116             :                  * Check that this is the right record: delete didn't
    2117             :                  * mangle the cursor.
    2118             :                  */
    2119             :                 {
    2120     7719767 :                         xfs_agblock_t   xxbno;
    2121     7719767 :                         xfs_extlen_t    xxlen;
    2122             : 
    2123     7719767 :                         if ((error = xfs_alloc_get_rec(bno_cur, &xxbno, &xxlen,
    2124             :                                         &i)))
    2125           0 :                                 goto error0;
    2126     7719779 :                         if (XFS_IS_CORRUPT(mp,
    2127             :                                            i != 1 ||
    2128             :                                            xxbno != ltbno ||
    2129             :                                            xxlen != ltlen)) {
    2130           0 :                                 xfs_btree_mark_sick(bno_cur);
    2131           0 :                                 error = -EFSCORRUPTED;
    2132           0 :                                 goto error0;
    2133             :                         }
    2134             :                 }
    2135             : #endif
    2136             :                 /*
    2137             :                  * Update remaining by-block entry to the new, joined block.
    2138             :                  */
    2139     7719779 :                 nbno = ltbno;
    2140     7719779 :                 nlen = len + ltlen + gtlen;
    2141     7719779 :                 if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
    2142           0 :                         goto error0;
    2143             :         }
    2144             :         /*
    2145             :          * Have only a left contiguous neighbor.
    2146             :          * Merge it together with the new freespace.
    2147             :          */
    2148    43656324 :         else if (haveleft) {
    2149             :                 /*
    2150             :                  * Delete the old by-size entry on the left.
    2151             :                  */
    2152     5993668 :                 if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i)))
    2153           2 :                         goto error0;
    2154     5993677 :                 if (XFS_IS_CORRUPT(mp, i != 1)) {
    2155           0 :                         xfs_btree_mark_sick(cnt_cur);
    2156           0 :                         error = -EFSCORRUPTED;
    2157           0 :                         goto error0;
    2158             :                 }
    2159     5993677 :                 if ((error = xfs_btree_delete(cnt_cur, &i)))
    2160           0 :                         goto error0;
    2161     5993664 :                 if (XFS_IS_CORRUPT(mp, i != 1)) {
    2162           0 :                         xfs_btree_mark_sick(cnt_cur);
    2163           0 :                         error = -EFSCORRUPTED;
    2164           0 :                         goto error0;
    2165             :                 }
    2166             :                 /*
    2167             :                  * Back up the by-block cursor to the left neighbor, and
    2168             :                  * update its length.
    2169             :                  */
    2170     5993664 :                 if ((error = xfs_btree_decrement(bno_cur, 0, &i)))
    2171           0 :                         goto error0;
    2172     5993661 :                 if (XFS_IS_CORRUPT(mp, i != 1)) {
    2173           0 :                         xfs_btree_mark_sick(bno_cur);
    2174           0 :                         error = -EFSCORRUPTED;
    2175           0 :                         goto error0;
    2176             :                 }
    2177     5993661 :                 nbno = ltbno;
    2178     5993661 :                 nlen = len + ltlen;
    2179     5993661 :                 if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
    2180           0 :                         goto error0;
    2181             :         }
    2182             :         /*
    2183             :          * Have only a right contiguous neighbor.
    2184             :          * Merge it together with the new freespace.
    2185             :          */
    2186    37662656 :         else if (haveright) {
    2187             :                 /*
    2188             :                  * Delete the old by-size entry on the right.
    2189             :                  */
    2190    10498736 :                 if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i)))
    2191          15 :                         goto error0;
    2192    10498738 :                 if (XFS_IS_CORRUPT(mp, i != 1)) {
    2193           0 :                         xfs_btree_mark_sick(cnt_cur);
    2194           0 :                         error = -EFSCORRUPTED;
    2195           0 :                         goto error0;
    2196             :                 }
    2197    10498738 :                 if ((error = xfs_btree_delete(cnt_cur, &i)))
    2198           0 :                         goto error0;
    2199    10498730 :                 if (XFS_IS_CORRUPT(mp, i != 1)) {
    2200           0 :                         xfs_btree_mark_sick(cnt_cur);
    2201           0 :                         error = -EFSCORRUPTED;
    2202           0 :                         goto error0;
    2203             :                 }
    2204             :                 /*
    2205             :                  * Update the starting block and length of the right
    2206             :                  * neighbor in the by-block tree.
    2207             :                  */
    2208    10498730 :                 nbno = bno;
    2209    10498730 :                 nlen = len + gtlen;
    2210    10498730 :                 if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
    2211           0 :                         goto error0;
    2212             :         }
    2213             :         /*
    2214             :          * No contiguous neighbors.
    2215             :          * Insert the new freespace into the by-block tree.
    2216             :          */
    2217             :         else {
    2218    27163920 :                 nbno = bno;
    2219    27163920 :                 nlen = len;
    2220    27163920 :                 if ((error = xfs_btree_insert(bno_cur, &i)))
    2221           0 :                         goto error0;
    2222    27164062 :                 if (XFS_IS_CORRUPT(mp, i != 1)) {
    2223           0 :                         xfs_btree_mark_sick(bno_cur);
    2224           0 :                         error = -EFSCORRUPTED;
    2225           0 :                         goto error0;
    2226             :                 }
    2227             :         }
    2228    51376230 :         xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
    2229    51375591 :         bno_cur = NULL;
    2230             :         /*
    2231             :          * In all cases we need to insert the new freespace in the by-size tree.
    2232             :          */
    2233    51375591 :         if ((error = xfs_alloc_lookup_eq(cnt_cur, nbno, nlen, &i)))
    2234          22 :                 goto error0;
    2235    51376482 :         if (XFS_IS_CORRUPT(mp, i != 0)) {
    2236           0 :                 xfs_btree_mark_sick(cnt_cur);
    2237           0 :                 error = -EFSCORRUPTED;
    2238           0 :                 goto error0;
    2239             :         }
    2240    51376482 :         if ((error = xfs_btree_insert(cnt_cur, &i)))
    2241           0 :                 goto error0;
    2242    51376654 :         if (XFS_IS_CORRUPT(mp, i != 1)) {
    2243           0 :                 xfs_btree_mark_sick(cnt_cur);
    2244           0 :                 error = -EFSCORRUPTED;
    2245           0 :                 goto error0;
    2246             :         }
    2247    51376654 :         xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
    2248    51376520 :         cnt_cur = NULL;
    2249             : 
    2250             :         /*
    2251             :          * Update the freespace totals in the ag and superblock.
    2252             :          */
    2253    51376520 :         error = xfs_alloc_update_counters(tp, agbp, len);
    2254    51375799 :         xfs_ag_resv_free_extent(agbp->b_pag, type, tp, len);
    2255    51374970 :         if (error)
    2256           0 :                 goto error0;
    2257             : 
    2258    51374970 :         XFS_STATS_INC(mp, xs_freex);
    2259    51374970 :         XFS_STATS_ADD(mp, xs_freeb, len);
    2260             : 
    2261    51374970 :         trace_xfs_free_extent(mp, agno, bno, len, type, haveleft, haveright);
    2262             : 
    2263    51374970 :         return 0;
    2264             : 
    2265         202 :  error0:
    2266         202 :         trace_xfs_free_extent(mp, agno, bno, len, type, -1, -1);
    2267         202 :         if (bno_cur)
    2268         160 :                 xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
    2269         202 :         if (cnt_cur)
    2270          56 :                 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
    2271             :         return error;
    2272             : }
    2273             : 
    2274             : /*
    2275             :  * Visible (exported) allocation/free functions.
    2276             :  * Some of these are used just by xfs_alloc_btree.c and this file.
    2277             :  */
    2278             : 
    2279             : /*
    2280             :  * Compute and fill in value of m_alloc_maxlevels.
    2281             :  */
    2282             : void
    2283       24119 : xfs_alloc_compute_maxlevels(
    2284             :         xfs_mount_t     *mp)    /* file system mount structure */
    2285             : {
    2286       48238 :         mp->m_alloc_maxlevels = xfs_btree_compute_maxlevels(mp->m_alloc_mnr,
    2287       24119 :                         (mp->m_sb.sb_agblocks + 1) / 2);
    2288       24119 :         ASSERT(mp->m_alloc_maxlevels <= xfs_allocbt_maxlevels_ondisk());
    2289       24119 : }
    2290             : 
    2291             : /*
    2292             :  * Find the length of the longest extent in an AG.  The 'need' parameter
    2293             :  * specifies how much space we're going to need for the AGFL and the
    2294             :  * 'reserved' parameter tells us how many blocks in this AG are reserved for
    2295             :  * other callers.
    2296             :  */
    2297             : xfs_extlen_t
    2298   209380583 : xfs_alloc_longest_free_extent(
    2299             :         struct xfs_perag        *pag,
    2300             :         xfs_extlen_t            need,
    2301             :         xfs_extlen_t            reserved)
    2302             : {
    2303   209380583 :         xfs_extlen_t            delta = 0;
    2304             : 
    2305             :         /*
    2306             :          * If the AGFL needs a recharge, we'll have to subtract that from the
    2307             :          * longest extent.
    2308             :          */
    2309   209380583 :         if (need > pag->pagf_flcount)
    2310      349023 :                 delta = need - pag->pagf_flcount;
    2311             : 
    2312             :         /*
    2313             :          * If we cannot maintain others' reservations with space from the
    2314             :          * not-longest freesp extents, we'll have to subtract /that/ from
    2315             :          * the longest extent too.
    2316             :          */
    2317   209380583 :         if (pag->pagf_freeblks - pag->pagf_longest < reserved)
    2318   157269874 :                 delta += reserved - (pag->pagf_freeblks - pag->pagf_longest);
    2319             : 
    2320             :         /*
    2321             :          * If the longest extent is long enough to satisfy all the
    2322             :          * reservations and AGFL rules in place, we can return this extent.
    2323             :          */
    2324   209380583 :         if (pag->pagf_longest > delta)
    2325   209348980 :                 return min_t(xfs_extlen_t, pag->pag_mount->m_ag_max_usable,
    2326             :                                 pag->pagf_longest - delta);
    2327             : 
    2328             :         /* Otherwise, let the caller try for 1 block if there's space. */
    2329       31603 :         return pag->pagf_flcount > 0 || pag->pagf_longest > 0;
    2330             : }
    2331             : 
    2332             : /*
    2333             :  * Compute the minimum length of the AGFL in the given AG.  If @pag is NULL,
    2334             :  * return the largest possible minimum length.
    2335             :  */
    2336             : unsigned int
    2337   740937690 : xfs_alloc_min_freelist(
    2338             :         struct xfs_mount        *mp,
    2339             :         struct xfs_perag        *pag)
    2340             : {
    2341             :         /* AG btrees have at least 1 level. */
    2342   740937690 :         static const uint8_t    fake_levels[XFS_BTNUM_AGF] = {1, 1, 1};
    2343   740937690 :         const uint8_t           *levels = pag ? pag->pagf_levels : fake_levels;
    2344   740937690 :         unsigned int            min_free;
    2345             : 
    2346   740937690 :         ASSERT(mp->m_alloc_maxlevels > 0);
    2347             : 
    2348             :         /* space needed by-bno freespace btree */
    2349   740937690 :         min_free = min_t(unsigned int, levels[XFS_BTNUM_BNOi] + 1,
    2350             :                                        mp->m_alloc_maxlevels);
    2351             :         /* space needed by-size freespace btree */
    2352   740937690 :         min_free += min_t(unsigned int, levels[XFS_BTNUM_CNTi] + 1,
    2353             :                                        mp->m_alloc_maxlevels);
    2354             :         /* space needed reverse mapping used space btree */
    2355   740937690 :         if (xfs_has_rmapbt(mp))
    2356   675853284 :                 min_free += min_t(unsigned int, levels[XFS_BTNUM_RMAPi] + 1,
    2357             :                                                 mp->m_rmap_maxlevels);
    2358             : 
    2359   740937690 :         return min_free;
    2360             : }
    2361             : 
    2362             : /*
    2363             :  * Check if the operation we are fixing up the freelist for should go ahead or
    2364             :  * not. If we are freeing blocks, we always allow it, otherwise the allocation
    2365             :  * is dependent on whether the size and shape of free space available will
    2366             :  * permit the requested allocation to take place.
    2367             :  */
    2368             : static bool
    2369   684913698 : xfs_alloc_space_available(
    2370             :         struct xfs_alloc_arg    *args,
    2371             :         xfs_extlen_t            min_free,
    2372             :         int                     flags)
    2373             : {
    2374   684913698 :         struct xfs_perag        *pag = args->pag;
    2375   684913698 :         xfs_extlen_t            alloc_len, longest;
    2376   684913698 :         xfs_extlen_t            reservation; /* blocks that are still reserved */
    2377   684913698 :         int                     available;
    2378   684913698 :         xfs_extlen_t            agflcount;
    2379             : 
    2380   684913698 :         if (flags & XFS_ALLOC_FLAG_FREEING)
    2381             :                 return true;
    2382             : 
    2383   153299209 :         reservation = xfs_ag_resv_needed(pag, args->resv);
    2384             : 
    2385             :         /* do we have enough contiguous free space for the allocation? */
    2386   153299296 :         alloc_len = args->minlen + (args->alignment - 1) + args->minalignslop;
    2387   153299296 :         longest = xfs_alloc_longest_free_extent(pag, min_free, reservation);
    2388   153299296 :         if (longest < alloc_len)
    2389             :                 return false;
    2390             : 
    2391             :         /*
    2392             :          * Do we have enough free space remaining for the allocation? Don't
    2393             :          * account extra agfl blocks because we are about to defer free them,
    2394             :          * making them unavailable until the current transaction commits.
    2395             :          */
    2396   138776976 :         agflcount = min_t(xfs_extlen_t, pag->pagf_flcount, min_free);
    2397   138776976 :         available = (int)(pag->pagf_freeblks + agflcount -
    2398   138776976 :                           reservation - min_free - args->minleft);
    2399   138776976 :         if (available < (int)max(args->total, alloc_len))
    2400             :                 return false;
    2401             : 
    2402             :         /*
    2403             :          * Clamp maxlen to the amount of free space available for the actual
    2404             :          * extent allocation.
    2405             :          */
    2406   119094909 :         if (available < (int)args->maxlen && !(flags & XFS_ALLOC_FLAG_CHECK)) {
    2407       17542 :                 args->maxlen = available;
    2408       17542 :                 ASSERT(args->maxlen > 0);
    2409       17542 :                 ASSERT(args->maxlen >= args->minlen);
    2410             :         }
    2411             : 
    2412             :         return true;
    2413             : }
    2414             : 
    2415             : int
    2416      198096 : xfs_free_agfl_block(
    2417             :         struct xfs_trans        *tp,
    2418             :         xfs_agnumber_t          agno,
    2419             :         xfs_agblock_t           agbno,
    2420             :         struct xfs_buf          *agbp,
    2421             :         struct xfs_owner_info   *oinfo)
    2422             : {
    2423      198096 :         int                     error;
    2424      198096 :         struct xfs_buf          *bp;
    2425             : 
    2426      198096 :         error = xfs_free_ag_extent(tp, agbp, agno, agbno, 1, oinfo,
    2427             :                                    XFS_AG_RESV_AGFL);
    2428      198097 :         if (error)
    2429             :                 return error;
    2430             : 
    2431      198093 :         error = xfs_trans_get_buf(tp, tp->t_mountp->m_ddev_targp,
    2432      198093 :                         XFS_AGB_TO_DADDR(tp->t_mountp, agno, agbno),
    2433             :                         tp->t_mountp->m_bsize, 0, &bp);
    2434      198093 :         if (error)
    2435             :                 return error;
    2436      198093 :         xfs_trans_binval(tp, bp);
    2437             : 
    2438      198093 :         return 0;
    2439             : }
    2440             : 
    2441             : /*
    2442             :  * Check the agfl fields of the agf for inconsistency or corruption.
    2443             :  *
    2444             :  * The original purpose was to detect an agfl header padding mismatch between
    2445             :  * current and early v5 kernels. This problem manifests as a 1-slot size
    2446             :  * difference between the on-disk flcount and the active [first, last] range of
    2447             :  * a wrapped agfl.
    2448             :  *
    2449             :  * However, we need to use these same checks to catch agfl count corruptions
    2450             :  * unrelated to padding. This could occur on any v4 or v5 filesystem, so either
    2451             :  * way, we need to reset the agfl and warn the user.
    2452             :  *
    2453             :  * Return true if a reset is required before the agfl can be used, false
    2454             :  * otherwise.
    2455             :  */
    2456             : static bool
    2457      173139 : xfs_agfl_needs_reset(
    2458             :         struct xfs_mount        *mp,
    2459             :         struct xfs_agf          *agf)
    2460             : {
    2461      173139 :         uint32_t                f = be32_to_cpu(agf->agf_flfirst);
    2462      173139 :         uint32_t                l = be32_to_cpu(agf->agf_fllast);
    2463      173139 :         uint32_t                c = be32_to_cpu(agf->agf_flcount);
    2464      173139 :         int                     agfl_size = xfs_agfl_size(mp);
    2465      173139 :         int                     active;
    2466             : 
    2467             :         /*
    2468             :          * The agf read verifier catches severe corruption of these fields.
    2469             :          * Repeat some sanity checks to cover a packed -> unpacked mismatch if
    2470             :          * the verifier allows it.
    2471             :          */
    2472      173139 :         if (f >= agfl_size || l >= agfl_size)
    2473             :                 return true;
    2474      173140 :         if (c > agfl_size)
    2475             :                 return true;
    2476             : 
    2477             :         /*
    2478             :          * Check consistency between the on-disk count and the active range. An
    2479             :          * agfl padding mismatch manifests as an inconsistent flcount.
    2480             :          */
    2481      173140 :         if (c && l >= f)
    2482      162676 :                 active = l - f + 1;
    2483       10464 :         else if (c)
    2484         234 :                 active = agfl_size - f + l + 1;
    2485             :         else
    2486             :                 active = 0;
    2487             : 
    2488      173140 :         return active != c;
    2489             : }
    2490             : 
    2491             : /*
    2492             :  * Reset the agfl to an empty state. Ignore/drop any existing blocks since the
    2493             :  * agfl content cannot be trusted. Warn the user that a repair is required to
    2494             :  * recover leaked blocks.
    2495             :  *
    2496             :  * The purpose of this mechanism is to handle filesystems affected by the agfl
    2497             :  * header padding mismatch problem. A reset keeps the filesystem online with a
    2498             :  * relatively minor free space accounting inconsistency rather than suffer the
    2499             :  * inevitable crash from use of an invalid agfl block.
    2500             :  */
    2501             : static void
    2502           4 : xfs_agfl_reset(
    2503             :         struct xfs_trans        *tp,
    2504             :         struct xfs_buf          *agbp,
    2505             :         struct xfs_perag        *pag)
    2506             : {
    2507           4 :         struct xfs_mount        *mp = tp->t_mountp;
    2508           4 :         struct xfs_agf          *agf = agbp->b_addr;
    2509             : 
    2510           8 :         ASSERT(xfs_perag_agfl_needs_reset(pag));
    2511           4 :         trace_xfs_agfl_reset(mp, agf, 0, _RET_IP_);
    2512             : 
    2513           4 :         xfs_warn(mp,
    2514             :                "WARNING: Reset corrupted AGFL on AG %u. %d blocks leaked. "
    2515             :                "Please unmount and run xfs_repair.",
    2516             :                  pag->pag_agno, pag->pagf_flcount);
    2517             : 
    2518           4 :         agf->agf_flfirst = 0;
    2519           8 :         agf->agf_fllast = cpu_to_be32(xfs_agfl_size(mp) - 1);
    2520           4 :         agf->agf_flcount = 0;
    2521           4 :         xfs_alloc_log_agf(tp, agbp, XFS_AGF_FLFIRST | XFS_AGF_FLLAST |
    2522             :                                     XFS_AGF_FLCOUNT);
    2523             : 
    2524           4 :         pag->pagf_flcount = 0;
    2525           4 :         clear_bit(XFS_AGSTATE_AGFL_NEEDS_RESET, &pag->pag_opstate);
    2526           4 : }
    2527             : 
    2528             : /*
    2529             :  * Defer an AGFL block free. This is effectively equivalent to
    2530             :  * xfs_free_extent_later() with some special handling particular to AGFL blocks.
    2531             :  *
    2532             :  * Deferring AGFL frees helps prevent log reservation overruns due to too many
    2533             :  * allocation operations in a transaction. AGFL frees are prone to this problem
    2534             :  * because for one they are always freed one at a time. Further, an immediate
    2535             :  * AGFL block free can cause a btree join and require another block free before
    2536             :  * the real allocation can proceed. Deferring the free disconnects freeing up
    2537             :  * the AGFL slot from freeing the block.
    2538             :  */
    2539             : static int
    2540      198100 : xfs_defer_agfl_block(
    2541             :         struct xfs_trans                *tp,
    2542             :         xfs_agnumber_t                  agno,
    2543             :         xfs_agblock_t                   agbno,
    2544             :         struct xfs_owner_info           *oinfo)
    2545             : {
    2546      198100 :         struct xfs_mount                *mp = tp->t_mountp;
    2547      198100 :         struct xfs_extent_free_item     *xefi;
    2548      198100 :         xfs_fsblock_t                   fsbno = XFS_AGB_TO_FSB(mp, agno, agbno);
    2549             : 
    2550      198100 :         ASSERT(xfs_extfree_item_cache != NULL);
    2551      198100 :         ASSERT(oinfo != NULL);
    2552             : 
    2553      198100 :         if (XFS_IS_CORRUPT(mp, !xfs_verify_fsbno(mp, fsbno)))
    2554           0 :                 return -EFSCORRUPTED;
    2555             : 
    2556      198100 :         xefi = kmem_cache_zalloc(xfs_extfree_item_cache,
    2557             :                                GFP_KERNEL | __GFP_NOFAIL);
    2558      198100 :         xefi->xefi_startblock = fsbno;
    2559      198100 :         xefi->xefi_blockcount = 1;
    2560      198100 :         xefi->xefi_owner = oinfo->oi_owner;
    2561      198100 :         xefi->xefi_agresv = XFS_AG_RESV_AGFL;
    2562             : 
    2563      198100 :         trace_xfs_agfl_free_defer(mp, agno, 0, agbno, 1);
    2564             : 
    2565      198100 :         xfs_extent_free_get_group(mp, xefi);
    2566      198100 :         xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_AGFL_FREE, &xefi->xefi_list);
    2567      198100 :         return 0;
    2568             : }
    2569             : 
    2570             : /*
    2571             :  * Add the extent to the list of extents to be free at transaction end.
    2572             :  * The list is maintained sorted (by block number).
    2573             :  */
    2574             : int
    2575    51173398 : __xfs_free_extent_later(
    2576             :         struct xfs_trans                *tp,
    2577             :         xfs_fsblock_t                   bno,
    2578             :         xfs_filblks_t                   len,
    2579             :         const struct xfs_owner_info     *oinfo,
    2580             :         enum xfs_ag_resv_type           type,
    2581             :         bool                            skip_discard)
    2582             : {
    2583    51173398 :         struct xfs_extent_free_item     *xefi;
    2584    51173398 :         struct xfs_mount                *mp = tp->t_mountp;
    2585             : #ifdef DEBUG
    2586    51173398 :         xfs_agnumber_t                  agno;
    2587    51173398 :         xfs_agblock_t                   agbno;
    2588             : 
    2589    51173398 :         ASSERT(bno != NULLFSBLOCK);
    2590    51173398 :         ASSERT(len > 0);
    2591    51173398 :         ASSERT(len <= XFS_MAX_BMBT_EXTLEN);
    2592    51173398 :         ASSERT(!isnullstartblock(bno));
    2593    51173398 :         agno = XFS_FSB_TO_AGNO(mp, bno);
    2594    51173398 :         agbno = XFS_FSB_TO_AGBNO(mp, bno);
    2595    51173398 :         ASSERT(agno < mp->m_sb.sb_agcount);
    2596    51173398 :         ASSERT(agbno < mp->m_sb.sb_agblocks);
    2597    51173398 :         ASSERT(len < mp->m_sb.sb_agblocks);
    2598    51173398 :         ASSERT(agbno + len <= mp->m_sb.sb_agblocks);
    2599             : #endif
    2600    51173398 :         ASSERT(xfs_extfree_item_cache != NULL);
    2601    51173398 :         ASSERT(type != XFS_AG_RESV_AGFL);
    2602             : 
    2603    51173398 :         if (XFS_IS_CORRUPT(mp, !xfs_verify_fsbext(mp, bno, len)))
    2604           0 :                 return -EFSCORRUPTED;
    2605             : 
    2606    51175954 :         xefi = kmem_cache_zalloc(xfs_extfree_item_cache,
    2607             :                                GFP_KERNEL | __GFP_NOFAIL);
    2608    51176075 :         xefi->xefi_startblock = bno;
    2609    51176075 :         xefi->xefi_blockcount = (xfs_extlen_t)len;
    2610    51176075 :         xefi->xefi_agresv = type;
    2611    51176075 :         if (skip_discard)
    2612     1571224 :                 xefi->xefi_flags |= XFS_EFI_SKIP_DISCARD;
    2613    51176075 :         if (oinfo) {
    2614      849505 :                 ASSERT(oinfo->oi_offset == 0);
    2615             : 
    2616      849505 :                 if (oinfo->oi_flags & XFS_OWNER_INFO_ATTR_FORK)
    2617          25 :                         xefi->xefi_flags |= XFS_EFI_ATTR_FORK;
    2618      849505 :                 if (oinfo->oi_flags & XFS_OWNER_INFO_BMBT_BLOCK)
    2619      607606 :                         xefi->xefi_flags |= XFS_EFI_BMBT_BLOCK;
    2620      849505 :                 xefi->xefi_owner = oinfo->oi_owner;
    2621             :         } else {
    2622    50326570 :                 xefi->xefi_owner = XFS_RMAP_OWN_NULL;
    2623             :         }
    2624    51176075 :         trace_xfs_bmap_free_defer(mp,
    2625    51176075 :                         XFS_FSB_TO_AGNO(tp->t_mountp, bno), 0,
    2626             :                         XFS_FSB_TO_AGBNO(tp->t_mountp, bno), len);
    2627             : 
    2628    51172627 :         xfs_extent_free_get_group(mp, xefi);
    2629    51169320 :         xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_FREE, &xefi->xefi_list);
    2630    51169320 :         return 0;
    2631             : }
    2632             : 
    2633             : #ifdef DEBUG
    2634             : /*
    2635             :  * Check if an AGF has a free extent record whose length is equal to
    2636             :  * args->minlen.
    2637             :  */
    2638             : STATIC int
    2639      373585 : xfs_exact_minlen_extent_available(
    2640             :         struct xfs_alloc_arg    *args,
    2641             :         struct xfs_buf          *agbp,
    2642             :         int                     *stat)
    2643             : {
    2644      373585 :         struct xfs_btree_cur    *cnt_cur;
    2645      373585 :         xfs_agblock_t           fbno;
    2646      373585 :         xfs_extlen_t            flen;
    2647      373585 :         int                     error = 0;
    2648             : 
    2649      373585 :         cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, agbp,
    2650             :                                         args->pag, XFS_BTNUM_CNT);
    2651      373586 :         error = xfs_alloc_lookup_ge(cnt_cur, 0, args->minlen, stat);
    2652      373552 :         if (error)
    2653           0 :                 goto out;
    2654             : 
    2655      373552 :         if (*stat == 0) {
    2656           0 :                 xfs_btree_mark_sick(cnt_cur);
    2657           0 :                 error = -EFSCORRUPTED;
    2658           0 :                 goto out;
    2659             :         }
    2660             : 
    2661      373552 :         error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, stat);
    2662      373574 :         if (error)
    2663           0 :                 goto out;
    2664             : 
    2665      373574 :         if (*stat == 1 && flen != args->minlen)
    2666        3456 :                 *stat = 0;
    2667             : 
    2668      370118 : out:
    2669      373574 :         xfs_btree_del_cursor(cnt_cur, error);
    2670             : 
    2671      373587 :         return error;
    2672             : }
    2673             : #endif
    2674             : 
    2675             : /*
    2676             :  * Decide whether to use this allocation group for this allocation.
    2677             :  * If so, fix up the btree freelist's size.
    2678             :  */
    2679             : int                     /* error */
    2680   361022854 : xfs_alloc_fix_freelist(
    2681             :         struct xfs_alloc_arg    *args,  /* allocation argument structure */
    2682             :         uint32_t                alloc_flags)
    2683             : {
    2684   361022854 :         struct xfs_mount        *mp = args->mp;
    2685   361022854 :         struct xfs_perag        *pag = args->pag;
    2686   361022854 :         struct xfs_trans        *tp = args->tp;
    2687   361022854 :         struct xfs_buf          *agbp = NULL;
    2688   361022854 :         struct xfs_buf          *agflbp = NULL;
    2689   361022854 :         struct xfs_alloc_arg    targs;  /* local allocation arguments */
    2690   361022854 :         xfs_agblock_t           bno;    /* freelist block */
    2691   361022854 :         xfs_extlen_t            need;   /* total blocks needed in freelist */
    2692   361022854 :         int                     error = 0;
    2693             : 
    2694             :         /* deferred ops (AGFL block frees) require permanent transactions */
    2695   361022854 :         ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
    2696             : 
    2697   722045708 :         if (!xfs_perag_initialised_agf(pag)) {
    2698        2576 :                 error = xfs_alloc_read_agf(pag, tp, alloc_flags, &agbp);
    2699        2576 :                 if (error) {
    2700             :                         /* Couldn't lock the AGF so skip this AG. */
    2701        1644 :                         if (error == -EAGAIN)
    2702        1644 :                                 error = 0;
    2703        1644 :                         goto out_no_agbp;
    2704             :                 }
    2705             :         }
    2706             : 
    2707             :         /*
    2708             :          * If this is a metadata preferred pag and we are user data then try
    2709             :          * somewhere else if we are not being asked to try harder at this
    2710             :          * point
    2711             :          */
    2712   722042420 :         if (xfs_perag_prefers_metadata(pag) &&
    2713           0 :             (args->datatype & XFS_ALLOC_USERDATA) &&
    2714           0 :             (alloc_flags & XFS_ALLOC_FLAG_TRYLOCK)) {
    2715           0 :                 ASSERT(!(alloc_flags & XFS_ALLOC_FLAG_FREEING));
    2716           0 :                 goto out_agbp_relse;
    2717             :         }
    2718             : 
    2719   361021210 :         need = xfs_alloc_min_freelist(mp, pag);
    2720   361021866 :         if (!xfs_alloc_space_available(args, need, alloc_flags |
    2721             :                         XFS_ALLOC_FLAG_CHECK))
    2722    34203970 :                 goto out_agbp_relse;
    2723             : 
    2724             :         /*
    2725             :          * Get the a.g. freespace buffer.
    2726             :          * Can fail if we're not blocking on locks, and it's held.
    2727             :          */
    2728   326820992 :         if (!agbp) {
    2729   326819888 :                 error = xfs_alloc_read_agf(pag, tp, alloc_flags, &agbp);
    2730   326829531 :                 if (error) {
    2731             :                         /* Couldn't lock the AGF so skip this AG. */
    2732     2812530 :                         if (error == -EAGAIN)
    2733     2811500 :                                 error = 0;
    2734     2812530 :                         goto out_no_agbp;
    2735             :                 }
    2736             :         }
    2737             : 
    2738             :         /* reset a padding mismatched agfl before final free space check */
    2739   648036210 :         if (xfs_perag_agfl_needs_reset(pag))
    2740           4 :                 xfs_agfl_reset(tp, agbp, pag);
    2741             : 
    2742             :         /* If there isn't enough total space or single-extent, reject it. */
    2743   324018105 :         need = xfs_alloc_min_freelist(mp, pag);
    2744   324020199 :         if (!xfs_alloc_space_available(args, need, alloc_flags))
    2745         415 :                 goto out_agbp_relse;
    2746             : 
    2747             : #ifdef DEBUG
    2748   324009957 :         if (args->alloc_minlen_only) {
    2749      373496 :                 int stat;
    2750             : 
    2751      373496 :                 error = xfs_exact_minlen_extent_available(args, agbp, &stat);
    2752      373581 :                 if (error || !stat)
    2753        3456 :                         goto out_agbp_relse;
    2754             :         }
    2755             : #endif
    2756             :         /*
    2757             :          * Make the freelist shorter if it's too long.
    2758             :          *
    2759             :          * Note that from this point onwards, we will always release the agf and
    2760             :          * agfl buffers on error. This handles the case where we error out and
    2761             :          * the buffers are clean or may not have been joined to the transaction
    2762             :          * and hence need to be released manually. If they have been joined to
    2763             :          * the transaction, then xfs_trans_brelse() will handle them
    2764             :          * appropriately based on the recursion count and dirty state of the
    2765             :          * buffer.
    2766             :          *
    2767             :          * XXX (dgc): When we have lots of free space, does this buy us
    2768             :          * anything other than extra overhead when we need to put more blocks
    2769             :          * back on the free list? Maybe we should only do this when space is
    2770             :          * getting low or the AGFL is more than half full?
    2771             :          *
    2772             :          * The NOSHRINK flag prevents the AGFL from being shrunk if it's too
    2773             :          * big; the NORMAP flag prevents AGFL expand/shrink operations from
    2774             :          * updating the rmapbt.  Both flags are used in xfs_repair while we're
    2775             :          * rebuilding the rmapbt, and neither are used by the kernel.  They're
    2776             :          * both required to ensure that rmaps are correctly recorded for the
    2777             :          * regenerated AGFL, bnobt, and cntbt.  See repair/phase5.c and
    2778             :          * repair/rmap.c in xfsprogs for details.
    2779             :          */
    2780   324006586 :         memset(&targs, 0, sizeof(targs));
    2781             :         /* struct copy below */
    2782   324006586 :         if (alloc_flags & XFS_ALLOC_FLAG_NORMAP)
    2783        5480 :                 targs.oinfo = XFS_RMAP_OINFO_SKIP_UPDATE;
    2784             :         else
    2785   324001106 :                 targs.oinfo = XFS_RMAP_OINFO_AG;
    2786   324204686 :         while (!(alloc_flags & XFS_ALLOC_FLAG_NOSHRINK) &&
    2787   324204686 :                         pag->pagf_flcount > need) {
    2788      192917 :                 error = xfs_alloc_get_freelist(pag, tp, agbp, &bno, 0);
    2789      198100 :                 if (error)
    2790           0 :                         goto out_agbp_relse;
    2791             : 
    2792             :                 /* defer agfl frees */
    2793      198100 :                 error = xfs_defer_agfl_block(tp, args->agno, bno, &targs.oinfo);
    2794      198100 :                 if (error)
    2795           0 :                         goto out_agbp_relse;
    2796             :         }
    2797             : 
    2798   324011769 :         targs.tp = tp;
    2799   324011769 :         targs.mp = mp;
    2800   324011769 :         targs.agbp = agbp;
    2801   324011769 :         targs.agno = args->agno;
    2802   324011769 :         targs.alignment = targs.minlen = targs.prod = 1;
    2803   324011769 :         targs.pag = pag;
    2804   324011769 :         error = xfs_alloc_read_agfl(pag, tp, &agflbp);
    2805   324016986 :         if (error)
    2806         737 :                 goto out_agbp_relse;
    2807             : 
    2808             :         /* Make the freelist longer if it's too short. */
    2809   324374847 :         while (pag->pagf_flcount < need) {
    2810      361465 :                 targs.agbno = 0;
    2811      361465 :                 targs.maxlen = need - pag->pagf_flcount;
    2812      361465 :                 targs.resv = XFS_AG_RESV_AGFL;
    2813             : 
    2814             :                 /* Allocate as many blocks as possible at once. */
    2815      361465 :                 error = xfs_alloc_ag_vextent_size(&targs, alloc_flags);
    2816      358615 :                 if (error)
    2817           5 :                         goto out_agflbp_relse;
    2818             : 
    2819             :                 /*
    2820             :                  * Stop if we run out.  Won't happen if callers are obeying
    2821             :                  * the restrictions correctly.  Can happen for free calls
    2822             :                  * on a completely full ag.
    2823             :                  */
    2824      358610 :                 if (targs.agbno == NULLAGBLOCK) {
    2825           0 :                         if (alloc_flags & XFS_ALLOC_FLAG_FREEING)
    2826             :                                 break;
    2827           0 :                         goto out_agflbp_relse;
    2828             :                 }
    2829             : 
    2830      358610 :                 if (!xfs_rmap_should_skip_owner_update(&targs.oinfo)) {
    2831      358605 :                         error = xfs_rmap_alloc(tp, agbp, pag,
    2832             :                                        targs.agbno, targs.len, &targs.oinfo);
    2833      358605 :                         if (error)
    2834          12 :                                 goto out_agflbp_relse;
    2835             :                 }
    2836      358598 :                 error = xfs_alloc_update_counters(tp, agbp,
    2837      358598 :                                                   -((long)(targs.len)));
    2838      358598 :                 if (error)
    2839           0 :                         goto out_agflbp_relse;
    2840             : 
    2841             :                 /*
    2842             :                  * Put each allocated block on the list.
    2843             :                  */
    2844      778353 :                 for (bno = targs.agbno; bno < targs.agbno + targs.len; bno++) {
    2845      419755 :                         error = xfs_alloc_put_freelist(pag, tp, agbp,
    2846             :                                                         agflbp, bno, 0);
    2847      419755 :                         if (error)
    2848           0 :                                 goto out_agflbp_relse;
    2849             :                 }
    2850             :         }
    2851   324013382 :         xfs_trans_brelse(tp, agflbp);
    2852   324018813 :         args->agbp = agbp;
    2853   324018813 :         return 0;
    2854             : 
    2855          17 : out_agflbp_relse:
    2856          17 :         xfs_trans_brelse(tp, agflbp);
    2857    34208595 : out_agbp_relse:
    2858    34208595 :         if (agbp)
    2859        4628 :                 xfs_trans_brelse(tp, agbp);
    2860    34203967 : out_no_agbp:
    2861    37022766 :         args->agbp = NULL;
    2862    37022766 :         return error;
    2863             : }
    2864             : 
    2865             : /*
    2866             :  * Get a block from the freelist.
    2867             :  * Returns with the buffer for the block gotten.
    2868             :  */
    2869             : int
    2870      596243 : xfs_alloc_get_freelist(
    2871             :         struct xfs_perag        *pag,
    2872             :         struct xfs_trans        *tp,
    2873             :         struct xfs_buf          *agbp,
    2874             :         xfs_agblock_t           *bnop,
    2875             :         int                     btreeblk)
    2876             : {
    2877      596243 :         struct xfs_agf          *agf = agbp->b_addr;
    2878      596243 :         struct xfs_buf          *agflbp;
    2879      596243 :         xfs_agblock_t           bno;
    2880      596243 :         __be32                  *agfl_bno;
    2881      596243 :         int                     error;
    2882      596243 :         uint32_t                logflags;
    2883      596243 :         struct xfs_mount        *mp = tp->t_mountp;
    2884             : 
    2885             :         /*
    2886             :          * Freelist is empty, give up.
    2887             :          */
    2888      596243 :         if (!agf->agf_flcount) {
    2889           0 :                 *bnop = NULLAGBLOCK;
    2890           0 :                 return 0;
    2891             :         }
    2892             :         /*
    2893             :          * Read the array of free blocks.
    2894             :          */
    2895      596243 :         error = xfs_alloc_read_agfl(pag, tp, &agflbp);
    2896      596243 :         if (error)
    2897             :                 return error;
    2898             : 
    2899             : 
    2900             :         /*
    2901             :          * Get the block number and update the data structures.
    2902             :          */
    2903      596243 :         agfl_bno = xfs_buf_to_agfl_bno(agflbp);
    2904      596243 :         bno = be32_to_cpu(agfl_bno[be32_to_cpu(agf->agf_flfirst)]);
    2905     1192486 :         if (XFS_IS_CORRUPT(tp->t_mountp, !xfs_verify_agbno(pag, bno)))
    2906           0 :                 return -EFSCORRUPTED;
    2907             : 
    2908      596243 :         be32_add_cpu(&agf->agf_flfirst, 1);
    2909      596243 :         xfs_trans_brelse(tp, agflbp);
    2910     1192486 :         if (be32_to_cpu(agf->agf_flfirst) == xfs_agfl_size(mp))
    2911         405 :                 agf->agf_flfirst = 0;
    2912             : 
    2913     1192486 :         ASSERT(!xfs_perag_agfl_needs_reset(pag));
    2914      596243 :         be32_add_cpu(&agf->agf_flcount, -1);
    2915      596243 :         pag->pagf_flcount--;
    2916             : 
    2917      596243 :         logflags = XFS_AGF_FLFIRST | XFS_AGF_FLCOUNT;
    2918      596243 :         if (btreeblk) {
    2919      398143 :                 be32_add_cpu(&agf->agf_btreeblks, 1);
    2920      398143 :                 pag->pagf_btreeblks++;
    2921      398143 :                 logflags |= XFS_AGF_BTREEBLKS;
    2922             :         }
    2923             : 
    2924      596243 :         xfs_alloc_log_agf(tp, agbp, logflags);
    2925      596243 :         *bnop = bno;
    2926             : 
    2927      596243 :         return 0;
    2928             : }
    2929             : 
    2930             : /*
    2931             :  * Log the given fields from the agf structure.
    2932             :  */
    2933             : void
    2934   137733233 : xfs_alloc_log_agf(
    2935             :         struct xfs_trans        *tp,
    2936             :         struct xfs_buf          *bp,
    2937             :         uint32_t                fields)
    2938             : {
    2939   137733233 :         int     first;          /* first byte offset */
    2940   137733233 :         int     last;           /* last byte offset */
    2941   137733233 :         static const short      offsets[] = {
    2942             :                 offsetof(xfs_agf_t, agf_magicnum),
    2943             :                 offsetof(xfs_agf_t, agf_versionnum),
    2944             :                 offsetof(xfs_agf_t, agf_seqno),
    2945             :                 offsetof(xfs_agf_t, agf_length),
    2946             :                 offsetof(xfs_agf_t, agf_roots[0]),
    2947             :                 offsetof(xfs_agf_t, agf_levels[0]),
    2948             :                 offsetof(xfs_agf_t, agf_flfirst),
    2949             :                 offsetof(xfs_agf_t, agf_fllast),
    2950             :                 offsetof(xfs_agf_t, agf_flcount),
    2951             :                 offsetof(xfs_agf_t, agf_freeblks),
    2952             :                 offsetof(xfs_agf_t, agf_longest),
    2953             :                 offsetof(xfs_agf_t, agf_btreeblks),
    2954             :                 offsetof(xfs_agf_t, agf_uuid),
    2955             :                 offsetof(xfs_agf_t, agf_rmap_blocks),
    2956             :                 offsetof(xfs_agf_t, agf_refcount_blocks),
    2957             :                 offsetof(xfs_agf_t, agf_refcount_root),
    2958             :                 offsetof(xfs_agf_t, agf_refcount_level),
    2959             :                 /* needed so that we don't log the whole rest of the structure: */
    2960             :                 offsetof(xfs_agf_t, agf_spare64),
    2961             :                 sizeof(xfs_agf_t)
    2962             :         };
    2963             : 
    2964   137733233 :         trace_xfs_agf(tp->t_mountp, bp->b_addr, fields, _RET_IP_);
    2965             : 
    2966   137734516 :         xfs_trans_buf_set_type(tp, bp, XFS_BLFT_AGF_BUF);
    2967             : 
    2968   137734279 :         xfs_btree_offsets(fields, offsets, XFS_AGF_NUM_BITS, &first, &last);
    2969   137734939 :         xfs_trans_log_buf(tp, bp, (uint)first, (uint)last);
    2970   137734674 : }
    2971             : 
    2972             : /*
    2973             :  * Put the block on the freelist for the allocation group.
    2974             :  */
    2975             : int
    2976      598341 : xfs_alloc_put_freelist(
    2977             :         struct xfs_perag        *pag,
    2978             :         struct xfs_trans        *tp,
    2979             :         struct xfs_buf          *agbp,
    2980             :         struct xfs_buf          *agflbp,
    2981             :         xfs_agblock_t           bno,
    2982             :         int                     btreeblk)
    2983             : {
    2984      598341 :         struct xfs_mount        *mp = tp->t_mountp;
    2985      598341 :         struct xfs_agf          *agf = agbp->b_addr;
    2986      598341 :         __be32                  *blockp;
    2987      598341 :         int                     error;
    2988      598341 :         uint32_t                logflags;
    2989      598341 :         __be32                  *agfl_bno;
    2990      598341 :         int                     startoff;
    2991             : 
    2992      598341 :         if (!agflbp) {
    2993      178586 :                 error = xfs_alloc_read_agfl(pag, tp, &agflbp);
    2994      178586 :                 if (error)
    2995             :                         return error;
    2996             :         }
    2997             : 
    2998      598341 :         be32_add_cpu(&agf->agf_fllast, 1);
    2999     1196682 :         if (be32_to_cpu(agf->agf_fllast) == xfs_agfl_size(mp))
    3000         411 :                 agf->agf_fllast = 0;
    3001             : 
    3002     1196682 :         ASSERT(!xfs_perag_agfl_needs_reset(pag));
    3003      598341 :         be32_add_cpu(&agf->agf_flcount, 1);
    3004      598341 :         pag->pagf_flcount++;
    3005             : 
    3006      598341 :         logflags = XFS_AGF_FLLAST | XFS_AGF_FLCOUNT;
    3007      598341 :         if (btreeblk) {
    3008      178586 :                 be32_add_cpu(&agf->agf_btreeblks, -1);
    3009      178586 :                 pag->pagf_btreeblks--;
    3010      178586 :                 logflags |= XFS_AGF_BTREEBLKS;
    3011             :         }
    3012             : 
    3013      598341 :         xfs_alloc_log_agf(tp, agbp, logflags);
    3014             : 
    3015     1196682 :         ASSERT(be32_to_cpu(agf->agf_flcount) <= xfs_agfl_size(mp));
    3016             : 
    3017      598341 :         agfl_bno = xfs_buf_to_agfl_bno(agflbp);
    3018      598341 :         blockp = &agfl_bno[be32_to_cpu(agf->agf_fllast)];
    3019      598341 :         *blockp = cpu_to_be32(bno);
    3020      598341 :         startoff = (char *)blockp - (char *)agflbp->b_addr;
    3021             : 
    3022      598341 :         xfs_alloc_log_agf(tp, agbp, logflags);
    3023             : 
    3024      598341 :         xfs_trans_buf_set_type(tp, agflbp, XFS_BLFT_AGFL_BUF);
    3025      598341 :         xfs_trans_log_buf(tp, agflbp, startoff,
    3026             :                           startoff + sizeof(xfs_agblock_t) - 1);
    3027      598341 :         return 0;
    3028             : }
    3029             : 
    3030             : /*
    3031             :  * Check that this AGF/AGI header's sequence number and length matches the AG
    3032             :  * number and size in fsblocks.
    3033             :  */
    3034             : xfs_failaddr_t
    3035     2597994 : xfs_validate_ag_length(
    3036             :         struct xfs_buf          *bp,
    3037             :         uint32_t                seqno,
    3038             :         uint32_t                length)
    3039             : {
    3040     2597994 :         struct xfs_mount        *mp = bp->b_mount;
    3041             :         /*
    3042             :          * During growfs operations, the perag is not fully initialised,
    3043             :          * so we can't use it for any useful checking. growfs ensures we can't
    3044             :          * use it by using uncached buffers that don't have the perag attached
    3045             :          * so we can detect and avoid this problem.
    3046             :          */
    3047     2597994 :         if (bp->b_pag && seqno != bp->b_pag->pag_agno)
    3048           0 :                 return __this_address;
    3049             : 
    3050             :         /*
    3051             :          * Only the last AG in the filesystem is allowed to be shorter
    3052             :          * than the AG size recorded in the superblock.
    3053             :          */
    3054     2597994 :         if (length != mp->m_sb.sb_agblocks) {
    3055             :                 /*
    3056             :                  * During growfs, the new last AG can get here before we
    3057             :                  * have updated the superblock. Give it a pass on the seqno
    3058             :                  * check.
    3059             :                  */
    3060        1093 :                 if (bp->b_pag && seqno != mp->m_sb.sb_agcount - 1)
    3061           0 :                         return __this_address;
    3062        1093 :                 if (length < XFS_MIN_AG_BLOCKS)
    3063           0 :                         return __this_address;
    3064        1093 :                 if (length > mp->m_sb.sb_agblocks)
    3065           2 :                         return __this_address;
    3066             :         }
    3067             : 
    3068             :         return NULL;
    3069             : }
    3070             : 
    3071             : /*
    3072             :  * Verify the AGF is consistent.
    3073             :  *
    3074             :  * We do not verify the AGFL indexes in the AGF are fully consistent here
    3075             :  * because of issues with variable on-disk structure sizes. Instead, we check
    3076             :  * the agfl indexes for consistency when we initialise the perag from the AGF
    3077             :  * information after a read completes.
    3078             :  *
    3079             :  * If the index is inconsistent, then we mark the perag as needing an AGFL
    3080             :  * reset. The first AGFL update performed then resets the AGFL indexes and
    3081             :  * refills the AGFL with known good free blocks, allowing the filesystem to
    3082             :  * continue operating normally at the cost of a few leaked free space blocks.
    3083             :  */
    3084             : static xfs_failaddr_t
    3085     1468229 : xfs_agf_verify(
    3086             :         struct xfs_buf          *bp)
    3087             : {
    3088     1468229 :         struct xfs_mount        *mp = bp->b_mount;
    3089     1468229 :         struct xfs_agf          *agf = bp->b_addr;
    3090     1468229 :         xfs_failaddr_t          fa;
    3091     1468229 :         uint32_t                agf_seqno = be32_to_cpu(agf->agf_seqno);
    3092     1468229 :         uint32_t                agf_length = be32_to_cpu(agf->agf_length);
    3093             : 
    3094     1468229 :         if (xfs_has_crc(mp)) {
    3095     1465913 :                 if (!uuid_equal(&agf->agf_uuid, &mp->m_sb.sb_meta_uuid))
    3096           0 :                         return __this_address;
    3097     1465911 :                 if (!xfs_log_check_lsn(mp, be64_to_cpu(agf->agf_lsn)))
    3098           0 :                         return __this_address;
    3099             :         }
    3100             : 
    3101     1468229 :         if (!xfs_verify_magic(bp, agf->agf_magicnum))
    3102           0 :                 return __this_address;
    3103             : 
    3104     1468231 :         if (!XFS_AGF_GOOD_VERSION(be32_to_cpu(agf->agf_versionnum)))
    3105           5 :                 return __this_address;
    3106             : 
    3107             :         /*
    3108             :          * Both agf_seqno and agf_length need to validated before anything else
    3109             :          * block number related in the AGF or AGFL can be checked.
    3110             :          */
    3111     1468226 :         fa = xfs_validate_ag_length(bp, agf_seqno, agf_length);
    3112     1468224 :         if (fa)
    3113             :                 return fa;
    3114             : 
    3115     2936450 :         if (be32_to_cpu(agf->agf_flfirst) >= xfs_agfl_size(mp))
    3116           2 :                 return __this_address;
    3117     2936446 :         if (be32_to_cpu(agf->agf_fllast) >= xfs_agfl_size(mp))
    3118           0 :                 return __this_address;
    3119     2936446 :         if (be32_to_cpu(agf->agf_flcount) > xfs_agfl_size(mp))
    3120           0 :                 return __this_address;
    3121             : 
    3122     4404669 :         if (be32_to_cpu(agf->agf_freeblks) < be32_to_cpu(agf->agf_longest) ||
    3123             :             be32_to_cpu(agf->agf_freeblks) > agf_length)
    3124           6 :                 return __this_address;
    3125             : 
    3126     1468220 :         if (be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) < 1 ||
    3127     1468220 :             be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) < 1 ||
    3128     1468220 :             be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) >
    3129     2936436 :                                                 mp->m_alloc_maxlevels ||
    3130     1468216 :             be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) >
    3131             :                                                 mp->m_alloc_maxlevels)
    3132           4 :                 return __this_address;
    3133             : 
    3134     2936426 :         if (xfs_has_lazysbcount(mp) &&
    3135     1468209 :             be32_to_cpu(agf->agf_btreeblks) > agf_length)
    3136           0 :                 return __this_address;
    3137             : 
    3138     1468217 :         if (xfs_has_rmapbt(mp)) {
    3139     2647298 :                 if (be32_to_cpu(agf->agf_rmap_blocks) > agf_length)
    3140           2 :                         return __this_address;
    3141             : 
    3142     1323647 :                 if (be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) < 1 ||
    3143     1323647 :                     be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) >
    3144     1323647 :                                                         mp->m_rmap_maxlevels)
    3145           0 :                         return __this_address;
    3146             :         }
    3147             : 
    3148     1468215 :         if (xfs_has_reflink(mp)) {
    3149     2647172 :                 if (be32_to_cpu(agf->agf_refcount_blocks) > agf_length)
    3150           2 :                         return __this_address;
    3151             : 
    3152     1323584 :                 if (be32_to_cpu(agf->agf_refcount_level) < 1 ||
    3153     2647166 :                     be32_to_cpu(agf->agf_refcount_level) > mp->m_refc_maxlevels)
    3154           1 :                         return __this_address;
    3155             :         }
    3156             : 
    3157             :         return NULL;
    3158             : }
    3159             : 
    3160             : static void
    3161      218910 : xfs_agf_read_verify(
    3162             :         struct xfs_buf  *bp)
    3163             : {
    3164      218910 :         struct xfs_mount *mp = bp->b_mount;
    3165      218910 :         xfs_failaddr_t  fa;
    3166             : 
    3167      437802 :         if (xfs_has_crc(mp) &&
    3168             :             !xfs_buf_verify_cksum(bp, XFS_AGF_CRC_OFF))
    3169           8 :                 xfs_verifier_error(bp, -EFSBADCRC, __this_address);
    3170             :         else {
    3171      218902 :                 fa = xfs_agf_verify(bp);
    3172      218902 :                 if (XFS_TEST_ERROR(fa, mp, XFS_ERRTAG_ALLOC_READ_AGF))
    3173          12 :                         xfs_verifier_error(bp, -EFSCORRUPTED, fa);
    3174             :         }
    3175      218910 : }
    3176             : 
    3177             : static void
    3178      893492 : xfs_agf_write_verify(
    3179             :         struct xfs_buf  *bp)
    3180             : {
    3181      893492 :         struct xfs_mount        *mp = bp->b_mount;
    3182      893492 :         struct xfs_buf_log_item *bip = bp->b_log_item;
    3183      893492 :         struct xfs_agf          *agf = bp->b_addr;
    3184      893492 :         xfs_failaddr_t          fa;
    3185             : 
    3186      893492 :         fa = xfs_agf_verify(bp);
    3187      893492 :         if (fa) {
    3188           0 :                 xfs_verifier_error(bp, -EFSCORRUPTED, fa);
    3189           0 :                 return;
    3190             :         }
    3191             : 
    3192      893492 :         if (!xfs_has_crc(mp))
    3193             :                 return;
    3194             : 
    3195      891198 :         if (bip)
    3196      885790 :                 agf->agf_lsn = cpu_to_be64(bip->bli_item.li_lsn);
    3197             : 
    3198      891198 :         xfs_buf_update_cksum(bp, XFS_AGF_CRC_OFF);
    3199             : }
    3200             : 
    3201             : const struct xfs_buf_ops xfs_agf_buf_ops = {
    3202             :         .name = "xfs_agf",
    3203             :         .magic = { cpu_to_be32(XFS_AGF_MAGIC), cpu_to_be32(XFS_AGF_MAGIC) },
    3204             :         .verify_read = xfs_agf_read_verify,
    3205             :         .verify_write = xfs_agf_write_verify,
    3206             :         .verify_struct = xfs_agf_verify,
    3207             : };
    3208             : 
    3209             : /*
    3210             :  * Read in the allocation group header (free/alloc section).
    3211             :  */
    3212             : int
    3213   983666099 : xfs_read_agf(
    3214             :         struct xfs_perag        *pag,
    3215             :         struct xfs_trans        *tp,
    3216             :         int                     flags,
    3217             :         struct xfs_buf          **agfbpp)
    3218             : {
    3219   983666099 :         struct xfs_mount        *mp = pag->pag_mount;
    3220   983666099 :         int                     error;
    3221             : 
    3222   983666099 :         trace_xfs_read_agf(pag->pag_mount, pag->pag_agno);
    3223             : 
    3224   983669373 :         error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
    3225   983669373 :                         XFS_AG_DADDR(mp, pag->pag_agno, XFS_AGF_DADDR(mp)),
    3226             :                         XFS_FSS_TO_BB(mp, 1), flags, agfbpp, &xfs_agf_buf_ops);
    3227   983713860 :         if (xfs_metadata_is_sick(error))
    3228          20 :                 xfs_ag_mark_sick(pag, XFS_SICK_AG_AGF);
    3229   983713860 :         if (error)
    3230             :                 return error;
    3231             : 
    3232   980898573 :         xfs_buf_set_ref(*agfbpp, XFS_AGF_REF);
    3233   980898573 :         return 0;
    3234             : }
    3235             : 
    3236             : /*
    3237             :  * Read in the allocation group header (free/alloc section) and initialise the
    3238             :  * perag structure if necessary. If the caller provides @agfbpp, then return the
    3239             :  * locked buffer to the caller, otherwise free it.
    3240             :  */
    3241             : int
    3242   983654881 : xfs_alloc_read_agf(
    3243             :         struct xfs_perag        *pag,
    3244             :         struct xfs_trans        *tp,
    3245             :         int                     flags,
    3246             :         struct xfs_buf          **agfbpp)
    3247             : {
    3248   983654881 :         struct xfs_buf          *agfbp;
    3249   983654881 :         struct xfs_agf          *agf;
    3250   983654881 :         int                     error;
    3251   983654881 :         int                     allocbt_blks;
    3252             : 
    3253   983654881 :         trace_xfs_alloc_read_agf(pag->pag_mount, pag->pag_agno);
    3254             : 
    3255             :         /* We don't support trylock when freeing. */
    3256   983674097 :         ASSERT((flags & (XFS_ALLOC_FLAG_FREEING | XFS_ALLOC_FLAG_TRYLOCK)) !=
    3257             :                         (XFS_ALLOC_FLAG_FREEING | XFS_ALLOC_FLAG_TRYLOCK));
    3258   983674097 :         error = xfs_read_agf(pag, tp,
    3259   983674097 :                         (flags & XFS_ALLOC_FLAG_TRYLOCK) ? XBF_TRYLOCK : 0,
    3260             :                         &agfbp);
    3261   983709566 :         if (error)
    3262             :                 return error;
    3263             : 
    3264   980893376 :         agf = agfbp->b_addr;
    3265  1961786752 :         if (!xfs_perag_initialised_agf(pag)) {
    3266      173139 :                 pag->pagf_freeblks = be32_to_cpu(agf->agf_freeblks);
    3267      173139 :                 pag->pagf_btreeblks = be32_to_cpu(agf->agf_btreeblks);
    3268      173139 :                 pag->pagf_flcount = be32_to_cpu(agf->agf_flcount);
    3269      173139 :                 pag->pagf_longest = be32_to_cpu(agf->agf_longest);
    3270      173139 :                 pag->pagf_levels[XFS_BTNUM_BNOi] =
    3271      173139 :                         be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNOi]);
    3272      173139 :                 pag->pagf_levels[XFS_BTNUM_CNTi] =
    3273      173139 :                         be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]);
    3274      173139 :                 pag->pagf_levels[XFS_BTNUM_RMAPi] =
    3275      173139 :                         be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAPi]);
    3276      173139 :                 pag->pagf_refcount_level = be32_to_cpu(agf->agf_refcount_level);
    3277      173139 :                 if (xfs_agfl_needs_reset(pag->pag_mount, agf))
    3278           4 :                         set_bit(XFS_AGSTATE_AGFL_NEEDS_RESET, &pag->pag_opstate);
    3279             :                 else
    3280      173135 :                         clear_bit(XFS_AGSTATE_AGFL_NEEDS_RESET, &pag->pag_opstate);
    3281             : 
    3282             :                 /*
    3283             :                  * Update the in-core allocbt counter. Filter out the rmapbt
    3284             :                  * subset of the btreeblks counter because the rmapbt is managed
    3285             :                  * by perag reservation. Subtract one for the rmapbt root block
    3286             :                  * because the rmap counter includes it while the btreeblks
    3287             :                  * counter only tracks non-root blocks.
    3288             :                  */
    3289      173140 :                 allocbt_blks = pag->pagf_btreeblks;
    3290      173140 :                 if (xfs_has_rmapbt(pag->pag_mount))
    3291      172266 :                         allocbt_blks -= be32_to_cpu(agf->agf_rmap_blocks) - 1;
    3292      173140 :                 if (allocbt_blks > 0)
    3293       54236 :                         atomic64_add(allocbt_blks,
    3294             :                                         &pag->pag_mount->m_allocbt_blks);
    3295             : 
    3296      173140 :                 set_bit(XFS_AGSTATE_AGF_INIT, &pag->pag_opstate);
    3297             :         }
    3298             : #ifdef DEBUG
    3299  1961440474 :         else if (!xfs_is_shutdown(pag->pag_mount)) {
    3300  1961422820 :                 ASSERT(pag->pagf_freeblks == be32_to_cpu(agf->agf_freeblks));
    3301  1961422820 :                 ASSERT(pag->pagf_btreeblks == be32_to_cpu(agf->agf_btreeblks));
    3302  1961422820 :                 ASSERT(pag->pagf_flcount == be32_to_cpu(agf->agf_flcount));
    3303  1961422820 :                 ASSERT(pag->pagf_longest == be32_to_cpu(agf->agf_longest));
    3304  1961422820 :                 ASSERT(pag->pagf_levels[XFS_BTNUM_BNOi] ==
    3305             :                        be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNOi]));
    3306  1961422820 :                 ASSERT(pag->pagf_levels[XFS_BTNUM_CNTi] ==
    3307             :                        be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]));
    3308             :         }
    3309             : #endif
    3310   980893377 :         if (agfbpp)
    3311   980548850 :                 *agfbpp = agfbp;
    3312             :         else
    3313      344527 :                 xfs_trans_brelse(tp, agfbp);
    3314             :         return 0;
    3315             : }
    3316             : 
    3317             : /*
    3318             :  * Pre-proces allocation arguments to set initial state that we don't require
    3319             :  * callers to set up correctly, as well as bounds check the allocation args
    3320             :  * that are set up.
    3321             :  */
    3322             : static int
    3323    58704099 : xfs_alloc_vextent_check_args(
    3324             :         struct xfs_alloc_arg    *args,
    3325             :         xfs_fsblock_t           target,
    3326             :         xfs_agnumber_t          *minimum_agno)
    3327             : {
    3328    58704099 :         struct xfs_mount        *mp = args->mp;
    3329    58704099 :         xfs_agblock_t           agsize;
    3330             : 
    3331    58704099 :         args->fsbno = NULLFSBLOCK;
    3332             : 
    3333    58704099 :         *minimum_agno = 0;
    3334    58704099 :         if (args->tp->t_highest_agno != NULLAGNUMBER)
    3335      676332 :                 *minimum_agno = args->tp->t_highest_agno;
    3336             : 
    3337             :         /*
    3338             :          * Just fix this up, for the case where the last a.g. is shorter
    3339             :          * (or there's only one a.g.) and the caller couldn't easily figure
    3340             :          * that out (xfs_bmap_alloc).
    3341             :          */
    3342    58704099 :         agsize = mp->m_sb.sb_agblocks;
    3343    58704099 :         if (args->maxlen > agsize)
    3344           0 :                 args->maxlen = agsize;
    3345    58704099 :         if (args->alignment == 0)
    3346     1251908 :                 args->alignment = 1;
    3347             : 
    3348    58704099 :         ASSERT(args->minlen > 0);
    3349    58704099 :         ASSERT(args->maxlen > 0);
    3350    58704099 :         ASSERT(args->alignment > 0);
    3351    58704099 :         ASSERT(args->resv != XFS_AG_RESV_AGFL);
    3352             : 
    3353    58704099 :         ASSERT(XFS_FSB_TO_AGNO(mp, target) < mp->m_sb.sb_agcount);
    3354    58704099 :         ASSERT(XFS_FSB_TO_AGBNO(mp, target) < agsize);
    3355    58704099 :         ASSERT(args->minlen <= args->maxlen);
    3356    58704099 :         ASSERT(args->minlen <= agsize);
    3357    58704099 :         ASSERT(args->mod < args->prod);
    3358             : 
    3359    58704099 :         if (XFS_FSB_TO_AGNO(mp, target) >= mp->m_sb.sb_agcount ||
    3360    58704099 :             XFS_FSB_TO_AGBNO(mp, target) >= agsize ||
    3361    58704099 :             args->minlen > args->maxlen || args->minlen > agsize ||
    3362    58704099 :             args->mod >= args->prod) {
    3363           0 :                 trace_xfs_alloc_vextent_badargs(args);
    3364           0 :                 return -ENOSPC;
    3365             :         }
    3366             : 
    3367    58704099 :         if (args->agno != NULLAGNUMBER && *minimum_agno > args->agno) {
    3368           0 :                 trace_xfs_alloc_vextent_skip_deadlock(args);
    3369           0 :                 return -ENOSPC;
    3370             :         }
    3371             :         return 0;
    3372             : 
    3373             : }
    3374             : 
    3375             : /*
    3376             :  * Prepare an AG for allocation. If the AG is not prepared to accept the
    3377             :  * allocation, return failure.
    3378             :  *
    3379             :  * XXX(dgc): The complexity of "need_pag" will go away as all caller paths are
    3380             :  * modified to hold their own perag references.
    3381             :  */
    3382             : static int
    3383    95154440 : xfs_alloc_vextent_prepare_ag(
    3384             :         struct xfs_alloc_arg    *args,
    3385             :         uint32_t                alloc_flags)
    3386             : {
    3387    95154440 :         bool                    need_pag = !args->pag;
    3388    95154440 :         int                     error;
    3389             : 
    3390    95154440 :         if (need_pag)
    3391           0 :                 args->pag = xfs_perag_get(args->mp, args->agno);
    3392             : 
    3393    95154440 :         args->agbp = NULL;
    3394    95154440 :         error = xfs_alloc_fix_freelist(args, alloc_flags);
    3395    95157284 :         if (error) {
    3396         738 :                 trace_xfs_alloc_vextent_nofix(args);
    3397         738 :                 if (need_pag)
    3398           0 :                         xfs_perag_put(args->pag);
    3399         738 :                 args->agbno = NULLAGBLOCK;
    3400         738 :                 return error;
    3401             :         }
    3402    95156546 :         if (!args->agbp) {
    3403             :                 /* cannot allocate in this AG at all */
    3404    37020973 :                 trace_xfs_alloc_vextent_noagbp(args);
    3405    37020949 :                 args->agbno = NULLAGBLOCK;
    3406    37020949 :                 return 0;
    3407             :         }
    3408    58135573 :         args->wasfromfl = 0;
    3409    58135573 :         return 0;
    3410             : }
    3411             : 
    3412             : /*
    3413             :  * Post-process allocation results to account for the allocation if it succeed
    3414             :  * and set the allocated block number correctly for the caller.
    3415             :  *
    3416             :  * XXX: we should really be returning ENOSPC for ENOSPC, not
    3417             :  * hiding it behind a "successful" NULLFSBLOCK allocation.
    3418             :  */
    3419             : static int
    3420    58708875 : xfs_alloc_vextent_finish(
    3421             :         struct xfs_alloc_arg    *args,
    3422             :         xfs_agnumber_t          minimum_agno,
    3423             :         int                     alloc_error,
    3424             :         bool                    drop_perag)
    3425             : {
    3426    58708875 :         struct xfs_mount        *mp = args->mp;
    3427    58708875 :         int                     error = 0;
    3428             : 
    3429             :         /*
    3430             :          * We can end up here with a locked AGF. If we failed, the caller is
    3431             :          * likely going to try to allocate again with different parameters, and
    3432             :          * that can widen the AGs that are searched for free space. If we have
    3433             :          * to do BMBT block allocation, we have to do a new allocation.
    3434             :          *
    3435             :          * Hence leaving this function with the AGF locked opens up potential
    3436             :          * ABBA AGF deadlocks because a future allocation attempt in this
    3437             :          * transaction may attempt to lock a lower number AGF.
    3438             :          *
    3439             :          * We can't release the AGF until the transaction is commited, so at
    3440             :          * this point we must update the "first allocation" tracker to point at
    3441             :          * this AG if the tracker is empty or points to a lower AG. This allows
    3442             :          * the next allocation attempt to be modified appropriately to avoid
    3443             :          * deadlocks.
    3444             :          */
    3445    58708875 :         if (args->agbp &&
    3446    58135305 :             (args->tp->t_highest_agno == NULLAGNUMBER ||
    3447      676329 :              args->agno > minimum_agno))
    3448    57498352 :                 args->tp->t_highest_agno = args->agno;
    3449             : 
    3450             :         /*
    3451             :          * If the allocation failed with an error or we had an ENOSPC result,
    3452             :          * preserve the returned error whilst also marking the allocation result
    3453             :          * as "no extent allocated". This ensures that callers that fail to
    3454             :          * capture the error will still treat it as a failed allocation.
    3455             :          */
    3456    58708875 :         if (alloc_error || args->agbno == NULLAGBLOCK) {
    3457      785994 :                 args->fsbno = NULLFSBLOCK;
    3458      785994 :                 error = alloc_error;
    3459      785994 :                 goto out_drop_perag;
    3460             :         }
    3461             : 
    3462    57922881 :         args->fsbno = XFS_AGB_TO_FSB(mp, args->agno, args->agbno);
    3463             : 
    3464    57922881 :         ASSERT(args->len >= args->minlen);
    3465    57922881 :         ASSERT(args->len <= args->maxlen);
    3466    57922881 :         ASSERT(args->agbno % args->alignment == 0);
    3467    57922881 :         XFS_AG_CHECK_DADDR(mp, XFS_FSB_TO_DADDR(mp, args->fsbno), args->len);
    3468             : 
    3469             :         /* if not file data, insert new block into the reverse map btree */
    3470    57922881 :         if (!xfs_rmap_should_skip_owner_update(&args->oinfo)) {
    3471     1793988 :                 error = xfs_rmap_alloc(args->tp, args->agbp, args->pag,
    3472     1793988 :                                        args->agbno, args->len, &args->oinfo);
    3473     1793984 :                 if (error)
    3474          55 :                         goto out_drop_perag;
    3475             :         }
    3476             : 
    3477    57922822 :         if (!args->wasfromfl) {
    3478    57922245 :                 error = xfs_alloc_update_counters(args->tp, args->agbp,
    3479    57922245 :                                                   -((long)(args->len)));
    3480    57922101 :                 if (error)
    3481           0 :                         goto out_drop_perag;
    3482             : 
    3483    57922101 :                 ASSERT(!xfs_extent_busy_search(mp, args->pag, args->agbno,
    3484             :                                 args->len));
    3485             :         }
    3486             : 
    3487    57922292 :         xfs_ag_resv_alloc_extent(args->pag, args->resv, args);
    3488             : 
    3489    57920785 :         XFS_STATS_INC(mp, xs_allocx);
    3490    57920785 :         XFS_STATS_ADD(mp, xs_allocb, args->len);
    3491             : 
    3492    57920785 :         trace_xfs_alloc_vextent_finish(args);
    3493             : 
    3494    58706903 : out_drop_perag:
    3495    58706903 :         if (drop_perag && args->pag) {
    3496    57293049 :                 xfs_perag_rele(args->pag);
    3497    57293934 :                 args->pag = NULL;
    3498             :         }
    3499    58707788 :         return error;
    3500             : }
    3501             : 
    3502             : /*
    3503             :  * Allocate within a single AG only. This uses a best-fit length algorithm so if
    3504             :  * you need an exact sized allocation without locality constraints, this is the
    3505             :  * fastest way to do it.
    3506             :  *
    3507             :  * Caller is expected to hold a perag reference in args->pag.
    3508             :  */
    3509             : int
    3510           0 : xfs_alloc_vextent_this_ag(
    3511             :         struct xfs_alloc_arg    *args,
    3512             :         xfs_agnumber_t          agno)
    3513             : {
    3514           0 :         struct xfs_mount        *mp = args->mp;
    3515           0 :         xfs_agnumber_t          minimum_agno;
    3516           0 :         uint32_t                alloc_flags = 0;
    3517           0 :         int                     error;
    3518             : 
    3519           0 :         ASSERT(args->pag != NULL);
    3520           0 :         ASSERT(args->pag->pag_agno == agno);
    3521             : 
    3522           0 :         args->agno = agno;
    3523           0 :         args->agbno = 0;
    3524             : 
    3525           0 :         trace_xfs_alloc_vextent_this_ag(args);
    3526             : 
    3527           0 :         error = xfs_alloc_vextent_check_args(args, XFS_AGB_TO_FSB(mp, agno, 0),
    3528             :                         &minimum_agno);
    3529           0 :         if (error) {
    3530           0 :                 if (error == -ENOSPC)
    3531             :                         return 0;
    3532           0 :                 return error;
    3533             :         }
    3534             : 
    3535           0 :         error = xfs_alloc_vextent_prepare_ag(args, alloc_flags);
    3536           0 :         if (!error && args->agbp)
    3537           0 :                 error = xfs_alloc_ag_vextent_size(args, alloc_flags);
    3538             : 
    3539           0 :         return xfs_alloc_vextent_finish(args, minimum_agno, error, false);
    3540             : }
    3541             : 
    3542             : /*
    3543             :  * Iterate all AGs trying to allocate an extent starting from @start_ag.
    3544             :  *
    3545             :  * If the incoming allocation type is XFS_ALLOCTYPE_NEAR_BNO, it means the
    3546             :  * allocation attempts in @start_agno have locality information. If we fail to
    3547             :  * allocate in that AG, then we revert to anywhere-in-AG for all the other AGs
    3548             :  * we attempt to allocation in as there is no locality optimisation possible for
    3549             :  * those allocations.
    3550             :  *
    3551             :  * On return, args->pag may be left referenced if we finish before the "all
    3552             :  * failed" return point. The allocation finish still needs the perag, and
    3553             :  * so the caller will release it once they've finished the allocation.
    3554             :  *
    3555             :  * When we wrap the AG iteration at the end of the filesystem, we have to be
    3556             :  * careful not to wrap into AGs below ones we already have locked in the
    3557             :  * transaction if we are doing a blocking iteration. This will result in an
    3558             :  * out-of-order locking of AGFs and hence can cause deadlocks.
    3559             :  */
    3560             : static int
    3561    57225737 : xfs_alloc_vextent_iterate_ags(
    3562             :         struct xfs_alloc_arg    *args,
    3563             :         xfs_agnumber_t          minimum_agno,
    3564             :         xfs_agnumber_t          start_agno,
    3565             :         xfs_agblock_t           target_agbno,
    3566             :         uint32_t                alloc_flags)
    3567             : {
    3568    57225737 :         struct xfs_mount        *mp = args->mp;
    3569    57225737 :         xfs_agnumber_t          restart_agno = minimum_agno;
    3570    57225737 :         xfs_agnumber_t          agno;
    3571    57225737 :         int                     error = 0;
    3572             : 
    3573    57225737 :         if (alloc_flags & XFS_ALLOC_FLAG_TRYLOCK)
    3574    57225737 :                 restart_agno = 0;
    3575           0 : restart:
    3576    93723433 :         for_each_perag_wrap_range(mp, start_agno, restart_agno,
    3577             :                         mp->m_sb.sb_agcount, agno, args->pag) {
    3578    93672951 :                 args->agno = agno;
    3579    93672951 :                 error = xfs_alloc_vextent_prepare_ag(args, alloc_flags);
    3580    93676711 :                 if (error)
    3581             :                         break;
    3582    93675974 :                 if (!args->agbp) {
    3583    36464807 :                         trace_xfs_alloc_vextent_loopfailed(args);
    3584    36464810 :                         continue;
    3585             :                 }
    3586             : 
    3587             :                 /*
    3588             :                  * Allocation is supposed to succeed now, so break out of the
    3589             :                  * loop regardless of whether we succeed or not.
    3590             :                  */
    3591    57211167 :                 if (args->agno == start_agno && target_agbno) {
    3592    54682977 :                         args->agbno = target_agbno;
    3593    54682977 :                         error = xfs_alloc_ag_vextent_near(args, alloc_flags);
    3594             :                 } else {
    3595     2528190 :                         args->agbno = 0;
    3596     2528190 :                         error = xfs_alloc_ag_vextent_size(args, alloc_flags);
    3597             :                 }
    3598             :                 break;
    3599             :         }
    3600    57259520 :         if (error) {
    3601        1494 :                 xfs_perag_rele(args->pag);
    3602        1494 :                 args->pag = NULL;
    3603        1494 :                 return error;
    3604             :         }
    3605    57258026 :         if (args->agbp)
    3606             :                 return 0;
    3607             : 
    3608             :         /*
    3609             :          * We didn't find an AG we can alloation from. If we were given
    3610             :          * constraining flags by the caller, drop them and retry the allocation
    3611             :          * without any constraints being set.
    3612             :          */
    3613       49303 :         if (alloc_flags & XFS_ALLOC_FLAG_TRYLOCK) {
    3614       32870 :                 alloc_flags &= ~XFS_ALLOC_FLAG_TRYLOCK;
    3615       32870 :                 restart_agno = minimum_agno;
    3616       32870 :                 goto restart;
    3617             :         }
    3618             : 
    3619       16433 :         ASSERT(args->pag == NULL);
    3620       16433 :         trace_xfs_alloc_vextent_allfailed(args);
    3621       16433 :         return 0;
    3622             : }
    3623             : 
    3624             : /*
    3625             :  * Iterate from the AGs from the start AG to the end of the filesystem, trying
    3626             :  * to allocate blocks. It starts with a near allocation attempt in the initial
    3627             :  * AG, then falls back to anywhere-in-ag after the first AG fails. It will wrap
    3628             :  * back to zero if allowed by previous allocations in this transaction,
    3629             :  * otherwise will wrap back to the start AG and run a second blocking pass to
    3630             :  * the end of the filesystem.
    3631             :  */
    3632             : int
    3633    56854289 : xfs_alloc_vextent_start_ag(
    3634             :         struct xfs_alloc_arg    *args,
    3635             :         xfs_fsblock_t           target)
    3636             : {
    3637    56854289 :         struct xfs_mount        *mp = args->mp;
    3638    56854289 :         xfs_agnumber_t          minimum_agno;
    3639    56854289 :         xfs_agnumber_t          start_agno;
    3640    56854289 :         xfs_agnumber_t          rotorstep = xfs_rotorstep;
    3641    56854289 :         bool                    bump_rotor = false;
    3642    56854289 :         uint32_t                alloc_flags = XFS_ALLOC_FLAG_TRYLOCK;
    3643    56854289 :         int                     error;
    3644             : 
    3645    56854289 :         ASSERT(args->pag == NULL);
    3646             : 
    3647    56854289 :         args->agno = NULLAGNUMBER;
    3648    56854289 :         args->agbno = NULLAGBLOCK;
    3649             : 
    3650    56854289 :         trace_xfs_alloc_vextent_start_ag(args);
    3651             : 
    3652    56853443 :         error = xfs_alloc_vextent_check_args(args, target, &minimum_agno);
    3653    56852217 :         if (error) {
    3654           0 :                 if (error == -ENOSPC)
    3655             :                         return 0;
    3656           0 :                 return error;
    3657             :         }
    3658             : 
    3659    59320227 :         if ((args->datatype & XFS_ALLOC_INITIAL_USER_DATA) &&
    3660             :             xfs_is_inode32(mp)) {
    3661           0 :                 target = XFS_AGB_TO_FSB(mp,
    3662             :                                 ((mp->m_agfrotor / rotorstep) %
    3663             :                                 mp->m_sb.sb_agcount), 0);
    3664           0 :                 bump_rotor = 1;
    3665             :         }
    3666             : 
    3667    56852217 :         start_agno = max(minimum_agno, XFS_FSB_TO_AGNO(mp, target));
    3668    56852217 :         error = xfs_alloc_vextent_iterate_ags(args, minimum_agno, start_agno,
    3669             :                         XFS_FSB_TO_AGBNO(mp, target), alloc_flags);
    3670             : 
    3671    56856628 :         if (bump_rotor) {
    3672           0 :                 if (args->agno == start_agno)
    3673           0 :                         mp->m_agfrotor = (mp->m_agfrotor + 1) %
    3674           0 :                                 (mp->m_sb.sb_agcount * rotorstep);
    3675             :                 else
    3676           0 :                         mp->m_agfrotor = (args->agno * rotorstep + 1) %
    3677           0 :                                 (mp->m_sb.sb_agcount * rotorstep);
    3678             :         }
    3679             : 
    3680    56856628 :         return xfs_alloc_vextent_finish(args, minimum_agno, error, true);
    3681             : }
    3682             : 
    3683             : /*
    3684             :  * Iterate from the agno indicated via @target through to the end of the
    3685             :  * filesystem attempting blocking allocation. This does not wrap or try a second
    3686             :  * pass, so will not recurse into AGs lower than indicated by the target.
    3687             :  */
    3688             : int
    3689      370156 : xfs_alloc_vextent_first_ag(
    3690             :         struct xfs_alloc_arg    *args,
    3691             :         xfs_fsblock_t           target)
    3692             :  {
    3693      370156 :         struct xfs_mount        *mp = args->mp;
    3694      370156 :         xfs_agnumber_t          minimum_agno;
    3695      370156 :         xfs_agnumber_t          start_agno;
    3696      370156 :         uint32_t                alloc_flags = XFS_ALLOC_FLAG_TRYLOCK;
    3697      370156 :         int                     error;
    3698             : 
    3699      370156 :         ASSERT(args->pag == NULL);
    3700             : 
    3701      370156 :         args->agno = NULLAGNUMBER;
    3702      370156 :         args->agbno = NULLAGBLOCK;
    3703             : 
    3704      370156 :         trace_xfs_alloc_vextent_first_ag(args);
    3705             : 
    3706      370151 :         error = xfs_alloc_vextent_check_args(args, target, &minimum_agno);
    3707      370139 :         if (error) {
    3708           0 :                 if (error == -ENOSPC)
    3709             :                         return 0;
    3710           0 :                 return error;
    3711             :         }
    3712             : 
    3713      370139 :         start_agno = max(minimum_agno, XFS_FSB_TO_AGNO(mp, target));
    3714      370139 :         error = xfs_alloc_vextent_iterate_ags(args, minimum_agno, start_agno,
    3715             :                         XFS_FSB_TO_AGBNO(mp, target), alloc_flags);
    3716      370163 :         return xfs_alloc_vextent_finish(args, minimum_agno, error, true);
    3717             : }
    3718             : 
    3719             : /*
    3720             :  * Allocate at the exact block target or fail. Caller is expected to hold a
    3721             :  * perag reference in args->pag.
    3722             :  */
    3723             : int
    3724      403398 : xfs_alloc_vextent_exact_bno(
    3725             :         struct xfs_alloc_arg    *args,
    3726             :         xfs_fsblock_t           target)
    3727             : {
    3728      403398 :         struct xfs_mount        *mp = args->mp;
    3729      403398 :         xfs_agnumber_t          minimum_agno;
    3730      403398 :         int                     error;
    3731             : 
    3732      403398 :         ASSERT(args->pag != NULL);
    3733      403398 :         ASSERT(args->pag->pag_agno == XFS_FSB_TO_AGNO(mp, target));
    3734             : 
    3735      403398 :         args->agno = XFS_FSB_TO_AGNO(mp, target);
    3736      403398 :         args->agbno = XFS_FSB_TO_AGBNO(mp, target);
    3737             : 
    3738      403398 :         trace_xfs_alloc_vextent_exact_bno(args);
    3739             : 
    3740      403398 :         error = xfs_alloc_vextent_check_args(args, target, &minimum_agno);
    3741      403396 :         if (error) {
    3742           0 :                 if (error == -ENOSPC)
    3743             :                         return 0;
    3744           0 :                 return error;
    3745             :         }
    3746             : 
    3747      403396 :         error = xfs_alloc_vextent_prepare_ag(args, 0);
    3748      403399 :         if (!error && args->agbp)
    3749      274562 :                 error = xfs_alloc_ag_vextent_exact(args);
    3750             : 
    3751      403395 :         return xfs_alloc_vextent_finish(args, minimum_agno, error, false);
    3752             : }
    3753             : 
    3754             : /*
    3755             :  * Allocate an extent as close to the target as possible. If there are not
    3756             :  * viable candidates in the AG, then fail the allocation.
    3757             :  *
    3758             :  * Caller may or may not have a per-ag reference in args->pag.
    3759             :  */
    3760             : int
    3761     1078601 : xfs_alloc_vextent_near_bno(
    3762             :         struct xfs_alloc_arg    *args,
    3763             :         xfs_fsblock_t           target)
    3764             : {
    3765     1078601 :         struct xfs_mount        *mp = args->mp;
    3766     1078601 :         xfs_agnumber_t          minimum_agno;
    3767     1078601 :         bool                    needs_perag = args->pag == NULL;
    3768     1078601 :         uint32_t                alloc_flags = 0;
    3769     1078601 :         int                     error;
    3770             : 
    3771     1078601 :         if (!needs_perag)
    3772      992656 :                 ASSERT(args->pag->pag_agno == XFS_FSB_TO_AGNO(mp, target));
    3773             : 
    3774     1078601 :         args->agno = XFS_FSB_TO_AGNO(mp, target);
    3775     1078601 :         args->agbno = XFS_FSB_TO_AGBNO(mp, target);
    3776             : 
    3777     1078601 :         trace_xfs_alloc_vextent_near_bno(args);
    3778             : 
    3779     1078576 :         error = xfs_alloc_vextent_check_args(args, target, &minimum_agno);
    3780     1078572 :         if (error) {
    3781           0 :                 if (error == -ENOSPC)
    3782             :                         return 0;
    3783           0 :                 return error;
    3784             :         }
    3785             : 
    3786     1078572 :         if (needs_perag)
    3787       85918 :                 args->pag = xfs_perag_grab(mp, args->agno);
    3788             : 
    3789     1078594 :         error = xfs_alloc_vextent_prepare_ag(args, alloc_flags);
    3790     1078613 :         if (!error && args->agbp)
    3791      651290 :                 error = xfs_alloc_ag_vextent_near(args, alloc_flags);
    3792             : 
    3793     1078594 :         return xfs_alloc_vextent_finish(args, minimum_agno, error, needs_perag);
    3794             : }
    3795             : 
    3796             : /* Ensure that the freelist is at full capacity. */
    3797             : int
    3798   265873970 : xfs_free_extent_fix_freelist(
    3799             :         struct xfs_trans        *tp,
    3800             :         struct xfs_perag        *pag,
    3801             :         struct xfs_buf          **agbp)
    3802             : {
    3803   265873970 :         struct xfs_alloc_arg    args;
    3804   265873970 :         int                     error;
    3805             : 
    3806   265873970 :         memset(&args, 0, sizeof(struct xfs_alloc_arg));
    3807   265873970 :         args.tp = tp;
    3808   265873970 :         args.mp = tp->t_mountp;
    3809   265873970 :         args.agno = pag->pag_agno;
    3810   265873970 :         args.pag = pag;
    3811             : 
    3812             :         /*
    3813             :          * validate that the block number is legal - the enables us to detect
    3814             :          * and handle a silent filesystem corruption rather than crashing.
    3815             :          */
    3816   265873970 :         if (args.agno >= args.mp->m_sb.sb_agcount)
    3817             :                 return -EFSCORRUPTED;
    3818             : 
    3819   265873970 :         error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING);
    3820   265877954 :         if (error)
    3821             :                 return error;
    3822             : 
    3823   265876909 :         *agbp = args.agbp;
    3824   265876909 :         return 0;
    3825             : }
    3826             : 
    3827             : /*
    3828             :  * Free an extent.
    3829             :  * Just break up the extent address and hand off to xfs_free_ag_extent
    3830             :  * after fixing up the freelist.
    3831             :  */
    3832             : int
    3833    51178224 : __xfs_free_extent(
    3834             :         struct xfs_trans                *tp,
    3835             :         struct xfs_perag                *pag,
    3836             :         xfs_agblock_t                   agbno,
    3837             :         xfs_extlen_t                    len,
    3838             :         const struct xfs_owner_info     *oinfo,
    3839             :         enum xfs_ag_resv_type           type,
    3840             :         bool                            skip_discard)
    3841             : {
    3842    51178224 :         struct xfs_mount                *mp = tp->t_mountp;
    3843    51178224 :         struct xfs_buf                  *agbp;
    3844    51178224 :         struct xfs_agf                  *agf;
    3845    51178224 :         int                             error;
    3846    51178224 :         unsigned int                    busy_flags = 0;
    3847             : 
    3848    51178224 :         ASSERT(len != 0);
    3849    51178224 :         ASSERT(type != XFS_AG_RESV_AGFL);
    3850             : 
    3851    51178224 :         if (XFS_TEST_ERROR(false, mp,
    3852             :                         XFS_ERRTAG_FREE_EXTENT))
    3853             :                 return -EIO;
    3854             : 
    3855    51177940 :         error = xfs_free_extent_fix_freelist(tp, pag, &agbp);
    3856    51178897 :         if (error) {
    3857          14 :                 if (xfs_metadata_is_sick(error))
    3858           0 :                         xfs_ag_mark_sick(pag, XFS_SICK_AG_BNOBT);
    3859          14 :                 return error;
    3860             :         }
    3861             : 
    3862    51178883 :         agf = agbp->b_addr;
    3863             : 
    3864    51178883 :         if (XFS_IS_CORRUPT(mp, agbno >= mp->m_sb.sb_agblocks)) {
    3865           0 :                 xfs_ag_mark_sick(pag, XFS_SICK_AG_BNOBT);
    3866           0 :                 error = -EFSCORRUPTED;
    3867           0 :                 goto err_release;
    3868             :         }
    3869             : 
    3870             :         /* validate the extent size is legal now we have the agf locked */
    3871   102357766 :         if (XFS_IS_CORRUPT(mp, agbno + len > be32_to_cpu(agf->agf_length))) {
    3872           0 :                 xfs_ag_mark_sick(pag, XFS_SICK_AG_BNOBT);
    3873           0 :                 error = -EFSCORRUPTED;
    3874           0 :                 goto err_release;
    3875             :         }
    3876             : 
    3877    51178883 :         error = xfs_free_ag_extent(tp, agbp, pag->pag_agno, agbno, len, oinfo,
    3878             :                         type);
    3879    51177360 :         if (error)
    3880         198 :                 goto err_release;
    3881             : 
    3882    51177162 :         if (skip_discard)
    3883     1571145 :                 busy_flags |= XFS_EXTENT_BUSY_SKIP_DISCARD;
    3884    51177162 :         xfs_extent_busy_insert(tp, pag, agbno, len, busy_flags);
    3885    51177162 :         return 0;
    3886             : 
    3887         198 : err_release:
    3888         198 :         xfs_trans_brelse(tp, agbp);
    3889         198 :         return error;
    3890             : }
    3891             : 
    3892             : struct xfs_alloc_query_range_info {
    3893             :         xfs_alloc_query_range_fn        fn;
    3894             :         void                            *priv;
    3895             : };
    3896             : 
    3897             : /* Format btree record and pass to our callback. */
    3898             : STATIC int
    3899   947148472 : xfs_alloc_query_range_helper(
    3900             :         struct xfs_btree_cur            *cur,
    3901             :         const union xfs_btree_rec       *rec,
    3902             :         void                            *priv)
    3903             : {
    3904   947148472 :         struct xfs_alloc_query_range_info       *query = priv;
    3905   947148472 :         struct xfs_alloc_rec_incore             irec;
    3906   947148472 :         xfs_failaddr_t                          fa;
    3907             : 
    3908   947148472 :         xfs_alloc_btrec_to_irec(rec, &irec);
    3909   947148672 :         fa = xfs_alloc_check_irec(cur, &irec);
    3910   947148870 :         if (fa)
    3911           0 :                 return xfs_alloc_complain_bad_rec(cur, fa, &irec);
    3912             : 
    3913   947148870 :         return query->fn(cur, &irec, query->priv);
    3914             : }
    3915             : 
    3916             : /* Find all free space within a given range of blocks. */
    3917             : int
    3918       21267 : xfs_alloc_query_range(
    3919             :         struct xfs_btree_cur                    *cur,
    3920             :         const struct xfs_alloc_rec_incore       *low_rec,
    3921             :         const struct xfs_alloc_rec_incore       *high_rec,
    3922             :         xfs_alloc_query_range_fn                fn,
    3923             :         void                                    *priv)
    3924             : {
    3925       21267 :         union xfs_btree_irec                    low_brec = { .a = *low_rec };
    3926       21267 :         union xfs_btree_irec                    high_brec = { .a = *high_rec };
    3927       21267 :         struct xfs_alloc_query_range_info       query = { .priv = priv, .fn = fn };
    3928             : 
    3929       21267 :         ASSERT(cur->bc_btnum == XFS_BTNUM_BNO);
    3930       21267 :         return xfs_btree_query_range(cur, &low_brec, &high_brec,
    3931             :                         xfs_alloc_query_range_helper, &query);
    3932             : }
    3933             : 
    3934             : /* Find all free space records. */
    3935             : int
    3936      420882 : xfs_alloc_query_all(
    3937             :         struct xfs_btree_cur                    *cur,
    3938             :         xfs_alloc_query_range_fn                fn,
    3939             :         void                                    *priv)
    3940             : {
    3941      420882 :         struct xfs_alloc_query_range_info       query;
    3942             : 
    3943      420882 :         ASSERT(cur->bc_btnum == XFS_BTNUM_BNO);
    3944      420882 :         query.priv = priv;
    3945      420882 :         query.fn = fn;
    3946      420882 :         return xfs_btree_query_all(cur, xfs_alloc_query_range_helper, &query);
    3947             : }
    3948             : 
    3949             : /*
    3950             :  * Scan part of the keyspace of the free space and tell us if the area has no
    3951             :  * records, is fully mapped by records, or is partially filled.
    3952             :  */
    3953             : int
    3954  1265272807 : xfs_alloc_has_records(
    3955             :         struct xfs_btree_cur    *cur,
    3956             :         xfs_agblock_t           bno,
    3957             :         xfs_extlen_t            len,
    3958             :         enum xbtree_recpacking  *outcome)
    3959             : {
    3960  1265272807 :         union xfs_btree_irec    low;
    3961  1265272807 :         union xfs_btree_irec    high;
    3962             : 
    3963  1265272807 :         memset(&low, 0, sizeof(low));
    3964  1265272807 :         low.a.ar_startblock = bno;
    3965  1265272807 :         memset(&high, 0xFF, sizeof(high));
    3966  1265272807 :         high.a.ar_startblock = bno + len - 1;
    3967             : 
    3968  1265272807 :         return xfs_btree_has_records(cur, &low, &high, NULL, outcome);
    3969             : }
    3970             : 
    3971             : /*
    3972             :  * Walk all the blocks in the AGFL.  The @walk_fn can return any negative
    3973             :  * error code or XFS_ITER_*.
    3974             :  */
    3975             : int
    3976    41770455 : xfs_agfl_walk(
    3977             :         struct xfs_mount        *mp,
    3978             :         struct xfs_agf          *agf,
    3979             :         struct xfs_buf          *agflbp,
    3980             :         xfs_agfl_walk_fn        walk_fn,
    3981             :         void                    *priv)
    3982             : {
    3983    41770455 :         __be32                  *agfl_bno;
    3984    41770455 :         unsigned int            i;
    3985    41770455 :         int                     error;
    3986             : 
    3987    41770455 :         agfl_bno = xfs_buf_to_agfl_bno(agflbp);
    3988    41770455 :         i = be32_to_cpu(agf->agf_flfirst);
    3989             : 
    3990             :         /* Nothing to walk in an empty AGFL. */
    3991    41770455 :         if (agf->agf_flcount == cpu_to_be32(0))
    3992             :                 return 0;
    3993             : 
    3994             :         /* Otherwise, walk from first to last, wrapping as needed. */
    3995   404137868 :         for (;;) {
    3996   808275736 :                 error = walk_fn(mp, be32_to_cpu(agfl_bno[i]), priv);
    3997   404137870 :                 if (error)
    3998     1589883 :                         return error;
    3999   805095974 :                 if (i == be32_to_cpu(agf->agf_fllast))
    4000             :                         break;
    4001   724754106 :                 if (++i == xfs_agfl_size(mp))
    4002           4 :                         i = 0;
    4003             :         }
    4004             : 
    4005             :         return 0;
    4006             : }
    4007             : 
    4008             : int __init
    4009          12 : xfs_extfree_intent_init_cache(void)
    4010             : {
    4011          12 :         xfs_extfree_item_cache = kmem_cache_create("xfs_extfree_intent",
    4012             :                         sizeof(struct xfs_extent_free_item),
    4013             :                         0, 0, NULL);
    4014             : 
    4015          12 :         return xfs_extfree_item_cache != NULL ? 0 : -ENOMEM;
    4016             : }
    4017             : 
    4018             : void
    4019          12 : xfs_extfree_intent_destroy_cache(void)
    4020             : {
    4021          12 :         kmem_cache_destroy(xfs_extfree_item_cache);
    4022          12 :         xfs_extfree_item_cache = NULL;
    4023          12 : }

Generated by: LCOV version 1.14