LCOV - code coverage report
Current view: top level - fs/xfs/libxfs - xfs_ialloc.c (source / functions) Hit Total Coverage
Test: fstests of 6.5.0-rc3-djwx @ Mon Jul 31 20:08:22 PDT 2023 Lines: 1111 1289 86.2 %
Date: 2023-07-31 20:08:22 Functions: 44 46 95.7 %

          Line data    Source code
       1             : // SPDX-License-Identifier: GPL-2.0
       2             : /*
       3             :  * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
       4             :  * All Rights Reserved.
       5             :  */
       6             : #include "xfs.h"
       7             : #include "xfs_fs.h"
       8             : #include "xfs_shared.h"
       9             : #include "xfs_format.h"
      10             : #include "xfs_log_format.h"
      11             : #include "xfs_trans_resv.h"
      12             : #include "xfs_bit.h"
      13             : #include "xfs_mount.h"
      14             : #include "xfs_inode.h"
      15             : #include "xfs_btree.h"
      16             : #include "xfs_ialloc.h"
      17             : #include "xfs_ialloc_btree.h"
      18             : #include "xfs_alloc.h"
      19             : #include "xfs_errortag.h"
      20             : #include "xfs_error.h"
      21             : #include "xfs_bmap.h"
      22             : #include "xfs_trans.h"
      23             : #include "xfs_buf_item.h"
      24             : #include "xfs_icreate_item.h"
      25             : #include "xfs_icache.h"
      26             : #include "xfs_trace.h"
      27             : #include "xfs_log.h"
      28             : #include "xfs_rmap.h"
      29             : #include "xfs_ag.h"
      30             : 
      31             : /*
      32             :  * Lookup a record by ino in the btree given by cur.
      33             :  */
      34             : int                                     /* error */
      35  3113729797 : xfs_inobt_lookup(
      36             :         struct xfs_btree_cur    *cur,   /* btree cursor */
      37             :         xfs_agino_t             ino,    /* starting inode of chunk */
      38             :         xfs_lookup_t            dir,    /* <=, >=, == */
      39             :         int                     *stat)  /* success/failure */
      40             : {
      41  6737692876 :         cur->bc_rec.i.ir_startino = ino;
      42  6737692876 :         cur->bc_rec.i.ir_holemask = 0;
      43  6737692876 :         cur->bc_rec.i.ir_count = 0;
      44  6737692876 :         cur->bc_rec.i.ir_freecount = 0;
      45  6737692876 :         cur->bc_rec.i.ir_free = 0;
      46  3113729797 :         return xfs_btree_lookup(cur, dir, stat);
      47             : }
      48             : 
      49             : /*
      50             :  * Update the record referred to by cur to the value given.
      51             :  * This either works (return 0) or gets an EFSCORRUPTED error.
      52             :  */
      53             : STATIC int                              /* error */
      54   260237749 : xfs_inobt_update(
      55             :         struct xfs_btree_cur    *cur,   /* btree cursor */
      56             :         xfs_inobt_rec_incore_t  *irec)  /* btree record */
      57             : {
      58   260237749 :         union xfs_btree_rec     rec;
      59             : 
      60   260237749 :         rec.inobt.ir_startino = cpu_to_be32(irec->ir_startino);
      61   260237749 :         if (xfs_has_sparseinodes(cur->bc_mp)) {
      62   260236354 :                 rec.inobt.ir_u.sp.ir_holemask = cpu_to_be16(irec->ir_holemask);
      63   260236354 :                 rec.inobt.ir_u.sp.ir_count = irec->ir_count;
      64   260236354 :                 rec.inobt.ir_u.sp.ir_freecount = irec->ir_freecount;
      65             :         } else {
      66             :                 /* ir_holemask/ir_count not supported on-disk */
      67        1395 :                 rec.inobt.ir_u.f.ir_freecount = cpu_to_be32(irec->ir_freecount);
      68             :         }
      69   260237749 :         rec.inobt.ir_free = cpu_to_be64(irec->ir_free);
      70   260237749 :         return xfs_btree_update(cur, &rec);
      71             : }
      72             : 
      73             : /* Convert on-disk btree record to incore inobt record. */
      74             : void
      75 20012211108 : xfs_inobt_btrec_to_irec(
      76             :         struct xfs_mount                *mp,
      77             :         const union xfs_btree_rec       *rec,
      78             :         struct xfs_inobt_rec_incore     *irec)
      79             : {
      80 20012211108 :         irec->ir_startino = be32_to_cpu(rec->inobt.ir_startino);
      81 20012211108 :         if (xfs_has_sparseinodes(mp)) {
      82 20012205187 :                 irec->ir_holemask = be16_to_cpu(rec->inobt.ir_u.sp.ir_holemask);
      83 20012205187 :                 irec->ir_count = rec->inobt.ir_u.sp.ir_count;
      84 20012205187 :                 irec->ir_freecount = rec->inobt.ir_u.sp.ir_freecount;
      85             :         } else {
      86             :                 /*
      87             :                  * ir_holemask/ir_count not supported on-disk. Fill in hardcoded
      88             :                  * values for full inode chunks.
      89             :                  */
      90        5921 :                 irec->ir_holemask = XFS_INOBT_HOLEMASK_FULL;
      91        5921 :                 irec->ir_count = XFS_INODES_PER_CHUNK;
      92        5921 :                 irec->ir_freecount =
      93        5921 :                                 be32_to_cpu(rec->inobt.ir_u.f.ir_freecount);
      94             :         }
      95 20012211108 :         irec->ir_free = be64_to_cpu(rec->inobt.ir_free);
      96 20012211108 : }
      97             : 
      98             : /* Simple checks for inode records. */
      99             : xfs_failaddr_t
     100 20027732580 : xfs_inobt_check_irec(
     101             :         struct xfs_btree_cur                    *cur,
     102             :         const struct xfs_inobt_rec_incore       *irec)
     103             : {
     104 20027732580 :         uint64_t                        realfree;
     105             : 
     106             :         /* Record has to be properly aligned within the AG. */
     107 20027732580 :         if (!xfs_verify_agino(cur->bc_ag.pag, irec->ir_startino))
     108           0 :                 return __this_address;
     109 20027732580 :         if (!xfs_verify_agino(cur->bc_ag.pag,
     110             :                                 irec->ir_startino + XFS_INODES_PER_CHUNK - 1))
     111           0 :                 return __this_address;
     112 20027732580 :         if (irec->ir_count < XFS_INODES_PER_HOLEMASK_BIT ||
     113             :             irec->ir_count > XFS_INODES_PER_CHUNK)
     114           0 :                 return __this_address;
     115 20027732580 :         if (irec->ir_freecount > XFS_INODES_PER_CHUNK)
     116           0 :                 return __this_address;
     117             : 
     118             :         /* if there are no holes, return the first available offset */
     119 20027732580 :         if (!xfs_inobt_issparse(irec->ir_holemask))
     120 14180392388 :                 realfree = irec->ir_free;
     121             :         else
     122  5847340192 :                 realfree = irec->ir_free & xfs_inobt_irec_to_allocmask(irec);
     123 40067766554 :         if (hweight64(realfree) != irec->ir_freecount)
     124     9549006 :                 return __this_address;
     125             : 
     126             :         return NULL;
     127             : }
     128             : 
     129             : static inline int
     130           0 : xfs_inobt_complain_bad_rec(
     131             :         struct xfs_btree_cur            *cur,
     132             :         xfs_failaddr_t                  fa,
     133             :         const struct xfs_inobt_rec_incore *irec)
     134             : {
     135           0 :         struct xfs_mount                *mp = cur->bc_mp;
     136             : 
     137           0 :         xfs_warn(mp,
     138             :                 "%s Inode BTree record corruption in AG %d detected at %pS!",
     139             :                 cur->bc_btnum == XFS_BTNUM_INO ? "Used" : "Free",
     140             :                 cur->bc_ag.pag->pag_agno, fa);
     141           0 :         xfs_warn(mp,
     142             : "start inode 0x%x, count 0x%x, free 0x%x freemask 0x%llx, holemask 0x%x",
     143             :                 irec->ir_startino, irec->ir_count, irec->ir_freecount,
     144             :                 irec->ir_free, irec->ir_holemask);
     145           0 :         return -EFSCORRUPTED;
     146             : }
     147             : 
     148             : /*
     149             :  * Get the data from the pointed-to record.
     150             :  */
     151             : int
     152 19858350600 : xfs_inobt_get_rec(
     153             :         struct xfs_btree_cur            *cur,
     154             :         struct xfs_inobt_rec_incore     *irec,
     155             :         int                             *stat)
     156             : {
     157 19858350600 :         struct xfs_mount                *mp = cur->bc_mp;
     158 19858350600 :         union xfs_btree_rec             *rec;
     159 19858350600 :         xfs_failaddr_t                  fa;
     160 19858350600 :         int                             error;
     161             : 
     162 19858350600 :         error = xfs_btree_get_rec(cur, &rec, stat);
     163 19810602292 :         if (error || *stat == 0)
     164             :                 return error;
     165             : 
     166 19815442487 :         xfs_inobt_btrec_to_irec(mp, rec, irec);
     167 19740645514 :         fa = xfs_inobt_check_irec(cur, irec);
     168 19790739321 :         if (fa)
     169           0 :                 return xfs_inobt_complain_bad_rec(cur, fa, irec);
     170             : 
     171             :         return 0;
     172             : }
     173             : 
     174             : /*
     175             :  * Insert a single inobt record. Cursor must already point to desired location.
     176             :  */
     177             : int
     178           0 : xfs_inobt_insert_rec(
     179             :         struct xfs_btree_cur    *cur,
     180             :         uint16_t                holemask,
     181             :         uint8_t                 count,
     182             :         int32_t                 freecount,
     183             :         xfs_inofree_t           free,
     184             :         int                     *stat)
     185             : {
     186    13597958 :         cur->bc_rec.i.ir_holemask = holemask;
     187    13597958 :         cur->bc_rec.i.ir_count = count;
     188    13597958 :         cur->bc_rec.i.ir_freecount = freecount;
     189    13597958 :         cur->bc_rec.i.ir_free = free;
     190           0 :         return xfs_btree_insert(cur, stat);
     191             : }
     192             : 
     193             : /*
     194             :  * Insert records describing a newly allocated inode chunk into the inobt.
     195             :  */
     196             : STATIC int
     197     1003922 : xfs_inobt_insert(
     198             :         struct xfs_perag        *pag,
     199             :         struct xfs_trans        *tp,
     200             :         struct xfs_buf          *agbp,
     201             :         xfs_agino_t             newino,
     202             :         xfs_agino_t             newlen,
     203             :         xfs_btnum_t             btnum)
     204             : {
     205     1003922 :         struct xfs_btree_cur    *cur;
     206     1003922 :         xfs_agino_t             thisino;
     207     1003922 :         int                     i;
     208     1003922 :         int                     error;
     209             : 
     210     1003922 :         cur = xfs_inobt_init_cursor(pag, tp, agbp, btnum);
     211             : 
     212     1003922 :         for (thisino = newino;
     213     2007893 :              thisino < newino + newlen;
     214     1003872 :              thisino += XFS_INODES_PER_CHUNK) {
     215     1003991 :                 error = xfs_inobt_lookup(cur, thisino, XFS_LOOKUP_EQ, &i);
     216     1004008 :                 if (error) {
     217           7 :                         xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
     218           7 :                         return error;
     219             :                 }
     220     1004001 :                 ASSERT(i == 0);
     221             : 
     222     1004001 :                 error = xfs_inobt_insert_rec(cur, XFS_INOBT_HOLEMASK_FULL,
     223             :                                              XFS_INODES_PER_CHUNK,
     224             :                                              XFS_INODES_PER_CHUNK,
     225             :                                              XFS_INOBT_ALL_FREE, &i);
     226     1003872 :                 if (error) {
     227           0 :                         xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
     228           0 :                         return error;
     229             :                 }
     230     1003872 :                 ASSERT(i == 1);
     231             :         }
     232             : 
     233     1003902 :         xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
     234             : 
     235     1003902 :         return 0;
     236             : }
     237             : 
     238             : /*
     239             :  * Verify that the number of free inodes in the AGI is correct.
     240             :  */
     241             : #ifdef DEBUG
     242             : static int
     243   511168458 : xfs_check_agi_freecount(
     244             :         struct xfs_btree_cur    *cur)
     245             : {
     246   511168458 :         if (cur->bc_nlevels == 1) {
     247   404938323 :                 xfs_inobt_rec_incore_t rec;
     248   404938323 :                 int             freecount = 0;
     249   404938323 :                 int             error;
     250   404938323 :                 int             i;
     251             : 
     252   404938323 :                 error = xfs_inobt_lookup(cur, 0, XFS_LOOKUP_GE, &i);
     253   405355999 :                 if (error)
     254         175 :                         return error;
     255             : 
     256 13615159689 :                 do {
     257 13615159689 :                         error = xfs_inobt_get_rec(cur, &rec, &i);
     258 13620196262 :                         if (error)
     259           0 :                                 return error;
     260             : 
     261 13620196262 :                         if (i) {
     262 13629948468 :                                 freecount += rec.ir_freecount;
     263 13629948468 :                                 error = xfs_btree_increment(cur, 0, &i);
     264 13625368017 :                                 if (error)
     265           0 :                                         return error;
     266             :                         }
     267 13615615811 :                 } while (i == 1);
     268             : 
     269   811623892 :                 if (!xfs_is_shutdown(cur->bc_mp))
     270   405867184 :                         ASSERT(freecount == cur->bc_ag.pag->pagi_freecount);
     271             :         }
     272             :         return 0;
     273             : }
     274             : #else
     275             : #define xfs_check_agi_freecount(cur)    0
     276             : #endif
     277             : 
     278             : /*
     279             :  * Initialise a new set of inodes. When called without a transaction context
     280             :  * (e.g. from recovery) we initiate a delayed write of the inode buffers rather
     281             :  * than logging them (which in a transaction context puts them into the AIL
     282             :  * for writeback rather than the xfsbufd queue).
     283             :  */
     284             : int
     285     1045574 : xfs_ialloc_inode_init(
     286             :         struct xfs_mount        *mp,
     287             :         struct xfs_trans        *tp,
     288             :         struct list_head        *buffer_list,
     289             :         int                     icount,
     290             :         xfs_agnumber_t          agno,
     291             :         xfs_agblock_t           agbno,
     292             :         xfs_agblock_t           length,
     293             :         unsigned int            gen)
     294             : {
     295     1045574 :         struct xfs_buf          *fbuf;
     296     1045574 :         struct xfs_dinode       *free;
     297     1045574 :         int                     nbufs;
     298     1045574 :         int                     version;
     299     1045574 :         int                     i, j;
     300     1045574 :         xfs_daddr_t             d;
     301     1045574 :         xfs_ino_t               ino = 0;
     302     1045574 :         int                     error;
     303             : 
     304             :         /*
     305             :          * Loop over the new block(s), filling in the inodes.  For small block
     306             :          * sizes, manipulate the inodes in buffers  which are multiples of the
     307             :          * blocks size.
     308             :          */
     309     1045574 :         nbufs = length / M_IGEO(mp)->blocks_per_cluster;
     310             : 
     311             :         /*
     312             :          * Figure out what version number to use in the inodes we create.  If
     313             :          * the superblock version has caught up to the one that supports the new
     314             :          * inode format, then use the new inode version.  Otherwise use the old
     315             :          * version so that old kernels will continue to be able to use the file
     316             :          * system.
     317             :          *
     318             :          * For v3 inodes, we also need to write the inode number into the inode,
     319             :          * so calculate the first inode number of the chunk here as
     320             :          * XFS_AGB_TO_AGINO() only works within a filesystem block, not
     321             :          * across multiple filesystem blocks (such as a cluster) and so cannot
     322             :          * be used in the cluster buffer loop below.
     323             :          *
     324             :          * Further, because we are writing the inode directly into the buffer
     325             :          * and calculating a CRC on the entire inode, we have ot log the entire
     326             :          * inode so that the entire range the CRC covers is present in the log.
     327             :          * That means for v3 inode we log the entire buffer rather than just the
     328             :          * inode cores.
     329             :          */
     330     1045574 :         if (xfs_has_v3inodes(mp)) {
     331     1045017 :                 version = 3;
     332     1045017 :                 ino = XFS_AGINO_TO_INO(mp, agno, XFS_AGB_TO_AGINO(mp, agbno));
     333             : 
     334             :                 /*
     335             :                  * log the initialisation that is about to take place as an
     336             :                  * logical operation. This means the transaction does not
     337             :                  * need to log the physical changes to the inode buffers as log
     338             :                  * recovery will know what initialisation is actually needed.
     339             :                  * Hence we only need to log the buffers as "ordered" buffers so
     340             :                  * they track in the AIL as if they were physically logged.
     341             :                  */
     342     1045017 :                 if (tp)
     343     1016376 :                         xfs_icreate_log(tp, agno, agbno, icount,
     344     1016376 :                                         mp->m_sb.sb_inodesize, length, gen);
     345             :         } else
     346             :                 version = 2;
     347             : 
     348     2608047 :         for (j = 0; j < nbufs; j++) {
     349             :                 /*
     350             :                  * Get the block.
     351             :                  */
     352     1562309 :                 d = XFS_AGB_TO_DADDR(mp, agno, agbno +
     353             :                                 (j * M_IGEO(mp)->blocks_per_cluster));
     354     1562309 :                 error = xfs_trans_get_buf(tp, mp->m_ddev_targp, d,
     355     1562309 :                                 mp->m_bsize * M_IGEO(mp)->blocks_per_cluster,
     356             :                                 XBF_UNMAPPED, &fbuf);
     357     1561829 :                 if (error)
     358           0 :                         return error;
     359             : 
     360             :                 /* Initialize the inode buffers and log them appropriately. */
     361     1561829 :                 fbuf->b_ops = &xfs_inode_buf_ops;
     362     1561829 :                 xfs_buf_zero(fbuf, 0, BBTOB(fbuf->b_length));
     363    53108944 :                 for (i = 0; i < M_IGEO(mp)->inodes_per_cluster; i++) {
     364    49984978 :                         int     ioffset = i << mp->m_sb.sb_inodelog;
     365             : 
     366    49984978 :                         free = xfs_make_iptr(mp, fbuf, i);
     367    49981112 :                         free->di_magic = cpu_to_be16(XFS_DINODE_MAGIC);
     368    49981112 :                         free->di_version = version;
     369    49981112 :                         free->di_gen = cpu_to_be32(gen);
     370    49981112 :                         free->di_next_unlinked = cpu_to_be32(NULLAGINO);
     371             : 
     372    49981112 :                         if (version == 3) {
     373    49980408 :                                 free->di_ino = cpu_to_be64(ino);
     374    49980408 :                                 ino++;
     375    49980408 :                                 uuid_copy(&free->di_uuid,
     376    49980408 :                                           &mp->m_sb.sb_meta_uuid);
     377    49981204 :                                 xfs_dinode_calc_crc(mp, free);
     378         704 :                         } else if (tp) {
     379             :                                 /* just log the inode core */
     380         704 :                                 xfs_trans_log_buf(tp, fbuf, ioffset,
     381        1408 :                                           ioffset + XFS_DINODE_SIZE(mp) - 1);
     382             :                         }
     383             :                 }
     384             : 
     385     1562137 :                 if (tp) {
     386             :                         /*
     387             :                          * Mark the buffer as an inode allocation buffer so it
     388             :                          * sticks in AIL at the point of this allocation
     389             :                          * transaction. This ensures the they are on disk before
     390             :                          * the tail of the log can be moved past this
     391             :                          * transaction (i.e. by preventing relogging from moving
     392             :                          * it forward in the log).
     393             :                          */
     394     1519228 :                         xfs_trans_inode_alloc_buf(tp, fbuf);
     395     1518575 :                         if (version == 3) {
     396             :                                 /*
     397             :                                  * Mark the buffer as ordered so that they are
     398             :                                  * not physically logged in the transaction but
     399             :                                  * still tracked in the AIL as part of the
     400             :                                  * transaction and pin the log appropriately.
     401             :                                  */
     402     1518557 :                                 xfs_trans_ordered_buf(tp, fbuf);
     403             :                         }
     404             :                 } else {
     405       42909 :                         fbuf->b_flags |= XBF_DONE;
     406       42909 :                         xfs_buf_delwri_queue(fbuf, buffer_list);
     407       42909 :                         xfs_buf_relse(fbuf);
     408             :                 }
     409             :         }
     410             :         return 0;
     411             : }
     412             : 
     413             : /*
     414             :  * Align startino and allocmask for a recently allocated sparse chunk such that
     415             :  * they are fit for insertion (or merge) into the on-disk inode btrees.
     416             :  *
     417             :  * Background:
     418             :  *
     419             :  * When enabled, sparse inode support increases the inode alignment from cluster
     420             :  * size to inode chunk size. This means that the minimum range between two
     421             :  * non-adjacent inode records in the inobt is large enough for a full inode
     422             :  * record. This allows for cluster sized, cluster aligned block allocation
     423             :  * without need to worry about whether the resulting inode record overlaps with
     424             :  * another record in the tree. Without this basic rule, we would have to deal
     425             :  * with the consequences of overlap by potentially undoing recent allocations in
     426             :  * the inode allocation codepath.
     427             :  *
     428             :  * Because of this alignment rule (which is enforced on mount), there are two
     429             :  * inobt possibilities for newly allocated sparse chunks. One is that the
     430             :  * aligned inode record for the chunk covers a range of inodes not already
     431             :  * covered in the inobt (i.e., it is safe to insert a new sparse record). The
     432             :  * other is that a record already exists at the aligned startino that considers
     433             :  * the newly allocated range as sparse. In the latter case, record content is
     434             :  * merged in hope that sparse inode chunks fill to full chunks over time.
     435             :  */
     436             : STATIC void
     437      515081 : xfs_align_sparse_ino(
     438             :         struct xfs_mount                *mp,
     439             :         xfs_agino_t                     *startino,
     440             :         uint16_t                        *allocmask)
     441             : {
     442      515081 :         xfs_agblock_t                   agbno;
     443      515081 :         xfs_agblock_t                   mod;
     444      515081 :         int                             offset;
     445             : 
     446      515081 :         agbno = XFS_AGINO_TO_AGBNO(mp, *startino);
     447      515081 :         mod = agbno % mp->m_sb.sb_inoalignmt;
     448      515081 :         if (!mod)
     449             :                 return;
     450             : 
     451             :         /* calculate the inode offset and align startino */
     452      280439 :         offset = XFS_AGB_TO_AGINO(mp, mod);
     453      280439 :         *startino -= offset;
     454             : 
     455             :         /*
     456             :          * Since startino has been aligned down, left shift allocmask such that
     457             :          * it continues to represent the same physical inodes relative to the
     458             :          * new startino.
     459             :          */
     460      280439 :         *allocmask <<= offset / XFS_INODES_PER_HOLEMASK_BIT;
     461             : }
     462             : 
     463             : /*
     464             :  * Determine whether the source inode record can merge into the target. Both
     465             :  * records must be sparse, the inode ranges must match and there must be no
     466             :  * allocation overlap between the records.
     467             :  */
     468             : STATIC bool
     469      128662 : __xfs_inobt_can_merge(
     470             :         struct xfs_inobt_rec_incore     *trec,  /* tgt record */
     471             :         struct xfs_inobt_rec_incore     *srec)  /* src record */
     472             : {
     473      128662 :         uint64_t                        talloc;
     474      128662 :         uint64_t                        salloc;
     475             : 
     476             :         /* records must cover the same inode range */
     477      128662 :         if (trec->ir_startino != srec->ir_startino)
     478             :                 return false;
     479             : 
     480             :         /* both records must be sparse */
     481      128651 :         if (!xfs_inobt_issparse(trec->ir_holemask) ||
     482      128651 :             !xfs_inobt_issparse(srec->ir_holemask))
     483             :                 return false;
     484             : 
     485             :         /* both records must track some inodes */
     486      128651 :         if (!trec->ir_count || !srec->ir_count)
     487             :                 return false;
     488             : 
     489             :         /* can't exceed capacity of a full record */
     490      128651 :         if (trec->ir_count + srec->ir_count > XFS_INODES_PER_CHUNK)
     491             :                 return false;
     492             : 
     493             :         /* verify there is no allocation overlap */
     494      128646 :         talloc = xfs_inobt_irec_to_allocmask(trec);
     495      128673 :         salloc = xfs_inobt_irec_to_allocmask(srec);
     496      128674 :         if (talloc & salloc)
     497           0 :                 return false;
     498             : 
     499             :         return true;
     500             : }
     501             : 
     502             : /*
     503             :  * Merge the source inode record into the target. The caller must call
     504             :  * __xfs_inobt_can_merge() to ensure the merge is valid.
     505             :  */
     506             : STATIC void
     507      128658 : __xfs_inobt_rec_merge(
     508             :         struct xfs_inobt_rec_incore     *trec,  /* target */
     509             :         struct xfs_inobt_rec_incore     *srec)  /* src */
     510             : {
     511      128658 :         ASSERT(trec->ir_startino == srec->ir_startino);
     512             : 
     513             :         /* combine the counts */
     514      128658 :         trec->ir_count += srec->ir_count;
     515      128658 :         trec->ir_freecount += srec->ir_freecount;
     516             : 
     517             :         /*
     518             :          * Merge the holemask and free mask. For both fields, 0 bits refer to
     519             :          * allocated inodes. We combine the allocated ranges with bitwise AND.
     520             :          */
     521      128658 :         trec->ir_holemask &= srec->ir_holemask;
     522      128658 :         trec->ir_free &= srec->ir_free;
     523      128658 : }
     524             : 
     525             : /*
     526             :  * Insert a new sparse inode chunk into the associated inode btree. The inode
     527             :  * record for the sparse chunk is pre-aligned to a startino that should match
     528             :  * any pre-existing sparse inode record in the tree. This allows sparse chunks
     529             :  * to fill over time.
     530             :  *
     531             :  * This function supports two modes of handling preexisting records depending on
     532             :  * the merge flag. If merge is true, the provided record is merged with the
     533             :  * existing record and updated in place. The merged record is returned in nrec.
     534             :  * If merge is false, an existing record is replaced with the provided record.
     535             :  * If no preexisting record exists, the provided record is always inserted.
     536             :  *
     537             :  * It is considered corruption if a merge is requested and not possible. Given
     538             :  * the sparse inode alignment constraints, this should never happen.
     539             :  */
     540             : STATIC int
     541     1030118 : xfs_inobt_insert_sprec(
     542             :         struct xfs_perag                *pag,
     543             :         struct xfs_trans                *tp,
     544             :         struct xfs_buf                  *agbp,
     545             :         int                             btnum,
     546             :         struct xfs_inobt_rec_incore     *nrec,  /* in/out: new/merged rec. */
     547             :         bool                            merge)  /* merge or replace */
     548             : {
     549     1030118 :         struct xfs_mount                *mp = pag->pag_mount;
     550     1030118 :         struct xfs_btree_cur            *cur;
     551     1030118 :         int                             error;
     552     1030118 :         int                             i;
     553     1030118 :         struct xfs_inobt_rec_incore     rec;
     554             : 
     555     1030118 :         cur = xfs_inobt_init_cursor(pag, tp, agbp, btnum);
     556             : 
     557             :         /* the new record is pre-aligned so we know where to look */
     558     1030187 :         error = xfs_inobt_lookup(cur, nrec->ir_startino, XFS_LOOKUP_EQ, &i);
     559     1030248 :         if (error)
     560           4 :                 goto error;
     561             :         /* if nothing there, insert a new record and return */
     562     1030244 :         if (i == 0) {
     563      901553 :                 error = xfs_inobt_insert_rec(cur, nrec->ir_holemask,
     564      901553 :                                              nrec->ir_count, nrec->ir_freecount,
     565             :                                              nrec->ir_free, &i);
     566      901503 :                 if (error)
     567           0 :                         goto error;
     568      901503 :                 if (XFS_IS_CORRUPT(mp, i != 1)) {
     569           0 :                         error = -EFSCORRUPTED;
     570           0 :                         goto error;
     571             :                 }
     572             : 
     573      901503 :                 goto out;
     574             :         }
     575             : 
     576             :         /*
     577             :          * A record exists at this startino. Merge or replace the record
     578             :          * depending on what we've been asked to do.
     579             :          */
     580      128691 :         if (merge) {
     581      128685 :                 error = xfs_inobt_get_rec(cur, &rec, &i);
     582      128667 :                 if (error)
     583           0 :                         goto error;
     584      128667 :                 if (XFS_IS_CORRUPT(mp, i != 1)) {
     585           0 :                         error = -EFSCORRUPTED;
     586           0 :                         goto error;
     587             :                 }
     588      128667 :                 if (XFS_IS_CORRUPT(mp, rec.ir_startino != nrec->ir_startino)) {
     589           0 :                         error = -EFSCORRUPTED;
     590           0 :                         goto error;
     591             :                 }
     592             : 
     593             :                 /*
     594             :                  * This should never fail. If we have coexisting records that
     595             :                  * cannot merge, something is seriously wrong.
     596             :                  */
     597      128667 :                 if (XFS_IS_CORRUPT(mp, !__xfs_inobt_can_merge(nrec, &rec))) {
     598           0 :                         error = -EFSCORRUPTED;
     599           0 :                         goto error;
     600             :                 }
     601             : 
     602      128676 :                 trace_xfs_irec_merge_pre(mp, pag->pag_agno, rec.ir_startino,
     603      128676 :                                          rec.ir_holemask, nrec->ir_startino,
     604      128676 :                                          nrec->ir_holemask);
     605             : 
     606             :                 /* merge to nrec to output the updated record */
     607      128657 :                 __xfs_inobt_rec_merge(nrec, &rec);
     608             : 
     609      128659 :                 trace_xfs_irec_merge_post(mp, pag->pag_agno, nrec->ir_startino,
     610      128659 :                                           nrec->ir_holemask);
     611             : 
     612      128657 :                 error = xfs_inobt_rec_check_count(mp, nrec);
     613      128688 :                 if (error)
     614           0 :                         goto error;
     615             :         }
     616             : 
     617      128694 :         error = xfs_inobt_update(cur, nrec);
     618      128679 :         if (error)
     619           0 :                 goto error;
     620             : 
     621      128679 : out:
     622     1030182 :         xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
     623     1030182 :         return 0;
     624           4 : error:
     625           4 :         xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
     626           4 :         return error;
     627             : }
     628             : 
     629             : /*
     630             :  * Allocate new inodes in the allocation group specified by agbp.  Returns 0 if
     631             :  * inodes were allocated in this AG; -EAGAIN if there was no space in this AG so
     632             :  * the caller knows it can try another AG, a hard -ENOSPC when over the maximum
     633             :  * inode count threshold, or the usual negative error code for other errors.
     634             :  */
     635             : STATIC int
     636     2588947 : xfs_ialloc_ag_alloc(
     637             :         struct xfs_perag        *pag,
     638             :         struct xfs_trans        *tp,
     639             :         struct xfs_buf          *agbp)
     640             : {
     641     2588947 :         struct xfs_agi          *agi;
     642     2588947 :         struct xfs_alloc_arg    args;
     643     2588947 :         int                     error;
     644     2588947 :         xfs_agino_t             newino;         /* new first inode's number */
     645     2588947 :         xfs_agino_t             newlen;         /* new number of inodes */
     646     2588947 :         int                     isaligned = 0;  /* inode allocation at stripe */
     647             :                                                 /* unit boundary */
     648             :         /* init. to full chunk */
     649     2588947 :         struct xfs_inobt_rec_incore rec;
     650     2588947 :         struct xfs_ino_geometry *igeo = M_IGEO(tp->t_mountp);
     651     2588947 :         uint16_t                allocmask = (uint16_t) -1;
     652     2588947 :         int                     do_sparse = 0;
     653             : 
     654     2588947 :         memset(&args, 0, sizeof(args));
     655     2588947 :         args.tp = tp;
     656     2588947 :         args.mp = tp->t_mountp;
     657     2588947 :         args.fsbno = NULLFSBLOCK;
     658     2588947 :         args.oinfo = XFS_RMAP_OINFO_INODES;
     659     2588947 :         args.pag = pag;
     660             : 
     661             : #ifdef DEBUG
     662             :         /* randomly do sparse inode allocations */
     663     2588947 :         if (xfs_has_sparseinodes(tp->t_mountp) &&
     664     2588581 :             igeo->ialloc_min_blks < igeo->ialloc_blks)
     665     2588536 :                 do_sparse = get_random_u32_below(2);
     666             : #endif
     667             : 
     668             :         /*
     669             :          * Locking will ensure that we don't have two callers in here
     670             :          * at one time.
     671             :          */
     672     2588787 :         newlen = igeo->ialloc_inos;
     673     2588787 :         if (igeo->maxicount &&
     674     2588565 :             percpu_counter_read_positive(&args.mp->m_icount) + newlen >
     675             :                                                         igeo->maxicount)
     676             :                 return -ENOSPC;
     677     2588666 :         args.minlen = args.maxlen = igeo->ialloc_blks;
     678             :         /*
     679             :          * First try to allocate inodes contiguous with the last-allocated
     680             :          * chunk of inodes.  If the filesystem is striped, this will fill
     681             :          * an entire stripe unit with inodes.
     682             :          */
     683     2588666 :         agi = agbp->b_addr;
     684     2588666 :         newino = be32_to_cpu(agi->agi_newino);
     685     2588666 :         args.agbno = XFS_AGINO_TO_AGBNO(args.mp, newino) +
     686     2588666 :                      igeo->ialloc_blks;
     687     2588666 :         if (do_sparse)
     688     1294289 :                 goto sparse_alloc;
     689     1294377 :         if (likely(newino != NULLAGINO &&
     690             :                   (args.agbno < be32_to_cpu(agi->agi_length)))) {
     691     1237828 :                 args.prod = 1;
     692             : 
     693             :                 /*
     694             :                  * We need to take into account alignment here to ensure that
     695             :                  * we don't modify the free list if we fail to have an exact
     696             :                  * block. If we don't have an exact match, and every oher
     697             :                  * attempt allocation attempt fails, we'll end up cancelling
     698             :                  * a dirty transaction and shutting down.
     699             :                  *
     700             :                  * For an exact allocation, alignment must be 1,
     701             :                  * however we need to take cluster alignment into account when
     702             :                  * fixing up the freelist. Use the minalignslop field to
     703             :                  * indicate that extra blocks might be required for alignment,
     704             :                  * but not to use them in the actual exact allocation.
     705             :                  */
     706     1237828 :                 args.alignment = 1;
     707     1237828 :                 args.minalignslop = igeo->cluster_align - 1;
     708             : 
     709             :                 /* Allow space for the inode btree to split. */
     710     1237828 :                 args.minleft = igeo->inobt_maxlevels;
     711     2475656 :                 error = xfs_alloc_vextent_exact_bno(&args,
     712     1237828 :                                 XFS_AGB_TO_FSB(args.mp, pag->pag_agno,
     713             :                                                 args.agbno));
     714     1237895 :                 if (error)
     715             :                         return error;
     716             : 
     717             :                 /*
     718             :                  * This request might have dirtied the transaction if the AG can
     719             :                  * satisfy the request, but the exact block was not available.
     720             :                  * If the allocation did fail, subsequent requests will relax
     721             :                  * the exact agbno requirement and increase the alignment
     722             :                  * instead. It is critical that the total size of the request
     723             :                  * (len + alignment + slop) does not increase from this point
     724             :                  * on, so reset minalignslop to ensure it is not included in
     725             :                  * subsequent requests.
     726             :                  */
     727     1237894 :                 args.minalignslop = 0;
     728             :         }
     729             : 
     730     1294443 :         if (unlikely(args.fsbno == NULLFSBLOCK)) {
     731             :                 /*
     732             :                  * Set the alignment for the allocation.
     733             :                  * If stripe alignment is turned on then align at stripe unit
     734             :                  * boundary.
     735             :                  * If the cluster size is smaller than a filesystem block
     736             :                  * then we're doing I/O for inodes in filesystem block size
     737             :                  * pieces, so don't need alignment anyway.
     738             :                  */
     739     1019800 :                 isaligned = 0;
     740     1019800 :                 if (igeo->ialloc_align) {
     741           0 :                         ASSERT(!xfs_has_noalign(args.mp));
     742           0 :                         args.alignment = args.mp->m_dalign;
     743           0 :                         isaligned = 1;
     744             :                 } else
     745     1019800 :                         args.alignment = igeo->cluster_align;
     746             :                 /*
     747             :                  * Allocate a fixed-size extent of inodes.
     748             :                  */
     749     1019800 :                 args.prod = 1;
     750             :                 /*
     751             :                  * Allow space for the inode btree to split.
     752             :                  */
     753     1019800 :                 args.minleft = igeo->inobt_maxlevels;
     754     2039600 :                 error = xfs_alloc_vextent_near_bno(&args,
     755     1019800 :                                 XFS_AGB_TO_FSB(args.mp, pag->pag_agno,
     756             :                                                 be32_to_cpu(agi->agi_root)));
     757     1019803 :                 if (error)
     758             :                         return error;
     759             :         }
     760             : 
     761             :         /*
     762             :          * If stripe alignment is turned on, then try again with cluster
     763             :          * alignment.
     764             :          */
     765     1019796 :         if (isaligned && args.fsbno == NULLFSBLOCK) {
     766           0 :                 args.alignment = igeo->cluster_align;
     767           0 :                 error = xfs_alloc_vextent_near_bno(&args,
     768           0 :                                 XFS_AGB_TO_FSB(args.mp, pag->pag_agno,
     769             :                                                 be32_to_cpu(agi->agi_root)));
     770           0 :                 if (error)
     771             :                         return error;
     772             :         }
     773             : 
     774             :         /*
     775             :          * Finally, try a sparse allocation if the filesystem supports it and
     776             :          * the sparse allocation length is smaller than a full chunk.
     777             :          */
     778     1294439 :         if (xfs_has_sparseinodes(args.mp) &&
     779     1294380 :             igeo->ialloc_min_blks < igeo->ialloc_blks &&
     780     1294362 :             args.fsbno == NULLFSBLOCK) {
     781      792448 : sparse_alloc:
     782     2086737 :                 args.alignment = args.mp->m_sb.sb_spino_align;
     783     2086737 :                 args.prod = 1;
     784             : 
     785     2086737 :                 args.minlen = igeo->ialloc_min_blks;
     786     2086737 :                 args.maxlen = args.minlen;
     787             : 
     788             :                 /*
     789             :                  * The inode record will be aligned to full chunk size. We must
     790             :                  * prevent sparse allocation from AG boundaries that result in
     791             :                  * invalid inode records, such as records that start at agbno 0
     792             :                  * or extend beyond the AG.
     793             :                  *
     794             :                  * Set min agbno to the first aligned, non-zero agbno and max to
     795             :                  * the last aligned agbno that is at least one full chunk from
     796             :                  * the end of the AG.
     797             :                  */
     798     2086737 :                 args.min_agbno = args.mp->m_sb.sb_inoalignmt;
     799     2086737 :                 args.max_agbno = round_down(args.mp->m_sb.sb_agblocks,
     800     2086737 :                                             args.mp->m_sb.sb_inoalignmt) -
     801     2086737 :                                  igeo->ialloc_blks;
     802             : 
     803     4173474 :                 error = xfs_alloc_vextent_near_bno(&args,
     804     2086737 :                                 XFS_AGB_TO_FSB(args.mp, pag->pag_agno,
     805             :                                                 be32_to_cpu(agi->agi_root)));
     806     2086797 :                 if (error)
     807             :                         return error;
     808             : 
     809     2086788 :                 newlen = XFS_AGB_TO_AGINO(args.mp, args.len);
     810     2086788 :                 ASSERT(newlen <= XFS_INODES_PER_CHUNK);
     811     2086788 :                 allocmask = (1 << (newlen / XFS_INODES_PER_HOLEMASK_BIT)) - 1;
     812             :         }
     813             : 
     814     2588779 :         if (args.fsbno == NULLFSBLOCK)
     815             :                 return -EAGAIN;
     816             : 
     817     1017060 :         ASSERT(args.len == args.minlen);
     818             : 
     819             :         /*
     820             :          * Stamp and write the inode buffers.
     821             :          *
     822             :          * Seed the new inode cluster with a random generation number. This
     823             :          * prevents short-term reuse of generation numbers if a chunk is
     824             :          * freed and then immediately reallocated. We use random numbers
     825             :          * rather than a linear progression to prevent the next generation
     826             :          * number from being easily guessable.
     827             :          */
     828     1017060 :         error = xfs_ialloc_inode_init(args.mp, tp, NULL, newlen, pag->pag_agno,
     829             :                         args.agbno, args.len, get_random_u32());
     830             : 
     831     1017104 :         if (error)
     832             :                 return error;
     833             :         /*
     834             :          * Convert the results.
     835             :          */
     836     1017104 :         newino = XFS_AGB_TO_AGINO(args.mp, args.agbno);
     837             : 
     838     1017104 :         if (xfs_inobt_issparse(~allocmask)) {
     839             :                 /*
     840             :                  * We've allocated a sparse chunk. Align the startino and mask.
     841             :                  */
     842      515082 :                 xfs_align_sparse_ino(args.mp, &newino, &allocmask);
     843             : 
     844      514953 :                 rec.ir_startino = newino;
     845      514953 :                 rec.ir_holemask = ~allocmask;
     846      514953 :                 rec.ir_count = newlen;
     847      514953 :                 rec.ir_freecount = newlen;
     848      514953 :                 rec.ir_free = XFS_INOBT_ALL_FREE;
     849             : 
     850             :                 /*
     851             :                  * Insert the sparse record into the inobt and allow for a merge
     852             :                  * if necessary. If a merge does occur, rec is updated to the
     853             :                  * merged record.
     854             :                  */
     855      514953 :                 error = xfs_inobt_insert_sprec(pag, tp, agbp,
     856             :                                 XFS_BTNUM_INO, &rec, true);
     857      515129 :                 if (error == -EFSCORRUPTED) {
     858           0 :                         xfs_alert(args.mp,
     859             :         "invalid sparse inode record: ino 0x%llx holemask 0x%x count %u",
     860             :                                   XFS_AGINO_TO_INO(args.mp, pag->pag_agno,
     861             :                                                    rec.ir_startino),
     862             :                                   rec.ir_holemask, rec.ir_count);
     863           0 :                         xfs_force_shutdown(args.mp, SHUTDOWN_CORRUPT_INCORE);
     864             :                 }
     865      515129 :                 if (error)
     866             :                         return error;
     867             : 
     868             :                 /*
     869             :                  * We can't merge the part we've just allocated as for the inobt
     870             :                  * due to finobt semantics. The original record may or may not
     871             :                  * exist independent of whether physical inodes exist in this
     872             :                  * sparse chunk.
     873             :                  *
     874             :                  * We must update the finobt record based on the inobt record.
     875             :                  * rec contains the fully merged and up to date inobt record
     876             :                  * from the previous call. Set merge false to replace any
     877             :                  * existing record with this one.
     878             :                  */
     879      515129 :                 if (xfs_has_finobt(args.mp)) {
     880      515108 :                         error = xfs_inobt_insert_sprec(pag, tp, agbp,
     881             :                                        XFS_BTNUM_FINO, &rec, false);
     882      515131 :                         if (error)
     883             :                                 return error;
     884             :                 }
     885             :         } else {
     886             :                 /* full chunk - insert new records to both btrees */
     887      502022 :                 error = xfs_inobt_insert(pag, tp, agbp, newino, newlen,
     888             :                                          XFS_BTNUM_INO);
     889      502037 :                 if (error)
     890             :                         return error;
     891             : 
     892      502035 :                 if (xfs_has_finobt(args.mp)) {
     893      502020 :                         error = xfs_inobt_insert(pag, tp, agbp, newino,
     894             :                                                  newlen, XFS_BTNUM_FINO);
     895      502021 :                         if (error)
     896             :                                 return error;
     897             :                 }
     898             :         }
     899             : 
     900             :         /*
     901             :          * Update AGI counts and newino.
     902             :          */
     903     1017179 :         be32_add_cpu(&agi->agi_count, newlen);
     904     1017179 :         be32_add_cpu(&agi->agi_freecount, newlen);
     905     1017179 :         pag->pagi_freecount += newlen;
     906     1017179 :         pag->pagi_count += newlen;
     907     1017179 :         agi->agi_newino = cpu_to_be32(newino);
     908             : 
     909             :         /*
     910             :          * Log allocation group header fields
     911             :          */
     912     1017179 :         xfs_ialloc_log_agi(tp, agbp,
     913             :                 XFS_AGI_COUNT | XFS_AGI_FREECOUNT | XFS_AGI_NEWINO);
     914             :         /*
     915             :          * Modify/log superblock values for inode count and inode free count.
     916             :          */
     917     1017136 :         xfs_trans_mod_sb(tp, XFS_TRANS_SB_ICOUNT, (long)newlen);
     918     1016964 :         xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, (long)newlen);
     919     1016964 :         return 0;
     920             : }
     921             : 
     922             : /*
     923             :  * Try to retrieve the next record to the left/right from the current one.
     924             :  */
     925             : STATIC int
     926         162 : xfs_ialloc_next_rec(
     927             :         struct xfs_btree_cur    *cur,
     928             :         xfs_inobt_rec_incore_t  *rec,
     929             :         int                     *done,
     930             :         int                     left)
     931             : {
     932         162 :         int                     error;
     933         162 :         int                     i;
     934             : 
     935         162 :         if (left)
     936          81 :                 error = xfs_btree_decrement(cur, 0, &i);
     937             :         else
     938          81 :                 error = xfs_btree_increment(cur, 0, &i);
     939             : 
     940         162 :         if (error)
     941             :                 return error;
     942         162 :         *done = !i;
     943         162 :         if (i) {
     944          21 :                 error = xfs_inobt_get_rec(cur, rec, &i);
     945          21 :                 if (error)
     946             :                         return error;
     947          21 :                 if (XFS_IS_CORRUPT(cur->bc_mp, i != 1))
     948           0 :                         return -EFSCORRUPTED;
     949             :         }
     950             : 
     951             :         return 0;
     952             : }
     953             : 
     954             : STATIC int
     955         856 : xfs_ialloc_get_rec(
     956             :         struct xfs_btree_cur    *cur,
     957             :         xfs_agino_t             agino,
     958             :         xfs_inobt_rec_incore_t  *rec,
     959             :         int                     *done)
     960             : {
     961         856 :         int                     error;
     962         856 :         int                     i;
     963             : 
     964         856 :         error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_EQ, &i);
     965         856 :         if (error)
     966             :                 return error;
     967         856 :         *done = !i;
     968         856 :         if (i) {
     969         428 :                 error = xfs_inobt_get_rec(cur, rec, &i);
     970         428 :                 if (error)
     971             :                         return error;
     972         428 :                 if (XFS_IS_CORRUPT(cur->bc_mp, i != 1))
     973           0 :                         return -EFSCORRUPTED;
     974             :         }
     975             : 
     976             :         return 0;
     977             : }
     978             : 
     979             : /*
     980             :  * Return the offset of the first free inode in the record. If the inode chunk
     981             :  * is sparsely allocated, we convert the record holemask to inode granularity
     982             :  * and mask off the unallocated regions from the inode free mask.
     983             :  */
     984             : STATIC int
     985    84435483 : xfs_inobt_first_free_inode(
     986             :         struct xfs_inobt_rec_incore     *rec)
     987             : {
     988    84435483 :         xfs_inofree_t                   realfree;
     989             : 
     990             :         /* if there are no holes, return the first available offset */
     991    84435483 :         if (!xfs_inobt_issparse(rec->ir_holemask))
     992    59992845 :                 return xfs_lowbit64(rec->ir_free);
     993             : 
     994    24442638 :         realfree = xfs_inobt_irec_to_allocmask(rec);
     995    24442583 :         realfree &= rec->ir_free;
     996             : 
     997    24442583 :         return xfs_lowbit64(realfree);
     998             : }
     999             : 
    1000             : /*
    1001             :  * Allocate an inode using the inobt-only algorithm.
    1002             :  */
    1003             : STATIC int
    1004        2092 : xfs_dialloc_ag_inobt(
    1005             :         struct xfs_perag        *pag,
    1006             :         struct xfs_trans        *tp,
    1007             :         struct xfs_buf          *agbp,
    1008             :         xfs_ino_t               parent,
    1009             :         xfs_ino_t               *inop)
    1010             : {
    1011        2092 :         struct xfs_mount        *mp = tp->t_mountp;
    1012        2092 :         struct xfs_agi          *agi = agbp->b_addr;
    1013        2092 :         xfs_agnumber_t          pagno = XFS_INO_TO_AGNO(mp, parent);
    1014        2092 :         xfs_agino_t             pagino = XFS_INO_TO_AGINO(mp, parent);
    1015        2092 :         struct xfs_btree_cur    *cur, *tcur;
    1016        2092 :         struct xfs_inobt_rec_incore rec, trec;
    1017        2092 :         xfs_ino_t               ino;
    1018        2092 :         int                     error;
    1019        2092 :         int                     offset;
    1020        2092 :         int                     i, j;
    1021        2092 :         int                     searchdistance = 10;
    1022             : 
    1023        4184 :         ASSERT(xfs_perag_initialised_agi(pag));
    1024        4184 :         ASSERT(xfs_perag_allows_inodes(pag));
    1025        2092 :         ASSERT(pag->pagi_freecount > 0);
    1026             : 
    1027        2092 :  restart_pagno:
    1028        2146 :         cur = xfs_inobt_init_cursor(pag, tp, agbp, XFS_BTNUM_INO);
    1029             :         /*
    1030             :          * If pagino is 0 (this is the root inode allocation) use newino.
    1031             :          * This must work because we've just allocated some.
    1032             :          */
    1033        2146 :         if (!pagino)
    1034          57 :                 pagino = be32_to_cpu(agi->agi_newino);
    1035             : 
    1036        2146 :         error = xfs_check_agi_freecount(cur);
    1037        2146 :         if (error)
    1038           0 :                 goto error0;
    1039             : 
    1040             :         /*
    1041             :          * If in the same AG as the parent, try to get near the parent.
    1042             :          */
    1043        2146 :         if (pagno == pag->pag_agno) {
    1044        2146 :                 int             doneleft;       /* done, to the left */
    1045        2146 :                 int             doneright;      /* done, to the right */
    1046             : 
    1047        2146 :                 error = xfs_inobt_lookup(cur, pagino, XFS_LOOKUP_LE, &i);
    1048        2146 :                 if (error)
    1049           0 :                         goto error0;
    1050        2146 :                 if (XFS_IS_CORRUPT(mp, i != 1)) {
    1051           0 :                         error = -EFSCORRUPTED;
    1052           0 :                         goto error0;
    1053             :                 }
    1054             : 
    1055        2146 :                 error = xfs_inobt_get_rec(cur, &rec, &j);
    1056        2146 :                 if (error)
    1057           0 :                         goto error0;
    1058        2146 :                 if (XFS_IS_CORRUPT(mp, j != 1)) {
    1059           0 :                         error = -EFSCORRUPTED;
    1060           0 :                         goto error0;
    1061             :                 }
    1062             : 
    1063        2146 :                 if (rec.ir_freecount > 0) {
    1064             :                         /*
    1065             :                          * Found a free inode in the same chunk
    1066             :                          * as the parent, done.
    1067             :                          */
    1068        2086 :                         goto alloc_inode;
    1069             :                 }
    1070             : 
    1071             : 
    1072             :                 /*
    1073             :                  * In the same AG as parent, but parent's chunk is full.
    1074             :                  */
    1075             : 
    1076             :                 /* duplicate the cursor, search left & right simultaneously */
    1077         509 :                 error = xfs_btree_dup_cursor(cur, &tcur);
    1078         509 :                 if (error)
    1079           0 :                         goto error0;
    1080             : 
    1081             :                 /*
    1082             :                  * Skip to last blocks looked up if same parent inode.
    1083             :                  */
    1084         509 :                 if (pagino != NULLAGINO &&
    1085         509 :                     pag->pagl_pagino == pagino &&
    1086         428 :                     pag->pagl_leftrec != NULLAGINO &&
    1087         428 :                     pag->pagl_rightrec != NULLAGINO) {
    1088         428 :                         error = xfs_ialloc_get_rec(tcur, pag->pagl_leftrec,
    1089             :                                                    &trec, &doneleft);
    1090         428 :                         if (error)
    1091           0 :                                 goto error1;
    1092             : 
    1093         428 :                         error = xfs_ialloc_get_rec(cur, pag->pagl_rightrec,
    1094             :                                                    &rec, &doneright);
    1095         428 :                         if (error)
    1096           0 :                                 goto error1;
    1097             :                 } else {
    1098             :                         /* search left with tcur, back up 1 record */
    1099          81 :                         error = xfs_ialloc_next_rec(tcur, &trec, &doneleft, 1);
    1100          81 :                         if (error)
    1101           0 :                                 goto error1;
    1102             : 
    1103             :                         /* search right with cur, go forward 1 record. */
    1104          81 :                         error = xfs_ialloc_next_rec(cur, &rec, &doneright, 0);
    1105          81 :                         if (error)
    1106           0 :                                 goto error1;
    1107             :                 }
    1108             : 
    1109             :                 /*
    1110             :                  * Loop until we find an inode chunk with a free inode.
    1111             :                  */
    1112         509 :                 while (--searchdistance > 0 && (!doneleft || !doneright)) {
    1113         449 :                         int     useleft;  /* using left inode chunk this time */
    1114             : 
    1115             :                         /* figure out the closer block if both are valid. */
    1116         449 :                         if (!doneleft && !doneright) {
    1117           0 :                                 useleft = pagino -
    1118           0 :                                  (trec.ir_startino + XFS_INODES_PER_CHUNK - 1) <
    1119           0 :                                   rec.ir_startino - pagino;
    1120             :                         } else {
    1121         449 :                                 useleft = !doneleft;
    1122             :                         }
    1123             : 
    1124             :                         /* free inodes to the left? */
    1125         449 :                         if (useleft && trec.ir_freecount) {
    1126           0 :                                 xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
    1127           0 :                                 cur = tcur;
    1128             : 
    1129           0 :                                 pag->pagl_leftrec = trec.ir_startino;
    1130           0 :                                 pag->pagl_rightrec = rec.ir_startino;
    1131           0 :                                 pag->pagl_pagino = pagino;
    1132           0 :                                 rec = trec;
    1133           0 :                                 goto alloc_inode;
    1134             :                         }
    1135             : 
    1136             :                         /* free inodes to the right? */
    1137         449 :                         if (!useleft && rec.ir_freecount) {
    1138         449 :                                 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
    1139             : 
    1140         449 :                                 pag->pagl_leftrec = trec.ir_startino;
    1141         449 :                                 pag->pagl_rightrec = rec.ir_startino;
    1142         449 :                                 pag->pagl_pagino = pagino;
    1143         449 :                                 goto alloc_inode;
    1144             :                         }
    1145             : 
    1146             :                         /* get next record to check */
    1147           0 :                         if (useleft) {
    1148           0 :                                 error = xfs_ialloc_next_rec(tcur, &trec,
    1149             :                                                                  &doneleft, 1);
    1150             :                         } else {
    1151           0 :                                 error = xfs_ialloc_next_rec(cur, &rec,
    1152             :                                                                  &doneright, 0);
    1153             :                         }
    1154           0 :                         if (error)
    1155           0 :                                 goto error1;
    1156             :                 }
    1157             : 
    1158          60 :                 if (searchdistance <= 0) {
    1159             :                         /*
    1160             :                          * Not in range - save last search
    1161             :                          * location and allocate a new inode
    1162             :                          */
    1163           6 :                         xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
    1164           6 :                         pag->pagl_leftrec = trec.ir_startino;
    1165           6 :                         pag->pagl_rightrec = rec.ir_startino;
    1166           6 :                         pag->pagl_pagino = pagino;
    1167             : 
    1168             :                 } else {
    1169             :                         /*
    1170             :                          * We've reached the end of the btree. because
    1171             :                          * we are only searching a small chunk of the
    1172             :                          * btree each search, there is obviously free
    1173             :                          * inodes closer to the parent inode than we
    1174             :                          * are now. restart the search again.
    1175             :                          */
    1176          54 :                         pag->pagl_pagino = NULLAGINO;
    1177          54 :                         pag->pagl_leftrec = NULLAGINO;
    1178          54 :                         pag->pagl_rightrec = NULLAGINO;
    1179          54 :                         xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
    1180          54 :                         xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
    1181          54 :                         goto restart_pagno;
    1182             :                 }
    1183             :         }
    1184             : 
    1185             :         /*
    1186             :          * In a different AG from the parent.
    1187             :          * See if the most recently allocated block has any free.
    1188             :          */
    1189           6 :         if (agi->agi_newino != cpu_to_be32(NULLAGINO)) {
    1190           6 :                 error = xfs_inobt_lookup(cur, be32_to_cpu(agi->agi_newino),
    1191             :                                          XFS_LOOKUP_EQ, &i);
    1192           6 :                 if (error)
    1193           0 :                         goto error0;
    1194             : 
    1195           6 :                 if (i == 1) {
    1196           6 :                         error = xfs_inobt_get_rec(cur, &rec, &j);
    1197           6 :                         if (error)
    1198           0 :                                 goto error0;
    1199             : 
    1200           6 :                         if (j == 1 && rec.ir_freecount > 0) {
    1201             :                                 /*
    1202             :                                  * The last chunk allocated in the group
    1203             :                                  * still has a free inode.
    1204             :                                  */
    1205           0 :                                 goto alloc_inode;
    1206             :                         }
    1207             :                 }
    1208             :         }
    1209             : 
    1210             :         /*
    1211             :          * None left in the last group, search the whole AG
    1212             :          */
    1213           6 :         error = xfs_inobt_lookup(cur, 0, XFS_LOOKUP_GE, &i);
    1214           6 :         if (error)
    1215           0 :                 goto error0;
    1216           6 :         if (XFS_IS_CORRUPT(mp, i != 1)) {
    1217           0 :                 error = -EFSCORRUPTED;
    1218           0 :                 goto error0;
    1219             :         }
    1220             : 
    1221           6 :         for (;;) {
    1222           6 :                 error = xfs_inobt_get_rec(cur, &rec, &i);
    1223           6 :                 if (error)
    1224           0 :                         goto error0;
    1225           6 :                 if (XFS_IS_CORRUPT(mp, i != 1)) {
    1226           0 :                         error = -EFSCORRUPTED;
    1227           0 :                         goto error0;
    1228             :                 }
    1229           6 :                 if (rec.ir_freecount > 0)
    1230             :                         break;
    1231           6 :                 error = xfs_btree_increment(cur, 0, &i);
    1232           6 :                 if (error)
    1233           0 :                         goto error0;
    1234           6 :                 if (XFS_IS_CORRUPT(mp, i != 1)) {
    1235           6 :                         error = -EFSCORRUPTED;
    1236           6 :                         goto error0;
    1237             :                 }
    1238             :         }
    1239             : 
    1240           0 : alloc_inode:
    1241        2086 :         offset = xfs_inobt_first_free_inode(&rec);
    1242        2086 :         ASSERT(offset >= 0);
    1243        2086 :         ASSERT(offset < XFS_INODES_PER_CHUNK);
    1244        2086 :         ASSERT((XFS_AGINO_TO_OFFSET(mp, rec.ir_startino) %
    1245             :                                    XFS_INODES_PER_CHUNK) == 0);
    1246        2086 :         ino = XFS_AGINO_TO_INO(mp, pag->pag_agno, rec.ir_startino + offset);
    1247        2086 :         rec.ir_free &= ~XFS_INOBT_MASK(offset);
    1248        2086 :         rec.ir_freecount--;
    1249        2086 :         error = xfs_inobt_update(cur, &rec);
    1250        2086 :         if (error)
    1251           0 :                 goto error0;
    1252        2086 :         be32_add_cpu(&agi->agi_freecount, -1);
    1253        2086 :         xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREECOUNT);
    1254        2086 :         pag->pagi_freecount--;
    1255             : 
    1256        2086 :         error = xfs_check_agi_freecount(cur);
    1257        2086 :         if (error)
    1258           0 :                 goto error0;
    1259             : 
    1260        2086 :         xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
    1261        2086 :         xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -1);
    1262        2086 :         *inop = ino;
    1263        2086 :         return 0;
    1264             : error1:
    1265           0 :         xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
    1266           6 : error0:
    1267           6 :         xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
    1268           6 :         return error;
    1269             : }
    1270             : 
    1271             : /*
    1272             :  * Use the free inode btree to allocate an inode based on distance from the
    1273             :  * parent. Note that the provided cursor may be deleted and replaced.
    1274             :  */
    1275             : STATIC int
    1276    78196872 : xfs_dialloc_ag_finobt_near(
    1277             :         xfs_agino_t                     pagino,
    1278             :         struct xfs_btree_cur            **ocur,
    1279             :         struct xfs_inobt_rec_incore     *rec)
    1280             : {
    1281    78196872 :         struct xfs_btree_cur            *lcur = *ocur;  /* left search cursor */
    1282    78196872 :         struct xfs_btree_cur            *rcur;  /* right search cursor */
    1283    78196872 :         struct xfs_inobt_rec_incore     rrec;
    1284    78196872 :         int                             error;
    1285    78196872 :         int                             i, j;
    1286             : 
    1287    78196872 :         error = xfs_inobt_lookup(lcur, pagino, XFS_LOOKUP_LE, &i);
    1288    78173196 :         if (error)
    1289             :                 return error;
    1290             : 
    1291    78173196 :         if (i == 1) {
    1292    12445598 :                 error = xfs_inobt_get_rec(lcur, rec, &i);
    1293    12446512 :                 if (error)
    1294             :                         return error;
    1295    12446512 :                 if (XFS_IS_CORRUPT(lcur->bc_mp, i != 1))
    1296           0 :                         return -EFSCORRUPTED;
    1297             : 
    1298             :                 /*
    1299             :                  * See if we've landed in the parent inode record. The finobt
    1300             :                  * only tracks chunks with at least one free inode, so record
    1301             :                  * existence is enough.
    1302             :                  */
    1303    12446512 :                 if (pagino >= rec->ir_startino &&
    1304    12446474 :                     pagino < (rec->ir_startino + XFS_INODES_PER_CHUNK))
    1305             :                         return 0;
    1306             :         }
    1307             : 
    1308    72293944 :         error = xfs_btree_dup_cursor(lcur, &rcur);
    1309    71678748 :         if (error)
    1310             :                 return error;
    1311             : 
    1312    71694862 :         error = xfs_inobt_lookup(rcur, pagino, XFS_LOOKUP_GE, &j);
    1313    72207926 :         if (error)
    1314           0 :                 goto error_rcur;
    1315    72207926 :         if (j == 1) {
    1316    69336324 :                 error = xfs_inobt_get_rec(rcur, &rrec, &j);
    1317    69551083 :                 if (error)
    1318           0 :                         goto error_rcur;
    1319    69551083 :                 if (XFS_IS_CORRUPT(lcur->bc_mp, j != 1)) {
    1320           0 :                         error = -EFSCORRUPTED;
    1321           0 :                         goto error_rcur;
    1322             :                 }
    1323             :         }
    1324             : 
    1325    72422685 :         if (XFS_IS_CORRUPT(lcur->bc_mp, i != 1 && j != 1)) {
    1326           0 :                 error = -EFSCORRUPTED;
    1327           0 :                 goto error_rcur;
    1328             :         }
    1329    72422685 :         if (i == 1 && j == 1) {
    1330             :                 /*
    1331             :                  * Both the left and right records are valid. Choose the closer
    1332             :                  * inode chunk to the target.
    1333             :                  */
    1334     3549836 :                 if ((pagino - rec->ir_startino + XFS_INODES_PER_CHUNK - 1) >
    1335     3549836 :                     (rrec.ir_startino - pagino)) {
    1336     1976476 :                         *rec = rrec;
    1337     1976476 :                         xfs_btree_del_cursor(lcur, XFS_BTREE_NOERROR);
    1338     1976450 :                         *ocur = rcur;
    1339             :                 } else {
    1340     1573360 :                         xfs_btree_del_cursor(rcur, XFS_BTREE_NOERROR);
    1341             :                 }
    1342    68872849 :         } else if (j == 1) {
    1343             :                 /* only the right record is valid */
    1344    65857058 :                 *rec = rrec;
    1345    65857058 :                 xfs_btree_del_cursor(lcur, XFS_BTREE_NOERROR);
    1346    66053729 :                 *ocur = rcur;
    1347     3015791 :         } else if (i == 1) {
    1348             :                 /* only the left record is valid */
    1349     3015516 :                 xfs_btree_del_cursor(rcur, XFS_BTREE_NOERROR);
    1350             :         }
    1351             : 
    1352             :         return 0;
    1353             : 
    1354           0 : error_rcur:
    1355           0 :         xfs_btree_del_cursor(rcur, XFS_BTREE_ERROR);
    1356           0 :         return error;
    1357             : }
    1358             : 
    1359             : /*
    1360             :  * Use the free inode btree to find a free inode based on a newino hint. If
    1361             :  * the hint is NULL, find the first free inode in the AG.
    1362             :  */
    1363             : STATIC int
    1364     6448343 : xfs_dialloc_ag_finobt_newino(
    1365             :         struct xfs_agi                  *agi,
    1366             :         struct xfs_btree_cur            *cur,
    1367             :         struct xfs_inobt_rec_incore     *rec)
    1368             : {
    1369     6448343 :         int error;
    1370     6448343 :         int i;
    1371             : 
    1372     6448343 :         if (agi->agi_newino != cpu_to_be32(NULLAGINO)) {
    1373     6386644 :                 error = xfs_inobt_lookup(cur, be32_to_cpu(agi->agi_newino),
    1374             :                                          XFS_LOOKUP_EQ, &i);
    1375     6387322 :                 if (error)
    1376             :                         return error;
    1377     6387322 :                 if (i == 1) {
    1378     6214534 :                         error = xfs_inobt_get_rec(cur, rec, &i);
    1379     6215251 :                         if (error)
    1380             :                                 return error;
    1381     6215251 :                         if (XFS_IS_CORRUPT(cur->bc_mp, i != 1))
    1382           0 :                                 return -EFSCORRUPTED;
    1383             :                         return 0;
    1384             :                 }
    1385             :         }
    1386             : 
    1387             :         /*
    1388             :          * Find the first inode available in the AG.
    1389             :          */
    1390      234487 :         error = xfs_inobt_lookup(cur, 0, XFS_LOOKUP_GE, &i);
    1391      234112 :         if (error)
    1392             :                 return error;
    1393      234112 :         if (XFS_IS_CORRUPT(cur->bc_mp, i != 1))
    1394           0 :                 return -EFSCORRUPTED;
    1395             : 
    1396      234112 :         error = xfs_inobt_get_rec(cur, rec, &i);
    1397      234116 :         if (error)
    1398             :                 return error;
    1399      234116 :         if (XFS_IS_CORRUPT(cur->bc_mp, i != 1))
    1400           0 :                 return -EFSCORRUPTED;
    1401             : 
    1402             :         return 0;
    1403             : }
    1404             : 
    1405             : /*
    1406             :  * Update the inobt based on a modification made to the finobt. Also ensure that
    1407             :  * the records from both trees are equivalent post-modification.
    1408             :  */
    1409             : STATIC int
    1410    84855594 : xfs_dialloc_ag_update_inobt(
    1411             :         struct xfs_btree_cur            *cur,   /* inobt cursor */
    1412             :         struct xfs_inobt_rec_incore     *frec,  /* finobt record */
    1413             :         int                             offset) /* inode offset */
    1414             : {
    1415    84855594 :         struct xfs_inobt_rec_incore     rec;
    1416    84855594 :         int                             error;
    1417    84855594 :         int                             i;
    1418             : 
    1419    84855594 :         error = xfs_inobt_lookup(cur, frec->ir_startino, XFS_LOOKUP_EQ, &i);
    1420    85039853 :         if (error)
    1421             :                 return error;
    1422    85039809 :         if (XFS_IS_CORRUPT(cur->bc_mp, i != 1))
    1423           0 :                 return -EFSCORRUPTED;
    1424             : 
    1425    85039809 :         error = xfs_inobt_get_rec(cur, &rec, &i);
    1426    85041244 :         if (error)
    1427             :                 return error;
    1428    85041244 :         if (XFS_IS_CORRUPT(cur->bc_mp, i != 1))
    1429           0 :                 return -EFSCORRUPTED;
    1430    85041244 :         ASSERT((XFS_AGINO_TO_OFFSET(cur->bc_mp, rec.ir_startino) %
    1431             :                                    XFS_INODES_PER_CHUNK) == 0);
    1432             : 
    1433    85041244 :         rec.ir_free &= ~XFS_INOBT_MASK(offset);
    1434    85041244 :         rec.ir_freecount--;
    1435             : 
    1436    85041244 :         if (XFS_IS_CORRUPT(cur->bc_mp,
    1437             :                            rec.ir_free != frec->ir_free ||
    1438             :                            rec.ir_freecount != frec->ir_freecount))
    1439           0 :                 return -EFSCORRUPTED;
    1440             : 
    1441    85041244 :         return xfs_inobt_update(cur, &rec);
    1442             : }
    1443             : 
    1444             : /*
    1445             :  * Allocate an inode using the free inode btree, if available. Otherwise, fall
    1446             :  * back to the inobt search algorithm.
    1447             :  *
    1448             :  * The caller selected an AG for us, and made sure that free inodes are
    1449             :  * available.
    1450             :  */
    1451             : static int
    1452    84663549 : xfs_dialloc_ag(
    1453             :         struct xfs_perag        *pag,
    1454             :         struct xfs_trans        *tp,
    1455             :         struct xfs_buf          *agbp,
    1456             :         xfs_ino_t               parent,
    1457             :         xfs_ino_t               *inop)
    1458             : {
    1459    84663549 :         struct xfs_mount                *mp = tp->t_mountp;
    1460    84663549 :         struct xfs_agi                  *agi = agbp->b_addr;
    1461    84663549 :         xfs_agnumber_t                  pagno = XFS_INO_TO_AGNO(mp, parent);
    1462    84663549 :         xfs_agino_t                     pagino = XFS_INO_TO_AGINO(mp, parent);
    1463    84663549 :         struct xfs_btree_cur            *cur;   /* finobt cursor */
    1464    84663549 :         struct xfs_btree_cur            *icur;  /* inobt cursor */
    1465    84663549 :         struct xfs_inobt_rec_incore     rec;
    1466    84663549 :         xfs_ino_t                       ino;
    1467    84663549 :         int                             error;
    1468    84663549 :         int                             offset;
    1469    84663549 :         int                             i;
    1470             : 
    1471    84663549 :         if (!xfs_has_finobt(mp))
    1472        2092 :                 return xfs_dialloc_ag_inobt(pag, tp, agbp, parent, inop);
    1473             : 
    1474             :         /*
    1475             :          * If pagino is 0 (this is the root inode allocation) use newino.
    1476             :          * This must work because we've just allocated some.
    1477             :          */
    1478    84661457 :         if (!pagino)
    1479       19364 :                 pagino = be32_to_cpu(agi->agi_newino);
    1480             : 
    1481    84661457 :         cur = xfs_inobt_init_cursor(pag, tp, agbp, XFS_BTNUM_FINO);
    1482             : 
    1483    84811728 :         error = xfs_check_agi_freecount(cur);
    1484    84599663 :         if (error)
    1485         165 :                 goto error_cur;
    1486             : 
    1487             :         /*
    1488             :          * The search algorithm depends on whether we're in the same AG as the
    1489             :          * parent. If so, find the closest available inode to the parent. If
    1490             :          * not, consider the agi hint or find the first free inode in the AG.
    1491             :          */
    1492    84599498 :         if (pag->pag_agno == pagno)
    1493    78150536 :                 error = xfs_dialloc_ag_finobt_near(pagino, &cur, &rec);
    1494             :         else
    1495     6448962 :                 error = xfs_dialloc_ag_finobt_newino(agi, cur, &rec);
    1496    84521147 :         if (error)
    1497           0 :                 goto error_cur;
    1498             : 
    1499    84521147 :         offset = xfs_inobt_first_free_inode(&rec);
    1500    84210890 :         ASSERT(offset >= 0);
    1501    84210890 :         ASSERT(offset < XFS_INODES_PER_CHUNK);
    1502    84210890 :         ASSERT((XFS_AGINO_TO_OFFSET(mp, rec.ir_startino) %
    1503             :                                    XFS_INODES_PER_CHUNK) == 0);
    1504    84210890 :         ino = XFS_AGINO_TO_INO(mp, pag->pag_agno, rec.ir_startino + offset);
    1505             : 
    1506             :         /*
    1507             :          * Modify or remove the finobt record.
    1508             :          */
    1509    84210890 :         rec.ir_free &= ~XFS_INOBT_MASK(offset);
    1510    84210890 :         rec.ir_freecount--;
    1511    84210890 :         if (rec.ir_freecount)
    1512    71879067 :                 error = xfs_inobt_update(cur, &rec);
    1513             :         else
    1514    12331823 :                 error = xfs_btree_delete(cur, &i);
    1515    83827354 :         if (error)
    1516           0 :                 goto error_cur;
    1517             : 
    1518             :         /*
    1519             :          * The finobt has now been updated appropriately. We haven't updated the
    1520             :          * agi and superblock yet, so we can create an inobt cursor and validate
    1521             :          * the original freecount. If all is well, make the equivalent update to
    1522             :          * the inobt using the finobt record and offset information.
    1523             :          */
    1524    83827354 :         icur = xfs_inobt_init_cursor(pag, tp, agbp, XFS_BTNUM_INO);
    1525             : 
    1526    84707676 :         error = xfs_check_agi_freecount(icur);
    1527    84857104 :         if (error)
    1528           7 :                 goto error_icur;
    1529             : 
    1530    84857097 :         error = xfs_dialloc_ag_update_inobt(icur, &rec, offset);
    1531    84865413 :         if (error)
    1532          44 :                 goto error_icur;
    1533             : 
    1534             :         /*
    1535             :          * Both trees have now been updated. We must update the perag and
    1536             :          * superblock before we can check the freecount for each btree.
    1537             :          */
    1538    84865369 :         be32_add_cpu(&agi->agi_freecount, -1);
    1539    84865369 :         xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREECOUNT);
    1540    84884494 :         pag->pagi_freecount--;
    1541             : 
    1542    84884494 :         xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -1);
    1543             : 
    1544    84372298 :         error = xfs_check_agi_freecount(icur);
    1545    84500563 :         if (error)
    1546           0 :                 goto error_icur;
    1547    84500563 :         error = xfs_check_agi_freecount(cur);
    1548    85009983 :         if (error)
    1549           0 :                 goto error_icur;
    1550             : 
    1551    85009983 :         xfs_btree_del_cursor(icur, XFS_BTREE_NOERROR);
    1552    85034121 :         xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
    1553    84985410 :         *inop = ino;
    1554    84985410 :         return 0;
    1555             : 
    1556          51 : error_icur:
    1557          51 :         xfs_btree_del_cursor(icur, XFS_BTREE_ERROR);
    1558         216 : error_cur:
    1559         216 :         xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
    1560         216 :         return error;
    1561             : }
    1562             : 
    1563             : static int
    1564     1016850 : xfs_dialloc_roll(
    1565             :         struct xfs_trans        **tpp,
    1566             :         struct xfs_buf          *agibp)
    1567             : {
    1568     1016850 :         struct xfs_trans        *tp = *tpp;
    1569     1016850 :         struct xfs_dquot_acct   *dqinfo;
    1570     1016850 :         int                     error;
    1571             : 
    1572             :         /*
    1573             :          * Hold to on to the agibp across the commit so no other allocation can
    1574             :          * come in and take the free inodes we just allocated for our caller.
    1575             :          */
    1576     1016850 :         xfs_trans_bhold(tp, agibp);
    1577             : 
    1578             :         /*
    1579             :          * We want the quota changes to be associated with the next transaction,
    1580             :          * NOT this one. So, detach the dqinfo from this and attach it to the
    1581             :          * next transaction.
    1582             :          */
    1583     1016861 :         dqinfo = tp->t_dqinfo;
    1584     1016861 :         tp->t_dqinfo = NULL;
    1585             : 
    1586     1016861 :         error = xfs_trans_roll(&tp);
    1587             : 
    1588             :         /* Re-attach the quota info that we detached from prev trx. */
    1589     1016927 :         tp->t_dqinfo = dqinfo;
    1590             : 
    1591             :         /*
    1592             :          * Join the buffer even on commit error so that the buffer is released
    1593             :          * when the caller cancels the transaction and doesn't have to handle
    1594             :          * this error case specially.
    1595             :          */
    1596     1016927 :         xfs_trans_bjoin(tp, agibp);
    1597     1016799 :         *tpp = tp;
    1598     1016799 :         return error;
    1599             : }
    1600             : 
    1601             : static bool
    1602    88476992 : xfs_dialloc_good_ag(
    1603             :         struct xfs_perag        *pag,
    1604             :         struct xfs_trans        *tp,
    1605             :         umode_t                 mode,
    1606             :         int                     flags,
    1607             :         bool                    ok_alloc)
    1608             : {
    1609    88476992 :         struct xfs_mount        *mp = tp->t_mountp;
    1610    88476992 :         xfs_extlen_t            ineed;
    1611    88476992 :         xfs_extlen_t            longest = 0;
    1612    88476992 :         int                     needspace;
    1613    88476992 :         int                     error;
    1614             : 
    1615    88476992 :         if (!pag)
    1616             :                 return false;
    1617   176953984 :         if (!xfs_perag_allows_inodes(pag))
    1618             :                 return false;
    1619             : 
    1620   176953984 :         if (!xfs_perag_initialised_agi(pag)) {
    1621          53 :                 error = xfs_ialloc_read_agi(pag, tp, NULL);
    1622          53 :                 if (error)
    1623             :                         return false;
    1624             :         }
    1625             : 
    1626    88476992 :         if (pag->pagi_freecount)
    1627             :                 return true;
    1628     4716141 :         if (!ok_alloc)
    1629             :                 return false;
    1630             : 
    1631     5919644 :         if (!xfs_perag_initialised_agf(pag)) {
    1632          17 :                 error = xfs_alloc_read_agf(pag, tp, flags, NULL);
    1633          17 :                 if (error)
    1634             :                         return false;
    1635             :         }
    1636             : 
    1637             :         /*
    1638             :          * Check that there is enough free space for the file plus a chunk of
    1639             :          * inodes if we need to allocate some. If this is the first pass across
    1640             :          * the AGs, take into account the potential space needed for alignment
    1641             :          * of inode chunks when checking the longest contiguous free space in
    1642             :          * the AG - this prevents us from getting ENOSPC because we have free
    1643             :          * space larger than ialloc_blks but alignment constraints prevent us
    1644             :          * from using it.
    1645             :          *
    1646             :          * If we can't find an AG with space for full alignment slack to be
    1647             :          * taken into account, we must be near ENOSPC in all AGs.  Hence we
    1648             :          * don't include alignment for the second pass and so if we fail
    1649             :          * allocation due to alignment issues then it is most likely a real
    1650             :          * ENOSPC condition.
    1651             :          *
    1652             :          * XXX(dgc): this calculation is now bogus thanks to the per-ag
    1653             :          * reservations that xfs_alloc_fix_freelist() now does via
    1654             :          * xfs_alloc_space_available(). When the AG fills up, pagf_freeblks will
    1655             :          * be more than large enough for the check below to succeed, but
    1656             :          * xfs_alloc_space_available() will fail because of the non-zero
    1657             :          * metadata reservation and hence we won't actually be able to allocate
    1658             :          * more inodes in this AG. We do soooo much unnecessary work near ENOSPC
    1659             :          * because of this.
    1660             :          */
    1661     2959806 :         ineed = M_IGEO(mp)->ialloc_min_blks;
    1662     2959806 :         if (flags && ineed > 1)
    1663     2900949 :                 ineed += M_IGEO(mp)->cluster_align;
    1664     2959806 :         longest = pag->pagf_longest;
    1665     2959806 :         if (!longest)
    1666         122 :                 longest = pag->pagf_flcount > 0;
    1667     2959806 :         needspace = S_ISDIR(mode) || S_ISREG(mode) || S_ISLNK(mode);
    1668             : 
    1669     2959806 :         if (pag->pagf_freeblks < needspace + ineed || longest < ineed)
    1670      346739 :                 return false;
    1671             :         return true;
    1672             : }
    1673             : 
    1674             : static int
    1675    86248888 : xfs_dialloc_try_ag(
    1676             :         struct xfs_perag        *pag,
    1677             :         struct xfs_trans        **tpp,
    1678             :         xfs_ino_t               parent,
    1679             :         xfs_ino_t               *new_ino,
    1680             :         bool                    ok_alloc)
    1681             : {
    1682    86248888 :         struct xfs_buf          *agbp;
    1683    86248888 :         xfs_ino_t               ino;
    1684    86248888 :         int                     error;
    1685             : 
    1686             :         /*
    1687             :          * Then read in the AGI buffer and recheck with the AGI buffer
    1688             :          * lock held.
    1689             :          */
    1690    86248888 :         error = xfs_ialloc_read_agi(pag, *tpp, &agbp);
    1691    86354117 :         if (error)
    1692             :                 return error;
    1693             : 
    1694    86354086 :         if (!pag->pagi_freecount) {
    1695     2655072 :                 if (!ok_alloc) {
    1696       66275 :                         error = -EAGAIN;
    1697       66275 :                         goto out_release;
    1698             :                 }
    1699             : 
    1700     2588797 :                 error = xfs_ialloc_ag_alloc(pag, *tpp, agbp);
    1701     2588915 :                 if (error < 0)
    1702     1571868 :                         goto out_release;
    1703             : 
    1704             :                 /*
    1705             :                  * We successfully allocated space for an inode cluster in this
    1706             :                  * AG.  Roll the transaction so that we can allocate one of the
    1707             :                  * new inodes.
    1708             :                  */
    1709     1017047 :                 ASSERT(pag->pagi_freecount > 0);
    1710     1017047 :                 error = xfs_dialloc_roll(tpp, agbp);
    1711     1016789 :                 if (error)
    1712           0 :                         goto out_release;
    1713             :         }
    1714             : 
    1715             :         /* Allocate an inode in the found AG */
    1716    84715803 :         error = xfs_dialloc_ag(pag, *tpp, agbp, parent, &ino);
    1717    84896627 :         if (!error)
    1718    84908780 :                 *new_ino = ino;
    1719             :         return error;
    1720             : 
    1721     1638143 : out_release:
    1722     1638143 :         xfs_trans_brelse(*tpp, agbp);
    1723     1638143 :         return error;
    1724             : }
    1725             : 
    1726             : /*
    1727             :  * Allocate an on-disk inode.
    1728             :  *
    1729             :  * Mode is used to tell whether the new inode is a directory and hence where to
    1730             :  * locate it. The on-disk inode that is allocated will be returned in @new_ino
    1731             :  * on success, otherwise an error will be set to indicate the failure (e.g.
    1732             :  * -ENOSPC).
    1733             :  */
    1734             : int
    1735    84847091 : xfs_dialloc(
    1736             :         struct xfs_trans        **tpp,
    1737             :         xfs_ino_t               parent,
    1738             :         umode_t                 mode,
    1739             :         xfs_ino_t               *new_ino)
    1740             : {
    1741    84847091 :         struct xfs_mount        *mp = (*tpp)->t_mountp;
    1742    84847091 :         xfs_agnumber_t          agno;
    1743    84847091 :         int                     error = 0;
    1744    84847091 :         xfs_agnumber_t          start_agno;
    1745    84847091 :         struct xfs_perag        *pag;
    1746    84847091 :         struct xfs_ino_geometry *igeo = M_IGEO(mp);
    1747    84847091 :         bool                    ok_alloc = true;
    1748    84847091 :         bool                    low_space = false;
    1749    84847091 :         int                     flags;
    1750    84847091 :         xfs_ino_t               ino = NULLFSINO;
    1751             : 
    1752             :         /*
    1753             :          * Directories, symlinks, and regular files frequently allocate at least
    1754             :          * one block, so factor that potential expansion when we examine whether
    1755             :          * an AG has enough space for file creation.
    1756             :          */
    1757    84847091 :         if (S_ISDIR(mode))
    1758     6564927 :                 start_agno = (atomic_inc_return(&mp->m_agirotor) - 1) %
    1759     6565059 :                                 mp->m_maxagi;
    1760             :         else {
    1761    78282032 :                 start_agno = XFS_INO_TO_AGNO(mp, parent);
    1762    78282032 :                 if (start_agno >= mp->m_maxagi)
    1763           0 :                         start_agno = 0;
    1764             :         }
    1765             : 
    1766             :         /*
    1767             :          * If we have already hit the ceiling of inode blocks then clear
    1768             :          * ok_alloc so we scan all available agi structures for a free
    1769             :          * inode.
    1770             :          *
    1771             :          * Read rough value of mp->m_icount by percpu_counter_read_positive,
    1772             :          * which will sacrifice the preciseness but improve the performance.
    1773             :          */
    1774    84846959 :         if (igeo->maxicount &&
    1775    84788875 :             percpu_counter_read_positive(&mp->m_icount) + igeo->ialloc_inos
    1776             :                                                         > igeo->maxicount) {
    1777      156982 :                 ok_alloc = false;
    1778             :         }
    1779             : 
    1780             :         /*
    1781             :          * If we are near to ENOSPC, we want to prefer allocation from AGs that
    1782             :          * have free inodes in them rather than use up free space allocating new
    1783             :          * inode chunks. Hence we turn off allocation for the first non-blocking
    1784             :          * pass through the AGs if we are near ENOSPC to consume free inodes
    1785             :          * that we can immediately allocate, but then we allow allocation on the
    1786             :          * second pass if we fail to find an AG with free inodes in it.
    1787             :          */
    1788    84846959 :         if (percpu_counter_read_positive(&mp->m_fdblocks) <
    1789    84846959 :                         mp->m_low_space[XFS_LOWSP_1_PCNT]) {
    1790      982649 :                 ok_alloc = false;
    1791      982649 :                 low_space = true;
    1792             :         }
    1793             : 
    1794             :         /*
    1795             :          * Loop until we find an allocation group that either has free inodes
    1796             :          * or in which we can allocate some inodes.  Iterate through the
    1797             :          * allocation groups upward, wrapping at the end.
    1798             :          */
    1799    84846959 :         flags = XFS_ALLOC_FLAG_TRYLOCK;
    1800    85045404 : retry:
    1801    88786035 :         for_each_perag_wrap_at(mp, start_agno, mp->m_maxagi, agno, pag) {
    1802    88597105 :                 if (xfs_dialloc_good_ag(pag, *tpp, mode, flags, ok_alloc)) {
    1803    86319507 :                         error = xfs_dialloc_try_ag(pag, tpp, parent,
    1804             :                                         &ino, ok_alloc);
    1805    86460922 :                         if (error != -EAGAIN)
    1806             :                                 break;
    1807             :                         error = 0;
    1808             :                 }
    1809             : 
    1810     7481262 :                 if (xfs_is_shutdown(mp)) {
    1811             :                         error = -EFSCORRUPTED;
    1812             :                         break;
    1813             :                 }
    1814             :         }
    1815    85161715 :         if (pag)
    1816    84740380 :                 xfs_perag_rele(pag);
    1817    85229268 :         if (error)
    1818         402 :                 return error;
    1819    85228866 :         if (ino == NULLFSINO) {
    1820      355018 :                 if (flags) {
    1821      198445 :                         flags = 0;
    1822      198445 :                         if (low_space)
    1823       37235 :                                 ok_alloc = true;
    1824      198445 :                         goto retry;
    1825             :                 }
    1826             :                 return -ENOSPC;
    1827             :         }
    1828    84873848 :         *new_ino = ino;
    1829    84873848 :         return 0;
    1830             : }
    1831             : 
    1832             : /*
    1833             :  * Free the blocks of an inode chunk. We must consider that the inode chunk
    1834             :  * might be sparse and only free the regions that are allocated as part of the
    1835             :  * chunk.
    1836             :  */
    1837             : static int
    1838      346331 : xfs_difree_inode_chunk(
    1839             :         struct xfs_trans                *tp,
    1840             :         xfs_agnumber_t                  agno,
    1841             :         struct xfs_inobt_rec_incore     *rec)
    1842             : {
    1843      346331 :         struct xfs_mount                *mp = tp->t_mountp;
    1844      346331 :         xfs_agblock_t                   sagbno = XFS_AGINO_TO_AGBNO(mp,
    1845             :                                                         rec->ir_startino);
    1846      346331 :         int                             startidx, endidx;
    1847      346331 :         int                             nextbit;
    1848      346331 :         xfs_agblock_t                   agbno;
    1849      346331 :         int                             contigblk;
    1850      346331 :         DECLARE_BITMAP(holemask, XFS_INOBT_HOLEMASK_BITS);
    1851             : 
    1852      346331 :         if (!xfs_inobt_issparse(rec->ir_holemask)) {
    1853             :                 /* not sparse, calculate extent info directly */
    1854      601534 :                 return xfs_free_extent_later(tp,
    1855      300767 :                                 XFS_AGB_TO_FSB(mp, agno, sagbno),
    1856      300767 :                                 M_IGEO(mp)->ialloc_blks, &XFS_RMAP_OINFO_INODES,
    1857             :                                 XFS_AG_RESV_NONE);
    1858             :         }
    1859             : 
    1860             :         /* holemask is only 16-bits (fits in an unsigned long) */
    1861       45564 :         ASSERT(sizeof(rec->ir_holemask) <= sizeof(holemask[0]));
    1862       45564 :         holemask[0] = rec->ir_holemask;
    1863             : 
    1864             :         /*
    1865             :          * Find contiguous ranges of zeroes (i.e., allocated regions) in the
    1866             :          * holemask and convert the start/end index of each range to an extent.
    1867             :          * We start with the start and end index both pointing at the first 0 in
    1868             :          * the mask.
    1869             :          */
    1870       45564 :         startidx = endidx = find_first_zero_bit(holemask,
    1871             :                                                 XFS_INOBT_HOLEMASK_BITS);
    1872       45563 :         nextbit = startidx + 1;
    1873      410066 :         while (startidx < XFS_INOBT_HOLEMASK_BITS) {
    1874      364502 :                 int error;
    1875             : 
    1876      364502 :                 nextbit = find_next_zero_bit(holemask, XFS_INOBT_HOLEMASK_BITS,
    1877             :                                              nextbit);
    1878             :                 /*
    1879             :                  * If the next zero bit is contiguous, update the end index of
    1880             :                  * the current range and continue.
    1881             :                  */
    1882      364501 :                 if (nextbit != XFS_INOBT_HOLEMASK_BITS &&
    1883      318936 :                     nextbit == endidx + 1) {
    1884      318939 :                         endidx = nextbit;
    1885      318939 :                         goto next;
    1886             :                 }
    1887             : 
    1888             :                 /*
    1889             :                  * nextbit is not contiguous with the current end index. Convert
    1890             :                  * the current start/end to an extent and add it to the free
    1891             :                  * list.
    1892             :                  */
    1893           0 :                 agbno = sagbno + (startidx * XFS_INODES_PER_HOLEMASK_BIT) /
    1894       45562 :                                   mp->m_sb.sb_inopblock;
    1895       91124 :                 contigblk = ((endidx - startidx + 1) *
    1896           0 :                              XFS_INODES_PER_HOLEMASK_BIT) /
    1897       45562 :                             mp->m_sb.sb_inopblock;
    1898             : 
    1899       45562 :                 ASSERT(agbno % mp->m_sb.sb_spino_align == 0);
    1900       45562 :                 ASSERT(contigblk % mp->m_sb.sb_spino_align == 0);
    1901       91124 :                 error = xfs_free_extent_later(tp,
    1902       45562 :                                 XFS_AGB_TO_FSB(mp, agno, agbno), contigblk,
    1903             :                                 &XFS_RMAP_OINFO_INODES, XFS_AG_RESV_NONE);
    1904       45564 :                 if (error)
    1905           0 :                         return error;
    1906             : 
    1907             :                 /* reset range to current bit and carry on... */
    1908             :                 startidx = endidx = nextbit;
    1909             : 
    1910      364503 : next:
    1911      364503 :                 nextbit++;
    1912             :         }
    1913             :         return 0;
    1914             : }
    1915             : 
    1916             : STATIC int
    1917    57758085 : xfs_difree_inobt(
    1918             :         struct xfs_perag                *pag,
    1919             :         struct xfs_trans                *tp,
    1920             :         struct xfs_buf                  *agbp,
    1921             :         xfs_agino_t                     agino,
    1922             :         struct xfs_icluster             *xic,
    1923             :         struct xfs_inobt_rec_incore     *orec)
    1924             : {
    1925    57758085 :         struct xfs_mount                *mp = pag->pag_mount;
    1926    57758085 :         struct xfs_agi                  *agi = agbp->b_addr;
    1927    57758085 :         struct xfs_btree_cur            *cur;
    1928    57758085 :         struct xfs_inobt_rec_incore     rec;
    1929    57758085 :         int                             ilen;
    1930    57758085 :         int                             error;
    1931    57758085 :         int                             i;
    1932    57758085 :         int                             off;
    1933             : 
    1934    57758085 :         ASSERT(agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC));
    1935    57758085 :         ASSERT(XFS_AGINO_TO_AGBNO(mp, agino) < be32_to_cpu(agi->agi_length));
    1936             : 
    1937             :         /*
    1938             :          * Initialize the cursor.
    1939             :          */
    1940    57758085 :         cur = xfs_inobt_init_cursor(pag, tp, agbp, XFS_BTNUM_INO);
    1941             : 
    1942    57775888 :         error = xfs_check_agi_freecount(cur);
    1943    57800285 :         if (error)
    1944           3 :                 goto error0;
    1945             : 
    1946             :         /*
    1947             :          * Look for the entry describing this inode.
    1948             :          */
    1949    57800282 :         if ((error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &i))) {
    1950          10 :                 xfs_warn(mp, "%s: xfs_inobt_lookup() returned error %d.",
    1951             :                         __func__, error);
    1952          10 :                 goto error0;
    1953             :         }
    1954    57826887 :         if (XFS_IS_CORRUPT(mp, i != 1)) {
    1955           0 :                 error = -EFSCORRUPTED;
    1956           0 :                 goto error0;
    1957             :         }
    1958    57826887 :         error = xfs_inobt_get_rec(cur, &rec, &i);
    1959    57788842 :         if (error) {
    1960           0 :                 xfs_warn(mp, "%s: xfs_inobt_get_rec() returned error %d.",
    1961             :                         __func__, error);
    1962           0 :                 goto error0;
    1963             :         }
    1964    57788842 :         if (XFS_IS_CORRUPT(mp, i != 1)) {
    1965           0 :                 error = -EFSCORRUPTED;
    1966           0 :                 goto error0;
    1967             :         }
    1968             :         /*
    1969             :          * Get the offset in the inode chunk.
    1970             :          */
    1971    57788842 :         off = agino - rec.ir_startino;
    1972    57788842 :         ASSERT(off >= 0 && off < XFS_INODES_PER_CHUNK);
    1973    57788842 :         ASSERT(!(rec.ir_free & XFS_INOBT_MASK(off)));
    1974             :         /*
    1975             :          * Mark the inode free & increment the count.
    1976             :          */
    1977    57788842 :         rec.ir_free |= XFS_INOBT_MASK(off);
    1978    57788842 :         rec.ir_freecount++;
    1979             : 
    1980             :         /*
    1981             :          * When an inode chunk is free, it becomes eligible for removal. Don't
    1982             :          * remove the chunk if the block size is large enough for multiple inode
    1983             :          * chunks (that might not be free).
    1984             :          */
    1985    57788842 :         if (!xfs_has_ikeep(mp) && rec.ir_free == XFS_INOBT_ALL_FREE &&
    1986      346319 :             mp->m_sb.sb_inopblock <= XFS_INODES_PER_CHUNK) {
    1987      346320 :                 xic->deleted = true;
    1988      346320 :                 xic->first_ino = XFS_AGINO_TO_INO(mp, pag->pag_agno,
    1989             :                                 rec.ir_startino);
    1990      346320 :                 xic->alloc = xfs_inobt_irec_to_allocmask(&rec);
    1991             : 
    1992             :                 /*
    1993             :                  * Remove the inode cluster from the AGI B+Tree, adjust the
    1994             :                  * AGI and Superblock inode counts, and mark the disk space
    1995             :                  * to be freed when the transaction is committed.
    1996             :                  */
    1997      346349 :                 ilen = rec.ir_freecount;
    1998      346349 :                 be32_add_cpu(&agi->agi_count, -ilen);
    1999      346349 :                 be32_add_cpu(&agi->agi_freecount, -(ilen - 1));
    2000      346349 :                 xfs_ialloc_log_agi(tp, agbp, XFS_AGI_COUNT | XFS_AGI_FREECOUNT);
    2001      346343 :                 pag->pagi_freecount -= ilen - 1;
    2002      346343 :                 pag->pagi_count -= ilen;
    2003      346343 :                 xfs_trans_mod_sb(tp, XFS_TRANS_SB_ICOUNT, -ilen);
    2004      346324 :                 xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -(ilen - 1));
    2005             : 
    2006      346325 :                 if ((error = xfs_btree_delete(cur, &i))) {
    2007           0 :                         xfs_warn(mp, "%s: xfs_btree_delete returned error %d.",
    2008             :                                 __func__, error);
    2009           0 :                         goto error0;
    2010             :                 }
    2011             : 
    2012      346336 :                 error = xfs_difree_inode_chunk(tp, pag->pag_agno, &rec);
    2013      346325 :                 if (error)
    2014           0 :                         goto error0;
    2015             :         } else {
    2016    57442522 :                 xic->deleted = false;
    2017             : 
    2018    57442522 :                 error = xfs_inobt_update(cur, &rec);
    2019    57416002 :                 if (error) {
    2020           0 :                         xfs_warn(mp, "%s: xfs_inobt_update returned error %d.",
    2021             :                                 __func__, error);
    2022           0 :                         goto error0;
    2023             :                 }
    2024             : 
    2025             :                 /*
    2026             :                  * Change the inode free counts and log the ag/sb changes.
    2027             :                  */
    2028    57416002 :                 be32_add_cpu(&agi->agi_freecount, 1);
    2029    57416002 :                 xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREECOUNT);
    2030    57447712 :                 pag->pagi_freecount++;
    2031    57447712 :                 xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, 1);
    2032             :         }
    2033             : 
    2034    57686011 :         error = xfs_check_agi_freecount(cur);
    2035    57714496 :         if (error)
    2036           0 :                 goto error0;
    2037             : 
    2038    57714496 :         *orec = rec;
    2039    57714496 :         xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
    2040    57714496 :         return 0;
    2041             : 
    2042          13 : error0:
    2043          13 :         xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
    2044          13 :         return error;
    2045             : }
    2046             : 
    2047             : /*
    2048             :  * Free an inode in the free inode btree.
    2049             :  */
    2050             : STATIC int
    2051    57740138 : xfs_difree_finobt(
    2052             :         struct xfs_perag                *pag,
    2053             :         struct xfs_trans                *tp,
    2054             :         struct xfs_buf                  *agbp,
    2055             :         xfs_agino_t                     agino,
    2056             :         struct xfs_inobt_rec_incore     *ibtrec) /* inobt record */
    2057             : {
    2058    57740138 :         struct xfs_mount                *mp = pag->pag_mount;
    2059    57740138 :         struct xfs_btree_cur            *cur;
    2060    57740138 :         struct xfs_inobt_rec_incore     rec;
    2061    57740138 :         int                             offset = agino - ibtrec->ir_startino;
    2062    57740138 :         int                             error;
    2063    57740138 :         int                             i;
    2064             : 
    2065    57740138 :         cur = xfs_inobt_init_cursor(pag, tp, agbp, XFS_BTNUM_FINO);
    2066             : 
    2067    57786102 :         error = xfs_inobt_lookup(cur, ibtrec->ir_startino, XFS_LOOKUP_EQ, &i);
    2068    57833849 :         if (error)
    2069          10 :                 goto error;
    2070    57833839 :         if (i == 0) {
    2071             :                 /*
    2072             :                  * If the record does not exist in the finobt, we must have just
    2073             :                  * freed an inode in a previously fully allocated chunk. If not,
    2074             :                  * something is out of sync.
    2075             :                  */
    2076    11692404 :                 if (XFS_IS_CORRUPT(mp, ibtrec->ir_freecount != 1)) {
    2077           0 :                         error = -EFSCORRUPTED;
    2078           0 :                         goto error;
    2079             :                 }
    2080             : 
    2081    11692404 :                 error = xfs_inobt_insert_rec(cur, ibtrec->ir_holemask,
    2082    11692404 :                                              ibtrec->ir_count,
    2083             :                                              ibtrec->ir_freecount,
    2084             :                                              ibtrec->ir_free, &i);
    2085    11683148 :                 if (error)
    2086           0 :                         goto error;
    2087    11683148 :                 ASSERT(i == 1);
    2088             : 
    2089    11683148 :                 goto out;
    2090             :         }
    2091             : 
    2092             :         /*
    2093             :          * Read and update the existing record. We could just copy the ibtrec
    2094             :          * across here, but that would defeat the purpose of having redundant
    2095             :          * metadata. By making the modifications independently, we can catch
    2096             :          * corruptions that we wouldn't see if we just copied from one record
    2097             :          * to another.
    2098             :          */
    2099    46141435 :         error = xfs_inobt_get_rec(cur, &rec, &i);
    2100    46120045 :         if (error)
    2101           0 :                 goto error;
    2102    46120045 :         if (XFS_IS_CORRUPT(mp, i != 1)) {
    2103           0 :                 error = -EFSCORRUPTED;
    2104           0 :                 goto error;
    2105             :         }
    2106             : 
    2107    46120045 :         rec.ir_free |= XFS_INOBT_MASK(offset);
    2108    46120045 :         rec.ir_freecount++;
    2109             : 
    2110    46120045 :         if (XFS_IS_CORRUPT(mp,
    2111             :                            rec.ir_free != ibtrec->ir_free ||
    2112             :                            rec.ir_freecount != ibtrec->ir_freecount)) {
    2113           0 :                 error = -EFSCORRUPTED;
    2114           0 :                 goto error;
    2115             :         }
    2116             : 
    2117             :         /*
    2118             :          * The content of inobt records should always match between the inobt
    2119             :          * and finobt. The lifecycle of records in the finobt is different from
    2120             :          * the inobt in that the finobt only tracks records with at least one
    2121             :          * free inode. Hence, if all of the inodes are free and we aren't
    2122             :          * keeping inode chunks permanently on disk, remove the record.
    2123             :          * Otherwise, update the record with the new information.
    2124             :          *
    2125             :          * Note that we currently can't free chunks when the block size is large
    2126             :          * enough for multiple chunks. Leave the finobt record to remain in sync
    2127             :          * with the inobt.
    2128             :          */
    2129    46120045 :         if (!xfs_has_ikeep(mp) && rec.ir_free == XFS_INOBT_ALL_FREE &&
    2130      346343 :             mp->m_sb.sb_inopblock <= XFS_INODES_PER_CHUNK) {
    2131      346344 :                 error = xfs_btree_delete(cur, &i);
    2132      346348 :                 if (error)
    2133           0 :                         goto error;
    2134      346348 :                 ASSERT(i == 1);
    2135             :         } else {
    2136    45773701 :                 error = xfs_inobt_update(cur, &rec);
    2137    45766337 :                 if (error)
    2138           0 :                         goto error;
    2139             :         }
    2140             : 
    2141    45766337 : out:
    2142    57795833 :         error = xfs_check_agi_freecount(cur);
    2143    57857875 :         if (error)
    2144           0 :                 goto error;
    2145             : 
    2146    57857875 :         xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
    2147    57857875 :         return 0;
    2148             : 
    2149          10 : error:
    2150          10 :         xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
    2151          10 :         return error;
    2152             : }
    2153             : 
    2154             : /*
    2155             :  * Free disk inode.  Carefully avoids touching the incore inode, all
    2156             :  * manipulations incore are the caller's responsibility.
    2157             :  * The on-disk inode is not changed by this operation, only the
    2158             :  * btree (free inode mask) is changed.
    2159             :  */
    2160             : int
    2161    57784085 : xfs_difree(
    2162             :         struct xfs_trans        *tp,
    2163             :         struct xfs_perag        *pag,
    2164             :         xfs_ino_t               inode,
    2165             :         struct xfs_icluster     *xic)
    2166             : {
    2167             :         /* REFERENCED */
    2168    57784085 :         xfs_agblock_t           agbno;  /* block number containing inode */
    2169    57784085 :         struct xfs_buf          *agbp;  /* buffer for allocation group header */
    2170    57784085 :         xfs_agino_t             agino;  /* allocation group inode number */
    2171    57784085 :         int                     error;  /* error return value */
    2172    57784085 :         struct xfs_mount        *mp = tp->t_mountp;
    2173    57784085 :         struct xfs_inobt_rec_incore rec;/* btree record */
    2174             : 
    2175             :         /*
    2176             :          * Break up inode number into its components.
    2177             :          */
    2178    57784085 :         if (pag->pag_agno != XFS_INO_TO_AGNO(mp, inode)) {
    2179           0 :                 xfs_warn(mp, "%s: agno != pag->pag_agno (%d != %d).",
    2180             :                         __func__, XFS_INO_TO_AGNO(mp, inode), pag->pag_agno);
    2181           0 :                 ASSERT(0);
    2182           0 :                 return -EINVAL;
    2183             :         }
    2184    57784085 :         agino = XFS_INO_TO_AGINO(mp, inode);
    2185    57784085 :         if (inode != XFS_AGINO_TO_INO(mp, pag->pag_agno, agino))  {
    2186           0 :                 xfs_warn(mp, "%s: inode != XFS_AGINO_TO_INO() (%llu != %llu).",
    2187             :                         __func__, (unsigned long long)inode,
    2188             :                         (unsigned long long)XFS_AGINO_TO_INO(mp, pag->pag_agno, agino));
    2189           0 :                 ASSERT(0);
    2190           0 :                 return -EINVAL;
    2191             :         }
    2192    57784085 :         agbno = XFS_AGINO_TO_AGBNO(mp, agino);
    2193    57784085 :         if (agbno >= mp->m_sb.sb_agblocks)  {
    2194           0 :                 xfs_warn(mp, "%s: agbno >= mp->m_sb.sb_agblocks (%d >= %d).",
    2195             :                         __func__, agbno, mp->m_sb.sb_agblocks);
    2196           0 :                 ASSERT(0);
    2197           0 :                 return -EINVAL;
    2198             :         }
    2199             :         /*
    2200             :          * Get the allocation group header.
    2201             :          */
    2202    57784085 :         error = xfs_ialloc_read_agi(pag, tp, &agbp);
    2203    57785331 :         if (error) {
    2204         107 :                 xfs_warn(mp, "%s: xfs_ialloc_read_agi() returned error %d.",
    2205             :                         __func__, error);
    2206         107 :                 return error;
    2207             :         }
    2208             : 
    2209             :         /*
    2210             :          * Fix up the inode allocation btree.
    2211             :          */
    2212    57785224 :         error = xfs_difree_inobt(pag, tp, agbp, agino, xic, &rec);
    2213    57828885 :         if (error)
    2214          13 :                 goto error0;
    2215             : 
    2216             :         /*
    2217             :          * Fix up the free inode btree.
    2218             :          */
    2219    57828872 :         if (xfs_has_finobt(mp)) {
    2220    57762775 :                 error = xfs_difree_finobt(pag, tp, agbp, agino, &rec);
    2221    57867235 :                 if (error)
    2222          10 :                         goto error0;
    2223             :         }
    2224             : 
    2225             :         return 0;
    2226             : 
    2227             : error0:
    2228             :         return error;
    2229             : }
    2230             : 
    2231             : STATIC int
    2232   383515841 : xfs_imap_lookup(
    2233             :         struct xfs_perag        *pag,
    2234             :         struct xfs_trans        *tp,
    2235             :         xfs_agino_t             agino,
    2236             :         xfs_agblock_t           agbno,
    2237             :         xfs_agblock_t           *chunk_agbno,
    2238             :         xfs_agblock_t           *offset_agbno,
    2239             :         int                     flags)
    2240             : {
    2241   383515841 :         struct xfs_mount        *mp = pag->pag_mount;
    2242   383515841 :         struct xfs_inobt_rec_incore rec;
    2243   383515841 :         struct xfs_btree_cur    *cur;
    2244   383515841 :         struct xfs_buf          *agbp;
    2245   383515841 :         int                     error;
    2246   383515841 :         int                     i;
    2247             : 
    2248   383515841 :         error = xfs_ialloc_read_agi(pag, tp, &agbp);
    2249   383511255 :         if (error) {
    2250         129 :                 xfs_alert(mp,
    2251             :                         "%s: xfs_ialloc_read_agi() returned error %d, agno %d",
    2252             :                         __func__, error, pag->pag_agno);
    2253         129 :                 return error;
    2254             :         }
    2255             : 
    2256             :         /*
    2257             :          * Lookup the inode record for the given agino. If the record cannot be
    2258             :          * found, then it's an invalid inode number and we should abort. Once
    2259             :          * we have a record, we need to ensure it contains the inode number
    2260             :          * we are looking up.
    2261             :          */
    2262   383511126 :         cur = xfs_inobt_init_cursor(pag, tp, agbp, XFS_BTNUM_INO);
    2263   383519648 :         error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &i);
    2264   383528701 :         if (!error) {
    2265   383527581 :                 if (i)
    2266   383524579 :                         error = xfs_inobt_get_rec(cur, &rec, &i);
    2267   383491341 :                 if (!error && i == 0)
    2268         267 :                         error = -EINVAL;
    2269             :         }
    2270             : 
    2271   383492461 :         xfs_trans_brelse(tp, agbp);
    2272   383518431 :         xfs_btree_del_cursor(cur, error);
    2273   383532723 :         if (error)
    2274             :                 return error;
    2275             : 
    2276             :         /* check that the returned record contains the required inode */
    2277   383532441 :         if (rec.ir_startino > agino ||
    2278   383532441 :             rec.ir_startino + M_IGEO(mp)->ialloc_inos <= agino)
    2279             :                 return -EINVAL;
    2280             : 
    2281             :         /* for untrusted inodes check it is allocated first */
    2282   383485580 :         if ((flags & XFS_IGET_UNTRUSTED) &&
    2283   383486839 :             (rec.ir_free & XFS_INOBT_MASK(agino - rec.ir_startino)))
    2284             :                 return -EINVAL;
    2285             : 
    2286   383483418 :         *chunk_agbno = XFS_AGINO_TO_AGBNO(mp, rec.ir_startino);
    2287   383483418 :         *offset_agbno = agbno - *chunk_agbno;
    2288   383483418 :         return 0;
    2289             : }
    2290             : 
    2291             : /*
    2292             :  * Return the location of the inode in imap, for mapping it into a buffer.
    2293             :  */
    2294             : int
    2295   440076850 : xfs_imap(
    2296             :         struct xfs_perag        *pag,
    2297             :         struct xfs_trans        *tp,
    2298             :         xfs_ino_t               ino,    /* inode to locate */
    2299             :         struct xfs_imap         *imap,  /* location map structure */
    2300             :         uint                    flags)  /* flags for inode btree lookup */
    2301             : {
    2302   440076850 :         struct xfs_mount        *mp = pag->pag_mount;
    2303   440076850 :         xfs_agblock_t           agbno;  /* block number of inode in the alloc group */
    2304   440076850 :         xfs_agino_t             agino;  /* inode number within alloc group */
    2305   440076850 :         xfs_agblock_t           chunk_agbno;    /* first block in inode chunk */
    2306   440076850 :         xfs_agblock_t           cluster_agbno;  /* first block in inode cluster */
    2307   440076850 :         int                     error;  /* error code */
    2308   440076850 :         int                     offset; /* index of inode in its buffer */
    2309   440076850 :         xfs_agblock_t           offset_agbno;   /* blks from chunk start to inode */
    2310             : 
    2311   440076850 :         ASSERT(ino != NULLFSINO);
    2312             : 
    2313             :         /*
    2314             :          * Split up the inode number into its parts.
    2315             :          */
    2316   440076850 :         agino = XFS_INO_TO_AGINO(mp, ino);
    2317   440076850 :         agbno = XFS_AGINO_TO_AGBNO(mp, agino);
    2318   440076850 :         if (agbno >= mp->m_sb.sb_agblocks ||
    2319   440163982 :             ino != XFS_AGINO_TO_INO(mp, pag->pag_agno, agino)) {
    2320       21197 :                 error = -EINVAL;
    2321             : #ifdef DEBUG
    2322             :                 /*
    2323             :                  * Don't output diagnostic information for untrusted inodes
    2324             :                  * as they can be invalid without implying corruption.
    2325             :                  */
    2326       21197 :                 if (flags & XFS_IGET_UNTRUSTED)
    2327             :                         return error;
    2328           0 :                 if (agbno >= mp->m_sb.sb_agblocks) {
    2329           0 :                         xfs_alert(mp,
    2330             :                 "%s: agbno (0x%llx) >= mp->m_sb.sb_agblocks (0x%lx)",
    2331             :                                 __func__, (unsigned long long)agbno,
    2332             :                                 (unsigned long)mp->m_sb.sb_agblocks);
    2333             :                 }
    2334           0 :                 if (ino != XFS_AGINO_TO_INO(mp, pag->pag_agno, agino)) {
    2335           0 :                         xfs_alert(mp,
    2336             :                 "%s: ino (0x%llx) != XFS_AGINO_TO_INO() (0x%llx)",
    2337             :                                 __func__, ino,
    2338             :                                 XFS_AGINO_TO_INO(mp, pag->pag_agno, agino));
    2339             :                 }
    2340           0 :                 xfs_stack_trace();
    2341             : #endif /* DEBUG */
    2342           0 :                 return error;
    2343             :         }
    2344             : 
    2345             :         /*
    2346             :          * For bulkstat and handle lookups, we have an untrusted inode number
    2347             :          * that we have to verify is valid. We cannot do this just by reading
    2348             :          * the inode buffer as it may have been unlinked and removed leaving
    2349             :          * inodes in stale state on disk. Hence we have to do a btree lookup
    2350             :          * in all cases where an untrusted inode number is passed.
    2351             :          */
    2352   440055653 :         if (flags & XFS_IGET_UNTRUSTED) {
    2353   383485486 :                 error = xfs_imap_lookup(pag, tp, agino, agbno,
    2354             :                                         &chunk_agbno, &offset_agbno, flags);
    2355   383517491 :                 if (error)
    2356             :                         return error;
    2357   383466816 :                 goto out_map;
    2358             :         }
    2359             : 
    2360             :         /*
    2361             :          * If the inode cluster size is the same as the blocksize or
    2362             :          * smaller we get to the buffer by simple arithmetics.
    2363             :          */
    2364    56570167 :         if (M_IGEO(mp)->blocks_per_cluster == 1) {
    2365           0 :                 offset = XFS_INO_TO_OFFSET(mp, ino);
    2366           0 :                 ASSERT(offset < mp->m_sb.sb_inopblock);
    2367             : 
    2368           0 :                 imap->im_blkno = XFS_AGB_TO_DADDR(mp, pag->pag_agno, agbno);
    2369           0 :                 imap->im_len = XFS_FSB_TO_BB(mp, 1);
    2370           0 :                 imap->im_boffset = (unsigned short)(offset <<
    2371           0 :                                                         mp->m_sb.sb_inodelog);
    2372           0 :                 return 0;
    2373             :         }
    2374             : 
    2375             :         /*
    2376             :          * If the inode chunks are aligned then use simple maths to
    2377             :          * find the location. Otherwise we have to do a btree
    2378             :          * lookup to find the location.
    2379             :          */
    2380    56570167 :         if (M_IGEO(mp)->inoalign_mask) {
    2381    56570167 :                 offset_agbno = agbno & M_IGEO(mp)->inoalign_mask;
    2382    56570167 :                 chunk_agbno = agbno - offset_agbno;
    2383             :         } else {
    2384           0 :                 error = xfs_imap_lookup(pag, tp, agino, agbno,
    2385             :                                         &chunk_agbno, &offset_agbno, flags);
    2386           0 :                 if (error)
    2387             :                         return error;
    2388             :         }
    2389             : 
    2390           0 : out_map:
    2391   440036983 :         ASSERT(agbno >= chunk_agbno);
    2392   880073966 :         cluster_agbno = chunk_agbno +
    2393   440036983 :                 ((offset_agbno / M_IGEO(mp)->blocks_per_cluster) *
    2394   440036983 :                  M_IGEO(mp)->blocks_per_cluster);
    2395   880073966 :         offset = ((agbno - cluster_agbno) * mp->m_sb.sb_inopblock) +
    2396   440036983 :                 XFS_INO_TO_OFFSET(mp, ino);
    2397             : 
    2398   440036983 :         imap->im_blkno = XFS_AGB_TO_DADDR(mp, pag->pag_agno, cluster_agbno);
    2399   440036983 :         imap->im_len = XFS_FSB_TO_BB(mp, M_IGEO(mp)->blocks_per_cluster);
    2400   440036983 :         imap->im_boffset = (unsigned short)(offset << mp->m_sb.sb_inodelog);
    2401             : 
    2402             :         /*
    2403             :          * If the inode number maps to a block outside the bounds
    2404             :          * of the file system then return NULL rather than calling
    2405             :          * read_buf and panicing when we get an error from the
    2406             :          * driver.
    2407             :          */
    2408   880073966 :         if ((imap->im_blkno + imap->im_len) >
    2409   440036983 :             XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)) {
    2410           0 :                 xfs_alert(mp,
    2411             :         "%s: (im_blkno (0x%llx) + im_len (0x%llx)) > sb_dblocks (0x%llx)",
    2412             :                         __func__, (unsigned long long) imap->im_blkno,
    2413             :                         (unsigned long long) imap->im_len,
    2414             :                         XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks));
    2415           0 :                 return -EINVAL;
    2416             :         }
    2417             :         return 0;
    2418             : }
    2419             : 
    2420             : /*
    2421             :  * Log specified fields for the ag hdr (inode section). The growth of the agi
    2422             :  * structure over time requires that we interpret the buffer as two logical
    2423             :  * regions delineated by the end of the unlinked list. This is due to the size
    2424             :  * of the hash table and its location in the middle of the agi.
    2425             :  *
    2426             :  * For example, a request to log a field before agi_unlinked and a field after
    2427             :  * agi_unlinked could cause us to log the entire hash table and use an excessive
    2428             :  * amount of log space. To avoid this behavior, log the region up through
    2429             :  * agi_unlinked in one call and the region after agi_unlinked through the end of
    2430             :  * the structure in another.
    2431             :  */
    2432             : void
    2433   143812316 : xfs_ialloc_log_agi(
    2434             :         struct xfs_trans        *tp,
    2435             :         struct xfs_buf          *bp,
    2436             :         uint32_t                fields)
    2437             : {
    2438   143812316 :         int                     first;          /* first byte number */
    2439   143812316 :         int                     last;           /* last byte number */
    2440   143812316 :         static const short      offsets[] = {   /* field starting offsets */
    2441             :                                         /* keep in sync with bit definitions */
    2442             :                 offsetof(xfs_agi_t, agi_magicnum),
    2443             :                 offsetof(xfs_agi_t, agi_versionnum),
    2444             :                 offsetof(xfs_agi_t, agi_seqno),
    2445             :                 offsetof(xfs_agi_t, agi_length),
    2446             :                 offsetof(xfs_agi_t, agi_count),
    2447             :                 offsetof(xfs_agi_t, agi_root),
    2448             :                 offsetof(xfs_agi_t, agi_level),
    2449             :                 offsetof(xfs_agi_t, agi_freecount),
    2450             :                 offsetof(xfs_agi_t, agi_newino),
    2451             :                 offsetof(xfs_agi_t, agi_dirino),
    2452             :                 offsetof(xfs_agi_t, agi_unlinked),
    2453             :                 offsetof(xfs_agi_t, agi_free_root),
    2454             :                 offsetof(xfs_agi_t, agi_free_level),
    2455             :                 offsetof(xfs_agi_t, agi_iblocks),
    2456             :                 sizeof(xfs_agi_t)
    2457             :         };
    2458             : #ifdef DEBUG
    2459   143812316 :         struct xfs_agi          *agi = bp->b_addr;
    2460             : 
    2461   143812316 :         ASSERT(agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC));
    2462             : #endif
    2463             : 
    2464             :         /*
    2465             :          * Compute byte offsets for the first and last fields in the first
    2466             :          * region and log the agi buffer. This only logs up through
    2467             :          * agi_unlinked.
    2468             :          */
    2469   143812316 :         if (fields & XFS_AGI_ALL_BITS_R1) {
    2470   143757883 :                 xfs_btree_offsets(fields, offsets, XFS_AGI_NUM_BITS_R1,
    2471             :                                   &first, &last);
    2472   143564621 :                 xfs_trans_log_buf(tp, bp, first, last);
    2473             :         }
    2474             : 
    2475             :         /*
    2476             :          * Mask off the bits in the first region and calculate the first and
    2477             :          * last field offsets for any bits in the second region.
    2478             :          */
    2479   144104886 :         fields &= ~XFS_AGI_ALL_BITS_R1;
    2480   144104886 :         if (fields) {
    2481        7817 :                 xfs_btree_offsets(fields, offsets, XFS_AGI_NUM_BITS_R2,
    2482             :                                   &first, &last);
    2483        7817 :                 xfs_trans_log_buf(tp, bp, first, last);
    2484             :         }
    2485   144104886 : }
    2486             : 
    2487             : static xfs_failaddr_t
    2488     2833412 : xfs_agi_verify(
    2489             :         struct xfs_buf          *bp)
    2490             : {
    2491     2833412 :         struct xfs_mount        *mp = bp->b_mount;
    2492     2833412 :         struct xfs_agi          *agi = bp->b_addr;
    2493     2833412 :         xfs_failaddr_t          fa;
    2494     2833412 :         uint32_t                agi_seqno = be32_to_cpu(agi->agi_seqno);
    2495     2833412 :         uint32_t                agi_length = be32_to_cpu(agi->agi_length);
    2496     2833412 :         int                     i;
    2497             : 
    2498     2833412 :         if (xfs_has_crc(mp)) {
    2499     2817599 :                 if (!uuid_equal(&agi->agi_uuid, &mp->m_sb.sb_meta_uuid))
    2500           0 :                         return __this_address;
    2501     2819271 :                 if (!xfs_log_check_lsn(mp, be64_to_cpu(agi->agi_lsn)))
    2502           0 :                         return __this_address;
    2503             :         }
    2504             : 
    2505             :         /*
    2506             :          * Validate the magic number of the agi block.
    2507             :          */
    2508     2835379 :         if (!xfs_verify_magic(bp, agi->agi_magicnum))
    2509           0 :                 return __this_address;
    2510     2833450 :         if (!XFS_AGI_GOOD_VERSION(be32_to_cpu(agi->agi_versionnum)))
    2511           0 :                 return __this_address;
    2512             : 
    2513     2833450 :         fa = xfs_validate_ag_length(bp, agi_seqno, agi_length);
    2514     2832918 :         if (fa)
    2515             :                 return fa;
    2516             : 
    2517     2832950 :         if (be32_to_cpu(agi->agi_level) < 1 ||
    2518     2832950 :             be32_to_cpu(agi->agi_level) > M_IGEO(mp)->inobt_maxlevels)
    2519           0 :                 return __this_address;
    2520             : 
    2521     2832950 :         if (xfs_has_finobt(mp) &&
    2522     2818759 :             (be32_to_cpu(agi->agi_free_level) < 1 ||
    2523             :              be32_to_cpu(agi->agi_free_level) > M_IGEO(mp)->inobt_maxlevels))
    2524           0 :                 return __this_address;
    2525             : 
    2526   184044015 :         for (i = 0; i < XFS_AGI_UNLINKED_BUCKETS; i++) {
    2527   181208578 :                 if (agi->agi_unlinked[i] == cpu_to_be32(NULLAGINO))
    2528   181019684 :                         continue;
    2529      191380 :                 if (!xfs_verify_ino(mp, be32_to_cpu(agi->agi_unlinked[i])))
    2530           0 :                         return __this_address;
    2531             :         }
    2532             : 
    2533             :         return NULL;
    2534             : }
    2535             : 
    2536             : static void
    2537      814535 : xfs_agi_read_verify(
    2538             :         struct xfs_buf  *bp)
    2539             : {
    2540      814535 :         struct xfs_mount *mp = bp->b_mount;
    2541      814535 :         xfs_failaddr_t  fa;
    2542             : 
    2543     1628611 :         if (xfs_has_crc(mp) &&
    2544             :             !xfs_buf_verify_cksum(bp, XFS_AGI_CRC_OFF))
    2545          10 :                 xfs_verifier_error(bp, -EFSBADCRC, __this_address);
    2546             :         else {
    2547      814525 :                 fa = xfs_agi_verify(bp);
    2548      814525 :                 if (XFS_TEST_ERROR(fa, mp, XFS_ERRTAG_IALLOC_READ_AGI))
    2549           0 :                         xfs_verifier_error(bp, -EFSCORRUPTED, fa);
    2550             :         }
    2551      814535 : }
    2552             : 
    2553             : static void
    2554      700780 : xfs_agi_write_verify(
    2555             :         struct xfs_buf  *bp)
    2556             : {
    2557      700780 :         struct xfs_mount        *mp = bp->b_mount;
    2558      700780 :         struct xfs_buf_log_item *bip = bp->b_log_item;
    2559      700780 :         struct xfs_agi          *agi = bp->b_addr;
    2560      700780 :         xfs_failaddr_t          fa;
    2561             : 
    2562      700780 :         fa = xfs_agi_verify(bp);
    2563      700780 :         if (fa) {
    2564           0 :                 xfs_verifier_error(bp, -EFSCORRUPTED, fa);
    2565           0 :                 return;
    2566             :         }
    2567             : 
    2568      700780 :         if (!xfs_has_crc(mp))
    2569             :                 return;
    2570             : 
    2571      687128 :         if (bip)
    2572      669120 :                 agi->agi_lsn = cpu_to_be64(bip->bli_item.li_lsn);
    2573      687128 :         xfs_buf_update_cksum(bp, XFS_AGI_CRC_OFF);
    2574             : }
    2575             : 
    2576             : const struct xfs_buf_ops xfs_agi_buf_ops = {
    2577             :         .name = "xfs_agi",
    2578             :         .magic = { cpu_to_be32(XFS_AGI_MAGIC), cpu_to_be32(XFS_AGI_MAGIC) },
    2579             :         .verify_read = xfs_agi_read_verify,
    2580             :         .verify_write = xfs_agi_write_verify,
    2581             :         .verify_struct = xfs_agi_verify,
    2582             : };
    2583             : 
    2584             : /*
    2585             :  * Read in the allocation group header (inode allocation section)
    2586             :  */
    2587             : int
    2588  1822591412 : xfs_read_agi(
    2589             :         struct xfs_perag        *pag,
    2590             :         struct xfs_trans        *tp,
    2591             :         struct xfs_buf          **agibpp)
    2592             : {
    2593  1822591412 :         struct xfs_mount        *mp = pag->pag_mount;
    2594  1822591412 :         int                     error;
    2595             : 
    2596  1822591412 :         trace_xfs_read_agi(pag->pag_mount, pag->pag_agno);
    2597             : 
    2598  7283883380 :         error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
    2599  1820970845 :                         XFS_AG_DADDR(mp, pag->pag_agno, XFS_AGI_DADDR(mp)),
    2600  1820970845 :                         XFS_FSS_TO_BB(mp, 1), 0, agibpp, &xfs_agi_buf_ops);
    2601  1822531475 :         if (error)
    2602             :                 return error;
    2603  1822525314 :         if (tp)
    2604  1818913708 :                 xfs_trans_buf_set_type(tp, *agibpp, XFS_BLFT_AGI_BUF);
    2605             : 
    2606  1822100359 :         xfs_buf_set_ref(*agibpp, XFS_AGI_REF);
    2607  1822100359 :         return 0;
    2608             : }
    2609             : 
    2610             : /*
    2611             :  * Read in the agi and initialise the per-ag data. If the caller supplies a
    2612             :  * @agibpp, return the locked AGI buffer to them, otherwise release it.
    2613             :  */
    2614             : int
    2615  1699911159 : xfs_ialloc_read_agi(
    2616             :         struct xfs_perag        *pag,
    2617             :         struct xfs_trans        *tp,
    2618             :         struct xfs_buf          **agibpp)
    2619             : {
    2620  1699911159 :         struct xfs_buf          *agibp;
    2621  1699911159 :         struct xfs_agi          *agi;
    2622  1699911159 :         int                     error;
    2623             : 
    2624  1699911159 :         trace_xfs_ialloc_read_agi(pag->pag_mount, pag->pag_agno);
    2625             : 
    2626  1698405710 :         error = xfs_read_agi(pag, tp, &agibp);
    2627  1700128067 :         if (error)
    2628             :                 return error;
    2629             : 
    2630  1700121978 :         agi = agibp->b_addr;
    2631  3400243956 :         if (!xfs_perag_initialised_agi(pag)) {
    2632      429829 :                 pag->pagi_freecount = be32_to_cpu(agi->agi_freecount);
    2633      429829 :                 pag->pagi_count = be32_to_cpu(agi->agi_count);
    2634      429829 :                 set_bit(XFS_AGSTATE_AGI_INIT, &pag->pag_opstate);
    2635             :         }
    2636             : 
    2637             :         /*
    2638             :          * It's possible for these to be out of sync if
    2639             :          * we are in the middle of a forced shutdown.
    2640             :          */
    2641  1700121978 :         ASSERT(pag->pagi_freecount == be32_to_cpu(agi->agi_freecount) ||
    2642             :                 xfs_is_shutdown(pag->pag_mount));
    2643  1700121978 :         if (agibpp)
    2644  1700062971 :                 *agibpp = agibp;
    2645             :         else
    2646       59007 :                 xfs_trans_brelse(tp, agibp);
    2647             :         return 0;
    2648             : }
    2649             : 
    2650             : /* How many inodes are backed by inode clusters ondisk? */
    2651             : STATIC int
    2652  2476512226 : xfs_ialloc_count_ondisk(
    2653             :         struct xfs_btree_cur            *cur,
    2654             :         xfs_agino_t                     low,
    2655             :         xfs_agino_t                     high,
    2656             :         unsigned int                    *allocated)
    2657             : {
    2658  2476512226 :         struct xfs_inobt_rec_incore     irec;
    2659  2476512226 :         unsigned int                    ret = 0;
    2660  2476512226 :         int                             has_record;
    2661  2476512226 :         int                             error;
    2662             : 
    2663  2476512226 :         error = xfs_inobt_lookup(cur, low, XFS_LOOKUP_LE, &has_record);
    2664  2473706717 :         if (error)
    2665             :                 return error;
    2666             : 
    2667  4547996699 :         while (has_record) {
    2668  2746748643 :                 unsigned int            i, hole_idx;
    2669             : 
    2670  2746748643 :                 error = xfs_inobt_get_rec(cur, &irec, &has_record);
    2671  2747354821 :                 if (error)
    2672           0 :                         return error;
    2673  2747354821 :                 if (irec.ir_startino > high)
    2674             :                         break;
    2675             : 
    2676 >13385*10^7 :                 for (i = 0; i < XFS_INODES_PER_CHUNK; i++) {
    2677 >13179*10^7 :                         if (irec.ir_startino + i < low)
    2678 >13136*10^7 :                                 continue;
    2679   435065208 :                         if (irec.ir_startino + i > high)
    2680             :                                 break;
    2681             : 
    2682   424403246 :                         hole_idx = i / XFS_INODES_PER_HOLEMASK_BIT;
    2683   424403246 :                         if (!(irec.ir_holemask & (1U << hole_idx)))
    2684   259892608 :                                 ret++;
    2685             :                 }
    2686             : 
    2687  2073410028 :                 error = xfs_btree_increment(cur, 0, &has_record);
    2688  2074289982 :                 if (error)
    2689           0 :                         return error;
    2690             :         }
    2691             : 
    2692  2475192849 :         *allocated = ret;
    2693  2475192849 :         return 0;
    2694             : }
    2695             : 
    2696             : /* Is there an inode record covering a given extent? */
    2697             : int
    2698  2474912095 : xfs_ialloc_has_inodes_at_extent(
    2699             :         struct xfs_btree_cur    *cur,
    2700             :         xfs_agblock_t           bno,
    2701             :         xfs_extlen_t            len,
    2702             :         enum xbtree_recpacking  *outcome)
    2703             : {
    2704  2474912095 :         xfs_agino_t             agino;
    2705  2474912095 :         xfs_agino_t             last_agino;
    2706  2474912095 :         unsigned int            allocated;
    2707  2474912095 :         int                     error;
    2708             : 
    2709  2474912095 :         agino = XFS_AGB_TO_AGINO(cur->bc_mp, bno);
    2710  2474912095 :         last_agino = XFS_AGB_TO_AGINO(cur->bc_mp, bno + len) - 1;
    2711             : 
    2712  2474912095 :         error = xfs_ialloc_count_ondisk(cur, agino, last_agino, &allocated);
    2713  2475449438 :         if (error)
    2714             :                 return error;
    2715             : 
    2716  2475449438 :         if (allocated == 0)
    2717  2471753458 :                 *outcome = XBTREE_RECPACKING_EMPTY;
    2718     3695980 :         else if (allocated == last_agino - agino + 1)
    2719     3695980 :                 *outcome = XBTREE_RECPACKING_FULL;
    2720             :         else
    2721           0 :                 *outcome = XBTREE_RECPACKING_SPARSE;
    2722             :         return 0;
    2723             : }
    2724             : 
    2725             : struct xfs_ialloc_count_inodes {
    2726             :         xfs_agino_t                     count;
    2727             :         xfs_agino_t                     freecount;
    2728             : };
    2729             : 
    2730             : /* Record inode counts across all inobt records. */
    2731             : STATIC int
    2732   154983542 : xfs_ialloc_count_inodes_rec(
    2733             :         struct xfs_btree_cur            *cur,
    2734             :         const union xfs_btree_rec       *rec,
    2735             :         void                            *priv)
    2736             : {
    2737   154983542 :         struct xfs_inobt_rec_incore     irec;
    2738   154983542 :         struct xfs_ialloc_count_inodes  *ci = priv;
    2739   154983542 :         xfs_failaddr_t                  fa;
    2740             : 
    2741   154983542 :         xfs_inobt_btrec_to_irec(cur->bc_mp, rec, &irec);
    2742   154981227 :         fa = xfs_inobt_check_irec(cur, &irec);
    2743   154987731 :         if (fa)
    2744           0 :                 return xfs_inobt_complain_bad_rec(cur, fa, &irec);
    2745             : 
    2746   154987731 :         ci->count += irec.ir_count;
    2747   154987731 :         ci->freecount += irec.ir_freecount;
    2748             : 
    2749   154987731 :         return 0;
    2750             : }
    2751             : 
    2752             : /* Count allocated and free inodes under an inobt. */
    2753             : int
    2754     1529131 : xfs_ialloc_count_inodes(
    2755             :         struct xfs_btree_cur            *cur,
    2756             :         xfs_agino_t                     *count,
    2757             :         xfs_agino_t                     *freecount)
    2758             : {
    2759     1529131 :         struct xfs_ialloc_count_inodes  ci = {0};
    2760     1529131 :         int                             error;
    2761             : 
    2762     1529131 :         ASSERT(cur->bc_btnum == XFS_BTNUM_INO);
    2763     1529131 :         error = xfs_btree_query_all(cur, xfs_ialloc_count_inodes_rec, &ci);
    2764     1529969 :         if (error)
    2765             :                 return error;
    2766             : 
    2767     1529969 :         *count = ci.count;
    2768     1529969 :         *freecount = ci.freecount;
    2769     1529969 :         return 0;
    2770             : }
    2771             : 
    2772             : /*
    2773             :  * Initialize inode-related geometry information.
    2774             :  *
    2775             :  * Compute the inode btree min and max levels and set maxicount.
    2776             :  *
    2777             :  * Set the inode cluster size.  This may still be overridden by the file
    2778             :  * system block size if it is larger than the chosen cluster size.
    2779             :  *
    2780             :  * For v5 filesystems, scale the cluster size with the inode size to keep a
    2781             :  * constant ratio of inode per cluster buffer, but only if mkfs has set the
    2782             :  * inode alignment value appropriately for larger cluster sizes.
    2783             :  *
    2784             :  * Then compute the inode cluster alignment information.
    2785             :  */
    2786             : void
    2787       59405 : xfs_ialloc_setup_geometry(
    2788             :         struct xfs_mount        *mp)
    2789             : {
    2790       59405 :         struct xfs_sb           *sbp = &mp->m_sb;
    2791       59405 :         struct xfs_ino_geometry *igeo = M_IGEO(mp);
    2792       59405 :         uint64_t                icount;
    2793       59405 :         uint                    inodes;
    2794             : 
    2795       59405 :         igeo->new_diflags2 = 0;
    2796       59405 :         if (xfs_has_bigtime(mp))
    2797       59123 :                 igeo->new_diflags2 |= XFS_DIFLAG2_BIGTIME;
    2798       59405 :         if (xfs_has_large_extent_counts(mp))
    2799       59145 :                 igeo->new_diflags2 |= XFS_DIFLAG2_NREXT64;
    2800             : 
    2801             :         /* Compute inode btree geometry. */
    2802       59405 :         igeo->agino_log = sbp->sb_inopblog + sbp->sb_agblklog;
    2803       59405 :         igeo->inobt_mxr[0] = xfs_inobt_maxrecs(mp, sbp->sb_blocksize, 1);
    2804       59405 :         igeo->inobt_mxr[1] = xfs_inobt_maxrecs(mp, sbp->sb_blocksize, 0);
    2805       59405 :         igeo->inobt_mnr[0] = igeo->inobt_mxr[0] / 2;
    2806       59405 :         igeo->inobt_mnr[1] = igeo->inobt_mxr[1] / 2;
    2807             : 
    2808       59405 :         igeo->ialloc_inos = max_t(uint16_t, XFS_INODES_PER_CHUNK,
    2809             :                         sbp->sb_inopblock);
    2810       59405 :         igeo->ialloc_blks = igeo->ialloc_inos >> sbp->sb_inopblog;
    2811             : 
    2812       59405 :         if (sbp->sb_spino_align)
    2813       59199 :                 igeo->ialloc_min_blks = sbp->sb_spino_align;
    2814             :         else
    2815         206 :                 igeo->ialloc_min_blks = igeo->ialloc_blks;
    2816             : 
    2817             :         /* Compute and fill in value of m_ino_geo.inobt_maxlevels. */
    2818       59405 :         inodes = (1LL << XFS_INO_AGINO_BITS(mp)) >> XFS_INODES_PER_CHUNK_LOG;
    2819       59405 :         igeo->inobt_maxlevels = xfs_btree_compute_maxlevels(igeo->inobt_mnr,
    2820             :                         inodes);
    2821       59405 :         ASSERT(igeo->inobt_maxlevels <= xfs_iallocbt_maxlevels_ondisk());
    2822             : 
    2823             :         /*
    2824             :          * Set the maximum inode count for this filesystem, being careful not
    2825             :          * to use obviously garbage sb_inopblog/sb_inopblock values.  Regular
    2826             :          * users should never get here due to failing sb verification, but
    2827             :          * certain users (xfs_db) need to be usable even with corrupt metadata.
    2828             :          */
    2829       59405 :         if (sbp->sb_imax_pct && igeo->ialloc_blks) {
    2830             :                 /*
    2831             :                  * Make sure the maximum inode count is a multiple
    2832             :                  * of the units we allocate inodes in.
    2833             :                  */
    2834       59405 :                 icount = sbp->sb_dblocks * sbp->sb_imax_pct;
    2835       59405 :                 do_div(icount, 100);
    2836       59405 :                 do_div(icount, igeo->ialloc_blks);
    2837       59405 :                 igeo->maxicount = XFS_FSB_TO_INO(mp,
    2838             :                                 icount * igeo->ialloc_blks);
    2839             :         } else {
    2840           0 :                 igeo->maxicount = 0;
    2841             :         }
    2842             : 
    2843             :         /*
    2844             :          * Compute the desired size of an inode cluster buffer size, which
    2845             :          * starts at 8K and (on v5 filesystems) scales up with larger inode
    2846             :          * sizes.
    2847             :          *
    2848             :          * Preserve the desired inode cluster size because the sparse inodes
    2849             :          * feature uses that desired size (not the actual size) to compute the
    2850             :          * sparse inode alignment.  The mount code validates this value, so we
    2851             :          * cannot change the behavior.
    2852             :          */
    2853       59405 :         igeo->inode_cluster_size_raw = XFS_INODE_BIG_CLUSTER_SIZE;
    2854       59405 :         if (xfs_has_v3inodes(mp)) {
    2855       59199 :                 int     new_size = igeo->inode_cluster_size_raw;
    2856             : 
    2857       59199 :                 new_size *= mp->m_sb.sb_inodesize / XFS_DINODE_MIN_SIZE;
    2858       59199 :                 if (mp->m_sb.sb_inoalignmt >= XFS_B_TO_FSBT(mp, new_size))
    2859       59199 :                         igeo->inode_cluster_size_raw = new_size;
    2860             :         }
    2861             : 
    2862             :         /* Calculate inode cluster ratios. */
    2863       59405 :         if (igeo->inode_cluster_size_raw > mp->m_sb.sb_blocksize)
    2864       59405 :                 igeo->blocks_per_cluster = XFS_B_TO_FSBT(mp,
    2865             :                                 igeo->inode_cluster_size_raw);
    2866             :         else
    2867           0 :                 igeo->blocks_per_cluster = 1;
    2868       59405 :         igeo->inode_cluster_size = XFS_FSB_TO_B(mp, igeo->blocks_per_cluster);
    2869       59405 :         igeo->inodes_per_cluster = XFS_FSB_TO_INO(mp, igeo->blocks_per_cluster);
    2870             : 
    2871             :         /* Calculate inode cluster alignment. */
    2872       59405 :         if (xfs_has_align(mp) &&
    2873       59405 :             mp->m_sb.sb_inoalignmt >= igeo->blocks_per_cluster)
    2874       59405 :                 igeo->cluster_align = mp->m_sb.sb_inoalignmt;
    2875             :         else
    2876           0 :                 igeo->cluster_align = 1;
    2877       59405 :         igeo->inoalign_mask = igeo->cluster_align - 1;
    2878       59405 :         igeo->cluster_align_inodes = XFS_FSB_TO_INO(mp, igeo->cluster_align);
    2879             : 
    2880             :         /*
    2881             :          * If we are using stripe alignment, check whether
    2882             :          * the stripe unit is a multiple of the inode alignment
    2883             :          */
    2884       59405 :         if (mp->m_dalign && igeo->inoalign_mask &&
    2885         106 :             !(mp->m_dalign & igeo->inoalign_mask))
    2886          75 :                 igeo->ialloc_align = mp->m_dalign;
    2887             :         else
    2888       59330 :                 igeo->ialloc_align = 0;
    2889       59405 : }
    2890             : 
    2891             : /* Compute the location of the root directory inode that is laid out by mkfs. */
    2892             : xfs_ino_t
    2893          95 : xfs_ialloc_calc_rootino(
    2894             :         struct xfs_mount        *mp,
    2895             :         int                     sunit)
    2896             : {
    2897          95 :         struct xfs_ino_geometry *igeo = M_IGEO(mp);
    2898          95 :         xfs_agblock_t           first_bno;
    2899             : 
    2900             :         /*
    2901             :          * Pre-calculate the geometry of AG 0.  We know what it looks like
    2902             :          * because libxfs knows how to create allocation groups now.
    2903             :          *
    2904             :          * first_bno is the first block in which mkfs could possibly have
    2905             :          * allocated the root directory inode, once we factor in the metadata
    2906             :          * that mkfs formats before it.  Namely, the four AG headers...
    2907             :          */
    2908          95 :         first_bno = howmany(4 * mp->m_sb.sb_sectsize, mp->m_sb.sb_blocksize);
    2909             : 
    2910             :         /* ...the two free space btree roots... */
    2911          95 :         first_bno += 2;
    2912             : 
    2913             :         /* ...the inode btree root... */
    2914          95 :         first_bno += 1;
    2915             : 
    2916             :         /* ...the initial AGFL... */
    2917          95 :         first_bno += xfs_alloc_min_freelist(mp, NULL);
    2918             : 
    2919             :         /* ...the free inode btree root... */
    2920          95 :         if (xfs_has_finobt(mp))
    2921          95 :                 first_bno++;
    2922             : 
    2923             :         /* ...the reverse mapping btree root... */
    2924          95 :         if (xfs_has_rmapbt(mp))
    2925          70 :                 first_bno++;
    2926             : 
    2927             :         /* ...the reference count btree... */
    2928          95 :         if (xfs_has_reflink(mp))
    2929          70 :                 first_bno++;
    2930             : 
    2931             :         /*
    2932             :          * ...and the log, if it is allocated in the first allocation group.
    2933             :          *
    2934             :          * This can happen with filesystems that only have a single
    2935             :          * allocation group, or very odd geometries created by old mkfs
    2936             :          * versions on very small filesystems.
    2937             :          */
    2938          95 :         if (xfs_ag_contains_log(mp, 0))
    2939           0 :                  first_bno += mp->m_sb.sb_logblocks;
    2940             : 
    2941             :         /*
    2942             :          * Now round first_bno up to whatever allocation alignment is given
    2943             :          * by the filesystem or was passed in.
    2944             :          */
    2945          95 :         if (xfs_has_dalign(mp) && igeo->ialloc_align > 0)
    2946          64 :                 first_bno = roundup(first_bno, sunit);
    2947          31 :         else if (xfs_has_align(mp) &&
    2948          31 :                         mp->m_sb.sb_inoalignmt > 1)
    2949          31 :                 first_bno = roundup(first_bno, mp->m_sb.sb_inoalignmt);
    2950             : 
    2951          95 :         return XFS_AGINO_TO_INO(mp, 0, XFS_AGB_TO_AGINO(mp, first_bno));
    2952             : }
    2953             : 
    2954             : /*
    2955             :  * Ensure there are not sparse inode clusters that cross the new EOAG.
    2956             :  *
    2957             :  * This is a no-op for non-spinode filesystems since clusters are always fully
    2958             :  * allocated and checking the bnobt suffices.  However, a spinode filesystem
    2959             :  * could have a record where the upper inodes are free blocks.  If those blocks
    2960             :  * were removed from the filesystem, the inode record would extend beyond EOAG,
    2961             :  * which will be flagged as corruption.
    2962             :  */
    2963             : int
    2964         847 : xfs_ialloc_check_shrink(
    2965             :         struct xfs_perag        *pag,
    2966             :         struct xfs_trans        *tp,
    2967             :         struct xfs_buf          *agibp,
    2968             :         xfs_agblock_t           new_length)
    2969             : {
    2970         847 :         struct xfs_inobt_rec_incore rec;
    2971         847 :         struct xfs_btree_cur    *cur;
    2972         847 :         xfs_agino_t             agino;
    2973         847 :         int                     has;
    2974         847 :         int                     error;
    2975             : 
    2976         847 :         if (!xfs_has_sparseinodes(pag->pag_mount))
    2977             :                 return 0;
    2978             : 
    2979         847 :         cur = xfs_inobt_init_cursor(pag, tp, agibp, XFS_BTNUM_INO);
    2980             : 
    2981             :         /* Look up the inobt record that would correspond to the new EOFS. */
    2982         847 :         agino = XFS_AGB_TO_AGINO(pag->pag_mount, new_length);
    2983         847 :         error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &has);
    2984         847 :         if (error || !has)
    2985          82 :                 goto out;
    2986             : 
    2987         765 :         error = xfs_inobt_get_rec(cur, &rec, &has);
    2988         765 :         if (error)
    2989           0 :                 goto out;
    2990             : 
    2991         765 :         if (!has) {
    2992           0 :                 error = -EFSCORRUPTED;
    2993           0 :                 goto out;
    2994             :         }
    2995             : 
    2996             :         /* If the record covers inodes that would be beyond EOFS, bail out. */
    2997         765 :         if (rec.ir_startino + XFS_INODES_PER_CHUNK > agino) {
    2998         344 :                 error = -ENOSPC;
    2999         344 :                 goto out;
    3000             :         }
    3001         421 : out:
    3002         847 :         xfs_btree_del_cursor(cur, error);
    3003         847 :         return error;
    3004             : }

Generated by: LCOV version 1.14