LCOV - code coverage report
Current view: top level - fs/xfs/libxfs - xfs_ialloc.c (source / functions) Hit Total Coverage
Test: fstests of 6.5.0-rc3-djwa @ Mon Jul 31 20:08:17 PDT 2023 Lines: 1079 1286 83.9 %
Date: 2023-07-31 20:08:17 Functions: 44 46 95.7 %

          Line data    Source code
       1             : // SPDX-License-Identifier: GPL-2.0
       2             : /*
       3             :  * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
       4             :  * All Rights Reserved.
       5             :  */
       6             : #include "xfs.h"
       7             : #include "xfs_fs.h"
       8             : #include "xfs_shared.h"
       9             : #include "xfs_format.h"
      10             : #include "xfs_log_format.h"
      11             : #include "xfs_trans_resv.h"
      12             : #include "xfs_bit.h"
      13             : #include "xfs_mount.h"
      14             : #include "xfs_inode.h"
      15             : #include "xfs_btree.h"
      16             : #include "xfs_ialloc.h"
      17             : #include "xfs_ialloc_btree.h"
      18             : #include "xfs_alloc.h"
      19             : #include "xfs_errortag.h"
      20             : #include "xfs_error.h"
      21             : #include "xfs_bmap.h"
      22             : #include "xfs_trans.h"
      23             : #include "xfs_buf_item.h"
      24             : #include "xfs_icreate_item.h"
      25             : #include "xfs_icache.h"
      26             : #include "xfs_trace.h"
      27             : #include "xfs_log.h"
      28             : #include "xfs_rmap.h"
      29             : #include "xfs_ag.h"
      30             : 
      31             : /*
      32             :  * Lookup a record by ino in the btree given by cur.
      33             :  */
      34             : int                                     /* error */
      35  3032015626 : xfs_inobt_lookup(
      36             :         struct xfs_btree_cur    *cur,   /* btree cursor */
      37             :         xfs_agino_t             ino,    /* starting inode of chunk */
      38             :         xfs_lookup_t            dir,    /* <=, >=, == */
      39             :         int                     *stat)  /* success/failure */
      40             : {
      41  5961880987 :         cur->bc_rec.i.ir_startino = ino;
      42  5961880987 :         cur->bc_rec.i.ir_holemask = 0;
      43  5961880987 :         cur->bc_rec.i.ir_count = 0;
      44  5961880987 :         cur->bc_rec.i.ir_freecount = 0;
      45  5961880987 :         cur->bc_rec.i.ir_free = 0;
      46  3034702078 :         return xfs_btree_lookup(cur, dir, stat);
      47             : }
      48             : 
      49             : /*
      50             :  * Update the record referred to by cur to the value given.
      51             :  * This either works (return 0) or gets an EFSCORRUPTED error.
      52             :  */
      53             : STATIC int                              /* error */
      54   144056981 : xfs_inobt_update(
      55             :         struct xfs_btree_cur    *cur,   /* btree cursor */
      56             :         xfs_inobt_rec_incore_t  *irec)  /* btree record */
      57             : {
      58   144056981 :         union xfs_btree_rec     rec;
      59             : 
      60   144056981 :         rec.inobt.ir_startino = cpu_to_be32(irec->ir_startino);
      61   144056981 :         if (xfs_has_sparseinodes(cur->bc_mp)) {
      62   144056719 :                 rec.inobt.ir_u.sp.ir_holemask = cpu_to_be16(irec->ir_holemask);
      63   144056719 :                 rec.inobt.ir_u.sp.ir_count = irec->ir_count;
      64   144056719 :                 rec.inobt.ir_u.sp.ir_freecount = irec->ir_freecount;
      65             :         } else {
      66             :                 /* ir_holemask/ir_count not supported on-disk */
      67         262 :                 rec.inobt.ir_u.f.ir_freecount = cpu_to_be32(irec->ir_freecount);
      68             :         }
      69   144056981 :         rec.inobt.ir_free = cpu_to_be64(irec->ir_free);
      70   144056981 :         return xfs_btree_update(cur, &rec);
      71             : }
      72             : 
      73             : /* Convert on-disk btree record to incore inobt record. */
      74             : void
      75 14304103128 : xfs_inobt_btrec_to_irec(
      76             :         struct xfs_mount                *mp,
      77             :         const union xfs_btree_rec       *rec,
      78             :         struct xfs_inobt_rec_incore     *irec)
      79             : {
      80 14304103128 :         irec->ir_startino = be32_to_cpu(rec->inobt.ir_startino);
      81 14304103128 :         if (xfs_has_sparseinodes(mp)) {
      82 14304102012 :                 irec->ir_holemask = be16_to_cpu(rec->inobt.ir_u.sp.ir_holemask);
      83 14304102012 :                 irec->ir_count = rec->inobt.ir_u.sp.ir_count;
      84 14304102012 :                 irec->ir_freecount = rec->inobt.ir_u.sp.ir_freecount;
      85             :         } else {
      86             :                 /*
      87             :                  * ir_holemask/ir_count not supported on-disk. Fill in hardcoded
      88             :                  * values for full inode chunks.
      89             :                  */
      90        1116 :                 irec->ir_holemask = XFS_INOBT_HOLEMASK_FULL;
      91        1116 :                 irec->ir_count = XFS_INODES_PER_CHUNK;
      92        1116 :                 irec->ir_freecount =
      93        1116 :                                 be32_to_cpu(rec->inobt.ir_u.f.ir_freecount);
      94             :         }
      95 14304103128 :         irec->ir_free = be64_to_cpu(rec->inobt.ir_free);
      96 14304103128 : }
      97             : 
      98             : /* Simple checks for inode records. */
      99             : xfs_failaddr_t
     100 14345784434 : xfs_inobt_check_irec(
     101             :         struct xfs_btree_cur                    *cur,
     102             :         const struct xfs_inobt_rec_incore       *irec)
     103             : {
     104 14345784434 :         uint64_t                        realfree;
     105             : 
     106             :         /* Record has to be properly aligned within the AG. */
     107 14345784434 :         if (!xfs_verify_agino(cur->bc_ag.pag, irec->ir_startino))
     108           0 :                 return __this_address;
     109 14345784434 :         if (!xfs_verify_agino(cur->bc_ag.pag,
     110             :                                 irec->ir_startino + XFS_INODES_PER_CHUNK - 1))
     111           0 :                 return __this_address;
     112 14345784434 :         if (irec->ir_count < XFS_INODES_PER_HOLEMASK_BIT ||
     113             :             irec->ir_count > XFS_INODES_PER_CHUNK)
     114           0 :                 return __this_address;
     115 14345784434 :         if (irec->ir_freecount > XFS_INODES_PER_CHUNK)
     116           0 :                 return __this_address;
     117             : 
     118             :         /* if there are no holes, return the first available offset */
     119 14345784434 :         if (!xfs_inobt_issparse(irec->ir_holemask))
     120  8602980653 :                 realfree = irec->ir_free;
     121             :         else
     122  5742803781 :                 realfree = irec->ir_free & xfs_inobt_irec_to_allocmask(irec);
     123 28670403673 :         if (hweight64(realfree) != irec->ir_freecount)
     124     8569666 :                 return __this_address;
     125             : 
     126             :         return NULL;
     127             : }
     128             : 
     129             : static inline int
     130           0 : xfs_inobt_complain_bad_rec(
     131             :         struct xfs_btree_cur            *cur,
     132             :         xfs_failaddr_t                  fa,
     133             :         const struct xfs_inobt_rec_incore *irec)
     134             : {
     135           0 :         struct xfs_mount                *mp = cur->bc_mp;
     136             : 
     137           0 :         xfs_warn(mp,
     138             :                 "%s Inode BTree record corruption in AG %d detected at %pS!",
     139             :                 cur->bc_btnum == XFS_BTNUM_INO ? "Used" : "Free",
     140             :                 cur->bc_ag.pag->pag_agno, fa);
     141           0 :         xfs_warn(mp,
     142             : "start inode 0x%x, count 0x%x, free 0x%x freemask 0x%llx, holemask 0x%x",
     143             :                 irec->ir_startino, irec->ir_count, irec->ir_freecount,
     144             :                 irec->ir_free, irec->ir_holemask);
     145           0 :         return -EFSCORRUPTED;
     146             : }
     147             : 
     148             : /*
     149             :  * Get the data from the pointed-to record.
     150             :  */
     151             : int
     152 14143203124 : xfs_inobt_get_rec(
     153             :         struct xfs_btree_cur            *cur,
     154             :         struct xfs_inobt_rec_incore     *irec,
     155             :         int                             *stat)
     156             : {
     157 14143203124 :         struct xfs_mount                *mp = cur->bc_mp;
     158 14143203124 :         union xfs_btree_rec             *rec;
     159 14143203124 :         xfs_failaddr_t                  fa;
     160 14143203124 :         int                             error;
     161             : 
     162 14143203124 :         error = xfs_btree_get_rec(cur, &rec, stat);
     163 14131482440 :         if (error || *stat == 0)
     164             :                 return error;
     165             : 
     166 14140060648 :         xfs_inobt_btrec_to_irec(mp, rec, irec);
     167 14136249747 :         fa = xfs_inobt_check_irec(cur, irec);
     168 14170769310 :         if (fa)
     169           0 :                 return xfs_inobt_complain_bad_rec(cur, fa, irec);
     170             : 
     171             :         return 0;
     172             : }
     173             : 
     174             : /*
     175             :  * Insert a single inobt record. Cursor must already point to desired location.
     176             :  */
     177             : int
     178           0 : xfs_inobt_insert_rec(
     179             :         struct xfs_btree_cur    *cur,
     180             :         uint16_t                holemask,
     181             :         uint8_t                 count,
     182             :         int32_t                 freecount,
     183             :         xfs_inofree_t           free,
     184             :         int                     *stat)
     185             : {
     186    10103851 :         cur->bc_rec.i.ir_holemask = holemask;
     187    10103851 :         cur->bc_rec.i.ir_count = count;
     188    10103851 :         cur->bc_rec.i.ir_freecount = freecount;
     189    10103851 :         cur->bc_rec.i.ir_free = free;
     190           0 :         return xfs_btree_insert(cur, stat);
     191             : }
     192             : 
     193             : /*
     194             :  * Insert records describing a newly allocated inode chunk into the inobt.
     195             :  */
     196             : STATIC int
     197      350118 : xfs_inobt_insert(
     198             :         struct xfs_perag        *pag,
     199             :         struct xfs_trans        *tp,
     200             :         struct xfs_buf          *agbp,
     201             :         xfs_agino_t             newino,
     202             :         xfs_agino_t             newlen,
     203             :         xfs_btnum_t             btnum)
     204             : {
     205      350118 :         struct xfs_btree_cur    *cur;
     206      350118 :         xfs_agino_t             thisino;
     207      350118 :         int                     i;
     208      350118 :         int                     error;
     209             : 
     210      350118 :         cur = xfs_inobt_init_cursor(pag, tp, agbp, btnum);
     211             : 
     212      350118 :         for (thisino = newino;
     213      700266 :              thisino < newino + newlen;
     214      350149 :              thisino += XFS_INODES_PER_CHUNK) {
     215      350149 :                 error = xfs_inobt_lookup(cur, thisino, XFS_LOOKUP_EQ, &i);
     216      350150 :                 if (error) {
     217           1 :                         xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
     218           1 :                         return error;
     219             :                 }
     220      350149 :                 ASSERT(i == 0);
     221             : 
     222      350149 :                 error = xfs_inobt_insert_rec(cur, XFS_INOBT_HOLEMASK_FULL,
     223             :                                              XFS_INODES_PER_CHUNK,
     224             :                                              XFS_INODES_PER_CHUNK,
     225             :                                              XFS_INOBT_ALL_FREE, &i);
     226      350149 :                 if (error) {
     227           0 :                         xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
     228           0 :                         return error;
     229             :                 }
     230      350149 :                 ASSERT(i == 1);
     231             :         }
     232             : 
     233      350117 :         xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
     234             : 
     235      350117 :         return 0;
     236             : }
     237             : 
     238             : /*
     239             :  * Verify that the number of free inodes in the AGI is correct.
     240             :  */
     241             : #ifdef DEBUG
     242             : static int
     243   292184585 : xfs_check_agi_freecount(
     244             :         struct xfs_btree_cur    *cur)
     245             : {
     246   292184585 :         if (cur->bc_nlevels == 1) {
     247   259456053 :                 xfs_inobt_rec_incore_t rec;
     248   259456053 :                 int             freecount = 0;
     249   259456053 :                 int             error;
     250   259456053 :                 int             i;
     251             : 
     252   259456053 :                 error = xfs_inobt_lookup(cur, 0, XFS_LOOKUP_GE, &i);
     253   259470026 :                 if (error)
     254         335 :                         return error;
     255             : 
     256  8535944193 :                 do {
     257  8535944193 :                         error = xfs_inobt_get_rec(cur, &rec, &i);
     258  8527666232 :                         if (error)
     259           0 :                                 return error;
     260             : 
     261  8527666232 :                         if (i) {
     262  8545833558 :                                 freecount += rec.ir_freecount;
     263  8545833558 :                                 error = xfs_btree_increment(cur, 0, &i);
     264  8554163494 :                                 if (error)
     265           0 :                                         return error;
     266             :                         }
     267  8535996168 :                 } while (i == 1);
     268             : 
     269   519043332 :                 if (!xfs_is_shutdown(cur->bc_mp))
     270   259527521 :                         ASSERT(freecount == cur->bc_ag.pag->pagi_freecount);
     271             :         }
     272             :         return 0;
     273             : }
     274             : #else
     275             : #define xfs_check_agi_freecount(cur)    0
     276             : #endif
     277             : 
     278             : /*
     279             :  * Initialise a new set of inodes. When called without a transaction context
     280             :  * (e.g. from recovery) we initiate a delayed write of the inode buffers rather
     281             :  * than logging them (which in a transaction context puts them into the AIL
     282             :  * for writeback rather than the xfsbufd queue).
     283             :  */
     284             : int
     285      360195 : xfs_ialloc_inode_init(
     286             :         struct xfs_mount        *mp,
     287             :         struct xfs_trans        *tp,
     288             :         struct list_head        *buffer_list,
     289             :         int                     icount,
     290             :         xfs_agnumber_t          agno,
     291             :         xfs_agblock_t           agbno,
     292             :         xfs_agblock_t           length,
     293             :         unsigned int            gen)
     294             : {
     295      360195 :         struct xfs_buf          *fbuf;
     296      360195 :         struct xfs_dinode       *free;
     297      360195 :         int                     nbufs;
     298      360195 :         int                     version;
     299      360195 :         int                     i, j;
     300      360195 :         xfs_daddr_t             d;
     301      360195 :         xfs_ino_t               ino = 0;
     302      360195 :         int                     error;
     303             : 
     304             :         /*
     305             :          * Loop over the new block(s), filling in the inodes.  For small block
     306             :          * sizes, manipulate the inodes in buffers  which are multiples of the
     307             :          * blocks size.
     308             :          */
     309      360195 :         nbufs = length / M_IGEO(mp)->blocks_per_cluster;
     310             : 
     311             :         /*
     312             :          * Figure out what version number to use in the inodes we create.  If
     313             :          * the superblock version has caught up to the one that supports the new
     314             :          * inode format, then use the new inode version.  Otherwise use the old
     315             :          * version so that old kernels will continue to be able to use the file
     316             :          * system.
     317             :          *
     318             :          * For v3 inodes, we also need to write the inode number into the inode,
     319             :          * so calculate the first inode number of the chunk here as
     320             :          * XFS_AGB_TO_AGINO() only works within a filesystem block, not
     321             :          * across multiple filesystem blocks (such as a cluster) and so cannot
     322             :          * be used in the cluster buffer loop below.
     323             :          *
     324             :          * Further, because we are writing the inode directly into the buffer
     325             :          * and calculating a CRC on the entire inode, we have ot log the entire
     326             :          * inode so that the entire range the CRC covers is present in the log.
     327             :          * That means for v3 inode we log the entire buffer rather than just the
     328             :          * inode cores.
     329             :          */
     330      360195 :         if (xfs_has_v3inodes(mp)) {
     331      360193 :                 version = 3;
     332      360193 :                 ino = XFS_AGINO_TO_INO(mp, agno, XFS_AGB_TO_AGINO(mp, agbno));
     333             : 
     334             :                 /*
     335             :                  * log the initialisation that is about to take place as an
     336             :                  * logical operation. This means the transaction does not
     337             :                  * need to log the physical changes to the inode buffers as log
     338             :                  * recovery will know what initialisation is actually needed.
     339             :                  * Hence we only need to log the buffers as "ordered" buffers so
     340             :                  * they track in the AIL as if they were physically logged.
     341             :                  */
     342      360193 :                 if (tp)
     343      352459 :                         xfs_icreate_log(tp, agno, agbno, icount,
     344      352459 :                                         mp->m_sb.sb_inodesize, length, gen);
     345             :         } else
     346             :                 version = 2;
     347             : 
     348      899283 :         for (j = 0; j < nbufs; j++) {
     349             :                 /*
     350             :                  * Get the block.
     351             :                  */
     352      539086 :                 d = XFS_AGB_TO_DADDR(mp, agno, agbno +
     353             :                                 (j * M_IGEO(mp)->blocks_per_cluster));
     354      539086 :                 error = xfs_trans_get_buf(tp, mp->m_ddev_targp, d,
     355      539086 :                                 mp->m_bsize * M_IGEO(mp)->blocks_per_cluster,
     356             :                                 XBF_UNMAPPED, &fbuf);
     357      539090 :                 if (error)
     358           0 :                         return error;
     359             : 
     360             :                 /* Initialize the inode buffers and log them appropriately. */
     361      539090 :                 fbuf->b_ops = &xfs_inode_buf_ops;
     362      539090 :                 xfs_buf_zero(fbuf, 0, BBTOB(fbuf->b_length));
     363    18331185 :                 for (i = 0; i < M_IGEO(mp)->inodes_per_cluster; i++) {
     364    17253005 :                         int     ioffset = i << mp->m_sb.sb_inodelog;
     365             : 
     366    17253005 :                         free = xfs_make_iptr(mp, fbuf, i);
     367    17252976 :                         free->di_magic = cpu_to_be16(XFS_DINODE_MAGIC);
     368    17252976 :                         free->di_version = version;
     369    17252976 :                         free->di_gen = cpu_to_be32(gen);
     370    17252976 :                         free->di_next_unlinked = cpu_to_be32(NULLAGINO);
     371             : 
     372    17252976 :                         if (version == 3) {
     373    17252848 :                                 free->di_ino = cpu_to_be64(ino);
     374    17252848 :                                 ino++;
     375    17252848 :                                 uuid_copy(&free->di_uuid,
     376    17252848 :                                           &mp->m_sb.sb_meta_uuid);
     377    17252848 :                                 xfs_dinode_calc_crc(mp, free);
     378         128 :                         } else if (tp) {
     379             :                                 /* just log the inode core */
     380         128 :                                 xfs_trans_log_buf(tp, fbuf, ioffset,
     381         256 :                                           ioffset + XFS_DINODE_SIZE(mp) - 1);
     382             :                         }
     383             :                 }
     384             : 
     385      539090 :                 if (tp) {
     386             :                         /*
     387             :                          * Mark the buffer as an inode allocation buffer so it
     388             :                          * sticks in AIL at the point of this allocation
     389             :                          * transaction. This ensures the they are on disk before
     390             :                          * the tail of the log can be moved past this
     391             :                          * transaction (i.e. by preventing relogging from moving
     392             :                          * it forward in the log).
     393             :                          */
     394      527481 :                         xfs_trans_inode_alloc_buf(tp, fbuf);
     395      527481 :                         if (version == 3) {
     396             :                                 /*
     397             :                                  * Mark the buffer as ordered so that they are
     398             :                                  * not physically logged in the transaction but
     399             :                                  * still tracked in the AIL as part of the
     400             :                                  * transaction and pin the log appropriately.
     401             :                                  */
     402      527477 :                                 xfs_trans_ordered_buf(tp, fbuf);
     403             :                         }
     404             :                 } else {
     405       11609 :                         fbuf->b_flags |= XBF_DONE;
     406       11609 :                         xfs_buf_delwri_queue(fbuf, buffer_list);
     407       11609 :                         xfs_buf_relse(fbuf);
     408             :                 }
     409             :         }
     410             :         return 0;
     411             : }
     412             : 
     413             : /*
     414             :  * Align startino and allocmask for a recently allocated sparse chunk such that
     415             :  * they are fit for insertion (or merge) into the on-disk inode btrees.
     416             :  *
     417             :  * Background:
     418             :  *
     419             :  * When enabled, sparse inode support increases the inode alignment from cluster
     420             :  * size to inode chunk size. This means that the minimum range between two
     421             :  * non-adjacent inode records in the inobt is large enough for a full inode
     422             :  * record. This allows for cluster sized, cluster aligned block allocation
     423             :  * without need to worry about whether the resulting inode record overlaps with
     424             :  * another record in the tree. Without this basic rule, we would have to deal
     425             :  * with the consequences of overlap by potentially undoing recent allocations in
     426             :  * the inode allocation codepath.
     427             :  *
     428             :  * Because of this alignment rule (which is enforced on mount), there are two
     429             :  * inobt possibilities for newly allocated sparse chunks. One is that the
     430             :  * aligned inode record for the chunk covers a range of inodes not already
     431             :  * covered in the inobt (i.e., it is safe to insert a new sparse record). The
     432             :  * other is that a record already exists at the aligned startino that considers
     433             :  * the newly allocated range as sparse. In the latter case, record content is
     434             :  * merged in hope that sparse inode chunks fill to full chunks over time.
     435             :  */
     436             : STATIC void
     437      177402 : xfs_align_sparse_ino(
     438             :         struct xfs_mount                *mp,
     439             :         xfs_agino_t                     *startino,
     440             :         uint16_t                        *allocmask)
     441             : {
     442      177402 :         xfs_agblock_t                   agbno;
     443      177402 :         xfs_agblock_t                   mod;
     444      177402 :         int                             offset;
     445             : 
     446      177402 :         agbno = XFS_AGINO_TO_AGBNO(mp, *startino);
     447      177402 :         mod = agbno % mp->m_sb.sb_inoalignmt;
     448      177402 :         if (!mod)
     449             :                 return;
     450             : 
     451             :         /* calculate the inode offset and align startino */
     452       99923 :         offset = XFS_AGB_TO_AGINO(mp, mod);
     453       99923 :         *startino -= offset;
     454             : 
     455             :         /*
     456             :          * Since startino has been aligned down, left shift allocmask such that
     457             :          * it continues to represent the same physical inodes relative to the
     458             :          * new startino.
     459             :          */
     460       99923 :         *allocmask <<= offset / XFS_INODES_PER_HOLEMASK_BIT;
     461             : }
     462             : 
     463             : /*
     464             :  * Determine whether the source inode record can merge into the target. Both
     465             :  * records must be sparse, the inode ranges must match and there must be no
     466             :  * allocation overlap between the records.
     467             :  */
     468             : STATIC bool
     469       25229 : __xfs_inobt_can_merge(
     470             :         struct xfs_inobt_rec_incore     *trec,  /* tgt record */
     471             :         struct xfs_inobt_rec_incore     *srec)  /* src record */
     472             : {
     473       25229 :         uint64_t                        talloc;
     474       25229 :         uint64_t                        salloc;
     475             : 
     476             :         /* records must cover the same inode range */
     477       25229 :         if (trec->ir_startino != srec->ir_startino)
     478             :                 return false;
     479             : 
     480             :         /* both records must be sparse */
     481       25229 :         if (!xfs_inobt_issparse(trec->ir_holemask) ||
     482       25229 :             !xfs_inobt_issparse(srec->ir_holemask))
     483             :                 return false;
     484             : 
     485             :         /* both records must track some inodes */
     486       25229 :         if (!trec->ir_count || !srec->ir_count)
     487             :                 return false;
     488             : 
     489             :         /* can't exceed capacity of a full record */
     490       25229 :         if (trec->ir_count + srec->ir_count > XFS_INODES_PER_CHUNK)
     491             :                 return false;
     492             : 
     493             :         /* verify there is no allocation overlap */
     494       25229 :         talloc = xfs_inobt_irec_to_allocmask(trec);
     495       25229 :         salloc = xfs_inobt_irec_to_allocmask(srec);
     496       25229 :         if (talloc & salloc)
     497           0 :                 return false;
     498             : 
     499             :         return true;
     500             : }
     501             : 
     502             : /*
     503             :  * Merge the source inode record into the target. The caller must call
     504             :  * __xfs_inobt_can_merge() to ensure the merge is valid.
     505             :  */
     506             : STATIC void
     507       25229 : __xfs_inobt_rec_merge(
     508             :         struct xfs_inobt_rec_incore     *trec,  /* target */
     509             :         struct xfs_inobt_rec_incore     *srec)  /* src */
     510             : {
     511       25229 :         ASSERT(trec->ir_startino == srec->ir_startino);
     512             : 
     513             :         /* combine the counts */
     514       25229 :         trec->ir_count += srec->ir_count;
     515       25229 :         trec->ir_freecount += srec->ir_freecount;
     516             : 
     517             :         /*
     518             :          * Merge the holemask and free mask. For both fields, 0 bits refer to
     519             :          * allocated inodes. We combine the allocated ranges with bitwise AND.
     520             :          */
     521       25229 :         trec->ir_holemask &= srec->ir_holemask;
     522       25229 :         trec->ir_free &= srec->ir_free;
     523       25229 : }
     524             : 
     525             : /*
     526             :  * Insert a new sparse inode chunk into the associated inode btree. The inode
     527             :  * record for the sparse chunk is pre-aligned to a startino that should match
     528             :  * any pre-existing sparse inode record in the tree. This allows sparse chunks
     529             :  * to fill over time.
     530             :  *
     531             :  * This function supports two modes of handling preexisting records depending on
     532             :  * the merge flag. If merge is true, the provided record is merged with the
     533             :  * existing record and updated in place. The merged record is returned in nrec.
     534             :  * If merge is false, an existing record is replaced with the provided record.
     535             :  * If no preexisting record exists, the provided record is always inserted.
     536             :  *
     537             :  * It is considered corruption if a merge is requested and not possible. Given
     538             :  * the sparse inode alignment constraints, this should never happen.
     539             :  */
     540             : STATIC int
     541      354804 : xfs_inobt_insert_sprec(
     542             :         struct xfs_perag                *pag,
     543             :         struct xfs_trans                *tp,
     544             :         struct xfs_buf                  *agbp,
     545             :         int                             btnum,
     546             :         struct xfs_inobt_rec_incore     *nrec,  /* in/out: new/merged rec. */
     547             :         bool                            merge)  /* merge or replace */
     548             : {
     549      354804 :         struct xfs_mount                *mp = pag->pag_mount;
     550      354804 :         struct xfs_btree_cur            *cur;
     551      354804 :         int                             error;
     552      354804 :         int                             i;
     553      354804 :         struct xfs_inobt_rec_incore     rec;
     554             : 
     555      354804 :         cur = xfs_inobt_init_cursor(pag, tp, agbp, btnum);
     556             : 
     557             :         /* the new record is pre-aligned so we know where to look */
     558      354804 :         error = xfs_inobt_lookup(cur, nrec->ir_startino, XFS_LOOKUP_EQ, &i);
     559      354804 :         if (error)
     560           0 :                 goto error;
     561             :         /* if nothing there, insert a new record and return */
     562      354804 :         if (i == 0) {
     563      329575 :                 error = xfs_inobt_insert_rec(cur, nrec->ir_holemask,
     564      329575 :                                              nrec->ir_count, nrec->ir_freecount,
     565             :                                              nrec->ir_free, &i);
     566      329575 :                 if (error)
     567           0 :                         goto error;
     568      329575 :                 if (XFS_IS_CORRUPT(mp, i != 1)) {
     569           0 :                         error = -EFSCORRUPTED;
     570           0 :                         goto error;
     571             :                 }
     572             : 
     573      329575 :                 goto out;
     574             :         }
     575             : 
     576             :         /*
     577             :          * A record exists at this startino. Merge or replace the record
     578             :          * depending on what we've been asked to do.
     579             :          */
     580       25229 :         if (merge) {
     581       25229 :                 error = xfs_inobt_get_rec(cur, &rec, &i);
     582       25229 :                 if (error)
     583           0 :                         goto error;
     584       25229 :                 if (XFS_IS_CORRUPT(mp, i != 1)) {
     585           0 :                         error = -EFSCORRUPTED;
     586           0 :                         goto error;
     587             :                 }
     588       25229 :                 if (XFS_IS_CORRUPT(mp, rec.ir_startino != nrec->ir_startino)) {
     589           0 :                         error = -EFSCORRUPTED;
     590           0 :                         goto error;
     591             :                 }
     592             : 
     593             :                 /*
     594             :                  * This should never fail. If we have coexisting records that
     595             :                  * cannot merge, something is seriously wrong.
     596             :                  */
     597       25229 :                 if (XFS_IS_CORRUPT(mp, !__xfs_inobt_can_merge(nrec, &rec))) {
     598           0 :                         error = -EFSCORRUPTED;
     599           0 :                         goto error;
     600             :                 }
     601             : 
     602       25229 :                 trace_xfs_irec_merge_pre(mp, pag->pag_agno, rec.ir_startino,
     603             :                                          rec.ir_holemask, nrec->ir_startino,
     604             :                                          nrec->ir_holemask);
     605             : 
     606             :                 /* merge to nrec to output the updated record */
     607       25229 :                 __xfs_inobt_rec_merge(nrec, &rec);
     608             : 
     609       25229 :                 trace_xfs_irec_merge_post(mp, pag->pag_agno, nrec->ir_startino,
     610             :                                           nrec->ir_holemask);
     611             : 
     612       25229 :                 error = xfs_inobt_rec_check_count(mp, nrec);
     613       25229 :                 if (error)
     614           0 :                         goto error;
     615             :         }
     616             : 
     617       25229 :         error = xfs_inobt_update(cur, nrec);
     618       25229 :         if (error)
     619           0 :                 goto error;
     620             : 
     621       25229 : out:
     622      354804 :         xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
     623      354804 :         return 0;
     624           0 : error:
     625           0 :         xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
     626           0 :         return error;
     627             : }
     628             : 
     629             : /*
     630             :  * Allocate new inodes in the allocation group specified by agbp.  Returns 0 if
     631             :  * inodes were allocated in this AG; -EAGAIN if there was no space in this AG so
     632             :  * the caller knows it can try another AG, a hard -ENOSPC when over the maximum
     633             :  * inode count threshold, or the usual negative error code for other errors.
     634             :  */
     635             : STATIC int
     636      638517 : xfs_ialloc_ag_alloc(
     637             :         struct xfs_perag        *pag,
     638             :         struct xfs_trans        *tp,
     639             :         struct xfs_buf          *agbp)
     640             : {
     641      638517 :         struct xfs_agi          *agi;
     642      638517 :         struct xfs_alloc_arg    args;
     643      638517 :         int                     error;
     644      638517 :         xfs_agino_t             newino;         /* new first inode's number */
     645      638517 :         xfs_agino_t             newlen;         /* new number of inodes */
     646      638517 :         int                     isaligned = 0;  /* inode allocation at stripe */
     647             :                                                 /* unit boundary */
     648             :         /* init. to full chunk */
     649      638517 :         struct xfs_inobt_rec_incore rec;
     650      638517 :         struct xfs_ino_geometry *igeo = M_IGEO(tp->t_mountp);
     651      638517 :         uint16_t                allocmask = (uint16_t) -1;
     652      638517 :         int                     do_sparse = 0;
     653             : 
     654      638517 :         memset(&args, 0, sizeof(args));
     655      638517 :         args.tp = tp;
     656      638517 :         args.mp = tp->t_mountp;
     657      638517 :         args.fsbno = NULLFSBLOCK;
     658      638517 :         args.oinfo = XFS_RMAP_OINFO_INODES;
     659      638517 :         args.pag = pag;
     660             : 
     661             : #ifdef DEBUG
     662             :         /* randomly do sparse inode allocations */
     663      638517 :         if (xfs_has_sparseinodes(tp->t_mountp) &&
     664      638514 :             igeo->ialloc_min_blks < igeo->ialloc_blks)
     665      638471 :                 do_sparse = get_random_u32_below(2);
     666             : #endif
     667             : 
     668             :         /*
     669             :          * Locking will ensure that we don't have two callers in here
     670             :          * at one time.
     671             :          */
     672      638517 :         newlen = igeo->ialloc_inos;
     673      638517 :         if (igeo->maxicount &&
     674      638516 :             percpu_counter_read_positive(&args.mp->m_icount) + newlen >
     675             :                                                         igeo->maxicount)
     676             :                 return -ENOSPC;
     677      638492 :         args.minlen = args.maxlen = igeo->ialloc_blks;
     678             :         /*
     679             :          * First try to allocate inodes contiguous with the last-allocated
     680             :          * chunk of inodes.  If the filesystem is striped, this will fill
     681             :          * an entire stripe unit with inodes.
     682             :          */
     683      638492 :         agi = agbp->b_addr;
     684      638492 :         newino = be32_to_cpu(agi->agi_newino);
     685      638492 :         args.agbno = XFS_AGINO_TO_AGBNO(args.mp, newino) +
     686      638492 :                      igeo->ialloc_blks;
     687      638492 :         if (do_sparse)
     688      319108 :                 goto sparse_alloc;
     689      621379 :         if (likely(newino != NULLAGINO &&
     690             :                   (args.agbno < be32_to_cpu(agi->agi_length)))) {
     691      301988 :                 args.prod = 1;
     692             : 
     693             :                 /*
     694             :                  * We need to take into account alignment here to ensure that
     695             :                  * we don't modify the free list if we fail to have an exact
     696             :                  * block. If we don't have an exact match, and every oher
     697             :                  * attempt allocation attempt fails, we'll end up cancelling
     698             :                  * a dirty transaction and shutting down.
     699             :                  *
     700             :                  * For an exact allocation, alignment must be 1,
     701             :                  * however we need to take cluster alignment into account when
     702             :                  * fixing up the freelist. Use the minalignslop field to
     703             :                  * indicate that extra blocks might be required for alignment,
     704             :                  * but not to use them in the actual exact allocation.
     705             :                  */
     706      301988 :                 args.alignment = 1;
     707      301988 :                 args.minalignslop = igeo->cluster_align - 1;
     708             : 
     709             :                 /* Allow space for the inode btree to split. */
     710      301988 :                 args.minleft = igeo->inobt_maxlevels;
     711      603976 :                 error = xfs_alloc_vextent_exact_bno(&args,
     712      301988 :                                 XFS_AGB_TO_FSB(args.mp, pag->pag_agno,
     713             :                                                 args.agbno));
     714      301988 :                 if (error)
     715             :                         return error;
     716             : 
     717             :                 /*
     718             :                  * This request might have dirtied the transaction if the AG can
     719             :                  * satisfy the request, but the exact block was not available.
     720             :                  * If the allocation did fail, subsequent requests will relax
     721             :                  * the exact agbno requirement and increase the alignment
     722             :                  * instead. It is critical that the total size of the request
     723             :                  * (len + alignment + slop) does not increase from this point
     724             :                  * on, so reset minalignslop to ensure it is not included in
     725             :                  * subsequent requests.
     726             :                  */
     727      301984 :                 args.minalignslop = 0;
     728             :         }
     729             : 
     730      319380 :         if (unlikely(args.fsbno == NULLFSBLOCK)) {
     731             :                 /*
     732             :                  * Set the alignment for the allocation.
     733             :                  * If stripe alignment is turned on then align at stripe unit
     734             :                  * boundary.
     735             :                  * If the cluster size is smaller than a filesystem block
     736             :                  * then we're doing I/O for inodes in filesystem block size
     737             :                  * pieces, so don't need alignment anyway.
     738             :                  */
     739      259418 :                 isaligned = 0;
     740      259418 :                 if (igeo->ialloc_align) {
     741           0 :                         ASSERT(!xfs_has_noalign(args.mp));
     742           0 :                         args.alignment = args.mp->m_dalign;
     743           0 :                         isaligned = 1;
     744             :                 } else
     745      259418 :                         args.alignment = igeo->cluster_align;
     746             :                 /*
     747             :                  * Allocate a fixed-size extent of inodes.
     748             :                  */
     749      259418 :                 args.prod = 1;
     750             :                 /*
     751             :                  * Allow space for the inode btree to split.
     752             :                  */
     753      259418 :                 args.minleft = igeo->inobt_maxlevels;
     754      518836 :                 error = xfs_alloc_vextent_near_bno(&args,
     755      259418 :                                 XFS_AGB_TO_FSB(args.mp, pag->pag_agno,
     756             :                                                 be32_to_cpu(agi->agi_root)));
     757      259418 :                 if (error)
     758             :                         return error;
     759             :         }
     760             : 
     761             :         /*
     762             :          * If stripe alignment is turned on, then try again with cluster
     763             :          * alignment.
     764             :          */
     765      259416 :         if (isaligned && args.fsbno == NULLFSBLOCK) {
     766           0 :                 args.alignment = igeo->cluster_align;
     767           0 :                 error = xfs_alloc_vextent_near_bno(&args,
     768           0 :                                 XFS_AGB_TO_FSB(args.mp, pag->pag_agno,
     769             :                                                 be32_to_cpu(agi->agi_root)));
     770           0 :                 if (error)
     771             :                         return error;
     772             :         }
     773             : 
     774             :         /*
     775             :          * Finally, try a sparse allocation if the filesystem supports it and
     776             :          * the sparse allocation length is smaller than a full chunk.
     777             :          */
     778      319378 :         if (xfs_has_sparseinodes(args.mp) &&
     779      319376 :             igeo->ialloc_min_blks < igeo->ialloc_blks &&
     780      319333 :             args.fsbno == NULLFSBLOCK) {
     781      144317 : sparse_alloc:
     782      463425 :                 args.alignment = args.mp->m_sb.sb_spino_align;
     783      463425 :                 args.prod = 1;
     784             : 
     785      463425 :                 args.minlen = igeo->ialloc_min_blks;
     786      463425 :                 args.maxlen = args.minlen;
     787             : 
     788             :                 /*
     789             :                  * The inode record will be aligned to full chunk size. We must
     790             :                  * prevent sparse allocation from AG boundaries that result in
     791             :                  * invalid inode records, such as records that start at agbno 0
     792             :                  * or extend beyond the AG.
     793             :                  *
     794             :                  * Set min agbno to the first aligned, non-zero agbno and max to
     795             :                  * the last aligned agbno that is at least one full chunk from
     796             :                  * the end of the AG.
     797             :                  */
     798      463425 :                 args.min_agbno = args.mp->m_sb.sb_inoalignmt;
     799      463425 :                 args.max_agbno = round_down(args.mp->m_sb.sb_agblocks,
     800      463425 :                                             args.mp->m_sb.sb_inoalignmt) -
     801      463425 :                                  igeo->ialloc_blks;
     802             : 
     803      926850 :                 error = xfs_alloc_vextent_near_bno(&args,
     804      463425 :                                 XFS_AGB_TO_FSB(args.mp, pag->pag_agno,
     805             :                                                 be32_to_cpu(agi->agi_root)));
     806      463425 :                 if (error)
     807             :                         return error;
     808             : 
     809      463408 :                 newlen = XFS_AGB_TO_AGINO(args.mp, args.len);
     810      463408 :                 ASSERT(newlen <= XFS_INODES_PER_CHUNK);
     811      463408 :                 allocmask = (1 << (newlen / XFS_INODES_PER_HOLEMASK_BIT)) - 1;
     812             :         }
     813             : 
     814      638469 :         if (args.fsbno == NULLFSBLOCK)
     815             :                 return -EAGAIN;
     816             : 
     817      352463 :         ASSERT(args.len == args.minlen);
     818             : 
     819             :         /*
     820             :          * Stamp and write the inode buffers.
     821             :          *
     822             :          * Seed the new inode cluster with a random generation number. This
     823             :          * prevents short-term reuse of generation numbers if a chunk is
     824             :          * freed and then immediately reallocated. We use random numbers
     825             :          * rather than a linear progression to prevent the next generation
     826             :          * number from being easily guessable.
     827             :          */
     828      352463 :         error = xfs_ialloc_inode_init(args.mp, tp, NULL, newlen, pag->pag_agno,
     829             :                         args.agbno, args.len, get_random_u32());
     830             : 
     831      352463 :         if (error)
     832             :                 return error;
     833             :         /*
     834             :          * Convert the results.
     835             :          */
     836      352463 :         newino = XFS_AGB_TO_AGINO(args.mp, args.agbno);
     837             : 
     838      352463 :         if (xfs_inobt_issparse(~allocmask)) {
     839             :                 /*
     840             :                  * We've allocated a sparse chunk. Align the startino and mask.
     841             :                  */
     842      177402 :                 xfs_align_sparse_ino(args.mp, &newino, &allocmask);
     843             : 
     844      177402 :                 rec.ir_startino = newino;
     845      177402 :                 rec.ir_holemask = ~allocmask;
     846      177402 :                 rec.ir_count = newlen;
     847      177402 :                 rec.ir_freecount = newlen;
     848      177402 :                 rec.ir_free = XFS_INOBT_ALL_FREE;
     849             : 
     850             :                 /*
     851             :                  * Insert the sparse record into the inobt and allow for a merge
     852             :                  * if necessary. If a merge does occur, rec is updated to the
     853             :                  * merged record.
     854             :                  */
     855      177402 :                 error = xfs_inobt_insert_sprec(pag, tp, agbp,
     856             :                                 XFS_BTNUM_INO, &rec, true);
     857      177402 :                 if (error == -EFSCORRUPTED) {
     858           0 :                         xfs_alert(args.mp,
     859             :         "invalid sparse inode record: ino 0x%llx holemask 0x%x count %u",
     860             :                                   XFS_AGINO_TO_INO(args.mp, pag->pag_agno,
     861             :                                                    rec.ir_startino),
     862             :                                   rec.ir_holemask, rec.ir_count);
     863           0 :                         xfs_force_shutdown(args.mp, SHUTDOWN_CORRUPT_INCORE);
     864             :                 }
     865      177402 :                 if (error)
     866             :                         return error;
     867             : 
     868             :                 /*
     869             :                  * We can't merge the part we've just allocated as for the inobt
     870             :                  * due to finobt semantics. The original record may or may not
     871             :                  * exist independent of whether physical inodes exist in this
     872             :                  * sparse chunk.
     873             :                  *
     874             :                  * We must update the finobt record based on the inobt record.
     875             :                  * rec contains the fully merged and up to date inobt record
     876             :                  * from the previous call. Set merge false to replace any
     877             :                  * existing record with this one.
     878             :                  */
     879      177402 :                 if (xfs_has_finobt(args.mp)) {
     880      177402 :                         error = xfs_inobt_insert_sprec(pag, tp, agbp,
     881             :                                        XFS_BTNUM_FINO, &rec, false);
     882      177402 :                         if (error)
     883             :                                 return error;
     884             :                 }
     885             :         } else {
     886             :                 /* full chunk - insert new records to both btrees */
     887      175061 :                 error = xfs_inobt_insert(pag, tp, agbp, newino, newlen,
     888             :                                          XFS_BTNUM_INO);
     889      175061 :                 if (error)
     890             :                         return error;
     891             : 
     892      175061 :                 if (xfs_has_finobt(args.mp)) {
     893      175057 :                         error = xfs_inobt_insert(pag, tp, agbp, newino,
     894             :                                                  newlen, XFS_BTNUM_FINO);
     895      175057 :                         if (error)
     896             :                                 return error;
     897             :                 }
     898             :         }
     899             : 
     900             :         /*
     901             :          * Update AGI counts and newino.
     902             :          */
     903      352462 :         be32_add_cpu(&agi->agi_count, newlen);
     904      352462 :         be32_add_cpu(&agi->agi_freecount, newlen);
     905      352462 :         pag->pagi_freecount += newlen;
     906      352462 :         pag->pagi_count += newlen;
     907      352462 :         agi->agi_newino = cpu_to_be32(newino);
     908             : 
     909             :         /*
     910             :          * Log allocation group header fields
     911             :          */
     912      352462 :         xfs_ialloc_log_agi(tp, agbp,
     913             :                 XFS_AGI_COUNT | XFS_AGI_FREECOUNT | XFS_AGI_NEWINO);
     914             :         /*
     915             :          * Modify/log superblock values for inode count and inode free count.
     916             :          */
     917      352462 :         xfs_trans_mod_sb(tp, XFS_TRANS_SB_ICOUNT, (long)newlen);
     918      352462 :         xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, (long)newlen);
     919      352462 :         return 0;
     920             : }
     921             : 
     922             : /*
     923             :  * Try to retrieve the next record to the left/right from the current one.
     924             :  */
     925             : STATIC int
     926          12 : xfs_ialloc_next_rec(
     927             :         struct xfs_btree_cur    *cur,
     928             :         xfs_inobt_rec_incore_t  *rec,
     929             :         int                     *done,
     930             :         int                     left)
     931             : {
     932          12 :         int                     error;
     933          12 :         int                     i;
     934             : 
     935          12 :         if (left)
     936           6 :                 error = xfs_btree_decrement(cur, 0, &i);
     937             :         else
     938           6 :                 error = xfs_btree_increment(cur, 0, &i);
     939             : 
     940          12 :         if (error)
     941             :                 return error;
     942          12 :         *done = !i;
     943          12 :         if (i) {
     944           6 :                 error = xfs_inobt_get_rec(cur, rec, &i);
     945           6 :                 if (error)
     946             :                         return error;
     947           6 :                 if (XFS_IS_CORRUPT(cur->bc_mp, i != 1))
     948           0 :                         return -EFSCORRUPTED;
     949             :         }
     950             : 
     951             :         return 0;
     952             : }
     953             : 
     954             : STATIC int
     955         160 : xfs_ialloc_get_rec(
     956             :         struct xfs_btree_cur    *cur,
     957             :         xfs_agino_t             agino,
     958             :         xfs_inobt_rec_incore_t  *rec,
     959             :         int                     *done)
     960             : {
     961         160 :         int                     error;
     962         160 :         int                     i;
     963             : 
     964         160 :         error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_EQ, &i);
     965         160 :         if (error)
     966             :                 return error;
     967         160 :         *done = !i;
     968         160 :         if (i) {
     969          80 :                 error = xfs_inobt_get_rec(cur, rec, &i);
     970          80 :                 if (error)
     971             :                         return error;
     972          80 :                 if (XFS_IS_CORRUPT(cur->bc_mp, i != 1))
     973           0 :                         return -EFSCORRUPTED;
     974             :         }
     975             : 
     976             :         return 0;
     977             : }
     978             : 
     979             : /*
     980             :  * Return the offset of the first free inode in the record. If the inode chunk
     981             :  * is sparsely allocated, we convert the record holemask to inode granularity
     982             :  * and mask off the unallocated regions from the inode free mask.
     983             :  */
     984             : STATIC int
     985    47345582 : xfs_inobt_first_free_inode(
     986             :         struct xfs_inobt_rec_incore     *rec)
     987             : {
     988    47345582 :         xfs_inofree_t                   realfree;
     989             : 
     990             :         /* if there are no holes, return the first available offset */
     991    47345582 :         if (!xfs_inobt_issparse(rec->ir_holemask))
     992    30063932 :                 return xfs_lowbit64(rec->ir_free);
     993             : 
     994    17281650 :         realfree = xfs_inobt_irec_to_allocmask(rec);
     995    17281879 :         realfree &= rec->ir_free;
     996             : 
     997    34563758 :         return xfs_lowbit64(realfree);
     998             : }
     999             : 
    1000             : /*
    1001             :  * Allocate an inode using the inobt-only algorithm.
    1002             :  */
    1003             : STATIC int
    1004         392 : xfs_dialloc_ag_inobt(
    1005             :         struct xfs_perag        *pag,
    1006             :         struct xfs_trans        *tp,
    1007             :         struct xfs_buf          *agbp,
    1008             :         xfs_ino_t               parent,
    1009             :         xfs_ino_t               *inop)
    1010             : {
    1011         392 :         struct xfs_mount        *mp = tp->t_mountp;
    1012         392 :         struct xfs_agi          *agi = agbp->b_addr;
    1013         392 :         xfs_agnumber_t          pagno = XFS_INO_TO_AGNO(mp, parent);
    1014         392 :         xfs_agino_t             pagino = XFS_INO_TO_AGINO(mp, parent);
    1015         392 :         struct xfs_btree_cur    *cur, *tcur;
    1016         392 :         struct xfs_inobt_rec_incore rec, trec;
    1017         392 :         xfs_ino_t               ino;
    1018         392 :         int                     error;
    1019         392 :         int                     offset;
    1020         392 :         int                     i, j;
    1021         392 :         int                     searchdistance = 10;
    1022             : 
    1023         784 :         ASSERT(xfs_perag_initialised_agi(pag));
    1024         784 :         ASSERT(xfs_perag_allows_inodes(pag));
    1025         392 :         ASSERT(pag->pagi_freecount > 0);
    1026             : 
    1027         392 :  restart_pagno:
    1028         392 :         cur = xfs_inobt_init_cursor(pag, tp, agbp, XFS_BTNUM_INO);
    1029             :         /*
    1030             :          * If pagino is 0 (this is the root inode allocation) use newino.
    1031             :          * This must work because we've just allocated some.
    1032             :          */
    1033         392 :         if (!pagino)
    1034          22 :                 pagino = be32_to_cpu(agi->agi_newino);
    1035             : 
    1036         392 :         error = xfs_check_agi_freecount(cur);
    1037         392 :         if (error)
    1038           0 :                 goto error0;
    1039             : 
    1040             :         /*
    1041             :          * If in the same AG as the parent, try to get near the parent.
    1042             :          */
    1043         392 :         if (pagno == pag->pag_agno) {
    1044         392 :                 int             doneleft;       /* done, to the left */
    1045         392 :                 int             doneright;      /* done, to the right */
    1046             : 
    1047         392 :                 error = xfs_inobt_lookup(cur, pagino, XFS_LOOKUP_LE, &i);
    1048         392 :                 if (error)
    1049           0 :                         goto error0;
    1050         392 :                 if (XFS_IS_CORRUPT(mp, i != 1)) {
    1051           0 :                         error = -EFSCORRUPTED;
    1052           0 :                         goto error0;
    1053             :                 }
    1054             : 
    1055         392 :                 error = xfs_inobt_get_rec(cur, &rec, &j);
    1056         392 :                 if (error)
    1057           0 :                         goto error0;
    1058         392 :                 if (XFS_IS_CORRUPT(mp, j != 1)) {
    1059           0 :                         error = -EFSCORRUPTED;
    1060           0 :                         goto error0;
    1061             :                 }
    1062             : 
    1063         392 :                 if (rec.ir_freecount > 0) {
    1064             :                         /*
    1065             :                          * Found a free inode in the same chunk
    1066             :                          * as the parent, done.
    1067             :                          */
    1068         392 :                         goto alloc_inode;
    1069             :                 }
    1070             : 
    1071             : 
    1072             :                 /*
    1073             :                  * In the same AG as parent, but parent's chunk is full.
    1074             :                  */
    1075             : 
    1076             :                 /* duplicate the cursor, search left & right simultaneously */
    1077          86 :                 error = xfs_btree_dup_cursor(cur, &tcur);
    1078          86 :                 if (error)
    1079           0 :                         goto error0;
    1080             : 
    1081             :                 /*
    1082             :                  * Skip to last blocks looked up if same parent inode.
    1083             :                  */
    1084          86 :                 if (pagino != NULLAGINO &&
    1085          86 :                     pag->pagl_pagino == pagino &&
    1086          80 :                     pag->pagl_leftrec != NULLAGINO &&
    1087          80 :                     pag->pagl_rightrec != NULLAGINO) {
    1088          80 :                         error = xfs_ialloc_get_rec(tcur, pag->pagl_leftrec,
    1089             :                                                    &trec, &doneleft);
    1090          80 :                         if (error)
    1091           0 :                                 goto error1;
    1092             : 
    1093          80 :                         error = xfs_ialloc_get_rec(cur, pag->pagl_rightrec,
    1094             :                                                    &rec, &doneright);
    1095          80 :                         if (error)
    1096           0 :                                 goto error1;
    1097             :                 } else {
    1098             :                         /* search left with tcur, back up 1 record */
    1099           6 :                         error = xfs_ialloc_next_rec(tcur, &trec, &doneleft, 1);
    1100           6 :                         if (error)
    1101           0 :                                 goto error1;
    1102             : 
    1103             :                         /* search right with cur, go forward 1 record. */
    1104           6 :                         error = xfs_ialloc_next_rec(cur, &rec, &doneright, 0);
    1105           6 :                         if (error)
    1106           0 :                                 goto error1;
    1107             :                 }
    1108             : 
    1109             :                 /*
    1110             :                  * Loop until we find an inode chunk with a free inode.
    1111             :                  */
    1112          86 :                 while (--searchdistance > 0 && (!doneleft || !doneright)) {
    1113          86 :                         int     useleft;  /* using left inode chunk this time */
    1114             : 
    1115             :                         /* figure out the closer block if both are valid. */
    1116          86 :                         if (!doneleft && !doneright) {
    1117           0 :                                 useleft = pagino -
    1118           0 :                                  (trec.ir_startino + XFS_INODES_PER_CHUNK - 1) <
    1119           0 :                                   rec.ir_startino - pagino;
    1120             :                         } else {
    1121          86 :                                 useleft = !doneleft;
    1122             :                         }
    1123             : 
    1124             :                         /* free inodes to the left? */
    1125          86 :                         if (useleft && trec.ir_freecount) {
    1126           0 :                                 xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
    1127           0 :                                 cur = tcur;
    1128             : 
    1129           0 :                                 pag->pagl_leftrec = trec.ir_startino;
    1130           0 :                                 pag->pagl_rightrec = rec.ir_startino;
    1131           0 :                                 pag->pagl_pagino = pagino;
    1132           0 :                                 rec = trec;
    1133           0 :                                 goto alloc_inode;
    1134             :                         }
    1135             : 
    1136             :                         /* free inodes to the right? */
    1137          86 :                         if (!useleft && rec.ir_freecount) {
    1138          86 :                                 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
    1139             : 
    1140          86 :                                 pag->pagl_leftrec = trec.ir_startino;
    1141          86 :                                 pag->pagl_rightrec = rec.ir_startino;
    1142          86 :                                 pag->pagl_pagino = pagino;
    1143          86 :                                 goto alloc_inode;
    1144             :                         }
    1145             : 
    1146             :                         /* get next record to check */
    1147           0 :                         if (useleft) {
    1148           0 :                                 error = xfs_ialloc_next_rec(tcur, &trec,
    1149             :                                                                  &doneleft, 1);
    1150             :                         } else {
    1151           0 :                                 error = xfs_ialloc_next_rec(cur, &rec,
    1152             :                                                                  &doneright, 0);
    1153             :                         }
    1154           0 :                         if (error)
    1155           0 :                                 goto error1;
    1156             :                 }
    1157             : 
    1158           0 :                 if (searchdistance <= 0) {
    1159             :                         /*
    1160             :                          * Not in range - save last search
    1161             :                          * location and allocate a new inode
    1162             :                          */
    1163           0 :                         xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
    1164           0 :                         pag->pagl_leftrec = trec.ir_startino;
    1165           0 :                         pag->pagl_rightrec = rec.ir_startino;
    1166           0 :                         pag->pagl_pagino = pagino;
    1167             : 
    1168             :                 } else {
    1169             :                         /*
    1170             :                          * We've reached the end of the btree. because
    1171             :                          * we are only searching a small chunk of the
    1172             :                          * btree each search, there is obviously free
    1173             :                          * inodes closer to the parent inode than we
    1174             :                          * are now. restart the search again.
    1175             :                          */
    1176           0 :                         pag->pagl_pagino = NULLAGINO;
    1177           0 :                         pag->pagl_leftrec = NULLAGINO;
    1178           0 :                         pag->pagl_rightrec = NULLAGINO;
    1179           0 :                         xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
    1180           0 :                         xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
    1181           0 :                         goto restart_pagno;
    1182             :                 }
    1183             :         }
    1184             : 
    1185             :         /*
    1186             :          * In a different AG from the parent.
    1187             :          * See if the most recently allocated block has any free.
    1188             :          */
    1189           0 :         if (agi->agi_newino != cpu_to_be32(NULLAGINO)) {
    1190           0 :                 error = xfs_inobt_lookup(cur, be32_to_cpu(agi->agi_newino),
    1191             :                                          XFS_LOOKUP_EQ, &i);
    1192           0 :                 if (error)
    1193           0 :                         goto error0;
    1194             : 
    1195           0 :                 if (i == 1) {
    1196           0 :                         error = xfs_inobt_get_rec(cur, &rec, &j);
    1197           0 :                         if (error)
    1198           0 :                                 goto error0;
    1199             : 
    1200           0 :                         if (j == 1 && rec.ir_freecount > 0) {
    1201             :                                 /*
    1202             :                                  * The last chunk allocated in the group
    1203             :                                  * still has a free inode.
    1204             :                                  */
    1205           0 :                                 goto alloc_inode;
    1206             :                         }
    1207             :                 }
    1208             :         }
    1209             : 
    1210             :         /*
    1211             :          * None left in the last group, search the whole AG
    1212             :          */
    1213           0 :         error = xfs_inobt_lookup(cur, 0, XFS_LOOKUP_GE, &i);
    1214           0 :         if (error)
    1215           0 :                 goto error0;
    1216           0 :         if (XFS_IS_CORRUPT(mp, i != 1)) {
    1217           0 :                 error = -EFSCORRUPTED;
    1218           0 :                 goto error0;
    1219             :         }
    1220             : 
    1221           0 :         for (;;) {
    1222           0 :                 error = xfs_inobt_get_rec(cur, &rec, &i);
    1223           0 :                 if (error)
    1224           0 :                         goto error0;
    1225           0 :                 if (XFS_IS_CORRUPT(mp, i != 1)) {
    1226           0 :                         error = -EFSCORRUPTED;
    1227           0 :                         goto error0;
    1228             :                 }
    1229           0 :                 if (rec.ir_freecount > 0)
    1230             :                         break;
    1231           0 :                 error = xfs_btree_increment(cur, 0, &i);
    1232           0 :                 if (error)
    1233           0 :                         goto error0;
    1234           0 :                 if (XFS_IS_CORRUPT(mp, i != 1)) {
    1235           0 :                         error = -EFSCORRUPTED;
    1236           0 :                         goto error0;
    1237             :                 }
    1238             :         }
    1239             : 
    1240           0 : alloc_inode:
    1241         392 :         offset = xfs_inobt_first_free_inode(&rec);
    1242         392 :         ASSERT(offset >= 0);
    1243         392 :         ASSERT(offset < XFS_INODES_PER_CHUNK);
    1244         392 :         ASSERT((XFS_AGINO_TO_OFFSET(mp, rec.ir_startino) %
    1245             :                                    XFS_INODES_PER_CHUNK) == 0);
    1246         392 :         ino = XFS_AGINO_TO_INO(mp, pag->pag_agno, rec.ir_startino + offset);
    1247         392 :         rec.ir_free &= ~XFS_INOBT_MASK(offset);
    1248         392 :         rec.ir_freecount--;
    1249         392 :         error = xfs_inobt_update(cur, &rec);
    1250         392 :         if (error)
    1251           0 :                 goto error0;
    1252         392 :         be32_add_cpu(&agi->agi_freecount, -1);
    1253         392 :         xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREECOUNT);
    1254         392 :         pag->pagi_freecount--;
    1255             : 
    1256         392 :         error = xfs_check_agi_freecount(cur);
    1257         392 :         if (error)
    1258           0 :                 goto error0;
    1259             : 
    1260         392 :         xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
    1261         392 :         xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -1);
    1262         392 :         *inop = ino;
    1263         392 :         return 0;
    1264             : error1:
    1265           0 :         xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
    1266           0 : error0:
    1267           0 :         xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
    1268           0 :         return error;
    1269             : }
    1270             : 
    1271             : /*
    1272             :  * Use the free inode btree to allocate an inode based on distance from the
    1273             :  * parent. Note that the provided cursor may be deleted and replaced.
    1274             :  */
    1275             : STATIC int
    1276    44609919 : xfs_dialloc_ag_finobt_near(
    1277             :         xfs_agino_t                     pagino,
    1278             :         struct xfs_btree_cur            **ocur,
    1279             :         struct xfs_inobt_rec_incore     *rec)
    1280             : {
    1281    44609919 :         struct xfs_btree_cur            *lcur = *ocur;  /* left search cursor */
    1282    44609919 :         struct xfs_btree_cur            *rcur;  /* right search cursor */
    1283    44609919 :         struct xfs_inobt_rec_incore     rrec;
    1284    44609919 :         int                             error;
    1285    44609919 :         int                             i, j;
    1286             : 
    1287    44609919 :         error = xfs_inobt_lookup(lcur, pagino, XFS_LOOKUP_LE, &i);
    1288    44612828 :         if (error)
    1289             :                 return error;
    1290             : 
    1291    44612828 :         if (i == 1) {
    1292     4463856 :                 error = xfs_inobt_get_rec(lcur, rec, &i);
    1293     4463860 :                 if (error)
    1294             :                         return error;
    1295     4463860 :                 if (XFS_IS_CORRUPT(lcur->bc_mp, i != 1))
    1296           0 :                         return -EFSCORRUPTED;
    1297             : 
    1298             :                 /*
    1299             :                  * See if we've landed in the parent inode record. The finobt
    1300             :                  * only tracks chunks with at least one free inode, so record
    1301             :                  * existence is enough.
    1302             :                  */
    1303     4463860 :                 if (pagino >= rec->ir_startino &&
    1304     4463859 :                     pagino < (rec->ir_startino + XFS_INODES_PER_CHUNK))
    1305             :                         return 0;
    1306             :         }
    1307             : 
    1308    42643840 :         error = xfs_btree_dup_cursor(lcur, &rcur);
    1309    42643773 :         if (error)
    1310             :                 return error;
    1311             : 
    1312    42644543 :         error = xfs_inobt_lookup(rcur, pagino, XFS_LOOKUP_GE, &j);
    1313    42642796 :         if (error)
    1314           0 :                 goto error_rcur;
    1315    42642796 :         if (j == 1) {
    1316    41355041 :                 error = xfs_inobt_get_rec(rcur, &rrec, &j);
    1317    41355118 :                 if (error)
    1318           0 :                         goto error_rcur;
    1319    41355118 :                 if (XFS_IS_CORRUPT(lcur->bc_mp, j != 1)) {
    1320           0 :                         error = -EFSCORRUPTED;
    1321           0 :                         goto error_rcur;
    1322             :                 }
    1323             :         }
    1324             : 
    1325    42642873 :         if (XFS_IS_CORRUPT(lcur->bc_mp, i != 1 && j != 1)) {
    1326           0 :                 error = -EFSCORRUPTED;
    1327           0 :                 goto error_rcur;
    1328             :         }
    1329    42642873 :         if (i == 1 && j == 1) {
    1330             :                 /*
    1331             :                  * Both the left and right records are valid. Choose the closer
    1332             :                  * inode chunk to the target.
    1333             :                  */
    1334     1205974 :                 if ((pagino - rec->ir_startino + XFS_INODES_PER_CHUNK - 1) >
    1335     1205974 :                     (rrec.ir_startino - pagino)) {
    1336      511216 :                         *rec = rrec;
    1337      511216 :                         xfs_btree_del_cursor(lcur, XFS_BTREE_NOERROR);
    1338      511216 :                         *ocur = rcur;
    1339             :                 } else {
    1340      694758 :                         xfs_btree_del_cursor(rcur, XFS_BTREE_NOERROR);
    1341             :                 }
    1342    41436899 :         } else if (j == 1) {
    1343             :                 /* only the right record is valid */
    1344    40148002 :                 *rec = rrec;
    1345    40148002 :                 xfs_btree_del_cursor(lcur, XFS_BTREE_NOERROR);
    1346    40148867 :                 *ocur = rcur;
    1347     1288897 :         } else if (i == 1) {
    1348             :                 /* only the left record is valid */
    1349     1288896 :                 xfs_btree_del_cursor(rcur, XFS_BTREE_NOERROR);
    1350             :         }
    1351             : 
    1352             :         return 0;
    1353             : 
    1354           0 : error_rcur:
    1355           0 :         xfs_btree_del_cursor(rcur, XFS_BTREE_ERROR);
    1356           0 :         return error;
    1357             : }
    1358             : 
    1359             : /*
    1360             :  * Use the free inode btree to find a free inode based on a newino hint. If
    1361             :  * the hint is NULL, find the first free inode in the AG.
    1362             :  */
    1363             : STATIC int
    1364     2731902 : xfs_dialloc_ag_finobt_newino(
    1365             :         struct xfs_agi                  *agi,
    1366             :         struct xfs_btree_cur            *cur,
    1367             :         struct xfs_inobt_rec_incore     *rec)
    1368             : {
    1369     2731902 :         int error;
    1370     2731902 :         int i;
    1371             : 
    1372     2731902 :         if (agi->agi_newino != cpu_to_be32(NULLAGINO)) {
    1373     2686452 :                 error = xfs_inobt_lookup(cur, be32_to_cpu(agi->agi_newino),
    1374             :                                          XFS_LOOKUP_EQ, &i);
    1375     2686466 :                 if (error)
    1376             :                         return error;
    1377     2686466 :                 if (i == 1) {
    1378     2633105 :                         error = xfs_inobt_get_rec(cur, rec, &i);
    1379     2633107 :                         if (error)
    1380             :                                 return error;
    1381     2633107 :                         if (XFS_IS_CORRUPT(cur->bc_mp, i != 1))
    1382           0 :                                 return -EFSCORRUPTED;
    1383             :                         return 0;
    1384             :                 }
    1385             :         }
    1386             : 
    1387             :         /*
    1388             :          * Find the first inode available in the AG.
    1389             :          */
    1390       98811 :         error = xfs_inobt_lookup(cur, 0, XFS_LOOKUP_GE, &i);
    1391       98809 :         if (error)
    1392             :                 return error;
    1393       98809 :         if (XFS_IS_CORRUPT(cur->bc_mp, i != 1))
    1394           0 :                 return -EFSCORRUPTED;
    1395             : 
    1396       98809 :         error = xfs_inobt_get_rec(cur, rec, &i);
    1397       98809 :         if (error)
    1398             :                 return error;
    1399       98809 :         if (XFS_IS_CORRUPT(cur->bc_mp, i != 1))
    1400           0 :                 return -EFSCORRUPTED;
    1401             : 
    1402             :         return 0;
    1403             : }
    1404             : 
    1405             : /*
    1406             :  * Update the inobt based on a modification made to the finobt. Also ensure that
    1407             :  * the records from both trees are equivalent post-modification.
    1408             :  */
    1409             : STATIC int
    1410    47339327 : xfs_dialloc_ag_update_inobt(
    1411             :         struct xfs_btree_cur            *cur,   /* inobt cursor */
    1412             :         struct xfs_inobt_rec_incore     *frec,  /* finobt record */
    1413             :         int                             offset) /* inode offset */
    1414             : {
    1415    47339327 :         struct xfs_inobt_rec_incore     rec;
    1416    47339327 :         int                             error;
    1417    47339327 :         int                             i;
    1418             : 
    1419    47339327 :         error = xfs_inobt_lookup(cur, frec->ir_startino, XFS_LOOKUP_EQ, &i);
    1420    47344831 :         if (error)
    1421             :                 return error;
    1422    47344706 :         if (XFS_IS_CORRUPT(cur->bc_mp, i != 1))
    1423           0 :                 return -EFSCORRUPTED;
    1424             : 
    1425    47344706 :         error = xfs_inobt_get_rec(cur, &rec, &i);
    1426    47343944 :         if (error)
    1427             :                 return error;
    1428    47343944 :         if (XFS_IS_CORRUPT(cur->bc_mp, i != 1))
    1429           0 :                 return -EFSCORRUPTED;
    1430    47343944 :         ASSERT((XFS_AGINO_TO_OFFSET(cur->bc_mp, rec.ir_startino) %
    1431             :                                    XFS_INODES_PER_CHUNK) == 0);
    1432             : 
    1433    47343944 :         rec.ir_free &= ~XFS_INOBT_MASK(offset);
    1434    47343944 :         rec.ir_freecount--;
    1435             : 
    1436    47343944 :         if (XFS_IS_CORRUPT(cur->bc_mp,
    1437             :                            rec.ir_free != frec->ir_free ||
    1438             :                            rec.ir_freecount != frec->ir_freecount))
    1439           0 :                 return -EFSCORRUPTED;
    1440             : 
    1441    47343944 :         return xfs_inobt_update(cur, &rec);
    1442             : }
    1443             : 
    1444             : /*
    1445             :  * Allocate an inode using the free inode btree, if available. Otherwise, fall
    1446             :  * back to the inobt search algorithm.
    1447             :  *
    1448             :  * The caller selected an AG for us, and made sure that free inodes are
    1449             :  * available.
    1450             :  */
    1451             : static int
    1452    47344553 : xfs_dialloc_ag(
    1453             :         struct xfs_perag        *pag,
    1454             :         struct xfs_trans        *tp,
    1455             :         struct xfs_buf          *agbp,
    1456             :         xfs_ino_t               parent,
    1457             :         xfs_ino_t               *inop)
    1458             : {
    1459    47344553 :         struct xfs_mount                *mp = tp->t_mountp;
    1460    47344553 :         struct xfs_agi                  *agi = agbp->b_addr;
    1461    47344553 :         xfs_agnumber_t                  pagno = XFS_INO_TO_AGNO(mp, parent);
    1462    47344553 :         xfs_agino_t                     pagino = XFS_INO_TO_AGINO(mp, parent);
    1463    47344553 :         struct xfs_btree_cur            *cur;   /* finobt cursor */
    1464    47344553 :         struct xfs_btree_cur            *icur;  /* inobt cursor */
    1465    47344553 :         struct xfs_inobt_rec_incore     rec;
    1466    47344553 :         xfs_ino_t                       ino;
    1467    47344553 :         int                             error;
    1468    47344553 :         int                             offset;
    1469    47344553 :         int                             i;
    1470             : 
    1471    47344553 :         if (!xfs_has_finobt(mp))
    1472         392 :                 return xfs_dialloc_ag_inobt(pag, tp, agbp, parent, inop);
    1473             : 
    1474             :         /*
    1475             :          * If pagino is 0 (this is the root inode allocation) use newino.
    1476             :          * This must work because we've just allocated some.
    1477             :          */
    1478    47344161 :         if (!pagino)
    1479        8067 :                 pagino = be32_to_cpu(agi->agi_newino);
    1480             : 
    1481    47344161 :         cur = xfs_inobt_init_cursor(pag, tp, agbp, XFS_BTNUM_FINO);
    1482             : 
    1483    47345228 :         error = xfs_check_agi_freecount(cur);
    1484    47344307 :         if (error)
    1485         304 :                 goto error_cur;
    1486             : 
    1487             :         /*
    1488             :          * The search algorithm depends on whether we're in the same AG as the
    1489             :          * parent. If so, find the closest available inode to the parent. If
    1490             :          * not, consider the agi hint or find the first free inode in the AG.
    1491             :          */
    1492    47344003 :         if (pag->pag_agno == pagno)
    1493    44612094 :                 error = xfs_dialloc_ag_finobt_near(pagino, &cur, &rec);
    1494             :         else
    1495     2731909 :                 error = xfs_dialloc_ag_finobt_newino(agi, cur, &rec);
    1496    47344478 :         if (error)
    1497           0 :                 goto error_cur;
    1498             : 
    1499    47344478 :         offset = xfs_inobt_first_free_inode(&rec);
    1500    47343265 :         ASSERT(offset >= 0);
    1501    47343265 :         ASSERT(offset < XFS_INODES_PER_CHUNK);
    1502    47343265 :         ASSERT((XFS_AGINO_TO_OFFSET(mp, rec.ir_startino) %
    1503             :                                    XFS_INODES_PER_CHUNK) == 0);
    1504    47343265 :         ino = XFS_AGINO_TO_INO(mp, pag->pag_agno, rec.ir_startino + offset);
    1505             : 
    1506             :         /*
    1507             :          * Modify or remove the finobt record.
    1508             :          */
    1509    47343265 :         rec.ir_free &= ~XFS_INOBT_MASK(offset);
    1510    47343265 :         rec.ir_freecount--;
    1511    47343265 :         if (rec.ir_freecount)
    1512    37638506 :                 error = xfs_inobt_update(cur, &rec);
    1513             :         else
    1514     9704759 :                 error = xfs_btree_delete(cur, &i);
    1515    47344554 :         if (error)
    1516           0 :                 goto error_cur;
    1517             : 
    1518             :         /*
    1519             :          * The finobt has now been updated appropriately. We haven't updated the
    1520             :          * agi and superblock yet, so we can create an inobt cursor and validate
    1521             :          * the original freecount. If all is well, make the equivalent update to
    1522             :          * the inobt using the finobt record and offset information.
    1523             :          */
    1524    47344554 :         icur = xfs_inobt_init_cursor(pag, tp, agbp, XFS_BTNUM_INO);
    1525             : 
    1526    47343717 :         error = xfs_check_agi_freecount(icur);
    1527    47344712 :         if (error)
    1528          26 :                 goto error_icur;
    1529             : 
    1530    47344686 :         error = xfs_dialloc_ag_update_inobt(icur, &rec, offset);
    1531    47341854 :         if (error)
    1532         125 :                 goto error_icur;
    1533             : 
    1534             :         /*
    1535             :          * Both trees have now been updated. We must update the perag and
    1536             :          * superblock before we can check the freecount for each btree.
    1537             :          */
    1538    47341729 :         be32_add_cpu(&agi->agi_freecount, -1);
    1539    47341878 :         xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREECOUNT);
    1540    47344639 :         pag->pagi_freecount--;
    1541             : 
    1542    47344639 :         xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -1);
    1543             : 
    1544    47345080 :         error = xfs_check_agi_freecount(icur);
    1545    47344010 :         if (error)
    1546           0 :                 goto error_icur;
    1547    47344010 :         error = xfs_check_agi_freecount(cur);
    1548    47344807 :         if (error)
    1549           0 :                 goto error_icur;
    1550             : 
    1551    47344807 :         xfs_btree_del_cursor(icur, XFS_BTREE_NOERROR);
    1552    47342785 :         xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
    1553    47340600 :         *inop = ino;
    1554    47340600 :         return 0;
    1555             : 
    1556         151 : error_icur:
    1557         151 :         xfs_btree_del_cursor(icur, XFS_BTREE_ERROR);
    1558         455 : error_cur:
    1559         455 :         xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
    1560         455 :         return error;
    1561             : }
    1562             : 
    1563             : static int
    1564      352462 : xfs_dialloc_roll(
    1565             :         struct xfs_trans        **tpp,
    1566             :         struct xfs_buf          *agibp)
    1567             : {
    1568      352462 :         struct xfs_trans        *tp = *tpp;
    1569      352462 :         struct xfs_dquot_acct   *dqinfo;
    1570      352462 :         int                     error;
    1571             : 
    1572             :         /*
    1573             :          * Hold to on to the agibp across the commit so no other allocation can
    1574             :          * come in and take the free inodes we just allocated for our caller.
    1575             :          */
    1576      352462 :         xfs_trans_bhold(tp, agibp);
    1577             : 
    1578             :         /*
    1579             :          * We want the quota changes to be associated with the next transaction,
    1580             :          * NOT this one. So, detach the dqinfo from this and attach it to the
    1581             :          * next transaction.
    1582             :          */
    1583      352462 :         dqinfo = tp->t_dqinfo;
    1584      352462 :         tp->t_dqinfo = NULL;
    1585             : 
    1586      352462 :         error = xfs_trans_roll(&tp);
    1587             : 
    1588             :         /* Re-attach the quota info that we detached from prev trx. */
    1589      352462 :         tp->t_dqinfo = dqinfo;
    1590             : 
    1591             :         /*
    1592             :          * Join the buffer even on commit error so that the buffer is released
    1593             :          * when the caller cancels the transaction and doesn't have to handle
    1594             :          * this error case specially.
    1595             :          */
    1596      352462 :         xfs_trans_bjoin(tp, agibp);
    1597      352462 :         *tpp = tp;
    1598      352462 :         return error;
    1599             : }
    1600             : 
    1601             : static bool
    1602    48973439 : xfs_dialloc_good_ag(
    1603             :         struct xfs_perag        *pag,
    1604             :         struct xfs_trans        *tp,
    1605             :         umode_t                 mode,
    1606             :         int                     flags,
    1607             :         bool                    ok_alloc)
    1608             : {
    1609    48973439 :         struct xfs_mount        *mp = tp->t_mountp;
    1610    48973439 :         xfs_extlen_t            ineed;
    1611    48973439 :         xfs_extlen_t            longest = 0;
    1612    48973439 :         int                     needspace;
    1613    48973439 :         int                     error;
    1614             : 
    1615    48973439 :         if (!pag)
    1616             :                 return false;
    1617    97946878 :         if (!xfs_perag_allows_inodes(pag))
    1618             :                 return false;
    1619             : 
    1620    97946878 :         if (!xfs_perag_initialised_agi(pag)) {
    1621          76 :                 error = xfs_ialloc_read_agi(pag, tp, NULL);
    1622          76 :                 if (error)
    1623             :                         return false;
    1624             :         }
    1625             : 
    1626    48973439 :         if (pag->pagi_freecount)
    1627             :                 return true;
    1628     1981410 :         if (!ok_alloc)
    1629             :                 return false;
    1630             : 
    1631     1389812 :         if (!xfs_perag_initialised_agf(pag)) {
    1632          13 :                 error = xfs_alloc_read_agf(pag, tp, flags, NULL);
    1633          13 :                 if (error)
    1634             :                         return false;
    1635             :         }
    1636             : 
    1637             :         /*
    1638             :          * Check that there is enough free space for the file plus a chunk of
    1639             :          * inodes if we need to allocate some. If this is the first pass across
    1640             :          * the AGs, take into account the potential space needed for alignment
    1641             :          * of inode chunks when checking the longest contiguous free space in
    1642             :          * the AG - this prevents us from getting ENOSPC because we have free
    1643             :          * space larger than ialloc_blks but alignment constraints prevent us
    1644             :          * from using it.
    1645             :          *
    1646             :          * If we can't find an AG with space for full alignment slack to be
    1647             :          * taken into account, we must be near ENOSPC in all AGs.  Hence we
    1648             :          * don't include alignment for the second pass and so if we fail
    1649             :          * allocation due to alignment issues then it is most likely a real
    1650             :          * ENOSPC condition.
    1651             :          *
    1652             :          * XXX(dgc): this calculation is now bogus thanks to the per-ag
    1653             :          * reservations that xfs_alloc_fix_freelist() now does via
    1654             :          * xfs_alloc_space_available(). When the AG fills up, pagf_freeblks will
    1655             :          * be more than large enough for the check below to succeed, but
    1656             :          * xfs_alloc_space_available() will fail because of the non-zero
    1657             :          * metadata reservation and hence we won't actually be able to allocate
    1658             :          * more inodes in this AG. We do soooo much unnecessary work near ENOSPC
    1659             :          * because of this.
    1660             :          */
    1661      694893 :         ineed = M_IGEO(mp)->ialloc_min_blks;
    1662      694893 :         if (flags && ineed > 1)
    1663      685346 :                 ineed += M_IGEO(mp)->cluster_align;
    1664      694893 :         longest = pag->pagf_longest;
    1665      694893 :         if (!longest)
    1666          40 :                 longest = pag->pagf_flcount > 0;
    1667      694893 :         needspace = S_ISDIR(mode) || S_ISREG(mode) || S_ISLNK(mode);
    1668             : 
    1669      694893 :         if (pag->pagf_freeblks < needspace + ineed || longest < ineed)
    1670       52049 :                 return false;
    1671             :         return true;
    1672             : }
    1673             : 
    1674             : static int
    1675    47634801 : xfs_dialloc_try_ag(
    1676             :         struct xfs_perag        *pag,
    1677             :         struct xfs_trans        **tpp,
    1678             :         xfs_ino_t               parent,
    1679             :         xfs_ino_t               *new_ino,
    1680             :         bool                    ok_alloc)
    1681             : {
    1682    47634801 :         struct xfs_buf          *agbp;
    1683    47634801 :         xfs_ino_t               ino;
    1684    47634801 :         int                     error;
    1685             : 
    1686             :         /*
    1687             :          * Then read in the AGI buffer and recheck with the AGI buffer
    1688             :          * lock held.
    1689             :          */
    1690    47634801 :         error = xfs_ialloc_read_agi(pag, *tpp, &agbp);
    1691    47634639 :         if (error)
    1692             :                 return error;
    1693             : 
    1694    47634595 :         if (!pag->pagi_freecount) {
    1695      641444 :                 if (!ok_alloc) {
    1696        2928 :                         error = -EAGAIN;
    1697        2928 :                         goto out_release;
    1698             :                 }
    1699             : 
    1700      638516 :                 error = xfs_ialloc_ag_alloc(pag, *tpp, agbp);
    1701      638511 :                 if (error < 0)
    1702      286055 :                         goto out_release;
    1703             : 
    1704             :                 /*
    1705             :                  * We successfully allocated space for an inode cluster in this
    1706             :                  * AG.  Roll the transaction so that we can allocate one of the
    1707             :                  * new inodes.
    1708             :                  */
    1709      352456 :                 ASSERT(pag->pagi_freecount > 0);
    1710      352456 :                 error = xfs_dialloc_roll(tpp, agbp);
    1711      352462 :                 if (error)
    1712           0 :                         goto out_release;
    1713             :         }
    1714             : 
    1715             :         /* Allocate an inode in the found AG */
    1716    47345613 :         error = xfs_dialloc_ag(pag, *tpp, agbp, parent, &ino);
    1717    47344078 :         if (!error)
    1718    47345353 :                 *new_ino = ino;
    1719             :         return error;
    1720             : 
    1721      288983 : out_release:
    1722      288983 :         xfs_trans_brelse(*tpp, agbp);
    1723      288983 :         return error;
    1724             : }
    1725             : 
    1726             : /*
    1727             :  * Allocate an on-disk inode.
    1728             :  *
    1729             :  * Mode is used to tell whether the new inode is a directory and hence where to
    1730             :  * locate it. The on-disk inode that is allocated will be returned in @new_ino
    1731             :  * on success, otherwise an error will be set to indicate the failure (e.g.
    1732             :  * -ENOSPC).
    1733             :  */
    1734             : int
    1735    47488825 : xfs_dialloc(
    1736             :         struct xfs_trans        **tpp,
    1737             :         xfs_ino_t               parent,
    1738             :         umode_t                 mode,
    1739             :         xfs_ino_t               *new_ino)
    1740             : {
    1741    47488825 :         struct xfs_mount        *mp = (*tpp)->t_mountp;
    1742    47488825 :         xfs_agnumber_t          agno;
    1743    47488825 :         int                     error = 0;
    1744    47488825 :         xfs_agnumber_t          start_agno;
    1745    47488825 :         struct xfs_perag        *pag;
    1746    47488825 :         struct xfs_ino_geometry *igeo = M_IGEO(mp);
    1747    47488825 :         bool                    ok_alloc = true;
    1748    47488825 :         bool                    low_space = false;
    1749    47488825 :         int                     flags;
    1750    47488825 :         xfs_ino_t               ino = NULLFSINO;
    1751             : 
    1752             :         /*
    1753             :          * Directories, symlinks, and regular files frequently allocate at least
    1754             :          * one block, so factor that potential expansion when we examine whether
    1755             :          * an AG has enough space for file creation.
    1756             :          */
    1757    47488825 :         if (S_ISDIR(mode))
    1758     3252498 :                 start_agno = (atomic_inc_return(&mp->m_agirotor) - 1) %
    1759     3252493 :                                 mp->m_maxagi;
    1760             :         else {
    1761    44236332 :                 start_agno = XFS_INO_TO_AGNO(mp, parent);
    1762    44236332 :                 if (start_agno >= mp->m_maxagi)
    1763           0 :                         start_agno = 0;
    1764             :         }
    1765             : 
    1766             :         /*
    1767             :          * If we have already hit the ceiling of inode blocks then clear
    1768             :          * ok_alloc so we scan all available agi structures for a free
    1769             :          * inode.
    1770             :          *
    1771             :          * Read rough value of mp->m_icount by percpu_counter_read_positive,
    1772             :          * which will sacrifice the preciseness but improve the performance.
    1773             :          */
    1774    47488830 :         if (igeo->maxicount &&
    1775    47491760 :             percpu_counter_read_positive(&mp->m_icount) + igeo->ialloc_inos
    1776             :                                                         > igeo->maxicount) {
    1777      146525 :                 ok_alloc = false;
    1778             :         }
    1779             : 
    1780             :         /*
    1781             :          * If we are near to ENOSPC, we want to prefer allocation from AGs that
    1782             :          * have free inodes in them rather than use up free space allocating new
    1783             :          * inode chunks. Hence we turn off allocation for the first non-blocking
    1784             :          * pass through the AGs if we are near ENOSPC to consume free inodes
    1785             :          * that we can immediately allocate, but then we allow allocation on the
    1786             :          * second pass if we fail to find an AG with free inodes in it.
    1787             :          */
    1788    47488830 :         if (percpu_counter_read_positive(&mp->m_fdblocks) <
    1789    47488830 :                         mp->m_low_space[XFS_LOWSP_1_PCNT]) {
    1790      233662 :                 ok_alloc = false;
    1791      233662 :                 low_space = true;
    1792             :         }
    1793             : 
    1794             :         /*
    1795             :          * Loop until we find an allocation group that either has free inodes
    1796             :          * or in which we can allocate some inodes.  Iterate through the
    1797             :          * allocation groups upward, wrapping at the end.
    1798             :          */
    1799    47488830 :         flags = XFS_ALLOC_FLAG_TRYLOCK;
    1800    47641201 : retry:
    1801    49262117 :         for_each_perag_wrap_at(mp, start_agno, mp->m_maxagi, agno, pag) {
    1802    48973877 :                 if (xfs_dialloc_good_ag(pag, *tpp, mode, flags, ok_alloc)) {
    1803    47634776 :                         error = xfs_dialloc_try_ag(pag, tpp, parent,
    1804             :                                         &ino, ok_alloc);
    1805    47634293 :                         if (error != -EAGAIN)
    1806             :                                 break;
    1807             :                         error = 0;
    1808             :                 }
    1809             : 
    1810     3241832 :                 if (xfs_is_shutdown(mp)) {
    1811             :                         error = -EFSCORRUPTED;
    1812             :                         break;
    1813             :                 }
    1814             :         }
    1815    47643781 :         if (pag)
    1816    47345551 :                 xfs_perag_rele(pag);
    1817    47641490 :         if (error)
    1818         548 :                 return error;
    1819    47640942 :         if (ino == NULLFSINO) {
    1820      295562 :                 if (flags) {
    1821      152371 :                         flags = 0;
    1822      152371 :                         if (low_space)
    1823        5444 :                                 ok_alloc = true;
    1824      152371 :                         goto retry;
    1825             :                 }
    1826             :                 return -ENOSPC;
    1827             :         }
    1828    47345380 :         *new_ino = ino;
    1829    47345380 :         return 0;
    1830             : }
    1831             : 
    1832             : /*
    1833             :  * Free the blocks of an inode chunk. We must consider that the inode chunk
    1834             :  * might be sparse and only free the regions that are allocated as part of the
    1835             :  * chunk.
    1836             :  */
    1837             : static int
    1838       65666 : xfs_difree_inode_chunk(
    1839             :         struct xfs_trans                *tp,
    1840             :         xfs_agnumber_t                  agno,
    1841             :         struct xfs_inobt_rec_incore     *rec)
    1842             : {
    1843       65666 :         struct xfs_mount                *mp = tp->t_mountp;
    1844       65666 :         xfs_agblock_t                   sagbno = XFS_AGINO_TO_AGBNO(mp,
    1845             :                                                         rec->ir_startino);
    1846       65666 :         int                             startidx, endidx;
    1847       65666 :         int                             nextbit;
    1848       65666 :         xfs_agblock_t                   agbno;
    1849       65666 :         int                             contigblk;
    1850       65666 :         DECLARE_BITMAP(holemask, XFS_INOBT_HOLEMASK_BITS);
    1851             : 
    1852       65666 :         if (!xfs_inobt_issparse(rec->ir_holemask)) {
    1853             :                 /* not sparse, calculate extent info directly */
    1854      104466 :                 return xfs_free_extent_later(tp,
    1855       52233 :                                 XFS_AGB_TO_FSB(mp, agno, sagbno),
    1856       52233 :                                 M_IGEO(mp)->ialloc_blks, &XFS_RMAP_OINFO_INODES,
    1857             :                                 XFS_AG_RESV_NONE);
    1858             :         }
    1859             : 
    1860             :         /* holemask is only 16-bits (fits in an unsigned long) */
    1861       13433 :         ASSERT(sizeof(rec->ir_holemask) <= sizeof(holemask[0]));
    1862       13433 :         holemask[0] = rec->ir_holemask;
    1863             : 
    1864             :         /*
    1865             :          * Find contiguous ranges of zeroes (i.e., allocated regions) in the
    1866             :          * holemask and convert the start/end index of each range to an extent.
    1867             :          * We start with the start and end index both pointing at the first 0 in
    1868             :          * the mask.
    1869             :          */
    1870       13433 :         startidx = endidx = find_first_zero_bit(holemask,
    1871             :                                                 XFS_INOBT_HOLEMASK_BITS);
    1872       13433 :         nextbit = startidx + 1;
    1873      120897 :         while (startidx < XFS_INOBT_HOLEMASK_BITS) {
    1874      107464 :                 int error;
    1875             : 
    1876      107464 :                 nextbit = find_next_zero_bit(holemask, XFS_INOBT_HOLEMASK_BITS,
    1877             :                                              nextbit);
    1878             :                 /*
    1879             :                  * If the next zero bit is contiguous, update the end index of
    1880             :                  * the current range and continue.
    1881             :                  */
    1882      107464 :                 if (nextbit != XFS_INOBT_HOLEMASK_BITS &&
    1883       94031 :                     nextbit == endidx + 1) {
    1884       94031 :                         endidx = nextbit;
    1885       94031 :                         goto next;
    1886             :                 }
    1887             : 
    1888             :                 /*
    1889             :                  * nextbit is not contiguous with the current end index. Convert
    1890             :                  * the current start/end to an extent and add it to the free
    1891             :                  * list.
    1892             :                  */
    1893           0 :                 agbno = sagbno + (startidx * XFS_INODES_PER_HOLEMASK_BIT) /
    1894       13433 :                                   mp->m_sb.sb_inopblock;
    1895       26866 :                 contigblk = ((endidx - startidx + 1) *
    1896           0 :                              XFS_INODES_PER_HOLEMASK_BIT) /
    1897       13433 :                             mp->m_sb.sb_inopblock;
    1898             : 
    1899       13433 :                 ASSERT(agbno % mp->m_sb.sb_spino_align == 0);
    1900       13433 :                 ASSERT(contigblk % mp->m_sb.sb_spino_align == 0);
    1901       26866 :                 error = xfs_free_extent_later(tp,
    1902       13433 :                                 XFS_AGB_TO_FSB(mp, agno, agbno), contigblk,
    1903             :                                 &XFS_RMAP_OINFO_INODES, XFS_AG_RESV_NONE);
    1904       13433 :                 if (error)
    1905           0 :                         return error;
    1906             : 
    1907             :                 /* reset range to current bit and carry on... */
    1908             :                 startidx = endidx = nextbit;
    1909             : 
    1910      107464 : next:
    1911      107464 :                 nextbit++;
    1912             :         }
    1913             :         return 0;
    1914             : }
    1915             : 
    1916             : STATIC int
    1917    34311133 : xfs_difree_inobt(
    1918             :         struct xfs_perag                *pag,
    1919             :         struct xfs_trans                *tp,
    1920             :         struct xfs_buf                  *agbp,
    1921             :         xfs_agino_t                     agino,
    1922             :         struct xfs_icluster             *xic,
    1923             :         struct xfs_inobt_rec_incore     *orec)
    1924             : {
    1925    34311133 :         struct xfs_mount                *mp = pag->pag_mount;
    1926    34311133 :         struct xfs_agi                  *agi = agbp->b_addr;
    1927    34311133 :         struct xfs_btree_cur            *cur;
    1928    34311133 :         struct xfs_inobt_rec_incore     rec;
    1929    34311133 :         int                             ilen;
    1930    34311133 :         int                             error;
    1931    34311133 :         int                             i;
    1932    34311133 :         int                             off;
    1933             : 
    1934    34311133 :         ASSERT(agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC));
    1935    68622266 :         ASSERT(XFS_AGINO_TO_AGBNO(mp, agino) < be32_to_cpu(agi->agi_length));
    1936             : 
    1937             :         /*
    1938             :          * Initialize the cursor.
    1939             :          */
    1940    34311133 :         cur = xfs_inobt_init_cursor(pag, tp, agbp, XFS_BTNUM_INO);
    1941             : 
    1942    34311129 :         error = xfs_check_agi_freecount(cur);
    1943    34310635 :         if (error)
    1944           5 :                 goto error0;
    1945             : 
    1946             :         /*
    1947             :          * Look for the entry describing this inode.
    1948             :          */
    1949    34310630 :         if ((error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &i))) {
    1950           1 :                 xfs_warn(mp, "%s: xfs_inobt_lookup() returned error %d.",
    1951             :                         __func__, error);
    1952           1 :                 goto error0;
    1953             :         }
    1954    34311615 :         if (XFS_IS_CORRUPT(mp, i != 1)) {
    1955           0 :                 error = -EFSCORRUPTED;
    1956           0 :                 goto error0;
    1957             :         }
    1958    34311615 :         error = xfs_inobt_get_rec(cur, &rec, &i);
    1959    34311700 :         if (error) {
    1960           0 :                 xfs_warn(mp, "%s: xfs_inobt_get_rec() returned error %d.",
    1961             :                         __func__, error);
    1962           0 :                 goto error0;
    1963             :         }
    1964    34311700 :         if (XFS_IS_CORRUPT(mp, i != 1)) {
    1965           0 :                 error = -EFSCORRUPTED;
    1966           0 :                 goto error0;
    1967             :         }
    1968             :         /*
    1969             :          * Get the offset in the inode chunk.
    1970             :          */
    1971    34311700 :         off = agino - rec.ir_startino;
    1972    34311700 :         ASSERT(off >= 0 && off < XFS_INODES_PER_CHUNK);
    1973    34311700 :         ASSERT(!(rec.ir_free & XFS_INOBT_MASK(off)));
    1974             :         /*
    1975             :          * Mark the inode free & increment the count.
    1976             :          */
    1977    34311700 :         rec.ir_free |= XFS_INOBT_MASK(off);
    1978    34311700 :         rec.ir_freecount++;
    1979             : 
    1980             :         /*
    1981             :          * When an inode chunk is free, it becomes eligible for removal. Don't
    1982             :          * remove the chunk if the block size is large enough for multiple inode
    1983             :          * chunks (that might not be free).
    1984             :          */
    1985    34311700 :         if (!xfs_has_ikeep(mp) && rec.ir_free == XFS_INOBT_ALL_FREE &&
    1986       65666 :             mp->m_sb.sb_inopblock <= XFS_INODES_PER_CHUNK) {
    1987       65666 :                 xic->deleted = true;
    1988       65666 :                 xic->first_ino = XFS_AGINO_TO_INO(mp, pag->pag_agno,
    1989             :                                 rec.ir_startino);
    1990       65666 :                 xic->alloc = xfs_inobt_irec_to_allocmask(&rec);
    1991             : 
    1992             :                 /*
    1993             :                  * Remove the inode cluster from the AGI B+Tree, adjust the
    1994             :                  * AGI and Superblock inode counts, and mark the disk space
    1995             :                  * to be freed when the transaction is committed.
    1996             :                  */
    1997       65666 :                 ilen = rec.ir_freecount;
    1998       65666 :                 be32_add_cpu(&agi->agi_count, -ilen);
    1999       65666 :                 be32_add_cpu(&agi->agi_freecount, -(ilen - 1));
    2000       65666 :                 xfs_ialloc_log_agi(tp, agbp, XFS_AGI_COUNT | XFS_AGI_FREECOUNT);
    2001       65666 :                 pag->pagi_freecount -= ilen - 1;
    2002       65666 :                 pag->pagi_count -= ilen;
    2003       65666 :                 xfs_trans_mod_sb(tp, XFS_TRANS_SB_ICOUNT, -ilen);
    2004       65666 :                 xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -(ilen - 1));
    2005             : 
    2006       65666 :                 if ((error = xfs_btree_delete(cur, &i))) {
    2007           0 :                         xfs_warn(mp, "%s: xfs_btree_delete returned error %d.",
    2008             :                                 __func__, error);
    2009           0 :                         goto error0;
    2010             :                 }
    2011             : 
    2012       65666 :                 error = xfs_difree_inode_chunk(tp, pag->pag_agno, &rec);
    2013       65666 :                 if (error)
    2014           0 :                         goto error0;
    2015             :         } else {
    2016    34246034 :                 xic->deleted = false;
    2017             : 
    2018    34246034 :                 error = xfs_inobt_update(cur, &rec);
    2019    34245916 :                 if (error) {
    2020           0 :                         xfs_warn(mp, "%s: xfs_inobt_update returned error %d.",
    2021             :                                 __func__, error);
    2022           0 :                         goto error0;
    2023             :                 }
    2024             : 
    2025             :                 /*
    2026             :                  * Change the inode free counts and log the ag/sb changes.
    2027             :                  */
    2028    34245916 :                 be32_add_cpu(&agi->agi_freecount, 1);
    2029    34245869 :                 xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREECOUNT);
    2030    34244556 :                 pag->pagi_freecount++;
    2031    34244556 :                 xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, 1);
    2032             :         }
    2033             : 
    2034    34310788 :         error = xfs_check_agi_freecount(cur);
    2035    34310251 :         if (error)
    2036           0 :                 goto error0;
    2037             : 
    2038    34310251 :         *orec = rec;
    2039    34310251 :         xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
    2040    34310251 :         return 0;
    2041             : 
    2042           6 : error0:
    2043           6 :         xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
    2044           6 :         return error;
    2045             : }
    2046             : 
    2047             : /*
    2048             :  * Free an inode in the free inode btree.
    2049             :  */
    2050             : STATIC int
    2051    34311056 : xfs_difree_finobt(
    2052             :         struct xfs_perag                *pag,
    2053             :         struct xfs_trans                *tp,
    2054             :         struct xfs_buf                  *agbp,
    2055             :         xfs_agino_t                     agino,
    2056             :         struct xfs_inobt_rec_incore     *ibtrec) /* inobt record */
    2057             : {
    2058    34311056 :         struct xfs_mount                *mp = pag->pag_mount;
    2059    34311056 :         struct xfs_btree_cur            *cur;
    2060    34311056 :         struct xfs_inobt_rec_incore     rec;
    2061    34311056 :         int                             offset = agino - ibtrec->ir_startino;
    2062    34311056 :         int                             error;
    2063    34311056 :         int                             i;
    2064             : 
    2065    34311056 :         cur = xfs_inobt_init_cursor(pag, tp, agbp, XFS_BTNUM_FINO);
    2066             : 
    2067    34310980 :         error = xfs_inobt_lookup(cur, ibtrec->ir_startino, XFS_LOOKUP_EQ, &i);
    2068    34312125 :         if (error)
    2069           8 :                 goto error;
    2070    34312117 :         if (i == 0) {
    2071             :                 /*
    2072             :                  * If the record does not exist in the finobt, we must have just
    2073             :                  * freed an inode in a previously fully allocated chunk. If not,
    2074             :                  * something is out of sync.
    2075             :                  */
    2076     9424127 :                 if (XFS_IS_CORRUPT(mp, ibtrec->ir_freecount != 1)) {
    2077           0 :                         error = -EFSCORRUPTED;
    2078           0 :                         goto error;
    2079             :                 }
    2080             : 
    2081     9424127 :                 error = xfs_inobt_insert_rec(cur, ibtrec->ir_holemask,
    2082             :                                              ibtrec->ir_count,
    2083             :                                              ibtrec->ir_freecount,
    2084             :                                              ibtrec->ir_free, &i);
    2085     9423379 :                 if (error)
    2086           0 :                         goto error;
    2087     9423379 :                 ASSERT(i == 1);
    2088             : 
    2089     9423379 :                 goto out;
    2090             :         }
    2091             : 
    2092             :         /*
    2093             :          * Read and update the existing record. We could just copy the ibtrec
    2094             :          * across here, but that would defeat the purpose of having redundant
    2095             :          * metadata. By making the modifications independently, we can catch
    2096             :          * corruptions that we wouldn't see if we just copied from one record
    2097             :          * to another.
    2098             :          */
    2099    24887990 :         error = xfs_inobt_get_rec(cur, &rec, &i);
    2100    24887924 :         if (error)
    2101           0 :                 goto error;
    2102    24887924 :         if (XFS_IS_CORRUPT(mp, i != 1)) {
    2103           0 :                 error = -EFSCORRUPTED;
    2104           0 :                 goto error;
    2105             :         }
    2106             : 
    2107    24887924 :         rec.ir_free |= XFS_INOBT_MASK(offset);
    2108    24887924 :         rec.ir_freecount++;
    2109             : 
    2110    24887924 :         if (XFS_IS_CORRUPT(mp,
    2111             :                            rec.ir_free != ibtrec->ir_free ||
    2112             :                            rec.ir_freecount != ibtrec->ir_freecount)) {
    2113           0 :                 error = -EFSCORRUPTED;
    2114           0 :                 goto error;
    2115             :         }
    2116             : 
    2117             :         /*
    2118             :          * The content of inobt records should always match between the inobt
    2119             :          * and finobt. The lifecycle of records in the finobt is different from
    2120             :          * the inobt in that the finobt only tracks records with at least one
    2121             :          * free inode. Hence, if all of the inodes are free and we aren't
    2122             :          * keeping inode chunks permanently on disk, remove the record.
    2123             :          * Otherwise, update the record with the new information.
    2124             :          *
    2125             :          * Note that we currently can't free chunks when the block size is large
    2126             :          * enough for multiple chunks. Leave the finobt record to remain in sync
    2127             :          * with the inobt.
    2128             :          */
    2129    24887924 :         if (!xfs_has_ikeep(mp) && rec.ir_free == XFS_INOBT_ALL_FREE &&
    2130       65666 :             mp->m_sb.sb_inopblock <= XFS_INODES_PER_CHUNK) {
    2131       65666 :                 error = xfs_btree_delete(cur, &i);
    2132       65666 :                 if (error)
    2133           0 :                         goto error;
    2134       65666 :                 ASSERT(i == 1);
    2135             :         } else {
    2136    24822258 :                 error = xfs_inobt_update(cur, &rec);
    2137    24822303 :                 if (error)
    2138           0 :                         goto error;
    2139             :         }
    2140             : 
    2141    24822303 : out:
    2142    34311348 :         error = xfs_check_agi_freecount(cur);
    2143    34311101 :         if (error)
    2144           0 :                 goto error;
    2145             : 
    2146    34311101 :         xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
    2147    34311101 :         return 0;
    2148             : 
    2149           8 : error:
    2150           8 :         xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
    2151           8 :         return error;
    2152             : }
    2153             : 
    2154             : /*
    2155             :  * Free disk inode.  Carefully avoids touching the incore inode, all
    2156             :  * manipulations incore are the caller's responsibility.
    2157             :  * The on-disk inode is not changed by this operation, only the
    2158             :  * btree (free inode mask) is changed.
    2159             :  */
    2160             : int
    2161    34311312 : xfs_difree(
    2162             :         struct xfs_trans        *tp,
    2163             :         struct xfs_perag        *pag,
    2164             :         xfs_ino_t               inode,
    2165             :         struct xfs_icluster     *xic)
    2166             : {
    2167             :         /* REFERENCED */
    2168    34311312 :         xfs_agblock_t           agbno;  /* block number containing inode */
    2169    34311312 :         struct xfs_buf          *agbp;  /* buffer for allocation group header */
    2170    34311312 :         xfs_agino_t             agino;  /* allocation group inode number */
    2171    34311312 :         int                     error;  /* error return value */
    2172    34311312 :         struct xfs_mount        *mp = tp->t_mountp;
    2173    34311312 :         struct xfs_inobt_rec_incore rec;/* btree record */
    2174             : 
    2175             :         /*
    2176             :          * Break up inode number into its components.
    2177             :          */
    2178    34311312 :         if (pag->pag_agno != XFS_INO_TO_AGNO(mp, inode)) {
    2179           0 :                 xfs_warn(mp, "%s: agno != pag->pag_agno (%d != %d).",
    2180             :                         __func__, XFS_INO_TO_AGNO(mp, inode), pag->pag_agno);
    2181           0 :                 ASSERT(0);
    2182           0 :                 return -EINVAL;
    2183             :         }
    2184    34311312 :         agino = XFS_INO_TO_AGINO(mp, inode);
    2185    34311312 :         if (inode != XFS_AGINO_TO_INO(mp, pag->pag_agno, agino))  {
    2186           0 :                 xfs_warn(mp, "%s: inode != XFS_AGINO_TO_INO() (%llu != %llu).",
    2187             :                         __func__, (unsigned long long)inode,
    2188             :                         (unsigned long long)XFS_AGINO_TO_INO(mp, pag->pag_agno, agino));
    2189           0 :                 ASSERT(0);
    2190           0 :                 return -EINVAL;
    2191             :         }
    2192    34311312 :         agbno = XFS_AGINO_TO_AGBNO(mp, agino);
    2193    34311312 :         if (agbno >= mp->m_sb.sb_agblocks)  {
    2194           0 :                 xfs_warn(mp, "%s: agbno >= mp->m_sb.sb_agblocks (%d >= %d).",
    2195             :                         __func__, agbno, mp->m_sb.sb_agblocks);
    2196           0 :                 ASSERT(0);
    2197           0 :                 return -EINVAL;
    2198             :         }
    2199             :         /*
    2200             :          * Get the allocation group header.
    2201             :          */
    2202    34311312 :         error = xfs_ialloc_read_agi(pag, tp, &agbp);
    2203    34311527 :         if (error) {
    2204         144 :                 xfs_warn(mp, "%s: xfs_ialloc_read_agi() returned error %d.",
    2205             :                         __func__, error);
    2206         144 :                 return error;
    2207             :         }
    2208             : 
    2209             :         /*
    2210             :          * Fix up the inode allocation btree.
    2211             :          */
    2212    34311383 :         error = xfs_difree_inobt(pag, tp, agbp, agino, xic, &rec);
    2213    34311338 :         if (error)
    2214           6 :                 goto error0;
    2215             : 
    2216             :         /*
    2217             :          * Fix up the free inode btree.
    2218             :          */
    2219    34311332 :         if (xfs_has_finobt(mp)) {
    2220    34311273 :                 error = xfs_difree_finobt(pag, tp, agbp, agino, &rec);
    2221    34311524 :                 if (error)
    2222           8 :                         goto error0;
    2223             :         }
    2224             : 
    2225             :         return 0;
    2226             : 
    2227             : error0:
    2228             :         return error;
    2229             : }
    2230             : 
    2231             : STATIC int
    2232   354492569 : xfs_imap_lookup(
    2233             :         struct xfs_perag        *pag,
    2234             :         struct xfs_trans        *tp,
    2235             :         xfs_agino_t             agino,
    2236             :         xfs_agblock_t           agbno,
    2237             :         xfs_agblock_t           *chunk_agbno,
    2238             :         xfs_agblock_t           *offset_agbno,
    2239             :         int                     flags)
    2240             : {
    2241   354492569 :         struct xfs_mount        *mp = pag->pag_mount;
    2242   354492569 :         struct xfs_inobt_rec_incore rec;
    2243   354492569 :         struct xfs_btree_cur    *cur;
    2244   354492569 :         struct xfs_buf          *agbp;
    2245   354492569 :         int                     error;
    2246   354492569 :         int                     i;
    2247             : 
    2248   354492569 :         error = xfs_ialloc_read_agi(pag, tp, &agbp);
    2249   354540587 :         if (error) {
    2250         162 :                 xfs_alert(mp,
    2251             :                         "%s: xfs_ialloc_read_agi() returned error %d, agno %d",
    2252             :                         __func__, error, pag->pag_agno);
    2253         162 :                 return error;
    2254             :         }
    2255             : 
    2256             :         /*
    2257             :          * Lookup the inode record for the given agino. If the record cannot be
    2258             :          * found, then it's an invalid inode number and we should abort. Once
    2259             :          * we have a record, we need to ensure it contains the inode number
    2260             :          * we are looking up.
    2261             :          */
    2262   354540425 :         cur = xfs_inobt_init_cursor(pag, tp, agbp, XFS_BTNUM_INO);
    2263   354539105 :         error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &i);
    2264   354538344 :         if (!error) {
    2265   354540593 :                 if (i)
    2266   354541089 :                         error = xfs_inobt_get_rec(cur, &rec, &i);
    2267   354539279 :                 if (!error && i == 0)
    2268          25 :                         error = -EINVAL;
    2269             :         }
    2270             : 
    2271   354537030 :         xfs_trans_brelse(tp, agbp);
    2272   354541326 :         xfs_btree_del_cursor(cur, error);
    2273   354537618 :         if (error)
    2274             :                 return error;
    2275             : 
    2276             :         /* check that the returned record contains the required inode */
    2277   354537584 :         if (rec.ir_startino > agino ||
    2278   354537584 :             rec.ir_startino + M_IGEO(mp)->ialloc_inos <= agino)
    2279             :                 return -EINVAL;
    2280             : 
    2281             :         /* for untrusted inodes check it is allocated first */
    2282   354504149 :         if ((flags & XFS_IGET_UNTRUSTED) &&
    2283   354500037 :             (rec.ir_free & XFS_INOBT_MASK(agino - rec.ir_startino)))
    2284             :                 return -EINVAL;
    2285             : 
    2286   354503278 :         *chunk_agbno = XFS_AGINO_TO_AGBNO(mp, rec.ir_startino);
    2287   354503278 :         *offset_agbno = agbno - *chunk_agbno;
    2288   354503278 :         return 0;
    2289             : }
    2290             : 
    2291             : /*
    2292             :  * Return the location of the inode in imap, for mapping it into a buffer.
    2293             :  */
    2294             : int
    2295   374804633 : xfs_imap(
    2296             :         struct xfs_perag        *pag,
    2297             :         struct xfs_trans        *tp,
    2298             :         xfs_ino_t               ino,    /* inode to locate */
    2299             :         struct xfs_imap         *imap,  /* location map structure */
    2300             :         uint                    flags)  /* flags for inode btree lookup */
    2301             : {
    2302   374804633 :         struct xfs_mount        *mp = pag->pag_mount;
    2303   374804633 :         xfs_agblock_t           agbno;  /* block number of inode in the alloc group */
    2304   374804633 :         xfs_agino_t             agino;  /* inode number within alloc group */
    2305   374804633 :         xfs_agblock_t           chunk_agbno;    /* first block in inode chunk */
    2306   374804633 :         xfs_agblock_t           cluster_agbno;  /* first block in inode cluster */
    2307   374804633 :         int                     error;  /* error code */
    2308   374804633 :         int                     offset; /* index of inode in its buffer */
    2309   374804633 :         xfs_agblock_t           offset_agbno;   /* blks from chunk start to inode */
    2310             : 
    2311   374804633 :         ASSERT(ino != NULLFSINO);
    2312             : 
    2313             :         /*
    2314             :          * Split up the inode number into its parts.
    2315             :          */
    2316   374804633 :         agino = XFS_INO_TO_AGINO(mp, ino);
    2317   374804633 :         agbno = XFS_AGINO_TO_AGBNO(mp, agino);
    2318   374804633 :         if (agbno >= mp->m_sb.sb_agblocks ||
    2319   374800149 :             ino != XFS_AGINO_TO_INO(mp, pag->pag_agno, agino)) {
    2320       13561 :                 error = -EINVAL;
    2321             : #ifdef DEBUG
    2322             :                 /*
    2323             :                  * Don't output diagnostic information for untrusted inodes
    2324             :                  * as they can be invalid without implying corruption.
    2325             :                  */
    2326       13561 :                 if (flags & XFS_IGET_UNTRUSTED)
    2327             :                         return error;
    2328           0 :                 if (agbno >= mp->m_sb.sb_agblocks) {
    2329           0 :                         xfs_alert(mp,
    2330             :                 "%s: agbno (0x%llx) >= mp->m_sb.sb_agblocks (0x%lx)",
    2331             :                                 __func__, (unsigned long long)agbno,
    2332             :                                 (unsigned long)mp->m_sb.sb_agblocks);
    2333             :                 }
    2334           0 :                 if (ino != XFS_AGINO_TO_INO(mp, pag->pag_agno, agino)) {
    2335           0 :                         xfs_alert(mp,
    2336             :                 "%s: ino (0x%llx) != XFS_AGINO_TO_INO() (0x%llx)",
    2337             :                                 __func__, ino,
    2338             :                                 XFS_AGINO_TO_INO(mp, pag->pag_agno, agino));
    2339             :                 }
    2340           0 :                 xfs_stack_trace();
    2341             : #endif /* DEBUG */
    2342           0 :                 return error;
    2343             :         }
    2344             : 
    2345             :         /*
    2346             :          * For bulkstat and handle lookups, we have an untrusted inode number
    2347             :          * that we have to verify is valid. We cannot do this just by reading
    2348             :          * the inode buffer as it may have been unlinked and removed leaving
    2349             :          * inodes in stale state on disk. Hence we have to do a btree lookup
    2350             :          * in all cases where an untrusted inode number is passed.
    2351             :          */
    2352   374791072 :         if (flags & XFS_IGET_UNTRUSTED) {
    2353   354516473 :                 error = xfs_imap_lookup(pag, tp, agino, agbno,
    2354             :                                         &chunk_agbno, &offset_agbno, flags);
    2355   354528825 :                 if (error)
    2356             :                         return error;
    2357   354499379 :                 goto out_map;
    2358             :         }
    2359             : 
    2360             :         /*
    2361             :          * If the inode cluster size is the same as the blocksize or
    2362             :          * smaller we get to the buffer by simple arithmetics.
    2363             :          */
    2364    20274599 :         if (M_IGEO(mp)->blocks_per_cluster == 1) {
    2365        2460 :                 offset = XFS_INO_TO_OFFSET(mp, ino);
    2366        2460 :                 ASSERT(offset < mp->m_sb.sb_inopblock);
    2367             : 
    2368        2460 :                 imap->im_blkno = XFS_AGB_TO_DADDR(mp, pag->pag_agno, agbno);
    2369        2460 :                 imap->im_len = XFS_FSB_TO_BB(mp, 1);
    2370           0 :                 imap->im_boffset = (unsigned short)(offset <<
    2371        2460 :                                                         mp->m_sb.sb_inodelog);
    2372        2460 :                 return 0;
    2373             :         }
    2374             : 
    2375             :         /*
    2376             :          * If the inode chunks are aligned then use simple maths to
    2377             :          * find the location. Otherwise we have to do a btree
    2378             :          * lookup to find the location.
    2379             :          */
    2380    20272139 :         if (M_IGEO(mp)->inoalign_mask) {
    2381    20272139 :                 offset_agbno = agbno & M_IGEO(mp)->inoalign_mask;
    2382    20272139 :                 chunk_agbno = agbno - offset_agbno;
    2383             :         } else {
    2384           0 :                 error = xfs_imap_lookup(pag, tp, agino, agbno,
    2385             :                                         &chunk_agbno, &offset_agbno, flags);
    2386           0 :                 if (error)
    2387             :                         return error;
    2388             :         }
    2389             : 
    2390           0 : out_map:
    2391   374771518 :         ASSERT(agbno >= chunk_agbno);
    2392   749543036 :         cluster_agbno = chunk_agbno +
    2393   374771518 :                 ((offset_agbno / M_IGEO(mp)->blocks_per_cluster) *
    2394   374771518 :                  M_IGEO(mp)->blocks_per_cluster);
    2395   749543036 :         offset = ((agbno - cluster_agbno) * mp->m_sb.sb_inopblock) +
    2396   374771518 :                 XFS_INO_TO_OFFSET(mp, ino);
    2397             : 
    2398   374771518 :         imap->im_blkno = XFS_AGB_TO_DADDR(mp, pag->pag_agno, cluster_agbno);
    2399   374771518 :         imap->im_len = XFS_FSB_TO_BB(mp, M_IGEO(mp)->blocks_per_cluster);
    2400   374771518 :         imap->im_boffset = (unsigned short)(offset << mp->m_sb.sb_inodelog);
    2401             : 
    2402             :         /*
    2403             :          * If the inode number maps to a block outside the bounds
    2404             :          * of the file system then return NULL rather than calling
    2405             :          * read_buf and panicing when we get an error from the
    2406             :          * driver.
    2407             :          */
    2408   749543036 :         if ((imap->im_blkno + imap->im_len) >
    2409   374771518 :             XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)) {
    2410           0 :                 xfs_alert(mp,
    2411             :         "%s: (im_blkno (0x%llx) + im_len (0x%llx)) > sb_dblocks (0x%llx)",
    2412             :                         __func__, (unsigned long long) imap->im_blkno,
    2413             :                         (unsigned long long) imap->im_len,
    2414             :                         XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks));
    2415           0 :                 return -EINVAL;
    2416             :         }
    2417             :         return 0;
    2418             : }
    2419             : 
    2420             : /*
    2421             :  * Log specified fields for the ag hdr (inode section). The growth of the agi
    2422             :  * structure over time requires that we interpret the buffer as two logical
    2423             :  * regions delineated by the end of the unlinked list. This is due to the size
    2424             :  * of the hash table and its location in the middle of the agi.
    2425             :  *
    2426             :  * For example, a request to log a field before agi_unlinked and a field after
    2427             :  * agi_unlinked could cause us to log the entire hash table and use an excessive
    2428             :  * amount of log space. To avoid this behavior, log the region up through
    2429             :  * agi_unlinked in one call and the region after agi_unlinked through the end of
    2430             :  * the structure in another.
    2431             :  */
    2432             : void
    2433    82193495 : xfs_ialloc_log_agi(
    2434             :         struct xfs_trans        *tp,
    2435             :         struct xfs_buf          *bp,
    2436             :         uint32_t                fields)
    2437             : {
    2438    82193495 :         int                     first;          /* first byte number */
    2439    82193495 :         int                     last;           /* last byte number */
    2440    82193495 :         static const short      offsets[] = {   /* field starting offsets */
    2441             :                                         /* keep in sync with bit definitions */
    2442             :                 offsetof(xfs_agi_t, agi_magicnum),
    2443             :                 offsetof(xfs_agi_t, agi_versionnum),
    2444             :                 offsetof(xfs_agi_t, agi_seqno),
    2445             :                 offsetof(xfs_agi_t, agi_length),
    2446             :                 offsetof(xfs_agi_t, agi_count),
    2447             :                 offsetof(xfs_agi_t, agi_root),
    2448             :                 offsetof(xfs_agi_t, agi_level),
    2449             :                 offsetof(xfs_agi_t, agi_freecount),
    2450             :                 offsetof(xfs_agi_t, agi_newino),
    2451             :                 offsetof(xfs_agi_t, agi_dirino),
    2452             :                 offsetof(xfs_agi_t, agi_unlinked),
    2453             :                 offsetof(xfs_agi_t, agi_free_root),
    2454             :                 offsetof(xfs_agi_t, agi_free_level),
    2455             :                 offsetof(xfs_agi_t, agi_iblocks),
    2456             :                 sizeof(xfs_agi_t)
    2457             :         };
    2458             : #ifdef DEBUG
    2459    82193495 :         struct xfs_agi          *agi = bp->b_addr;
    2460             : 
    2461    82193495 :         ASSERT(agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC));
    2462             : #endif
    2463             : 
    2464             :         /*
    2465             :          * Compute byte offsets for the first and last fields in the first
    2466             :          * region and log the agi buffer. This only logs up through
    2467             :          * agi_unlinked.
    2468             :          */
    2469    82193495 :         if (fields & XFS_AGI_ALL_BITS_R1) {
    2470    82193786 :                 xfs_btree_offsets(fields, offsets, XFS_AGI_NUM_BITS_R1,
    2471             :                                   &first, &last);
    2472    82189236 :                 xfs_trans_log_buf(tp, bp, first, last);
    2473             :         }
    2474             : 
    2475             :         /*
    2476             :          * Mask off the bits in the first region and calculate the first and
    2477             :          * last field offsets for any bits in the second region.
    2478             :          */
    2479    82188000 :         fields &= ~XFS_AGI_ALL_BITS_R1;
    2480    82188000 :         if (fields) {
    2481        1806 :                 xfs_btree_offsets(fields, offsets, XFS_AGI_NUM_BITS_R2,
    2482             :                                   &first, &last);
    2483        1806 :                 xfs_trans_log_buf(tp, bp, first, last);
    2484             :         }
    2485    82188000 : }
    2486             : 
    2487             : static xfs_failaddr_t
    2488     1871332 : xfs_agi_verify(
    2489             :         struct xfs_buf          *bp)
    2490             : {
    2491     1871332 :         struct xfs_mount        *mp = bp->b_mount;
    2492     1871332 :         struct xfs_agi          *agi = bp->b_addr;
    2493     1871332 :         xfs_failaddr_t          fa;
    2494     1871332 :         uint32_t                agi_seqno = be32_to_cpu(agi->agi_seqno);
    2495     1871332 :         uint32_t                agi_length = be32_to_cpu(agi->agi_length);
    2496     1871332 :         int                     i;
    2497             : 
    2498     1871332 :         if (xfs_has_crc(mp)) {
    2499     1868868 :                 if (!uuid_equal(&agi->agi_uuid, &mp->m_sb.sb_meta_uuid))
    2500           0 :                         return __this_address;
    2501     1868914 :                 if (!xfs_log_check_lsn(mp, be64_to_cpu(agi->agi_lsn)))
    2502           0 :                         return __this_address;
    2503             :         }
    2504             : 
    2505             :         /*
    2506             :          * Validate the magic number of the agi block.
    2507             :          */
    2508     1871381 :         if (!xfs_verify_magic(bp, agi->agi_magicnum))
    2509           0 :                 return __this_address;
    2510     1871328 :         if (!XFS_AGI_GOOD_VERSION(be32_to_cpu(agi->agi_versionnum)))
    2511           0 :                 return __this_address;
    2512             : 
    2513     1871345 :         fa = xfs_validate_ag_length(bp, agi_seqno, agi_length);
    2514     1871295 :         if (fa)
    2515             :                 return fa;
    2516             : 
    2517     1871332 :         if (be32_to_cpu(agi->agi_level) < 1 ||
    2518     3742664 :             be32_to_cpu(agi->agi_level) > M_IGEO(mp)->inobt_maxlevels)
    2519          71 :                 return __this_address;
    2520             : 
    2521     1871262 :         if (xfs_has_finobt(mp) &&
    2522     3737758 :             (be32_to_cpu(agi->agi_free_level) < 1 ||
    2523     1868879 :              be32_to_cpu(agi->agi_free_level) > M_IGEO(mp)->inobt_maxlevels))
    2524           0 :                 return __this_address;
    2525             : 
    2526   121621420 :         for (i = 0; i < XFS_AGI_UNLINKED_BUCKETS; i++) {
    2527   119750115 :                 if (agi->agi_unlinked[i] == cpu_to_be32(NULLAGINO))
    2528   119434135 :                         continue;
    2529      316023 :                 if (!xfs_verify_ino(mp, be32_to_cpu(agi->agi_unlinked[i])))
    2530           0 :                         return __this_address;
    2531             :         }
    2532             : 
    2533             :         return NULL;
    2534             : }
    2535             : 
    2536             : static void
    2537      244966 : xfs_agi_read_verify(
    2538             :         struct xfs_buf  *bp)
    2539             : {
    2540      244966 :         struct xfs_mount *mp = bp->b_mount;
    2541      244966 :         xfs_failaddr_t  fa;
    2542             : 
    2543      489816 :         if (xfs_has_crc(mp) &&
    2544             :             !xfs_buf_verify_cksum(bp, XFS_AGI_CRC_OFF))
    2545           2 :                 xfs_verifier_error(bp, -EFSBADCRC, __this_address);
    2546             :         else {
    2547      244964 :                 fa = xfs_agi_verify(bp);
    2548      244964 :                 if (XFS_TEST_ERROR(fa, mp, XFS_ERRTAG_IALLOC_READ_AGI))
    2549           0 :                         xfs_verifier_error(bp, -EFSCORRUPTED, fa);
    2550             :         }
    2551      244966 : }
    2552             : 
    2553             : static void
    2554      479390 : xfs_agi_write_verify(
    2555             :         struct xfs_buf  *bp)
    2556             : {
    2557      479390 :         struct xfs_mount        *mp = bp->b_mount;
    2558      479390 :         struct xfs_buf_log_item *bip = bp->b_log_item;
    2559      479390 :         struct xfs_agi          *agi = bp->b_addr;
    2560      479390 :         xfs_failaddr_t          fa;
    2561             : 
    2562      479390 :         fa = xfs_agi_verify(bp);
    2563      479390 :         if (fa) {
    2564           0 :                 xfs_verifier_error(bp, -EFSCORRUPTED, fa);
    2565           0 :                 return;
    2566             :         }
    2567             : 
    2568      479390 :         if (!xfs_has_crc(mp))
    2569             :                 return;
    2570             : 
    2571      477094 :         if (bip)
    2572      471686 :                 agi->agi_lsn = cpu_to_be64(bip->bli_item.li_lsn);
    2573      477094 :         xfs_buf_update_cksum(bp, XFS_AGI_CRC_OFF);
    2574             : }
    2575             : 
    2576             : const struct xfs_buf_ops xfs_agi_buf_ops = {
    2577             :         .name = "xfs_agi",
    2578             :         .magic = { cpu_to_be32(XFS_AGI_MAGIC), cpu_to_be32(XFS_AGI_MAGIC) },
    2579             :         .verify_read = xfs_agi_read_verify,
    2580             :         .verify_write = xfs_agi_write_verify,
    2581             :         .verify_struct = xfs_agi_verify,
    2582             : };
    2583             : 
    2584             : /*
    2585             :  * Read in the allocation group header (inode allocation section)
    2586             :  */
    2587             : int
    2588  1464968922 : xfs_read_agi(
    2589             :         struct xfs_perag        *pag,
    2590             :         struct xfs_trans        *tp,
    2591             :         struct xfs_buf          **agibpp)
    2592             : {
    2593  1464968922 :         struct xfs_mount        *mp = pag->pag_mount;
    2594  1464968922 :         int                     error;
    2595             : 
    2596  1464968922 :         trace_xfs_read_agi(pag->pag_mount, pag->pag_agno);
    2597             : 
    2598  4395171891 :         error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
    2599  1465057297 :                         XFS_AG_DADDR(mp, pag->pag_agno, XFS_AGI_DADDR(mp)),
    2600  1465057297 :                         XFS_FSS_TO_BB(mp, 1), 0, agibpp, &xfs_agi_buf_ops);
    2601  1465254367 :         if (error)
    2602             :                 return error;
    2603  1465247268 :         if (tp)
    2604  1464377857 :                 xfs_trans_buf_set_type(tp, *agibpp, XFS_BLFT_AGI_BUF);
    2605             : 
    2606  1465286783 :         xfs_buf_set_ref(*agibpp, XFS_AGI_REF);
    2607  1465286783 :         return 0;
    2608             : }
    2609             : 
    2610             : /*
    2611             :  * Read in the agi and initialise the per-ag data. If the caller supplies a
    2612             :  * @agibpp, return the locked AGI buffer to them, otherwise release it.
    2613             :  */
    2614             : int
    2615  1391824921 : xfs_ialloc_read_agi(
    2616             :         struct xfs_perag        *pag,
    2617             :         struct xfs_trans        *tp,
    2618             :         struct xfs_buf          **agibpp)
    2619             : {
    2620  1391824921 :         struct xfs_buf          *agibp;
    2621  1391824921 :         struct xfs_agi          *agi;
    2622  1391824921 :         int                     error;
    2623             : 
    2624  1391824921 :         trace_xfs_ialloc_read_agi(pag->pag_mount, pag->pag_agno);
    2625             : 
    2626  1391912858 :         error = xfs_read_agi(pag, tp, &agibp);
    2627  1392051136 :         if (error)
    2628             :                 return error;
    2629             : 
    2630  1392044064 :         agi = agibp->b_addr;
    2631  2784088128 :         if (!xfs_perag_initialised_agi(pag)) {
    2632      124486 :                 pag->pagi_freecount = be32_to_cpu(agi->agi_freecount);
    2633      124486 :                 pag->pagi_count = be32_to_cpu(agi->agi_count);
    2634      124486 :                 set_bit(XFS_AGSTATE_AGI_INIT, &pag->pag_opstate);
    2635             :         }
    2636             : 
    2637             :         /*
    2638             :          * It's possible for these to be out of sync if
    2639             :          * we are in the middle of a forced shutdown.
    2640             :          */
    2641  2784088128 :         ASSERT(pag->pagi_freecount == be32_to_cpu(agi->agi_freecount) ||
    2642             :                 xfs_is_shutdown(pag->pag_mount));
    2643  1392044064 :         if (agibpp)
    2644  1392002766 :                 *agibpp = agibp;
    2645             :         else
    2646       41298 :                 xfs_trans_brelse(tp, agibp);
    2647             :         return 0;
    2648             : }
    2649             : 
    2650             : /* How many inodes are backed by inode clusters ondisk? */
    2651             : STATIC int
    2652  2109163832 : xfs_ialloc_count_ondisk(
    2653             :         struct xfs_btree_cur            *cur,
    2654             :         xfs_agino_t                     low,
    2655             :         xfs_agino_t                     high,
    2656             :         unsigned int                    *allocated)
    2657             : {
    2658  2109163832 :         struct xfs_inobt_rec_incore     irec;
    2659  2109163832 :         unsigned int                    ret = 0;
    2660  2109163832 :         int                             has_record;
    2661  2109163832 :         int                             error;
    2662             : 
    2663  2109163832 :         error = xfs_inobt_lookup(cur, low, XFS_LOOKUP_LE, &has_record);
    2664  2108955279 :         if (error)
    2665             :                 return error;
    2666             : 
    2667  3801768649 :         while (has_record) {
    2668  2639106064 :                 unsigned int            i, hole_idx;
    2669             : 
    2670  2639106064 :                 error = xfs_inobt_get_rec(cur, &irec, &has_record);
    2671  2638087320 :                 if (error)
    2672           0 :                         return error;
    2673  2638087320 :                 if (irec.ir_startino > high)
    2674             :                         break;
    2675             : 
    2676 >10951*10^7 :                 for (i = 0; i < XFS_INODES_PER_CHUNK; i++) {
    2677 >10783*10^7 :                         if (irec.ir_startino + i < low)
    2678 >10735*10^7 :                                 continue;
    2679   480272421 :                         if (irec.ir_startino + i > high)
    2680             :                                 break;
    2681             : 
    2682   467969045 :                         hole_idx = i / XFS_INODES_PER_HOLEMASK_BIT;
    2683   467969045 :                         if (!(irec.ir_holemask & (1U << hole_idx)))
    2684   306831950 :                                 ret++;
    2685             :                 }
    2686             : 
    2687  1691661273 :                 error = xfs_btree_increment(cur, 0, &has_record);
    2688  1692813370 :                 if (error)
    2689           0 :                         return error;
    2690             :         }
    2691             : 
    2692  2109088632 :         *allocated = ret;
    2693  2109088632 :         return 0;
    2694             : }
    2695             : 
    2696             : /* Is there an inode record covering a given extent? */
    2697             : int
    2698  2109157320 : xfs_ialloc_has_inodes_at_extent(
    2699             :         struct xfs_btree_cur    *cur,
    2700             :         xfs_agblock_t           bno,
    2701             :         xfs_extlen_t            len,
    2702             :         enum xbtree_recpacking  *outcome)
    2703             : {
    2704  2109157320 :         xfs_agino_t             agino;
    2705  2109157320 :         xfs_agino_t             last_agino;
    2706  2109157320 :         unsigned int            allocated;
    2707  2109157320 :         int                     error;
    2708             : 
    2709  2109157320 :         agino = XFS_AGB_TO_AGINO(cur->bc_mp, bno);
    2710  2109157320 :         last_agino = XFS_AGB_TO_AGINO(cur->bc_mp, bno + len) - 1;
    2711             : 
    2712  2109157320 :         error = xfs_ialloc_count_ondisk(cur, agino, last_agino, &allocated);
    2713  2109124237 :         if (error)
    2714             :                 return error;
    2715             : 
    2716  2109124237 :         if (allocated == 0)
    2717  2104244100 :                 *outcome = XBTREE_RECPACKING_EMPTY;
    2718     4880137 :         else if (allocated == last_agino - agino + 1)
    2719     4880137 :                 *outcome = XBTREE_RECPACKING_FULL;
    2720             :         else
    2721           0 :                 *outcome = XBTREE_RECPACKING_SPARSE;
    2722             :         return 0;
    2723             : }
    2724             : 
    2725             : struct xfs_ialloc_count_inodes {
    2726             :         xfs_agino_t                     count;
    2727             :         xfs_agino_t                     freecount;
    2728             : };
    2729             : 
    2730             : /* Record inode counts across all inobt records. */
    2731             : STATIC int
    2732   118283325 : xfs_ialloc_count_inodes_rec(
    2733             :         struct xfs_btree_cur            *cur,
    2734             :         const union xfs_btree_rec       *rec,
    2735             :         void                            *priv)
    2736             : {
    2737   118283325 :         struct xfs_inobt_rec_incore     irec;
    2738   118283325 :         struct xfs_ialloc_count_inodes  *ci = priv;
    2739   118283325 :         xfs_failaddr_t                  fa;
    2740             : 
    2741   118283325 :         xfs_inobt_btrec_to_irec(cur->bc_mp, rec, &irec);
    2742   118283396 :         fa = xfs_inobt_check_irec(cur, &irec);
    2743   118283867 :         if (fa)
    2744           0 :                 return xfs_inobt_complain_bad_rec(cur, fa, &irec);
    2745             : 
    2746   118283867 :         ci->count += irec.ir_count;
    2747   118283867 :         ci->freecount += irec.ir_freecount;
    2748             : 
    2749   118283867 :         return 0;
    2750             : }
    2751             : 
    2752             : /* Count allocated and free inodes under an inobt. */
    2753             : int
    2754     1335291 : xfs_ialloc_count_inodes(
    2755             :         struct xfs_btree_cur            *cur,
    2756             :         xfs_agino_t                     *count,
    2757             :         xfs_agino_t                     *freecount)
    2758             : {
    2759     1335291 :         struct xfs_ialloc_count_inodes  ci = {0};
    2760     1335291 :         int                             error;
    2761             : 
    2762     1335291 :         ASSERT(cur->bc_btnum == XFS_BTNUM_INO);
    2763     1335291 :         error = xfs_btree_query_all(cur, xfs_ialloc_count_inodes_rec, &ci);
    2764     1335329 :         if (error)
    2765             :                 return error;
    2766             : 
    2767     1335329 :         *count = ci.count;
    2768     1335329 :         *freecount = ci.freecount;
    2769     1335329 :         return 0;
    2770             : }
    2771             : 
    2772             : /*
    2773             :  * Initialize inode-related geometry information.
    2774             :  *
    2775             :  * Compute the inode btree min and max levels and set maxicount.
    2776             :  *
    2777             :  * Set the inode cluster size.  This may still be overridden by the file
    2778             :  * system block size if it is larger than the chosen cluster size.
    2779             :  *
    2780             :  * For v5 filesystems, scale the cluster size with the inode size to keep a
    2781             :  * constant ratio of inode per cluster buffer, but only if mkfs has set the
    2782             :  * inode alignment value appropriately for larger cluster sizes.
    2783             :  *
    2784             :  * Then compute the inode cluster alignment information.
    2785             :  */
    2786             : void
    2787       22495 : xfs_ialloc_setup_geometry(
    2788             :         struct xfs_mount        *mp)
    2789             : {
    2790       22495 :         struct xfs_sb           *sbp = &mp->m_sb;
    2791       22495 :         struct xfs_ino_geometry *igeo = M_IGEO(mp);
    2792       22495 :         uint64_t                icount;
    2793       22495 :         uint                    inodes;
    2794             : 
    2795       22495 :         igeo->new_diflags2 = 0;
    2796       22495 :         if (xfs_has_bigtime(mp))
    2797       22437 :                 igeo->new_diflags2 |= XFS_DIFLAG2_BIGTIME;
    2798       22495 :         if (xfs_has_large_extent_counts(mp))
    2799       22449 :                 igeo->new_diflags2 |= XFS_DIFLAG2_NREXT64;
    2800             : 
    2801             :         /* Compute inode btree geometry. */
    2802       22495 :         igeo->agino_log = sbp->sb_inopblog + sbp->sb_agblklog;
    2803       22495 :         igeo->inobt_mxr[0] = xfs_inobt_maxrecs(mp, sbp->sb_blocksize, 1);
    2804       22495 :         igeo->inobt_mxr[1] = xfs_inobt_maxrecs(mp, sbp->sb_blocksize, 0);
    2805       22495 :         igeo->inobt_mnr[0] = igeo->inobt_mxr[0] / 2;
    2806       22495 :         igeo->inobt_mnr[1] = igeo->inobt_mxr[1] / 2;
    2807             : 
    2808       22495 :         igeo->ialloc_inos = max_t(uint16_t, XFS_INODES_PER_CHUNK,
    2809             :                         sbp->sb_inopblock);
    2810       22495 :         igeo->ialloc_blks = igeo->ialloc_inos >> sbp->sb_inopblog;
    2811             : 
    2812       22495 :         if (sbp->sb_spino_align)
    2813       22411 :                 igeo->ialloc_min_blks = sbp->sb_spino_align;
    2814             :         else
    2815          84 :                 igeo->ialloc_min_blks = igeo->ialloc_blks;
    2816             : 
    2817             :         /* Compute and fill in value of m_ino_geo.inobt_maxlevels. */
    2818       22495 :         inodes = (1LL << XFS_INO_AGINO_BITS(mp)) >> XFS_INODES_PER_CHUNK_LOG;
    2819       22495 :         igeo->inobt_maxlevels = xfs_btree_compute_maxlevels(igeo->inobt_mnr,
    2820             :                         inodes);
    2821       22495 :         ASSERT(igeo->inobt_maxlevels <= xfs_iallocbt_maxlevels_ondisk());
    2822             : 
    2823             :         /*
    2824             :          * Set the maximum inode count for this filesystem, being careful not
    2825             :          * to use obviously garbage sb_inopblog/sb_inopblock values.  Regular
    2826             :          * users should never get here due to failing sb verification, but
    2827             :          * certain users (xfs_db) need to be usable even with corrupt metadata.
    2828             :          */
    2829       22495 :         if (sbp->sb_imax_pct && igeo->ialloc_blks) {
    2830             :                 /*
    2831             :                  * Make sure the maximum inode count is a multiple
    2832             :                  * of the units we allocate inodes in.
    2833             :                  */
    2834       22495 :                 icount = sbp->sb_dblocks * sbp->sb_imax_pct;
    2835       22495 :                 do_div(icount, 100);
    2836       22495 :                 do_div(icount, igeo->ialloc_blks);
    2837       22495 :                 igeo->maxicount = XFS_FSB_TO_INO(mp,
    2838             :                                 icount * igeo->ialloc_blks);
    2839             :         } else {
    2840           0 :                 igeo->maxicount = 0;
    2841             :         }
    2842             : 
    2843             :         /*
    2844             :          * Compute the desired size of an inode cluster buffer size, which
    2845             :          * starts at 8K and (on v5 filesystems) scales up with larger inode
    2846             :          * sizes.
    2847             :          *
    2848             :          * Preserve the desired inode cluster size because the sparse inodes
    2849             :          * feature uses that desired size (not the actual size) to compute the
    2850             :          * sparse inode alignment.  The mount code validates this value, so we
    2851             :          * cannot change the behavior.
    2852             :          */
    2853       22495 :         igeo->inode_cluster_size_raw = XFS_INODE_BIG_CLUSTER_SIZE;
    2854       22495 :         if (xfs_has_v3inodes(mp)) {
    2855       22451 :                 int     new_size = igeo->inode_cluster_size_raw;
    2856             : 
    2857       22451 :                 new_size *= mp->m_sb.sb_inodesize / XFS_DINODE_MIN_SIZE;
    2858       22451 :                 if (mp->m_sb.sb_inoalignmt >= XFS_B_TO_FSBT(mp, new_size))
    2859       22451 :                         igeo->inode_cluster_size_raw = new_size;
    2860             :         }
    2861             : 
    2862             :         /* Calculate inode cluster ratios. */
    2863       22495 :         if (igeo->inode_cluster_size_raw > mp->m_sb.sb_blocksize)
    2864       22284 :                 igeo->blocks_per_cluster = XFS_B_TO_FSBT(mp,
    2865             :                                 igeo->inode_cluster_size_raw);
    2866             :         else
    2867         211 :                 igeo->blocks_per_cluster = 1;
    2868       22495 :         igeo->inode_cluster_size = XFS_FSB_TO_B(mp, igeo->blocks_per_cluster);
    2869       22495 :         igeo->inodes_per_cluster = XFS_FSB_TO_INO(mp, igeo->blocks_per_cluster);
    2870             : 
    2871             :         /* Calculate inode cluster alignment. */
    2872       22495 :         if (xfs_has_align(mp) &&
    2873       22495 :             mp->m_sb.sb_inoalignmt >= igeo->blocks_per_cluster)
    2874       22475 :                 igeo->cluster_align = mp->m_sb.sb_inoalignmt;
    2875             :         else
    2876          20 :                 igeo->cluster_align = 1;
    2877       22495 :         igeo->inoalign_mask = igeo->cluster_align - 1;
    2878       22495 :         igeo->cluster_align_inodes = XFS_FSB_TO_INO(mp, igeo->cluster_align);
    2879             : 
    2880             :         /*
    2881             :          * If we are using stripe alignment, check whether
    2882             :          * the stripe unit is a multiple of the inode alignment
    2883             :          */
    2884       22495 :         if (mp->m_dalign && igeo->inoalign_mask &&
    2885          16 :             !(mp->m_dalign & igeo->inoalign_mask))
    2886          10 :                 igeo->ialloc_align = mp->m_dalign;
    2887             :         else
    2888       22485 :                 igeo->ialloc_align = 0;
    2889       22495 : }
    2890             : 
    2891             : /* Compute the location of the root directory inode that is laid out by mkfs. */
    2892             : xfs_ino_t
    2893          14 : xfs_ialloc_calc_rootino(
    2894             :         struct xfs_mount        *mp,
    2895             :         int                     sunit)
    2896             : {
    2897          14 :         struct xfs_ino_geometry *igeo = M_IGEO(mp);
    2898          14 :         xfs_agblock_t           first_bno;
    2899             : 
    2900             :         /*
    2901             :          * Pre-calculate the geometry of AG 0.  We know what it looks like
    2902             :          * because libxfs knows how to create allocation groups now.
    2903             :          *
    2904             :          * first_bno is the first block in which mkfs could possibly have
    2905             :          * allocated the root directory inode, once we factor in the metadata
    2906             :          * that mkfs formats before it.  Namely, the four AG headers...
    2907             :          */
    2908          14 :         first_bno = howmany(4 * mp->m_sb.sb_sectsize, mp->m_sb.sb_blocksize);
    2909             : 
    2910             :         /* ...the two free space btree roots... */
    2911          14 :         first_bno += 2;
    2912             : 
    2913             :         /* ...the inode btree root... */
    2914          14 :         first_bno += 1;
    2915             : 
    2916             :         /* ...the initial AGFL... */
    2917          14 :         first_bno += xfs_alloc_min_freelist(mp, NULL);
    2918             : 
    2919             :         /* ...the free inode btree root... */
    2920          14 :         if (xfs_has_finobt(mp))
    2921          14 :                 first_bno++;
    2922             : 
    2923             :         /* ...the reverse mapping btree root... */
    2924          14 :         if (xfs_has_rmapbt(mp))
    2925          14 :                 first_bno++;
    2926             : 
    2927             :         /* ...the reference count btree... */
    2928          14 :         if (xfs_has_reflink(mp))
    2929          14 :                 first_bno++;
    2930             : 
    2931             :         /*
    2932             :          * ...and the log, if it is allocated in the first allocation group.
    2933             :          *
    2934             :          * This can happen with filesystems that only have a single
    2935             :          * allocation group, or very odd geometries created by old mkfs
    2936             :          * versions on very small filesystems.
    2937             :          */
    2938          14 :         if (xfs_ag_contains_log(mp, 0))
    2939           0 :                  first_bno += mp->m_sb.sb_logblocks;
    2940             : 
    2941             :         /*
    2942             :          * Now round first_bno up to whatever allocation alignment is given
    2943             :          * by the filesystem or was passed in.
    2944             :          */
    2945          14 :         if (xfs_has_dalign(mp) && igeo->ialloc_align > 0)
    2946           8 :                 first_bno = roundup(first_bno, sunit);
    2947           6 :         else if (xfs_has_align(mp) &&
    2948           6 :                         mp->m_sb.sb_inoalignmt > 1)
    2949           6 :                 first_bno = roundup(first_bno, mp->m_sb.sb_inoalignmt);
    2950             : 
    2951          14 :         return XFS_AGINO_TO_INO(mp, 0, XFS_AGB_TO_AGINO(mp, first_bno));
    2952             : }
    2953             : 
    2954             : /*
    2955             :  * Ensure there are not sparse inode clusters that cross the new EOAG.
    2956             :  *
    2957             :  * This is a no-op for non-spinode filesystems since clusters are always fully
    2958             :  * allocated and checking the bnobt suffices.  However, a spinode filesystem
    2959             :  * could have a record where the upper inodes are free blocks.  If those blocks
    2960             :  * were removed from the filesystem, the inode record would extend beyond EOAG,
    2961             :  * which will be flagged as corruption.
    2962             :  */
    2963             : int
    2964         204 : xfs_ialloc_check_shrink(
    2965             :         struct xfs_perag        *pag,
    2966             :         struct xfs_trans        *tp,
    2967             :         struct xfs_buf          *agibp,
    2968             :         xfs_agblock_t           new_length)
    2969             : {
    2970         204 :         struct xfs_inobt_rec_incore rec;
    2971         204 :         struct xfs_btree_cur    *cur;
    2972         204 :         xfs_agino_t             agino;
    2973         204 :         int                     has;
    2974         204 :         int                     error;
    2975             : 
    2976         204 :         if (!xfs_has_sparseinodes(pag->pag_mount))
    2977             :                 return 0;
    2978             : 
    2979         204 :         cur = xfs_inobt_init_cursor(pag, tp, agibp, XFS_BTNUM_INO);
    2980             : 
    2981             :         /* Look up the inobt record that would correspond to the new EOFS. */
    2982         204 :         agino = XFS_AGB_TO_AGINO(pag->pag_mount, new_length);
    2983         204 :         error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &has);
    2984         204 :         if (error || !has)
    2985          10 :                 goto out;
    2986             : 
    2987         194 :         error = xfs_inobt_get_rec(cur, &rec, &has);
    2988         194 :         if (error)
    2989           0 :                 goto out;
    2990             : 
    2991         194 :         if (!has) {
    2992           0 :                 error = -EFSCORRUPTED;
    2993           0 :                 goto out;
    2994             :         }
    2995             : 
    2996             :         /* If the record covers inodes that would be beyond EOFS, bail out. */
    2997         194 :         if (rec.ir_startino + XFS_INODES_PER_CHUNK > agino) {
    2998         121 :                 error = -ENOSPC;
    2999         121 :                 goto out;
    3000             :         }
    3001          73 : out:
    3002         204 :         xfs_btree_del_cursor(cur, error);
    3003         204 :         return error;
    3004             : }

Generated by: LCOV version 1.14