LCOV - code coverage report
Current view: top level - fs/xfs/libxfs - xfs_btree.c (source / functions) Hit Total Coverage
Test: fstests of 6.5.0-rc3-achx @ Mon Jul 31 20:08:12 PDT 2023 Lines: 2012 2263 88.9 %
Date: 2023-07-31 20:08:12 Functions: 121 122 99.2 %

          Line data    Source code
       1             : // SPDX-License-Identifier: GPL-2.0
       2             : /*
       3             :  * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
       4             :  * All Rights Reserved.
       5             :  */
       6             : #include "xfs.h"
       7             : #include "xfs_fs.h"
       8             : #include "xfs_shared.h"
       9             : #include "xfs_format.h"
      10             : #include "xfs_log_format.h"
      11             : #include "xfs_trans_resv.h"
      12             : #include "xfs_bit.h"
      13             : #include "xfs_mount.h"
      14             : #include "xfs_inode.h"
      15             : #include "xfs_trans.h"
      16             : #include "xfs_buf_item.h"
      17             : #include "xfs_btree.h"
      18             : #include "xfs_errortag.h"
      19             : #include "xfs_error.h"
      20             : #include "xfs_trace.h"
      21             : #include "xfs_alloc.h"
      22             : #include "xfs_log.h"
      23             : #include "xfs_btree_staging.h"
      24             : #include "xfs_ag.h"
      25             : #include "xfs_alloc_btree.h"
      26             : #include "xfs_ialloc_btree.h"
      27             : #include "xfs_bmap_btree.h"
      28             : #include "xfs_rmap_btree.h"
      29             : #include "xfs_refcount_btree.h"
      30             : #include "xfs_health.h"
      31             : #include "scrub/xfile.h"
      32             : #include "scrub/xfbtree.h"
      33             : #include "xfs_btree_mem.h"
      34             : 
      35             : /*
      36             :  * Btree magic numbers.
      37             :  */
      38             : uint32_t
      39 >26235*10^7 : xfs_btree_magic(
      40             :         struct xfs_mount                *mp,
      41             :         const struct xfs_btree_ops      *ops)
      42             : {
      43 >26235*10^7 :         int                             idx = xfs_has_crc(mp) ? 1 : 0;
      44 >26235*10^7 :         __be32                          magic = ops->buf_ops->magic[idx];
      45             : 
      46             :         /* Ensure we asked for crc for crc-only magics. */
      47 >26232*10^7 :         ASSERT(magic != 0);
      48 >26232*10^7 :         return be32_to_cpu(magic);
      49             : }
      50             : 
      51             : /*
      52             :  * These sibling pointer checks are optimised for null sibling pointers. This
      53             :  * happens a lot, and we don't need to byte swap at runtime if the sibling
      54             :  * pointer is NULL.
      55             :  *
      56             :  * These are explicitly marked at inline because the cost of calling them as
      57             :  * functions instead of inlining them is about 36 bytes extra code per call site
      58             :  * on x86-64. Yes, gcc-11 fails to inline them, and explicit inlining of these
      59             :  * two sibling check functions reduces the compiled code size by over 300
      60             :  * bytes.
      61             :  */
      62             : static inline xfs_failaddr_t
      63  4341369986 : xfs_btree_check_lblock_siblings(
      64             :         struct xfs_mount        *mp,
      65             :         struct xfs_btree_cur    *cur,
      66             :         int                     level,
      67             :         xfs_fsblock_t           fsb,
      68             :         __be64                  dsibling)
      69             : {
      70  4341369986 :         xfs_fsblock_t           sibling;
      71             : 
      72  4341369986 :         if (dsibling == cpu_to_be64(NULLFSBLOCK))
      73             :                 return NULL;
      74             : 
      75  1054587194 :         sibling = be64_to_cpu(dsibling);
      76  1054587194 :         if (sibling == fsb)
      77           0 :                 return __this_address;
      78  1054587194 :         if (level >= 0) {
      79  1037679073 :                 if (!xfs_btree_check_lptr(cur, sibling, level + 1))
      80           0 :                         return __this_address;
      81    16908121 :         } else if (cur && (cur->bc_flags & XFS_BTREE_IN_XFILE)) {
      82           0 :                 if (!xfbtree_verify_xfileoff(cur, sibling))
      83           0 :                         return __this_address;
      84             :         } else {
      85    16908121 :                 if (!xfs_verify_fsbno(mp, sibling))
      86           0 :                         return __this_address;
      87             :         }
      88             : 
      89             :         return NULL;
      90             : }
      91             : 
      92             : static inline xfs_failaddr_t
      93 >51877*10^7 : xfs_btree_check_sblock_siblings(
      94             :         struct xfs_perag        *pag,
      95             :         struct xfs_btree_cur    *cur,
      96             :         int                     level,
      97             :         xfs_agblock_t           agbno,
      98             :         __be32                  dsibling)
      99             : {
     100 >51877*10^7 :         xfs_agblock_t           sibling;
     101             : 
     102 >51877*10^7 :         if (dsibling == cpu_to_be32(NULLAGBLOCK))
     103             :                 return NULL;
     104             : 
     105 >36723*10^7 :         sibling = be32_to_cpu(dsibling);
     106 >36723*10^7 :         if (sibling == agbno)
     107           0 :                 return __this_address;
     108 >36723*10^7 :         if (level >= 0) {
     109 >36717*10^7 :                 if (!xfs_btree_check_sptr(cur, sibling, level + 1))
     110           0 :                         return __this_address;
     111    66214159 :         } else if (cur && (cur->bc_flags & XFS_BTREE_IN_XFILE)) {
     112           0 :                 if (!xfbtree_verify_xfileoff(cur, sibling))
     113           0 :                         return __this_address;
     114             :         } else {
     115    66214159 :                 if (!xfs_verify_agbno(pag, sibling))
     116           0 :                         return __this_address;
     117             :         }
     118             :         return NULL;
     119             : }
     120             : 
     121             : /*
     122             :  * Check a long btree block header.  Return the address of the failing check,
     123             :  * or NULL if everything is ok.
     124             :  */
     125             : xfs_failaddr_t
     126  2151191978 : __xfs_btree_check_lblock(
     127             :         struct xfs_btree_cur    *cur,
     128             :         struct xfs_btree_block  *block,
     129             :         int                     level,
     130             :         struct xfs_buf          *bp)
     131             : {
     132  2151191978 :         struct xfs_mount        *mp = cur->bc_mp;
     133  2151191978 :         int                     crc = xfs_has_crc(mp);
     134  2151191978 :         xfs_failaddr_t          fa;
     135  2151191978 :         xfs_fsblock_t           fsb = NULLFSBLOCK;
     136             : 
     137  2151191978 :         if (crc) {
     138  2151193882 :                 if (!uuid_equal(&block->bb_u.l.bb_uuid, &mp->m_sb.sb_meta_uuid))
     139           0 :                         return __this_address;
     140  4302409728 :                 if (block->bb_u.l.bb_blkno !=
     141  2151204864 :                     cpu_to_be64(bp ? xfs_buf_daddr(bp) : XFS_BUF_DADDR_NULL))
     142           0 :                         return __this_address;
     143  2151204864 :                 if (block->bb_u.l.bb_pad != cpu_to_be32(0))
     144           0 :                         return __this_address;
     145             :         }
     146             : 
     147  2151202960 :         if (be32_to_cpu(block->bb_magic) != xfs_btree_magic(mp, cur->bc_ops))
     148           0 :                 return __this_address;
     149  2151208878 :         if (be16_to_cpu(block->bb_level) != level)
     150           0 :                 return __this_address;
     151  2151209147 :         if (be16_to_cpu(block->bb_numrecs) >
     152  2151208878 :             cur->bc_ops->get_maxrecs(cur, level))
     153           0 :                 return __this_address;
     154             : 
     155  2151209147 :         if ((cur->bc_flags & XFS_BTREE_IN_XFILE) && bp)
     156  1158113973 :                 fsb = xfbtree_buf_to_xfoff(cur, bp);
     157   993095174 :         else if (bp)
     158   972316043 :                 fsb = XFS_DADDR_TO_FSB(mp, xfs_buf_daddr(bp));
     159             : 
     160  2151207454 :         fa = xfs_btree_check_lblock_siblings(mp, cur, level, fsb,
     161             :                         block->bb_u.l.bb_leftsib);
     162  2151204547 :         if (!fa)
     163  2151203144 :                 fa = xfs_btree_check_lblock_siblings(mp, cur, level, fsb,
     164             :                                 block->bb_u.l.bb_rightsib);
     165             :         return fa;
     166             : }
     167             : 
     168             : /* Check a long btree block header. */
     169             : static int
     170  2145636038 : xfs_btree_check_lblock(
     171             :         struct xfs_btree_cur    *cur,
     172             :         struct xfs_btree_block  *block,
     173             :         int                     level,
     174             :         struct xfs_buf          *bp)
     175             : {
     176  2145636038 :         struct xfs_mount        *mp = cur->bc_mp;
     177  2145636038 :         xfs_failaddr_t          fa;
     178             : 
     179  2145636038 :         fa = __xfs_btree_check_lblock(cur, block, level, bp);
     180  4291290549 :         if (XFS_IS_CORRUPT(mp, fa != NULL) ||
     181  2145647438 :             XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BTREE_CHECK_LBLOCK)) {
     182           0 :                 if (bp)
     183           0 :                         trace_xfs_btree_corrupt(bp, _RET_IP_);
     184           0 :                 xfs_btree_mark_sick(cur);
     185           0 :                 return -EFSCORRUPTED;
     186             :         }
     187             :         return 0;
     188             : }
     189             : 
     190             : /*
     191             :  * Check a short btree block header.  Return the address of the failing check,
     192             :  * or NULL if everything is ok.
     193             :  */
     194             : xfs_failaddr_t
     195 >26020*10^7 : __xfs_btree_check_sblock(
     196             :         struct xfs_btree_cur    *cur,
     197             :         struct xfs_btree_block  *block,
     198             :         int                     level,
     199             :         struct xfs_buf          *bp)
     200             : {
     201 >26020*10^7 :         struct xfs_mount        *mp = cur->bc_mp;
     202 >26020*10^7 :         struct xfs_perag        *pag = cur->bc_ag.pag;
     203 >26020*10^7 :         int                     crc = xfs_has_crc(mp);
     204 >26020*10^7 :         xfs_failaddr_t          fa;
     205 >26020*10^7 :         xfs_agblock_t           agbno = NULLAGBLOCK;
     206             : 
     207 >26020*10^7 :         if (crc) {
     208 >26014*10^7 :                 if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid))
     209           0 :                         return __this_address;
     210 >51872*10^7 :                 if (block->bb_u.s.bb_blkno !=
     211 >25936*10^7 :                     cpu_to_be64(bp ? xfs_buf_daddr(bp) : XFS_BUF_DADDR_NULL))
     212           0 :                         return __this_address;
     213             :         }
     214             : 
     215 >25942*10^7 :         if (be32_to_cpu(block->bb_magic) != xfs_btree_magic(mp, cur->bc_ops))
     216           0 :                 return __this_address;
     217 >26025*10^7 :         if (be16_to_cpu(block->bb_level) != level)
     218           0 :                 return __this_address;
     219 >25994*10^7 :         if (be16_to_cpu(block->bb_numrecs) >
     220 >26025*10^7 :             cur->bc_ops->get_maxrecs(cur, level))
     221           0 :                 return __this_address;
     222             : 
     223 >25994*10^7 :         if ((cur->bc_flags & XFS_BTREE_IN_XFILE) && bp) {
     224   865349917 :                 pag = NULL;
     225   865349917 :                 agbno = xfbtree_buf_to_xfoff(cur, bp);
     226 >25907*10^7 :         } else if (bp) {
     227 >25907*10^7 :                 agbno = xfs_daddr_to_agbno(mp, xfs_buf_daddr(bp));
     228             :         }
     229             : 
     230 >25988*10^7 :         fa = xfs_btree_check_sblock_siblings(pag, cur, level, agbno,
     231             :                         block->bb_u.s.bb_leftsib);
     232 >25944*10^7 :         if (!fa)
     233 >25952*10^7 :                 fa = xfs_btree_check_sblock_siblings(pag, cur, level, agbno,
     234             :                                 block->bb_u.s.bb_rightsib);
     235             :         return fa;
     236             : }
     237             : 
     238             : /* Check a short btree block header. */
     239             : STATIC int
     240 >25919*10^7 : xfs_btree_check_sblock(
     241             :         struct xfs_btree_cur    *cur,
     242             :         struct xfs_btree_block  *block,
     243             :         int                     level,
     244             :         struct xfs_buf          *bp)
     245             : {
     246 >25919*10^7 :         struct xfs_mount        *mp = cur->bc_mp;
     247 >25919*10^7 :         xfs_failaddr_t          fa;
     248             : 
     249 >25919*10^7 :         fa = __xfs_btree_check_sblock(cur, block, level, bp);
     250 >51916*10^7 :         if (XFS_IS_CORRUPT(mp, fa != NULL) ||
     251 >25998*10^7 :             XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BTREE_CHECK_SBLOCK)) {
     252           0 :                 if (bp)
     253           0 :                         trace_xfs_btree_corrupt(bp, _RET_IP_);
     254           0 :                 xfs_btree_mark_sick(cur);
     255           0 :                 return -EFSCORRUPTED;
     256             :         }
     257             :         return 0;
     258             : }
     259             : 
     260             : /*
     261             :  * Debug routine: check that block header is ok.
     262             :  */
     263             : int
     264 >26140*10^7 : xfs_btree_check_block(
     265             :         struct xfs_btree_cur    *cur,   /* btree cursor */
     266             :         struct xfs_btree_block  *block, /* generic btree block pointer */
     267             :         int                     level,  /* level of the btree block */
     268             :         struct xfs_buf          *bp)    /* buffer containing block, if any */
     269             : {
     270 >26140*10^7 :         if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
     271  2145644804 :                 return xfs_btree_check_lblock(cur, block, level, bp);
     272             :         else
     273 >25926*10^7 :                 return xfs_btree_check_sblock(cur, block, level, bp);
     274             : }
     275             : 
     276             : /* Check that this long pointer is valid and points within the fs. */
     277             : bool
     278  3105173622 : xfs_btree_check_lptr(
     279             :         struct xfs_btree_cur    *cur,
     280             :         xfs_fsblock_t           fsbno,
     281             :         int                     level)
     282             : {
     283  3105173622 :         if (level <= 0)
     284             :                 return false;
     285  3105173622 :         if (cur->bc_flags & XFS_BTREE_IN_XFILE)
     286     2396706 :                 return xfbtree_verify_xfileoff(cur, fsbno);
     287  3102776916 :         return xfs_verify_fsbno(cur->bc_mp, fsbno);
     288             : }
     289             : 
     290             : /* Check that this short pointer is valid and points within the AG. */
     291             : bool
     292 >41501*10^7 : xfs_btree_check_sptr(
     293             :         struct xfs_btree_cur    *cur,
     294             :         xfs_agblock_t           agbno,
     295             :         int                     level)
     296             : {
     297 >41501*10^7 :         if (level <= 0)
     298             :                 return false;
     299 >41501*10^7 :         if (cur->bc_flags & XFS_BTREE_IN_XFILE)
     300  1433185681 :                 return xfbtree_verify_xfileoff(cur, agbno);
     301 >41358*10^7 :         return xfs_verify_agbno(cur->bc_ag.pag, agbno);
     302             : }
     303             : 
     304             : /*
     305             :  * Check that a given (indexed) btree pointer at a certain level of a
     306             :  * btree is valid and doesn't point past where it should.
     307             :  */
     308             : static int
     309 51891001224 : xfs_btree_check_ptr(
     310             :         struct xfs_btree_cur            *cur,
     311             :         const union xfs_btree_ptr       *ptr,
     312             :         int                             index,
     313             :         int                             level)
     314             : {
     315 51891001224 :         if (cur->bc_flags & XFS_BTREE_IN_XFILE)
     316  1731222433 :                 return xfbtree_check_ptr(cur, ptr, index, level);
     317             : 
     318 50159778791 :         if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
     319  2060754645 :                 if (xfs_btree_check_lptr(cur, be64_to_cpu((&ptr->l)[index]),
     320             :                                 level))
     321             :                         return 0;
     322           0 :                 xfs_err(cur->bc_mp,
     323             : "Inode %llu fork %d: Corrupt btree %d pointer at level %d index %d.",
     324             :                                 cur->bc_ino.ip->i_ino,
     325             :                                 cur->bc_ino.whichfork, cur->bc_btnum,
     326             :                                 level, index);
     327             :         } else {
     328 48099024146 :                 if (xfs_btree_check_sptr(cur, be32_to_cpu((&ptr->s)[index]),
     329             :                                 level))
     330             :                         return 0;
     331           0 :                 xfs_err(cur->bc_mp,
     332             : "AG %u: Corrupt btree %d pointer at level %d index %d.",
     333             :                                 cur->bc_ag.pag->pag_agno, cur->bc_btnum,
     334             :                                 level, index);
     335             :         }
     336             : 
     337           0 :         xfs_btree_mark_sick(cur);
     338           0 :         return -EFSCORRUPTED;
     339             : }
     340             : 
     341             : #ifdef DEBUG
     342             : # define xfs_btree_debug_check_ptr      xfs_btree_check_ptr
     343             : #else
     344             : # define xfs_btree_debug_check_ptr(...) (0)
     345             : #endif
     346             : 
     347             : /*
     348             :  * Calculate CRC on the whole btree block and stuff it into the
     349             :  * long-form btree header.
     350             :  *
     351             :  * Prior to calculting the CRC, pull the LSN out of the buffer log item and put
     352             :  * it into the buffer so recovery knows what the last modification was that made
     353             :  * it to disk.
     354             :  */
     355             : void
     356     9286042 : xfs_btree_lblock_calc_crc(
     357             :         struct xfs_buf          *bp)
     358             : {
     359     9286042 :         struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
     360     9286042 :         struct xfs_buf_log_item *bip = bp->b_log_item;
     361             : 
     362     9286042 :         if (!xfs_has_crc(bp->b_mount))
     363             :                 return;
     364     9286042 :         if (bip)
     365     9218948 :                 block->bb_u.l.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn);
     366     9286042 :         xfs_buf_update_cksum(bp, XFS_BTREE_LBLOCK_CRC_OFF);
     367             : }
     368             : 
     369             : bool
     370     5467933 : xfs_btree_lblock_verify_crc(
     371             :         struct xfs_buf          *bp)
     372             : {
     373     5467933 :         struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
     374     5467933 :         struct xfs_mount        *mp = bp->b_mount;
     375             : 
     376     5467933 :         if (xfs_has_crc(mp)) {
     377     5467933 :                 if (!xfs_log_check_lsn(mp, be64_to_cpu(block->bb_u.l.bb_lsn)))
     378             :                         return false;
     379     5467933 :                 return xfs_buf_verify_cksum(bp, XFS_BTREE_LBLOCK_CRC_OFF);
     380             :         }
     381             : 
     382             :         return true;
     383             : }
     384             : 
     385             : /*
     386             :  * Calculate CRC on the whole btree block and stuff it into the
     387             :  * short-form btree header.
     388             :  *
     389             :  * Prior to calculting the CRC, pull the LSN out of the buffer log item and put
     390             :  * it into the buffer so recovery knows what the last modification was that made
     391             :  * it to disk.
     392             :  */
     393             : void
     394    24116236 : xfs_btree_sblock_calc_crc(
     395             :         struct xfs_buf          *bp)
     396             : {
     397    24116236 :         struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
     398    24116236 :         struct xfs_buf_log_item *bip = bp->b_log_item;
     399             : 
     400    24116236 :         if (!xfs_has_crc(bp->b_mount))
     401             :                 return;
     402    24075302 :         if (bip)
     403    23029684 :                 block->bb_u.s.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn);
     404    24075302 :         xfs_buf_update_cksum(bp, XFS_BTREE_SBLOCK_CRC_OFF);
     405             : }
     406             : 
     407             : bool
     408     2764934 : xfs_btree_sblock_verify_crc(
     409             :         struct xfs_buf          *bp)
     410             : {
     411     2764934 :         struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
     412     2764934 :         struct xfs_mount        *mp = bp->b_mount;
     413             : 
     414     2764934 :         if (xfs_has_crc(mp)) {
     415     2764321 :                 if (!xfs_log_check_lsn(mp, be64_to_cpu(block->bb_u.s.bb_lsn)))
     416             :                         return false;
     417     2764321 :                 return xfs_buf_verify_cksum(bp, XFS_BTREE_SBLOCK_CRC_OFF);
     418             :         }
     419             : 
     420             :         return true;
     421             : }
     422             : 
     423             : static int
     424      539377 : xfs_btree_free_block(
     425             :         struct xfs_btree_cur    *cur,
     426             :         struct xfs_buf          *bp)
     427             : {
     428      539377 :         int                     error;
     429             : 
     430      539377 :         trace_xfs_btree_free_block(cur, bp);
     431             : 
     432      539376 :         error = cur->bc_ops->free_block(cur, bp);
     433      539376 :         if (!error) {
     434      539376 :                 xfs_trans_binval(cur->bc_tp, bp);
     435      539377 :                 XFS_BTREE_STATS_INC(cur, free);
     436             :         }
     437      539377 :         return error;
     438             : }
     439             : 
     440             : /*
     441             :  * Delete the btree cursor.
     442             :  */
     443             : void
     444  7295323073 : xfs_btree_del_cursor(
     445             :         struct xfs_btree_cur    *cur,           /* btree cursor */
     446             :         int                     error)          /* del because of error */
     447             : {
     448  7295323073 :         int                     i;              /* btree level */
     449             : 
     450             :         /*
     451             :          * Clear the buffer pointers and release the buffers. If we're doing
     452             :          * this because of an error, inspect all of the entries in the bc_bufs
     453             :          * array for buffers to be unlocked. This is because some of the btree
     454             :          * code works from level n down to 0, and if we get an error along the
     455             :          * way we won't have initialized all the entries down to 0.
     456             :          */
     457 19073681543 :         for (i = 0; i < cur->bc_nlevels; i++) {
     458 12199803747 :                 if (cur->bc_levels[i].bp)
     459 10745834552 :                         xfs_trans_brelse(cur->bc_tp, cur->bc_levels[i].bp);
     460  1453969195 :                 else if (!error)
     461             :                         break;
     462             :         }
     463             : 
     464             :         /*
     465             :          * If we are doing a BMBT update, the number of unaccounted blocks
     466             :          * allocated during this cursor life time should be zero. If it's not
     467             :          * zero, then we should be shut down or on our way to shutdown due to
     468             :          * cancelling a dirty transaction on error.
     469             :          */
     470  7297853731 :         ASSERT(cur->bc_btnum != XFS_BTNUM_BMAP || cur->bc_ino.allocated == 0 ||
     471             :                xfs_is_shutdown(cur->bc_mp) || error != 0);
     472  7297853731 :         if (unlikely(cur->bc_flags & XFS_BTREE_STAGING))
     473         156 :                 kmem_free(cur->bc_ops);
     474  7297853731 :         if (!(cur->bc_flags & XFS_BTREE_LONG_PTRS) &&
     475  6396235432 :             !(cur->bc_flags & XFS_BTREE_IN_XFILE) && cur->bc_ag.pag)
     476  6396235432 :                 xfs_perag_put(cur->bc_ag.pag);
     477  7301676974 :         if (cur->bc_flags & XFS_BTREE_IN_XFILE) {
     478   500416678 :                 if (cur->bc_mem.pag)
     479    77274079 :                         xfs_perag_put(cur->bc_mem.pag);
     480             :         }
     481  7301678919 :         kmem_cache_free(cur->bc_cache, cur);
     482  7299774605 : }
     483             : 
     484             : /* Return the buffer target for this btree's buffer. */
     485             : static inline struct xfs_buftarg *
     486 12926831726 : xfs_btree_buftarg(
     487             :         struct xfs_btree_cur    *cur)
     488             : {
     489 12926831726 :         if (cur->bc_flags & XFS_BTREE_IN_XFILE)
     490   693455728 :                 return xfbtree_target(cur->bc_mem.xfbtree);
     491 12233375998 :         return cur->bc_mp->m_ddev_targp;
     492             : }
     493             : 
     494             : /* Return the block size (in units of 512b sectors) for this btree. */
     495             : static inline unsigned int
     496 12929090270 : xfs_btree_bbsize(
     497             :         struct xfs_btree_cur    *cur)
     498             : {
     499 12929090270 :         if (cur->bc_flags & XFS_BTREE_IN_XFILE)
     500   693477066 :                 return xfbtree_bbsize();
     501 12235613204 :         return cur->bc_mp->m_bsize;
     502             : }
     503             : 
     504             : /*
     505             :  * Duplicate the btree cursor.
     506             :  * Allocate a new one, copy the record, re-get the buffers.
     507             :  */
     508             : int                                     /* error */
     509   292887405 : xfs_btree_dup_cursor(
     510             :         struct xfs_btree_cur *cur,              /* input cursor */
     511             :         struct xfs_btree_cur **ncur)            /* output cursor */
     512             : {
     513   292887405 :         struct xfs_buf  *bp;            /* btree block's buffer pointer */
     514   292887405 :         int             error;          /* error return value */
     515   292887405 :         int             i;              /* level number of btree block */
     516   292887405 :         xfs_mount_t     *mp;            /* mount structure for filesystem */
     517   292887405 :         struct xfs_btree_cur *new;              /* new cursor value */
     518   292887405 :         xfs_trans_t     *tp;            /* transaction pointer, can be NULL */
     519             : 
     520   292887405 :         tp = cur->bc_tp;
     521   292887405 :         mp = cur->bc_mp;
     522             : 
     523             :         /*
     524             :          * Allocate a new cursor like the old one.
     525             :          */
     526   292887405 :         new = cur->bc_ops->dup_cursor(cur);
     527             : 
     528             :         /*
     529             :          * Copy the record currently in the cursor.
     530             :          */
     531   293015044 :         new->bc_rec = cur->bc_rec;
     532             : 
     533             :         /*
     534             :          * For each level current, re-get the buffer and copy the ptr value.
     535             :          */
     536   896645051 :         for (i = 0; i < new->bc_nlevels; i++) {
     537   604293338 :                 new->bc_levels[i].ptr = cur->bc_levels[i].ptr;
     538   604293338 :                 new->bc_levels[i].ra = cur->bc_levels[i].ra;
     539   604293338 :                 bp = cur->bc_levels[i].bp;
     540   604293338 :                 if (bp) {
     541   548832963 :                         error = xfs_trans_read_buf(mp, tp,
     542             :                                         xfs_btree_buftarg(cur),
     543             :                                         xfs_buf_daddr(bp),
     544   549017838 :                                         xfs_btree_bbsize(cur), 0, &bp,
     545   549017838 :                                         cur->bc_ops->buf_ops);
     546   548354513 :                         if (xfs_metadata_is_sick(error))
     547           0 :                                 xfs_btree_mark_sick(new);
     548   548354513 :                         if (error) {
     549           6 :                                 xfs_btree_del_cursor(new, error);
     550           6 :                                 *ncur = NULL;
     551           6 :                                 return error;
     552             :                         }
     553             :                 }
     554   603630007 :                 new->bc_levels[i].bp = bp;
     555             :         }
     556   292351713 :         *ncur = new;
     557   292351713 :         return 0;
     558             : }
     559             : 
     560             : /*
     561             :  * XFS btree block layout and addressing:
     562             :  *
     563             :  * There are two types of blocks in the btree: leaf and non-leaf blocks.
     564             :  *
     565             :  * The leaf record start with a header then followed by records containing
     566             :  * the values.  A non-leaf block also starts with the same header, and
     567             :  * then first contains lookup keys followed by an equal number of pointers
     568             :  * to the btree blocks at the previous level.
     569             :  *
     570             :  *              +--------+-------+-------+-------+-------+-------+-------+
     571             :  * Leaf:        | header | rec 1 | rec 2 | rec 3 | rec 4 | rec 5 | rec N |
     572             :  *              +--------+-------+-------+-------+-------+-------+-------+
     573             :  *
     574             :  *              +--------+-------+-------+-------+-------+-------+-------+
     575             :  * Non-Leaf:    | header | key 1 | key 2 | key N | ptr 1 | ptr 2 | ptr N |
     576             :  *              +--------+-------+-------+-------+-------+-------+-------+
     577             :  *
     578             :  * The header is called struct xfs_btree_block for reasons better left unknown
     579             :  * and comes in different versions for short (32bit) and long (64bit) block
     580             :  * pointers.  The record and key structures are defined by the btree instances
     581             :  * and opaque to the btree core.  The block pointers are simple disk endian
     582             :  * integers, available in a short (32bit) and long (64bit) variant.
     583             :  *
     584             :  * The helpers below calculate the offset of a given record, key or pointer
     585             :  * into a btree block (xfs_btree_*_offset) or return a pointer to the given
     586             :  * record, key or pointer (xfs_btree_*_addr).  Note that all addressing
     587             :  * inside the btree block is done using indices starting at one, not zero!
     588             :  *
     589             :  * If XFS_BTREE_OVERLAPPING is set, then this btree supports keys containing
     590             :  * overlapping intervals.  In such a tree, records are still sorted lowest to
     591             :  * highest and indexed by the smallest key value that refers to the record.
     592             :  * However, nodes are different: each pointer has two associated keys -- one
     593             :  * indexing the lowest key available in the block(s) below (the same behavior
     594             :  * as the key in a regular btree) and another indexing the highest key
     595             :  * available in the block(s) below.  Because records are /not/ sorted by the
     596             :  * highest key, all leaf block updates require us to compute the highest key
     597             :  * that matches any record in the leaf and to recursively update the high keys
     598             :  * in the nodes going further up in the tree, if necessary.  Nodes look like
     599             :  * this:
     600             :  *
     601             :  *              +--------+-----+-----+-----+-----+-----+-------+-------+-----+
     602             :  * Non-Leaf:    | header | lo1 | hi1 | lo2 | hi2 | ... | ptr 1 | ptr 2 | ... |
     603             :  *              +--------+-----+-----+-----+-----+-----+-------+-------+-----+
     604             :  *
     605             :  * To perform an interval query on an overlapped tree, perform the usual
     606             :  * depth-first search and use the low and high keys to decide if we can skip
     607             :  * that particular node.  If a leaf node is reached, return the records that
     608             :  * intersect the interval.  Note that an interval query may return numerous
     609             :  * entries.  For a non-overlapped tree, simply search for the record associated
     610             :  * with the lowest key and iterate forward until a non-matching record is
     611             :  * found.  Section 14.3 ("Interval Trees") of _Introduction to Algorithms_ by
     612             :  * Cormen, Leiserson, Rivest, and Stein (2nd or 3rd ed. only) discuss this in
     613             :  * more detail.
     614             :  *
     615             :  * Why do we care about overlapping intervals?  Let's say you have a bunch of
     616             :  * reverse mapping records on a reflink filesystem:
     617             :  *
     618             :  * 1: +- file A startblock B offset C length D -----------+
     619             :  * 2:      +- file E startblock F offset G length H --------------+
     620             :  * 3:      +- file I startblock F offset J length K --+
     621             :  * 4:                                                        +- file L... --+
     622             :  *
     623             :  * Now say we want to map block (B+D) into file A at offset (C+D).  Ideally,
     624             :  * we'd simply increment the length of record 1.  But how do we find the record
     625             :  * that ends at (B+D-1) (i.e. record 1)?  A LE lookup of (B+D-1) would return
     626             :  * record 3 because the keys are ordered first by startblock.  An interval
     627             :  * query would return records 1 and 2 because they both overlap (B+D-1), and
     628             :  * from that we can pick out record 1 as the appropriate left neighbor.
     629             :  *
     630             :  * In the non-overlapped case you can do a LE lookup and decrement the cursor
     631             :  * because a record's interval must end before the next record.
     632             :  */
     633             : 
     634             : /*
     635             :  * Return size of the btree block header for this btree instance.
     636             :  */
     637 >58776*10^7 : static inline size_t xfs_btree_block_len(struct xfs_btree_cur *cur)
     638             : {
     639 >58776*10^7 :         if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
     640  9347329906 :                 if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS)
     641             :                         return XFS_BTREE_LBLOCK_CRC_LEN;
     642           0 :                 return XFS_BTREE_LBLOCK_LEN;
     643             :         }
     644 >57841*10^7 :         if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS)
     645 >57845*10^7 :                 return XFS_BTREE_SBLOCK_CRC_LEN;
     646             :         return XFS_BTREE_SBLOCK_LEN;
     647             : }
     648             : 
     649             : /*
     650             :  * Return size of btree block pointers for this btree instance.
     651             :  */
     652             : static inline size_t xfs_btree_ptr_len(struct xfs_btree_cur *cur)
     653             : {
     654 51350042426 :         return (cur->bc_flags & XFS_BTREE_LONG_PTRS) ?
     655 51350042426 :                 sizeof(__be64) : sizeof(__be32);
     656             : }
     657             : 
     658             : /*
     659             :  * Calculate offset of the n-th record in a btree block.
     660             :  */
     661             : STATIC size_t
     662 >40452*10^7 : xfs_btree_rec_offset(
     663             :         struct xfs_btree_cur    *cur,
     664             :         int                     n)
     665             : {
     666 >40452*10^7 :         return xfs_btree_block_len(cur) +
     667 >40452*10^7 :                 (n - 1) * cur->bc_ops->rec_len;
     668             : }
     669             : 
     670             : /*
     671             :  * Calculate offset of the n-th key in a btree block.
     672             :  */
     673             : STATIC size_t
     674 79521521610 : xfs_btree_key_offset(
     675             :         struct xfs_btree_cur    *cur,
     676             :         int                     n)
     677             : {
     678 79521521610 :         return xfs_btree_block_len(cur) +
     679 79521521610 :                 (n - 1) * cur->bc_ops->key_len;
     680             : }
     681             : 
     682             : /*
     683             :  * Calculate offset of the n-th high key in a btree block.
     684             :  */
     685             : STATIC size_t
     686 54969471006 : xfs_btree_high_key_offset(
     687             :         struct xfs_btree_cur    *cur,
     688             :         int                     n)
     689             : {
     690 54969471006 :         return xfs_btree_block_len(cur) +
     691 54969471006 :                 (n - 1) * cur->bc_ops->key_len + (cur->bc_ops->key_len / 2);
     692             : }
     693             : 
     694             : /*
     695             :  * Calculate offset of the n-th block pointer in a btree block.
     696             :  */
     697             : STATIC size_t
     698 51334703141 : xfs_btree_ptr_offset(
     699             :         struct xfs_btree_cur    *cur,
     700             :         int                     n,
     701             :         int                     level)
     702             : {
     703 51334703141 :         return xfs_btree_block_len(cur) +
     704 >10267*10^7 :                 cur->bc_ops->get_maxrecs(cur, level) * cur->bc_ops->key_len +
     705 >10197*10^7 :                 (n - 1) * xfs_btree_ptr_len(cur);
     706             : }
     707             : 
     708             : /*
     709             :  * Return a pointer to the n-th record in the btree block.
     710             :  */
     711             : union xfs_btree_rec *
     712  3061430958 : xfs_btree_rec_addr(
     713             :         struct xfs_btree_cur    *cur,
     714             :         int                     n,
     715             :         struct xfs_btree_block  *block)
     716             : {
     717 >40054*10^7 :         return (union xfs_btree_rec *)
     718 >40054*10^7 :                 ((char *)block + xfs_btree_rec_offset(cur, n));
     719             : }
     720             : 
     721             : /*
     722             :  * Return a pointer to the n-th key in the btree block.
     723             :  */
     724             : union xfs_btree_key *
     725  2628470463 : xfs_btree_key_addr(
     726             :         struct xfs_btree_cur    *cur,
     727             :         int                     n,
     728             :         struct xfs_btree_block  *block)
     729             : {
     730 78894257405 :         return (union xfs_btree_key *)
     731 78894257405 :                 ((char *)block + xfs_btree_key_offset(cur, n));
     732             : }
     733             : 
     734             : /*
     735             :  * Return a pointer to the n-th high key in the btree block.
     736             :  */
     737             : union xfs_btree_key *
     738  1831116549 : xfs_btree_high_key_addr(
     739             :         struct xfs_btree_cur    *cur,
     740             :         int                     n,
     741             :         struct xfs_btree_block  *block)
     742             : {
     743 54998042695 :         return (union xfs_btree_key *)
     744 54998042695 :                 ((char *)block + xfs_btree_high_key_offset(cur, n));
     745             : }
     746             : 
     747             : /*
     748             :  * Return a pointer to the n-th block pointer in the btree block.
     749             :  */
     750             : union xfs_btree_ptr *
     751 51348998043 : xfs_btree_ptr_addr(
     752             :         struct xfs_btree_cur    *cur,
     753             :         int                     n,
     754             :         struct xfs_btree_block  *block)
     755             : {
     756 51348998043 :         int                     level = xfs_btree_get_level(block);
     757             : 
     758 51348998043 :         ASSERT(block->bb_level != 0);
     759             : 
     760 >10268*10^7 :         return (union xfs_btree_ptr *)
     761 51348998043 :                 ((char *)block + xfs_btree_ptr_offset(cur, n, level));
     762             : }
     763             : 
     764             : struct xfs_ifork *
     765   916991643 : xfs_btree_ifork_ptr(
     766             :         struct xfs_btree_cur    *cur)
     767             : {
     768   916991643 :         ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE);
     769             : 
     770   916991643 :         if (cur->bc_flags & XFS_BTREE_STAGING)
     771       52223 :                 return cur->bc_ino.ifake->if_fork;
     772   916939420 :         return xfs_ifork_ptr(cur->bc_ino.ip, cur->bc_ino.whichfork);
     773             : }
     774             : 
     775             : /*
     776             :  * Get the root block which is stored in the inode.
     777             :  *
     778             :  * For now this btree implementation assumes the btree root is always
     779             :  * stored in the if_broot field of an inode fork.
     780             :  */
     781             : STATIC struct xfs_btree_block *
     782   512220285 : xfs_btree_get_iroot(
     783             :         struct xfs_btree_cur    *cur)
     784             : {
     785   512220285 :         struct xfs_ifork        *ifp = xfs_btree_ifork_ptr(cur);
     786             : 
     787   512219960 :         return (struct xfs_btree_block *)ifp->if_broot;
     788             : }
     789             : 
     790             : /*
     791             :  * Retrieve the block pointer from the cursor at the given level.
     792             :  * This may be an inode btree root or from a buffer.
     793             :  */
     794             : struct xfs_btree_block *                /* generic btree block pointer */
     795 >38974*10^7 : xfs_btree_get_block(
     796             :         struct xfs_btree_cur    *cur,   /* btree cursor */
     797             :         int                     level,  /* level in btree */
     798             :         struct xfs_buf          **bpp)  /* buffer containing the block */
     799             : {
     800 >38974*10^7 :         if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
     801  1940161147 :             (level == cur->bc_nlevels - 1)) {
     802   140530203 :                 *bpp = NULL;
     803   140530203 :                 return xfs_btree_get_iroot(cur);
     804             :         }
     805             : 
     806 >38960*10^7 :         *bpp = cur->bc_levels[level].bp;
     807 >38960*10^7 :         return XFS_BUF_TO_BLOCK(*bpp);
     808             : }
     809             : 
     810             : /*
     811             :  * Change the cursor to point to the first record at the given level.
     812             :  * Other levels are unaffected.
     813             :  */
     814             : STATIC int                              /* success=1, failure=0 */
     815   105957616 : xfs_btree_firstrec(
     816             :         struct xfs_btree_cur    *cur,   /* btree cursor */
     817             :         int                     level)  /* level to change */
     818             : {
     819   105957616 :         struct xfs_btree_block  *block; /* generic btree block pointer */
     820   105957616 :         struct xfs_buf          *bp;    /* buffer containing block */
     821             : 
     822             :         /*
     823             :          * Get the block pointer for this level.
     824             :          */
     825   105957616 :         block = xfs_btree_get_block(cur, level, &bp);
     826   105957564 :         if (xfs_btree_check_block(cur, block, level, bp))
     827             :                 return 0;
     828             :         /*
     829             :          * It's empty, there is no such record.
     830             :          */
     831   105957536 :         if (!block->bb_numrecs)
     832             :                 return 0;
     833             :         /*
     834             :          * Set the ptr value to 1, that's the first record/key.
     835             :          */
     836   105957536 :         cur->bc_levels[level].ptr = 1;
     837   105957536 :         return 1;
     838             : }
     839             : 
     840             : /*
     841             :  * Change the cursor to point to the last record in the current block
     842             :  * at the given level.  Other levels are unaffected.
     843             :  */
     844             : STATIC int                              /* success=1, failure=0 */
     845   116023022 : xfs_btree_lastrec(
     846             :         struct xfs_btree_cur    *cur,   /* btree cursor */
     847             :         int                     level)  /* level to change */
     848             : {
     849   116023022 :         struct xfs_btree_block  *block; /* generic btree block pointer */
     850   116023022 :         struct xfs_buf          *bp;    /* buffer containing block */
     851             : 
     852             :         /*
     853             :          * Get the block pointer for this level.
     854             :          */
     855   116023022 :         block = xfs_btree_get_block(cur, level, &bp);
     856   116022362 :         if (xfs_btree_check_block(cur, block, level, bp))
     857             :                 return 0;
     858             :         /*
     859             :          * It's empty, there is no such record.
     860             :          */
     861   116025766 :         if (!block->bb_numrecs)
     862             :                 return 0;
     863             :         /*
     864             :          * Set the ptr value to numrecs, that's the last record/key.
     865             :          */
     866   116025766 :         cur->bc_levels[level].ptr = be16_to_cpu(block->bb_numrecs);
     867   116025766 :         return 1;
     868             : }
     869             : 
     870             : /*
     871             :  * Compute first and last byte offsets for the fields given.
     872             :  * Interprets the offsets table, which contains struct field offsets.
     873             :  */
     874             : void
     875  2475491417 : xfs_btree_offsets(
     876             :         uint32_t        fields,         /* bitmask of fields */
     877             :         const short     *offsets,       /* table of field offsets */
     878             :         int             nbits,          /* number of bits to inspect */
     879             :         int             *first,         /* output: first byte offset */
     880             :         int             *last)          /* output: last byte offset */
     881             : {
     882  2475491417 :         int             i;              /* current bit number */
     883  2475491417 :         uint32_t        imask;          /* mask for current bit number */
     884             : 
     885  2475491417 :         ASSERT(fields != 0);
     886             :         /*
     887             :          * Find the lowest bit, so the first byte offset.
     888             :          */
     889 10201584462 :         for (i = 0, imask = 1u; ; i++, imask <<= 1) {
     890 10201584462 :                 if (imask & fields) {
     891  2475491417 :                         *first = offsets[i];
     892  2475491417 :                         break;
     893             :                 }
     894             :         }
     895             :         /*
     896             :          * Find the highest bit, so the last byte offset.
     897             :          */
     898  2475491417 :         for (i = nbits - 1, imask = 1u << i; ; i--, imask >>= 1) {
     899 17083555863 :                 if (imask & fields) {
     900  2475491417 :                         *last = offsets[i + 1] - 1;
     901  2475491417 :                         break;
     902             :                 }
     903             :         }
     904  2475491417 : }
     905             : 
     906             : /*
     907             :  * Get a buffer for the block, return it read in.
     908             :  * Long-form addressing.
     909             :  */
     910             : int
     911   444718120 : xfs_btree_read_bufl(
     912             :         struct xfs_mount        *mp,            /* file system mount point */
     913             :         struct xfs_trans        *tp,            /* transaction pointer */
     914             :         xfs_fsblock_t           fsbno,          /* file system block number */
     915             :         struct xfs_buf          **bpp,          /* buffer for fsbno */
     916             :         int                     refval,         /* ref count value for buffer */
     917             :         const struct xfs_buf_ops *ops)
     918             : {
     919   444718120 :         struct xfs_buf          *bp;            /* return value */
     920   444718120 :         xfs_daddr_t             d;              /* real disk block address */
     921   444718120 :         int                     error;
     922             : 
     923   444718120 :         if (!xfs_verify_fsbno(mp, fsbno))
     924             :                 return -EFSCORRUPTED;
     925   444716260 :         d = XFS_FSB_TO_DADDR(mp, fsbno);
     926   444716065 :         error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d,
     927             :                                    mp->m_bsize, 0, &bp, ops);
     928   444716966 :         if (error)
     929             :                 return error;
     930   444716900 :         if (bp)
     931   444716900 :                 xfs_buf_set_ref(bp, refval);
     932   444709606 :         *bpp = bp;
     933   444709606 :         return 0;
     934             : }
     935             : 
     936             : /*
     937             :  * Read-ahead the block, don't wait for it, don't return a buffer.
     938             :  * Long-form addressing.
     939             :  */
     940             : /* ARGSUSED */
     941             : void
     942    58203206 : xfs_btree_reada_bufl(
     943             :         struct xfs_mount        *mp,            /* file system mount point */
     944             :         xfs_fsblock_t           fsbno,          /* file system block number */
     945             :         xfs_extlen_t            count,          /* count of filesystem blocks */
     946             :         const struct xfs_buf_ops *ops)
     947             : {
     948    58203206 :         xfs_daddr_t             d;
     949             : 
     950    58203206 :         ASSERT(fsbno != NULLFSBLOCK);
     951    58203206 :         d = XFS_FSB_TO_DADDR(mp, fsbno);
     952    58203157 :         xfs_buf_readahead(mp->m_ddev_targp, d, mp->m_bsize * count, ops);
     953    58203505 : }
     954             : 
     955             : /*
     956             :  * Read-ahead the block, don't wait for it, don't return a buffer.
     957             :  * Short-form addressing.
     958             :  */
     959             : /* ARGSUSED */
     960             : void
     961  6025765672 : xfs_btree_reada_bufs(
     962             :         struct xfs_mount        *mp,            /* file system mount point */
     963             :         xfs_agnumber_t          agno,           /* allocation group number */
     964             :         xfs_agblock_t           agbno,          /* allocation group block number */
     965             :         xfs_extlen_t            count,          /* count of filesystem blocks */
     966             :         const struct xfs_buf_ops *ops)
     967             : {
     968  6025765672 :         xfs_daddr_t             d;
     969             : 
     970  6025765672 :         ASSERT(agno != NULLAGNUMBER);
     971  6025765672 :         ASSERT(agbno != NULLAGBLOCK);
     972  6025765672 :         d = XFS_AGB_TO_DADDR(mp, agno, agbno);
     973  6025765672 :         xfs_buf_readahead(mp->m_ddev_targp, d, mp->m_bsize * count, ops);
     974  6026778002 : }
     975             : 
     976             : STATIC int
     977    58209229 : xfs_btree_readahead_lblock(
     978             :         struct xfs_btree_cur    *cur,
     979             :         int                     lr,
     980             :         struct xfs_btree_block  *block)
     981             : {
     982    58209229 :         int                     rval = 0;
     983    58209229 :         xfs_fsblock_t           left = be64_to_cpu(block->bb_u.l.bb_leftsib);
     984    58209229 :         xfs_fsblock_t           right = be64_to_cpu(block->bb_u.l.bb_rightsib);
     985             : 
     986    58209229 :         if (cur->bc_flags & XFS_BTREE_IN_XFILE)
     987             :                 return 0;
     988             : 
     989    58203638 :         if ((lr & XFS_BTCUR_LEFTRA) && left != NULLFSBLOCK) {
     990    23968055 :                 xfs_btree_reada_bufl(cur->bc_mp, left, 1,
     991    23968055 :                                      cur->bc_ops->buf_ops);
     992    23968055 :                 rval++;
     993             :         }
     994             : 
     995    58203644 :         if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLFSBLOCK) {
     996    34235167 :                 xfs_btree_reada_bufl(cur->bc_mp, right, 1,
     997    34235167 :                                      cur->bc_ops->buf_ops);
     998    34235442 :                 rval++;
     999             :         }
    1000             : 
    1001             :         return rval;
    1002             : }
    1003             : 
    1004             : STATIC int
    1005  2396241584 : xfs_btree_readahead_sblock(
    1006             :         struct xfs_btree_cur    *cur,
    1007             :         int                     lr,
    1008             :         struct xfs_btree_block *block)
    1009             : {
    1010  2396241584 :         int                     rval = 0;
    1011  2396241584 :         xfs_agblock_t           left = be32_to_cpu(block->bb_u.s.bb_leftsib);
    1012  2396241584 :         xfs_agblock_t           right = be32_to_cpu(block->bb_u.s.bb_rightsib);
    1013             : 
    1014  2396241584 :         if (cur->bc_flags & XFS_BTREE_IN_XFILE)
    1015             :                 return 0;
    1016             : 
    1017  2362829001 :         if ((lr & XFS_BTCUR_LEFTRA) && left != NULLAGBLOCK) {
    1018   126091101 :                 xfs_btree_reada_bufs(cur->bc_mp, cur->bc_ag.pag->pag_agno,
    1019   126091101 :                                      left, 1, cur->bc_ops->buf_ops);
    1020   126091101 :                 rval++;
    1021             :         }
    1022             : 
    1023  2362830711 :         if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLAGBLOCK) {
    1024  2236655635 :                 xfs_btree_reada_bufs(cur->bc_mp, cur->bc_ag.pag->pag_agno,
    1025  2236655635 :                                      right, 1, cur->bc_ops->buf_ops);
    1026  2237338106 :                 rval++;
    1027             :         }
    1028             : 
    1029             :         return rval;
    1030             : }
    1031             : 
    1032             : /*
    1033             :  * Read-ahead btree blocks, at the given level.
    1034             :  * Bits in lr are set from XFS_BTCUR_{LEFT,RIGHT}RA.
    1035             :  */
    1036             : STATIC int
    1037 >12404*10^7 : xfs_btree_readahead(
    1038             :         struct xfs_btree_cur    *cur,           /* btree cursor */
    1039             :         int                     lev,            /* level in btree */
    1040             :         int                     lr)             /* left/right bits */
    1041             : {
    1042 >12404*10^7 :         struct xfs_btree_block  *block;
    1043             : 
    1044             :         /*
    1045             :          * No readahead needed if we are at the root level and the
    1046             :          * btree root is stored in the inode.
    1047             :          */
    1048 >12404*10^7 :         if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
    1049   114227898 :             (lev == cur->bc_nlevels - 1))
    1050             :                 return 0;
    1051             : 
    1052 >12403*10^7 :         if ((cur->bc_levels[lev].ra | lr) == cur->bc_levels[lev].ra)
    1053             :                 return 0;
    1054             : 
    1055  2454656958 :         cur->bc_levels[lev].ra |= lr;
    1056  2454656958 :         block = XFS_BUF_TO_BLOCK(cur->bc_levels[lev].bp);
    1057             : 
    1058  2454656958 :         if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
    1059    58209322 :                 return xfs_btree_readahead_lblock(cur, lr, block);
    1060  2396447636 :         return xfs_btree_readahead_sblock(cur, lr, block);
    1061             : }
    1062             : 
    1063             : STATIC int
    1064 42265350697 : xfs_btree_ptr_to_daddr(
    1065             :         struct xfs_btree_cur            *cur,
    1066             :         const union xfs_btree_ptr       *ptr,
    1067             :         xfs_daddr_t                     *daddr)
    1068             : {
    1069 42265350697 :         xfs_fsblock_t           fsbno;
    1070 42265350697 :         xfs_agblock_t           agbno;
    1071 42265350697 :         int                     error;
    1072             : 
    1073 42265350697 :         error = xfs_btree_check_ptr(cur, ptr, 0, 1);
    1074 42259767470 :         if (error)
    1075             :                 return error;
    1076             : 
    1077 42259767470 :         if (cur->bc_flags & XFS_BTREE_IN_XFILE) {
    1078  1491588093 :                 *daddr = xfbtree_ptr_to_daddr(cur, ptr);
    1079  1491571569 :                 return 0;
    1080             :         }
    1081             : 
    1082 40768179377 :         if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
    1083  1390904653 :                 fsbno = be64_to_cpu(ptr->l);
    1084  1390904653 :                 *daddr = XFS_FSB_TO_DADDR(cur->bc_mp, fsbno);
    1085             :         } else {
    1086 39377274724 :                 agbno = be32_to_cpu(ptr->s);
    1087 39377274724 :                 *daddr = XFS_AGB_TO_DADDR(cur->bc_mp, cur->bc_ag.pag->pag_agno,
    1088             :                                 agbno);
    1089             :         }
    1090             : 
    1091             :         return 0;
    1092             : }
    1093             : 
    1094             : /*
    1095             :  * Readahead @count btree blocks at the given @ptr location.
    1096             :  *
    1097             :  * We don't need to care about long or short form btrees here as we have a
    1098             :  * method of converting the ptr directly to a daddr available to us.
    1099             :  */
    1100             : STATIC void
    1101     8455362 : xfs_btree_readahead_ptr(
    1102             :         struct xfs_btree_cur    *cur,
    1103             :         union xfs_btree_ptr     *ptr,
    1104             :         xfs_extlen_t            count)
    1105             : {
    1106     8455362 :         xfs_daddr_t             daddr;
    1107             : 
    1108     8455362 :         if (xfs_btree_ptr_to_daddr(cur, ptr, &daddr))
    1109           0 :                 return;
    1110    16911438 :         xfs_buf_readahead(xfs_btree_buftarg(cur), daddr,
    1111     8455753 :                         xfs_btree_bbsize(cur) * count,
    1112     8455685 :                         cur->bc_ops->buf_ops);
    1113             : }
    1114             : 
    1115             : /*
    1116             :  * Set the buffer for level "lev" in the cursor to bp, releasing
    1117             :  * any previous buffer.
    1118             :  */
    1119             : STATIC void
    1120 12140519686 : xfs_btree_setbuf(
    1121             :         struct xfs_btree_cur    *cur,   /* btree cursor */
    1122             :         int                     lev,    /* level in btree */
    1123             :         struct xfs_buf          *bp)    /* new buffer to set */
    1124             : {
    1125 12140519686 :         struct xfs_btree_block  *b;     /* btree block */
    1126             : 
    1127 12140519686 :         if (cur->bc_levels[lev].bp)
    1128  1941221336 :                 xfs_trans_brelse(cur->bc_tp, cur->bc_levels[lev].bp);
    1129 12140585788 :         cur->bc_levels[lev].bp = bp;
    1130 12140585788 :         cur->bc_levels[lev].ra = 0;
    1131             : 
    1132 12140585788 :         b = XFS_BUF_TO_BLOCK(bp);
    1133 12140585788 :         if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
    1134  1072576279 :                 if (b->bb_u.l.bb_leftsib == cpu_to_be64(NULLFSBLOCK))
    1135   700008112 :                         cur->bc_levels[lev].ra |= XFS_BTCUR_LEFTRA;
    1136  1072576279 :                 if (b->bb_u.l.bb_rightsib == cpu_to_be64(NULLFSBLOCK))
    1137   915155389 :                         cur->bc_levels[lev].ra |= XFS_BTCUR_RIGHTRA;
    1138             :         } else {
    1139 11068009509 :                 if (b->bb_u.s.bb_leftsib == cpu_to_be32(NULLAGBLOCK))
    1140  6659876794 :                         cur->bc_levels[lev].ra |= XFS_BTCUR_LEFTRA;
    1141 11068009509 :                 if (b->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK))
    1142  7051175039 :                         cur->bc_levels[lev].ra |= XFS_BTCUR_RIGHTRA;
    1143             :         }
    1144 12140585788 : }
    1145             : 
    1146             : bool
    1147    29116452 : xfs_btree_ptr_is_null(
    1148             :         struct xfs_btree_cur            *cur,
    1149             :         const union xfs_btree_ptr       *ptr)
    1150             : {
    1151   305456144 :         if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
    1152  1359847894 :                 return ptr->l == cpu_to_be64(NULLFSBLOCK);
    1153             :         else
    1154  5982091589 :                 return ptr->s == cpu_to_be32(NULLAGBLOCK);
    1155             : }
    1156             : 
    1157             : void
    1158     1688407 : xfs_btree_set_ptr_null(
    1159             :         struct xfs_btree_cur    *cur,
    1160             :         union xfs_btree_ptr     *ptr)
    1161             : {
    1162     1688407 :         if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
    1163   563601102 :                 ptr->l = cpu_to_be64(NULLFSBLOCK);
    1164             :         else
    1165  1286175834 :                 ptr->s = cpu_to_be32(NULLAGBLOCK);
    1166     1688407 : }
    1167             : 
    1168             : /*
    1169             :  * Get/set/init sibling pointers
    1170             :  */
    1171             : void
    1172  6025948254 : xfs_btree_get_sibling(
    1173             :         struct xfs_btree_cur    *cur,
    1174             :         struct xfs_btree_block  *block,
    1175             :         union xfs_btree_ptr     *ptr,
    1176             :         int                     lr)
    1177             : {
    1178  6025948254 :         ASSERT(lr == XFS_BB_LEFTSIB || lr == XFS_BB_RIGHTSIB);
    1179             : 
    1180  6025948254 :         if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
    1181   825597102 :                 if (lr == XFS_BB_RIGHTSIB)
    1182   564346206 :                         ptr->l = block->bb_u.l.bb_rightsib;
    1183             :                 else
    1184   261250896 :                         ptr->l = block->bb_u.l.bb_leftsib;
    1185             :         } else {
    1186  5200351152 :                 if (lr == XFS_BB_RIGHTSIB)
    1187  4998087703 :                         ptr->s = block->bb_u.s.bb_rightsib;
    1188             :                 else
    1189   202263449 :                         ptr->s = block->bb_u.s.bb_leftsib;
    1190             :         }
    1191  6025948254 : }
    1192             : 
    1193             : void
    1194     8938561 : xfs_btree_set_sibling(
    1195             :         struct xfs_btree_cur            *cur,
    1196             :         struct xfs_btree_block          *block,
    1197             :         const union xfs_btree_ptr       *ptr,
    1198             :         int                             lr)
    1199             : {
    1200     8938561 :         ASSERT(lr == XFS_BB_LEFTSIB || lr == XFS_BB_RIGHTSIB);
    1201             : 
    1202     8938561 :         if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
    1203     1797493 :                 if (lr == XFS_BB_RIGHTSIB)
    1204     1191836 :                         block->bb_u.l.bb_rightsib = ptr->l;
    1205             :                 else
    1206      605657 :                         block->bb_u.l.bb_leftsib = ptr->l;
    1207             :         } else {
    1208     7141068 :                 if (lr == XFS_BB_RIGHTSIB)
    1209     3599484 :                         block->bb_u.s.bb_rightsib = ptr->s;
    1210             :                 else
    1211     3541584 :                         block->bb_u.s.bb_leftsib = ptr->s;
    1212             :         }
    1213     8938561 : }
    1214             : 
    1215             : static void
    1216    22277597 : __xfs_btree_init_block(
    1217             :         struct xfs_mount        *mp,
    1218             :         struct xfs_btree_block  *buf,
    1219             :         const struct xfs_btree_ops *ops,
    1220             :         xfs_daddr_t             blkno,
    1221             :         __u16                   level,
    1222             :         __u16                   numrecs,
    1223             :         __u64                   owner)
    1224             : {
    1225    22277597 :         int                     crc = xfs_has_crc(mp);
    1226    22277597 :         __u32                   magic = xfs_btree_magic(mp, ops);
    1227             : 
    1228    22277745 :         buf->bb_magic = cpu_to_be32(magic);
    1229    22277745 :         buf->bb_level = cpu_to_be16(level);
    1230    22277745 :         buf->bb_numrecs = cpu_to_be16(numrecs);
    1231             : 
    1232    22277745 :         if (ops->geom_flags & XFS_BTREE_LONG_PTRS) {
    1233    19809989 :                 buf->bb_u.l.bb_leftsib = cpu_to_be64(NULLFSBLOCK);
    1234    19809989 :                 buf->bb_u.l.bb_rightsib = cpu_to_be64(NULLFSBLOCK);
    1235    19809989 :                 if (crc) {
    1236    19809997 :                         buf->bb_u.l.bb_blkno = cpu_to_be64(blkno);
    1237    19809997 :                         buf->bb_u.l.bb_owner = cpu_to_be64(owner);
    1238    19809997 :                         uuid_copy(&buf->bb_u.l.bb_uuid, &mp->m_sb.sb_meta_uuid);
    1239    19809976 :                         buf->bb_u.l.bb_pad = 0;
    1240    19809976 :                         buf->bb_u.l.bb_lsn = 0;
    1241             :                 }
    1242             :         } else {
    1243             :                 /* owner is a 32 bit value on short blocks */
    1244     2467756 :                 __u32 __owner = (__u32)owner;
    1245             : 
    1246     2467756 :                 buf->bb_u.s.bb_leftsib = cpu_to_be32(NULLAGBLOCK);
    1247     2467756 :                 buf->bb_u.s.bb_rightsib = cpu_to_be32(NULLAGBLOCK);
    1248     2467756 :                 if (crc) {
    1249     2427015 :                         buf->bb_u.s.bb_blkno = cpu_to_be64(blkno);
    1250     2427015 :                         buf->bb_u.s.bb_owner = cpu_to_be32(__owner);
    1251     2427015 :                         uuid_copy(&buf->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid);
    1252     2426750 :                         buf->bb_u.s.bb_lsn = 0;
    1253             :                 }
    1254             :         }
    1255    22277459 : }
    1256             : 
    1257             : void
    1258    16677926 : xfs_btree_init_block(
    1259             :         struct xfs_mount        *mp,
    1260             :         struct xfs_btree_block  *block,
    1261             :         const struct xfs_btree_ops *ops,
    1262             :         __u16                   level,
    1263             :         __u16                   numrecs,
    1264             :         __u64                   owner)
    1265             : {
    1266    16677926 :         __xfs_btree_init_block(mp, block, ops, XFS_BUF_DADDR_NULL, level,
    1267             :                         numrecs, owner);
    1268    16677919 : }
    1269             : 
    1270             : void
    1271     5599587 : xfs_btree_init_buf(
    1272             :         struct xfs_mount                *mp,
    1273             :         struct xfs_buf                  *bp,
    1274             :         const struct xfs_btree_ops      *ops,
    1275             :         __u16                           level,
    1276             :         __u16                           numrecs,
    1277             :         __u64                           owner)
    1278             : {
    1279     5599587 :         __xfs_btree_init_block(mp, XFS_BUF_TO_BLOCK(bp), ops,
    1280             :                         xfs_buf_daddr(bp), level, numrecs, owner);
    1281     5599774 :         bp->b_ops = ops->buf_ops;
    1282     5599774 : }
    1283             : 
    1284             : void
    1285     2851893 : xfs_btree_init_block_cur(
    1286             :         struct xfs_btree_cur    *cur,
    1287             :         struct xfs_buf          *bp,
    1288             :         int                     level,
    1289             :         int                     numrecs)
    1290             : {
    1291     2851893 :         __u64                   owner;
    1292             : 
    1293             :         /*
    1294             :          * we can pull the owner from the cursor right now as the different
    1295             :          * owners align directly with the pointer size of the btree. This may
    1296             :          * change in future, but is safe for current users of the generic btree
    1297             :          * code.
    1298             :          */
    1299     2851893 :         if (cur->bc_flags & XFS_BTREE_IN_XFILE)
    1300      378885 :                 owner = xfbtree_owner(cur);
    1301     2473008 :         else if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
    1302      548187 :                 owner = cur->bc_ino.ip->i_ino;
    1303             :         else
    1304     1924821 :                 owner = cur->bc_ag.pag->pag_agno;
    1305             : 
    1306     2851893 :         xfs_btree_init_buf(cur->bc_mp, bp, cur->bc_ops, level, numrecs, owner);
    1307     2851918 : }
    1308             : 
    1309             : /*
    1310             :  * Return true if ptr is the last record in the btree and
    1311             :  * we need to track updates to this record.  The decision
    1312             :  * will be further refined in the update_lastrec method.
    1313             :  */
    1314             : STATIC int
    1315  2513866827 : xfs_btree_is_lastrec(
    1316             :         struct xfs_btree_cur    *cur,
    1317             :         struct xfs_btree_block  *block,
    1318             :         int                     level)
    1319             : {
    1320  2513866827 :         union xfs_btree_ptr     ptr;
    1321             : 
    1322  2513866827 :         if (level > 0)
    1323             :                 return 0;
    1324  2511589762 :         if (!(cur->bc_flags & XFS_BTREE_LASTREC_UPDATE))
    1325             :                 return 0;
    1326             : 
    1327   309523758 :         xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
    1328   619001738 :         if (!xfs_btree_ptr_is_null(cur, &ptr))
    1329    52402319 :                 return 0;
    1330             :         return 1;
    1331             : }
    1332             : 
    1333             : STATIC void
    1334   132339901 : xfs_btree_buf_to_ptr(
    1335             :         struct xfs_btree_cur    *cur,
    1336             :         struct xfs_buf          *bp,
    1337             :         union xfs_btree_ptr     *ptr)
    1338             : {
    1339   132339901 :         if (cur->bc_flags & XFS_BTREE_IN_XFILE) {
    1340      378888 :                 xfbtree_buf_to_ptr(cur, bp, ptr);
    1341      378888 :                 return;
    1342             :         }
    1343             : 
    1344   131961013 :         if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
    1345     6878617 :                 ptr->l = cpu_to_be64(XFS_DADDR_TO_FSB(cur->bc_mp,
    1346             :                                         xfs_buf_daddr(bp)));
    1347             :         else {
    1348   125082396 :                 ptr->s = cpu_to_be32(xfs_daddr_to_agbno(cur->bc_mp,
    1349             :                                         xfs_buf_daddr(bp)));
    1350             :         }
    1351             : }
    1352             : 
    1353             : static inline void
    1354             : xfs_btree_set_refs(
    1355             :         struct xfs_btree_cur    *cur,
    1356             :         struct xfs_buf          *bp)
    1357             : {
    1358 12372442166 :         xfs_buf_set_ref(bp, cur->bc_ops->lru_refs);
    1359             : }
    1360             : 
    1361             : int
    1362     2855428 : xfs_btree_get_buf_block(
    1363             :         struct xfs_btree_cur            *cur,
    1364             :         const union xfs_btree_ptr       *ptr,
    1365             :         struct xfs_btree_block          **block,
    1366             :         struct xfs_buf                  **bpp)
    1367             : {
    1368     2855428 :         xfs_daddr_t                     d;
    1369     2855428 :         int                             error;
    1370             : 
    1371     2855428 :         error = xfs_btree_ptr_to_daddr(cur, ptr, &d);
    1372     2855451 :         if (error)
    1373             :                 return error;
    1374     2855294 :         error = xfs_trans_get_buf(cur->bc_tp, xfs_btree_buftarg(cur), d,
    1375     2855466 :                         xfs_btree_bbsize(cur), 0, bpp);
    1376     2855445 :         if (error)
    1377             :                 return error;
    1378             : 
    1379     2855445 :         (*bpp)->b_ops = cur->bc_ops->buf_ops;
    1380     2855445 :         *block = XFS_BUF_TO_BLOCK(*bpp);
    1381     2855445 :         return 0;
    1382             : }
    1383             : 
    1384             : /*
    1385             :  * Read in the buffer at the given ptr and return the buffer and
    1386             :  * the block pointer within the buffer.
    1387             :  */
    1388             : int
    1389 12370441143 : xfs_btree_read_buf_block(
    1390             :         struct xfs_btree_cur            *cur,
    1391             :         const union xfs_btree_ptr       *ptr,
    1392             :         int                             flags,
    1393             :         struct xfs_btree_block          **block,
    1394             :         struct xfs_buf                  **bpp)
    1395             : {
    1396 12370441143 :         struct xfs_mount        *mp = cur->bc_mp;
    1397 12370441143 :         xfs_daddr_t             d;
    1398 12370441143 :         int                     error;
    1399             : 
    1400             :         /* need to sort out how callers deal with failures first */
    1401 12370441143 :         ASSERT(!(flags & XBF_TRYLOCK));
    1402             : 
    1403 12370441143 :         error = xfs_btree_ptr_to_daddr(cur, ptr, &d);
    1404 12369968119 :         if (error)
    1405             :                 return error;
    1406 12365396052 :         error = xfs_trans_read_buf(mp, cur->bc_tp, xfs_btree_buftarg(cur), d,
    1407 12369409740 :                         xfs_btree_bbsize(cur), flags, bpp,
    1408 12369409740 :                         cur->bc_ops->buf_ops);
    1409 12372141396 :         if (xfs_metadata_is_sick(error))
    1410        1349 :                 xfs_btree_mark_sick(cur);
    1411 12372141396 :         if (error)
    1412             :                 return error;
    1413             : 
    1414 12372442166 :         xfs_btree_set_refs(cur, *bpp);
    1415 12370437903 :         *block = XFS_BUF_TO_BLOCK(*bpp);
    1416 12370437903 :         return 0;
    1417             : }
    1418             : 
    1419             : /*
    1420             :  * Copy keys from one btree block to another.
    1421             :  */
    1422             : void
    1423   365527715 : xfs_btree_copy_keys(
    1424             :         struct xfs_btree_cur            *cur,
    1425             :         union xfs_btree_key             *dst_key,
    1426             :         const union xfs_btree_key       *src_key,
    1427             :         int                             numkeys)
    1428             : {
    1429   365527715 :         ASSERT(numkeys >= 0);
    1430   731055430 :         memcpy(dst_key, src_key, numkeys * cur->bc_ops->key_len);
    1431   365527715 : }
    1432             : 
    1433             : /*
    1434             :  * Copy records from one btree block to another.
    1435             :  */
    1436             : STATIC void
    1437  2064936288 : xfs_btree_copy_recs(
    1438             :         struct xfs_btree_cur    *cur,
    1439             :         union xfs_btree_rec     *dst_rec,
    1440             :         union xfs_btree_rec     *src_rec,
    1441             :         int                     numrecs)
    1442             : {
    1443  2064936288 :         ASSERT(numrecs >= 0);
    1444  4129872576 :         memcpy(dst_rec, src_rec, numrecs * cur->bc_ops->rec_len);
    1445  2064936288 : }
    1446             : 
    1447             : /*
    1448             :  * Copy block pointers from one btree block to another.
    1449             :  */
    1450             : void
    1451    12898049 : xfs_btree_copy_ptrs(
    1452             :         struct xfs_btree_cur    *cur,
    1453             :         union xfs_btree_ptr     *dst_ptr,
    1454             :         const union xfs_btree_ptr *src_ptr,
    1455             :         int                     numptrs)
    1456             : {
    1457    12898049 :         ASSERT(numptrs >= 0);
    1458    31441373 :         memcpy(dst_ptr, src_ptr, numptrs * xfs_btree_ptr_len(cur));
    1459    12898049 : }
    1460             : 
    1461             : /*
    1462             :  * Shift keys one index left/right inside a single btree block.
    1463             :  */
    1464             : STATIC void
    1465     2441235 : xfs_btree_shift_keys(
    1466             :         struct xfs_btree_cur    *cur,
    1467             :         union xfs_btree_key     *key,
    1468             :         int                     dir,
    1469             :         int                     numkeys)
    1470             : {
    1471     2441235 :         char                    *dst_key;
    1472             : 
    1473     2441235 :         ASSERT(numkeys >= 0);
    1474     2441235 :         ASSERT(dir == 1 || dir == -1);
    1475             : 
    1476     2441235 :         dst_key = (char *)key + (dir * cur->bc_ops->key_len);
    1477     4882470 :         memmove(dst_key, key, numkeys * cur->bc_ops->key_len);
    1478     2441235 : }
    1479             : 
    1480             : /*
    1481             :  * Shift records one index left/right inside a single btree block.
    1482             :  */
    1483             : STATIC void
    1484  1576969880 : xfs_btree_shift_recs(
    1485             :         struct xfs_btree_cur    *cur,
    1486             :         union xfs_btree_rec     *rec,
    1487             :         int                     dir,
    1488             :         int                     numrecs)
    1489             : {
    1490  1576969880 :         char                    *dst_rec;
    1491             : 
    1492  1576969880 :         ASSERT(numrecs >= 0);
    1493  1576969880 :         ASSERT(dir == 1 || dir == -1);
    1494             : 
    1495  1576969880 :         dst_rec = (char *)rec + (dir * cur->bc_ops->rec_len);
    1496  3153939760 :         memmove(dst_rec, rec, numrecs * cur->bc_ops->rec_len);
    1497  1576969880 : }
    1498             : 
    1499             : /*
    1500             :  * Shift block pointers one index left/right inside a single btree block.
    1501             :  */
    1502             : STATIC void
    1503     2441236 : xfs_btree_shift_ptrs(
    1504             :         struct xfs_btree_cur    *cur,
    1505             :         union xfs_btree_ptr     *ptr,
    1506             :         int                     dir,
    1507             :         int                     numptrs)
    1508             : {
    1509     2441236 :         char                    *dst_ptr;
    1510             : 
    1511     2441236 :         ASSERT(numptrs >= 0);
    1512     2441236 :         ASSERT(dir == 1 || dir == -1);
    1513             : 
    1514     2441236 :         dst_ptr = (char *)ptr + (dir * xfs_btree_ptr_len(cur));
    1515     4882472 :         memmove(dst_ptr, ptr, numptrs * xfs_btree_ptr_len(cur));
    1516     2441236 : }
    1517             : 
    1518             : /*
    1519             :  * Log key values from the btree block.
    1520             :  */
    1521             : STATIC void
    1522   363616064 : xfs_btree_log_keys(
    1523             :         struct xfs_btree_cur    *cur,
    1524             :         struct xfs_buf          *bp,
    1525             :         int                     first,
    1526             :         int                     last)
    1527             : {
    1528             : 
    1529   363616064 :         if (bp) {
    1530   353794442 :                 xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF);
    1531   353795629 :                 xfs_trans_log_buf(cur->bc_tp, bp,
    1532   353795629 :                                   xfs_btree_key_offset(cur, first),
    1533   353795629 :                                   xfs_btree_key_offset(cur, last + 1) - 1);
    1534             :         } else {
    1535     9821622 :                 xfs_trans_log_inode(cur->bc_tp, cur->bc_ino.ip,
    1536     9821622 :                                 xfs_ilog_fbroot(cur->bc_ino.whichfork));
    1537             :         }
    1538   363621623 : }
    1539             : 
    1540             : /*
    1541             :  * Log record values from the btree block.
    1542             :  */
    1543             : void
    1544  2647586713 : xfs_btree_log_recs(
    1545             :         struct xfs_btree_cur    *cur,
    1546             :         struct xfs_buf          *bp,
    1547             :         int                     first,
    1548             :         int                     last)
    1549             : {
    1550             : 
    1551  2647586713 :         xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF);
    1552  2647501646 :         xfs_trans_log_buf(cur->bc_tp, bp,
    1553  2647501646 :                           xfs_btree_rec_offset(cur, first),
    1554  2647501646 :                           xfs_btree_rec_offset(cur, last + 1) - 1);
    1555             : 
    1556  2647973040 : }
    1557             : 
    1558             : /*
    1559             :  * Log block pointer fields from a btree block (nonleaf).
    1560             :  */
    1561             : STATIC void
    1562     2822489 : xfs_btree_log_ptrs(
    1563             :         struct xfs_btree_cur    *cur,   /* btree cursor */
    1564             :         struct xfs_buf          *bp,    /* buffer containing btree block */
    1565             :         int                     first,  /* index of first pointer to log */
    1566             :         int                     last)   /* index of last pointer to log */
    1567             : {
    1568             : 
    1569     2822489 :         if (bp) {
    1570     2769848 :                 struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
    1571     2769848 :                 int                     level = xfs_btree_get_level(block);
    1572             : 
    1573     2769848 :                 xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF);
    1574     2769849 :                 xfs_trans_log_buf(cur->bc_tp, bp,
    1575     2769849 :                                 xfs_btree_ptr_offset(cur, first, level),
    1576     2769848 :                                 xfs_btree_ptr_offset(cur, last + 1, level) - 1);
    1577             :         } else {
    1578       52641 :                 xfs_trans_log_inode(cur->bc_tp, cur->bc_ino.ip,
    1579       52641 :                         xfs_ilog_fbroot(cur->bc_ino.whichfork));
    1580             :         }
    1581             : 
    1582     2822492 : }
    1583             : 
    1584             : /*
    1585             :  * Log fields from a btree block header.
    1586             :  */
    1587             : void
    1588  2020950676 : xfs_btree_log_block(
    1589             :         struct xfs_btree_cur    *cur,   /* btree cursor */
    1590             :         struct xfs_buf          *bp,    /* buffer containing btree block */
    1591             :         uint32_t                fields) /* mask of fields: XFS_BB_... */
    1592             : {
    1593  2020950676 :         int                     first;  /* first byte offset logged */
    1594  2020950676 :         int                     last;   /* last byte offset logged */
    1595  2020950676 :         static const short      soffsets[] = {  /* table of offsets (short) */
    1596             :                 offsetof(struct xfs_btree_block, bb_magic),
    1597             :                 offsetof(struct xfs_btree_block, bb_level),
    1598             :                 offsetof(struct xfs_btree_block, bb_numrecs),
    1599             :                 offsetof(struct xfs_btree_block, bb_u.s.bb_leftsib),
    1600             :                 offsetof(struct xfs_btree_block, bb_u.s.bb_rightsib),
    1601             :                 offsetof(struct xfs_btree_block, bb_u.s.bb_blkno),
    1602             :                 offsetof(struct xfs_btree_block, bb_u.s.bb_lsn),
    1603             :                 offsetof(struct xfs_btree_block, bb_u.s.bb_uuid),
    1604             :                 offsetof(struct xfs_btree_block, bb_u.s.bb_owner),
    1605             :                 offsetof(struct xfs_btree_block, bb_u.s.bb_crc),
    1606             :                 XFS_BTREE_SBLOCK_CRC_LEN
    1607             :         };
    1608  2020950676 :         static const short      loffsets[] = {  /* table of offsets (long) */
    1609             :                 offsetof(struct xfs_btree_block, bb_magic),
    1610             :                 offsetof(struct xfs_btree_block, bb_level),
    1611             :                 offsetof(struct xfs_btree_block, bb_numrecs),
    1612             :                 offsetof(struct xfs_btree_block, bb_u.l.bb_leftsib),
    1613             :                 offsetof(struct xfs_btree_block, bb_u.l.bb_rightsib),
    1614             :                 offsetof(struct xfs_btree_block, bb_u.l.bb_blkno),
    1615             :                 offsetof(struct xfs_btree_block, bb_u.l.bb_lsn),
    1616             :                 offsetof(struct xfs_btree_block, bb_u.l.bb_uuid),
    1617             :                 offsetof(struct xfs_btree_block, bb_u.l.bb_owner),
    1618             :                 offsetof(struct xfs_btree_block, bb_u.l.bb_crc),
    1619             :                 offsetof(struct xfs_btree_block, bb_u.l.bb_pad),
    1620             :                 XFS_BTREE_LBLOCK_CRC_LEN
    1621             :         };
    1622             : 
    1623  2020950676 :         if (bp) {
    1624  2020868489 :                 int nbits;
    1625             : 
    1626  2020868489 :                 if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS) {
    1627             :                         /*
    1628             :                          * We don't log the CRC when updating a btree
    1629             :                          * block but instead recreate it during log
    1630             :                          * recovery.  As the log buffers have checksums
    1631             :                          * of their own this is safe and avoids logging a crc
    1632             :                          * update in a lot of places.
    1633             :                          */
    1634  2020863167 :                         if (fields == XFS_BB_ALL_BITS)
    1635     4403321 :                                 fields = XFS_BB_ALL_BITS_CRC;
    1636             :                         nbits = XFS_BB_NUM_BITS_CRC;
    1637             :                 } else {
    1638             :                         nbits = XFS_BB_NUM_BITS;
    1639             :                 }
    1640  2020868489 :                 xfs_btree_offsets(fields,
    1641  2020868489 :                                   (cur->bc_flags & XFS_BTREE_LONG_PTRS) ?
    1642             :                                         loffsets : soffsets,
    1643             :                                   nbits, &first, &last);
    1644  2020786531 :                 xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF);
    1645  2020792248 :                 xfs_trans_log_buf(cur->bc_tp, bp, first, last);
    1646             :         } else {
    1647       82187 :                 xfs_trans_log_inode(cur->bc_tp, cur->bc_ino.ip,
    1648       82187 :                         xfs_ilog_fbroot(cur->bc_ino.whichfork));
    1649             :         }
    1650  2021042586 : }
    1651             : 
    1652             : /*
    1653             :  * Increment cursor by one record at the level.
    1654             :  * For nonzero levels the leaf-ward information is untouched.
    1655             :  */
    1656             : int                                             /* error */
    1657 >12159*10^7 : xfs_btree_increment(
    1658             :         struct xfs_btree_cur    *cur,
    1659             :         int                     level,
    1660             :         int                     *stat)          /* success/failure */
    1661             : {
    1662 >12159*10^7 :         struct xfs_btree_block  *block;
    1663 >12159*10^7 :         union xfs_btree_ptr     ptr;
    1664 >12159*10^7 :         struct xfs_buf          *bp;
    1665 >12159*10^7 :         int                     error;          /* error return value */
    1666 >12159*10^7 :         int                     lev;
    1667             : 
    1668 >12159*10^7 :         ASSERT(level < cur->bc_nlevels);
    1669             : 
    1670             :         /* Read-ahead to the right at this level. */
    1671 >12159*10^7 :         xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA);
    1672             : 
    1673             :         /* Get a pointer to the btree block. */
    1674 >12124*10^7 :         block = xfs_btree_get_block(cur, level, &bp);
    1675             : 
    1676             : #ifdef DEBUG
    1677 >12129*10^7 :         error = xfs_btree_check_block(cur, block, level, bp);
    1678 >12160*10^7 :         if (error)
    1679           0 :                 goto error0;
    1680             : #endif
    1681             : 
    1682             :         /* We're done if we remain in the block after the increment. */
    1683 >12160*10^7 :         if (++cur->bc_levels[level].ptr <= xfs_btree_get_numrecs(block))
    1684 >11760*10^7 :                 goto out1;
    1685             : 
    1686             :         /* Fail if we just went off the right edge of the tree. */
    1687  3995491344 :         xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
    1688  7988956976 :         if (xfs_btree_ptr_is_null(cur, &ptr))
    1689  3269275850 :                 goto out0;
    1690             : 
    1691   725202638 :         XFS_BTREE_STATS_INC(cur, increment);
    1692             : 
    1693             :         /*
    1694             :          * March up the tree incrementing pointers.
    1695             :          * Stop when we don't go off the right edge of a block.
    1696             :          */
    1697   730673857 :         for (lev = level + 1; lev < cur->bc_nlevels; lev++) {
    1698   730673731 :                 block = xfs_btree_get_block(cur, lev, &bp);
    1699             : 
    1700             : #ifdef DEBUG
    1701   730669163 :                 error = xfs_btree_check_block(cur, block, lev, bp);
    1702   730680064 :                 if (error)
    1703           0 :                         goto error0;
    1704             : #endif
    1705             : 
    1706   730680064 :                 if (++cur->bc_levels[lev].ptr <= xfs_btree_get_numrecs(block))
    1707             :                         break;
    1708             : 
    1709             :                 /* Read-ahead the right block for the next loop. */
    1710     5745705 :                 xfs_btree_readahead(cur, lev, XFS_BTCUR_RIGHTRA);
    1711             :         }
    1712             : 
    1713             :         /*
    1714             :          * If we went off the root then we are either seriously
    1715             :          * confused or have the tree root in an inode.
    1716             :          */
    1717   724934485 :         if (lev == cur->bc_nlevels) {
    1718           0 :                 if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
    1719           0 :                         goto out0;
    1720           0 :                 ASSERT(0);
    1721           0 :                 xfs_btree_mark_sick(cur);
    1722           0 :                 error = -EFSCORRUPTED;
    1723           0 :                 goto error0;
    1724             :         }
    1725   724934485 :         ASSERT(lev < cur->bc_nlevels);
    1726             : 
    1727             :         /*
    1728             :          * Now walk back down the tree, fixing up the cursor's buffer
    1729             :          * pointers and key numbers.
    1730             :          */
    1731  1455571810 :         for (block = xfs_btree_get_block(cur, lev, &bp); lev > level; ) {
    1732   730674183 :                 union xfs_btree_ptr     *ptrp;
    1733             : 
    1734   730674183 :                 ptrp = xfs_btree_ptr_addr(cur, cur->bc_levels[lev].ptr, block);
    1735   730663401 :                 --lev;
    1736   730663401 :                 error = xfs_btree_read_buf_block(cur, ptrp, 0, &block, &bp);
    1737   730661213 :                 if (error)
    1738          38 :                         goto error0;
    1739             : 
    1740   730661175 :                 xfs_btree_setbuf(cur, lev, bp);
    1741   730637325 :                 cur->bc_levels[lev].ptr = 1;
    1742             :         }
    1743   724895249 : out1:
    1744 >11833*10^7 :         *stat = 1;
    1745 >11833*10^7 :         return 0;
    1746             : 
    1747  3269275850 : out0:
    1748  3269275850 :         *stat = 0;
    1749  3269275850 :         return 0;
    1750             : 
    1751             : error0:
    1752             :         return error;
    1753             : }
    1754             : 
    1755             : /*
    1756             :  * Decrement cursor by one record at the level.
    1757             :  * For nonzero levels the leaf-ward information is untouched.
    1758             :  */
    1759             : int                                             /* error */
    1760  2365990084 : xfs_btree_decrement(
    1761             :         struct xfs_btree_cur    *cur,
    1762             :         int                     level,
    1763             :         int                     *stat)          /* success/failure */
    1764             : {
    1765  2365990084 :         struct xfs_btree_block  *block;
    1766  2365990084 :         struct xfs_buf          *bp;
    1767  2365990084 :         int                     error;          /* error return value */
    1768  2365990084 :         int                     lev;
    1769  2365990084 :         union xfs_btree_ptr     ptr;
    1770             : 
    1771  2365990084 :         ASSERT(level < cur->bc_nlevels);
    1772             : 
    1773             :         /* Read-ahead to the left at this level. */
    1774  2365990084 :         xfs_btree_readahead(cur, level, XFS_BTCUR_LEFTRA);
    1775             : 
    1776             :         /* We're done if we remain in the block after the decrement. */
    1777  2365908850 :         if (--cur->bc_levels[level].ptr > 0)
    1778  2142647100 :                 goto out1;
    1779             : 
    1780             :         /* Get a pointer to the btree block. */
    1781   223261750 :         block = xfs_btree_get_block(cur, level, &bp);
    1782             : 
    1783             : #ifdef DEBUG
    1784   223261234 :         error = xfs_btree_check_block(cur, block, level, bp);
    1785   223262368 :         if (error)
    1786           0 :                 goto error0;
    1787             : #endif
    1788             : 
    1789             :         /* Fail if we just went off the left edge of the tree. */
    1790   223262368 :         xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_LEFTSIB);
    1791   446523974 :         if (xfs_btree_ptr_is_null(cur, &ptr))
    1792   140624173 :                 goto out0;
    1793             : 
    1794    82637814 :         XFS_BTREE_STATS_INC(cur, decrement);
    1795             : 
    1796             :         /*
    1797             :          * March up the tree decrementing pointers.
    1798             :          * Stop when we don't go off the left edge of a block.
    1799             :          */
    1800    82860448 :         for (lev = level + 1; lev < cur->bc_nlevels; lev++) {
    1801    82860452 :                 if (--cur->bc_levels[lev].ptr > 0)
    1802             :                         break;
    1803             :                 /* Read-ahead the left block for the next loop. */
    1804      222704 :                 xfs_btree_readahead(cur, lev, XFS_BTCUR_LEFTRA);
    1805             :         }
    1806             : 
    1807             :         /*
    1808             :          * If we went off the root then we are seriously confused.
    1809             :          * or the root of the tree is in an inode.
    1810             :          */
    1811    82637744 :         if (lev == cur->bc_nlevels) {
    1812           0 :                 if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
    1813           0 :                         goto out0;
    1814           0 :                 ASSERT(0);
    1815           0 :                 xfs_btree_mark_sick(cur);
    1816           0 :                 error = -EFSCORRUPTED;
    1817           0 :                 goto error0;
    1818             :         }
    1819    82637744 :         ASSERT(lev < cur->bc_nlevels);
    1820             : 
    1821             :         /*
    1822             :          * Now walk back down the tree, fixing up the cursor's buffer
    1823             :          * pointers and key numbers.
    1824             :          */
    1825   165498033 :         for (block = xfs_btree_get_block(cur, lev, &bp); lev > level; ) {
    1826    82860323 :                 union xfs_btree_ptr     *ptrp;
    1827             : 
    1828    82860323 :                 ptrp = xfs_btree_ptr_addr(cur, cur->bc_levels[lev].ptr, block);
    1829    82860319 :                 --lev;
    1830    82860319 :                 error = xfs_btree_read_buf_block(cur, ptrp, 0, &block, &bp);
    1831    82860505 :                 if (error)
    1832          12 :                         goto error0;
    1833    82860493 :                 xfs_btree_setbuf(cur, lev, bp);
    1834    82860289 :                 cur->bc_levels[lev].ptr = xfs_btree_get_numrecs(block);
    1835             :         }
    1836    82637603 : out1:
    1837  2225284703 :         *stat = 1;
    1838  2225284703 :         return 0;
    1839             : 
    1840   140624173 : out0:
    1841   140624173 :         *stat = 0;
    1842   140624173 :         return 0;
    1843             : 
    1844             : error0:
    1845             :         return error;
    1846             : }
    1847             : 
    1848             : /*
    1849             :  * Check the btree block owner now that we have the context to know who the
    1850             :  * real owner is.
    1851             :  */
    1852             : static inline xfs_failaddr_t
    1853 11325985691 : xfs_btree_check_block_owner(
    1854             :         struct xfs_btree_cur    *cur,
    1855             :         struct xfs_btree_block  *block)
    1856             : {
    1857 11325985691 :         if (!xfs_has_crc(cur->bc_mp))
    1858             :                 return NULL;
    1859             : 
    1860 11325982462 :         if (cur->bc_flags & XFS_BTREE_IN_XFILE)
    1861   570693213 :                 return xfbtree_check_block_owner(cur, block);
    1862             : 
    1863 10755289249 :         if (!(cur->bc_flags & XFS_BTREE_LONG_PTRS)) {
    1864 10155492488 :                 if (be32_to_cpu(block->bb_u.s.bb_owner) !=
    1865 10155492488 :                                                 cur->bc_ag.pag->pag_agno)
    1866           0 :                         return __this_address;
    1867             :                 return NULL;
    1868             :         }
    1869             : 
    1870   599796761 :         if (cur->bc_ino.flags & XFS_BTCUR_BMBT_INVALID_OWNER)
    1871             :                 return NULL;
    1872             : 
    1873   599621034 :         if (be64_to_cpu(block->bb_u.l.bb_owner) != cur->bc_ino.ip->i_ino)
    1874           0 :                 return __this_address;
    1875             : 
    1876             :         return NULL;
    1877             : }
    1878             : 
    1879             : int
    1880 30268102298 : xfs_btree_lookup_get_block(
    1881             :         struct xfs_btree_cur            *cur,   /* btree cursor */
    1882             :         int                             level,  /* level in the btree */
    1883             :         const union xfs_btree_ptr       *pp,    /* ptr to btree block */
    1884             :         struct xfs_btree_block          **blkp) /* return btree block */
    1885             : {
    1886 30268102298 :         struct xfs_buf          *bp;    /* buffer pointer for btree block */
    1887 30268102298 :         xfs_daddr_t             daddr;
    1888 30268102298 :         int                     error = 0;
    1889             : 
    1890             :         /* special case the root block if in an inode */
    1891 30268102298 :         if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
    1892  1025009295 :             (level == cur->bc_nlevels - 1)) {
    1893   371548273 :                 *blkp = xfs_btree_get_iroot(cur);
    1894   371543712 :                 return 0;
    1895             :         }
    1896             : 
    1897             :         /*
    1898             :          * If the old buffer at this level for the disk address we are
    1899             :          * looking for re-use it.
    1900             :          *
    1901             :          * Otherwise throw it away and get a new one.
    1902             :          */
    1903 29896554025 :         bp = cur->bc_levels[level].bp;
    1904 29896554025 :         error = xfs_btree_ptr_to_daddr(cur, pp, &daddr);
    1905 29894194795 :         if (error)
    1906             :                 return error;
    1907 29894194795 :         if (bp && xfs_buf_daddr(bp) == daddr) {
    1908 18572437711 :                 *blkp = XFS_BUF_TO_BLOCK(bp);
    1909 18572437711 :                 return 0;
    1910             :         }
    1911             : 
    1912 11321757084 :         error = xfs_btree_read_buf_block(cur, pp, 0, blkp, &bp);
    1913 11325333983 :         if (error)
    1914             :                 return error;
    1915             : 
    1916             :         /* Check the inode owner since the verifiers don't. */
    1917 11325462333 :         if (xfs_btree_check_block_owner(cur, *blkp) != NULL)
    1918           0 :                 goto out_bad;
    1919             : 
    1920             :         /* Did we get the level we were looking for? */
    1921 11326781297 :         if (be16_to_cpu((*blkp)->bb_level) != level)
    1922           0 :                 goto out_bad;
    1923             : 
    1924             :         /* Check that internal nodes have at least one record. */
    1925 11326781297 :         if (level != 0 && be16_to_cpu((*blkp)->bb_numrecs) == 0)
    1926           0 :                 goto out_bad;
    1927             : 
    1928 11326781297 :         xfs_btree_setbuf(cur, level, bp);
    1929 11326781297 :         return 0;
    1930             : 
    1931           0 : out_bad:
    1932           0 :         *blkp = NULL;
    1933           0 :         xfs_buf_mark_corrupt(bp);
    1934           0 :         xfs_trans_brelse(cur->bc_tp, bp);
    1935           0 :         xfs_btree_mark_sick(cur);
    1936           0 :         return -EFSCORRUPTED;
    1937             : }
    1938             : 
    1939             : /*
    1940             :  * Get current search key.  For level 0 we don't actually have a key
    1941             :  * structure so we make one up from the record.  For all other levels
    1942             :  * we just return the right key.
    1943             :  */
    1944             : STATIC union xfs_btree_key *
    1945 >12080*10^7 : xfs_lookup_get_search_key(
    1946             :         struct xfs_btree_cur    *cur,
    1947             :         int                     level,
    1948             :         int                     keyno,
    1949             :         struct xfs_btree_block  *block,
    1950             :         union xfs_btree_key     *kp)
    1951             : {
    1952 >12080*10^7 :         if (level == 0) {
    1953 86698055738 :                 cur->bc_ops->init_key_from_rec(kp,
    1954             :                                 xfs_btree_rec_addr(cur, keyno, block));
    1955 86698055738 :                 return kp;
    1956             :         }
    1957             : 
    1958 34111457320 :         return xfs_btree_key_addr(cur, keyno, block);
    1959             : }
    1960             : 
    1961             : /*
    1962             :  * Lookup the record.  The cursor is made to point to it, based on dir.
    1963             :  * stat is set to 0 if can't find any such record, 1 for success.
    1964             :  */
    1965             : int                                     /* error */
    1966 17599581786 : xfs_btree_lookup(
    1967             :         struct xfs_btree_cur    *cur,   /* btree cursor */
    1968             :         xfs_lookup_t            dir,    /* <=, ==, or >= */
    1969             :         int                     *stat)  /* success/failure */
    1970             : {
    1971 17599581786 :         struct xfs_btree_block  *block; /* current btree block */
    1972 17599581786 :         int64_t                 diff;   /* difference for the current key */
    1973 17599581786 :         int                     error;  /* error return value */
    1974 17599581786 :         int                     keyno;  /* current key number */
    1975 17599581786 :         int                     level;  /* level in the btree */
    1976 17599581786 :         union xfs_btree_ptr     *pp;    /* ptr to btree block */
    1977 17599581786 :         union xfs_btree_ptr     ptr;    /* ptr to btree block */
    1978             : 
    1979 17599581786 :         XFS_BTREE_STATS_INC(cur, lookup);
    1980             : 
    1981             :         /* No such thing as a zero-level tree. */
    1982 17593933328 :         if (XFS_IS_CORRUPT(cur->bc_mp, cur->bc_nlevels == 0)) {
    1983           0 :                 xfs_btree_mark_sick(cur);
    1984           0 :                 return -EFSCORRUPTED;
    1985             :         }
    1986             : 
    1987 17593933328 :         block = NULL;
    1988 17593933328 :         keyno = 0;
    1989             : 
    1990             :         /* initialise start pointer from cursor */
    1991 17593933328 :         cur->bc_ops->init_ptr_from_cur(cur, &ptr);
    1992 17593427023 :         pp = &ptr;
    1993             : 
    1994             :         /*
    1995             :          * Iterate over each level in the btree, starting at the root.
    1996             :          * For each level above the leaves, find the key we need, based
    1997             :          * on the lookup record, then follow the corresponding block
    1998             :          * pointer down to the next level.
    1999             :          */
    2000 43891309275 :         for (level = cur->bc_nlevels - 1, diff = 1; level >= 0; level--) {
    2001             :                 /* Get the block we need to do the lookup on. */
    2002 27129572037 :                 error = xfs_btree_lookup_get_block(cur, level, pp, &block);
    2003 27140914068 :                 if (error)
    2004        6895 :                         goto error0;
    2005             : 
    2006 27140907173 :                 if (diff == 0) {
    2007             :                         /*
    2008             :                          * If we already had a key match at a higher level, we
    2009             :                          * know we need to use the first entry in this block.
    2010             :                          */
    2011             :                         keyno = 1;
    2012             :                 } else {
    2013             :                         /* Otherwise search this block. Do a binary search. */
    2014             : 
    2015 27105108935 :                         int     high;   /* high entry number */
    2016 27105108935 :                         int     low;    /* low entry number */
    2017             : 
    2018             :                         /* Set low and high entry numbers, 1-based. */
    2019 27105108935 :                         low = 1;
    2020 27105108935 :                         high = xfs_btree_get_numrecs(block);
    2021 27105108935 :                         if (!high) {
    2022             :                                 /* Block is empty, must be an empty leaf. */
    2023   836109461 :                                 if (level != 0 || cur->bc_nlevels != 1) {
    2024           0 :                                         XFS_CORRUPTION_ERROR(__func__,
    2025             :                                                         XFS_ERRLEVEL_LOW,
    2026             :                                                         cur->bc_mp, block,
    2027             :                                                         sizeof(*block));
    2028           0 :                                         xfs_btree_mark_sick(cur);
    2029           0 :                                         return -EFSCORRUPTED;
    2030             :                                 }
    2031             : 
    2032   836109461 :                                 cur->bc_levels[0].ptr = dir != XFS_LOOKUP_LE;
    2033   836109461 :                                 *stat = 0;
    2034   836109461 :                                 return 0;
    2035             :                         }
    2036             : 
    2037             :                         /* Binary search the block. */
    2038 >14482*10^7 :                         while (low <= high) {
    2039 >12087*10^7 :                                 union xfs_btree_key     key;
    2040 >12087*10^7 :                                 union xfs_btree_key     *kp;
    2041             : 
    2042 >12087*10^7 :                                 XFS_BTREE_STATS_INC(cur, compare);
    2043             : 
    2044             :                                 /* keyno is average of low and high. */
    2045 >12065*10^7 :                                 keyno = (low + high) >> 1;
    2046             : 
    2047             :                                 /* Get current search key */
    2048 >12065*10^7 :                                 kp = xfs_lookup_get_search_key(cur, level,
    2049             :                                                 keyno, block, &key);
    2050             : 
    2051             :                                 /*
    2052             :                                  * Compute difference to get next direction:
    2053             :                                  *  - less than, move right
    2054             :                                  *  - greater than, move left
    2055             :                                  *  - equal, we're done
    2056             :                                  */
    2057 >12059*10^7 :                                 diff = cur->bc_ops->key_diff(cur, kp);
    2058 >12087*10^7 :                                 if (diff < 0)
    2059 68759196405 :                                         low = keyno + 1;
    2060 52116568769 :                                 else if (diff > 0)
    2061 49792233852 :                                         high = keyno - 1;
    2062             :                                 else
    2063             :                                         break;
    2064             :                         }
    2065             :                 }
    2066             : 
    2067             :                 /*
    2068             :                  * If there are more levels, set up for the next level
    2069             :                  * by getting the block number and filling in the cursor.
    2070             :                  */
    2071 26302186176 :                 if (level > 0) {
    2072             :                         /*
    2073             :                          * If we moved left, need the previous key number,
    2074             :                          * unless there isn't one.
    2075             :                          */
    2076  9540226219 :                         if (diff > 0 && --keyno < 1)
    2077             :                                 keyno = 1;
    2078  9540226219 :                         pp = xfs_btree_ptr_addr(cur, keyno, block);
    2079             : 
    2080  9535875580 :                         error = xfs_btree_debug_check_ptr(cur, pp, 0, level);
    2081  9535922295 :                         if (error)
    2082           0 :                                 goto error0;
    2083             : 
    2084  9535922295 :                         cur->bc_levels[level].ptr = keyno;
    2085             :                 }
    2086             :         }
    2087             : 
    2088             :         /* Done with the search. See if we need to adjust the results. */
    2089 16761737238 :         if (dir != XFS_LOOKUP_LE && diff < 0) {
    2090   797969993 :                 keyno++;
    2091             :                 /*
    2092             :                  * If ge search and we went off the end of the block, but it's
    2093             :                  * not the last block, we're in the wrong block.
    2094             :                  */
    2095   797969993 :                 xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
    2096   797964192 :                 if (dir == XFS_LOOKUP_GE &&
    2097   209812317 :                     keyno > xfs_btree_get_numrecs(block) &&
    2098             :                     !xfs_btree_ptr_is_null(cur, &ptr)) {
    2099     2363196 :                         int     i;
    2100             : 
    2101     2363196 :                         cur->bc_levels[0].ptr = keyno;
    2102     2363196 :                         error = xfs_btree_increment(cur, 0, &i);
    2103     2363196 :                         if (error)
    2104           3 :                                 goto error0;
    2105     2363193 :                         if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
    2106           0 :                                 xfs_btree_mark_sick(cur);
    2107     2363193 :                                 return -EFSCORRUPTED;
    2108             :                         }
    2109     2363193 :                         *stat = 1;
    2110     2363193 :                         return 0;
    2111             :                 }
    2112 15963767245 :         } else if (dir == XFS_LOOKUP_LE && diff > 0)
    2113  7223409681 :                 keyno--;
    2114 16759368241 :         cur->bc_levels[0].ptr = keyno;
    2115             : 
    2116             :         /* Return if we succeeded or not. */
    2117 16759368241 :         if (keyno == 0 || keyno > xfs_btree_get_numrecs(block))
    2118  3430096083 :                 *stat = 0;
    2119 13329272158 :         else if (dir != XFS_LOOKUP_EQ || diff == 0)
    2120 12808240115 :                 *stat = 1;
    2121             :         else
    2122   521032043 :                 *stat = 0;
    2123             :         return 0;
    2124             : 
    2125             : error0:
    2126             :         return error;
    2127             : }
    2128             : 
    2129             : /* Find the high key storage area from a regular key. */
    2130             : union xfs_btree_key *
    2131  1563454027 : xfs_btree_high_key_from_key(
    2132             :         struct xfs_btree_cur    *cur,
    2133             :         union xfs_btree_key     *key)
    2134             : {
    2135  1563454027 :         ASSERT(cur->bc_flags & XFS_BTREE_OVERLAPPING);
    2136  1563454027 :         return (union xfs_btree_key *)((char *)key +
    2137  1563454027 :                         (cur->bc_ops->key_len / 2));
    2138             : }
    2139             : 
    2140             : /* Determine the low (and high if overlapped) keys of a leaf block */
    2141             : STATIC void
    2142  1197238139 : xfs_btree_get_leaf_keys(
    2143             :         struct xfs_btree_cur    *cur,
    2144             :         struct xfs_btree_block  *block,
    2145             :         union xfs_btree_key     *key)
    2146             : {
    2147  1197238139 :         union xfs_btree_key     max_hkey;
    2148  1197238139 :         union xfs_btree_key     hkey;
    2149  1197238139 :         union xfs_btree_rec     *rec;
    2150  1197238139 :         union xfs_btree_key     *high;
    2151  1197238139 :         int                     n;
    2152             : 
    2153  1197238139 :         rec = xfs_btree_rec_addr(cur, 1, block);
    2154  1197238139 :         cur->bc_ops->init_key_from_rec(key, rec);
    2155             : 
    2156  1197323138 :         if (cur->bc_flags & XFS_BTREE_OVERLAPPING) {
    2157             : 
    2158   706864492 :                 cur->bc_ops->init_high_key_from_rec(&max_hkey, rec);
    2159 93761960542 :                 for (n = 2; n <= xfs_btree_get_numrecs(block); n++) {
    2160 92348173972 :                         rec = xfs_btree_rec_addr(cur, n, block);
    2161 92348173972 :                         cur->bc_ops->init_high_key_from_rec(&hkey, rec);
    2162 92315459172 :                         if (xfs_btree_keycmp_gt(cur, &hkey, &max_hkey))
    2163 90757382108 :                                 max_hkey = hkey;
    2164             :                 }
    2165             : 
    2166   706922078 :                 high = xfs_btree_high_key_from_key(cur, key);
    2167  1413831726 :                 memcpy(high, &max_hkey, cur->bc_ops->key_len / 2);
    2168             :         }
    2169  1197374509 : }
    2170             : 
    2171             : /* Determine the low (and high if overlapped) keys of a node block */
    2172             : STATIC void
    2173   150525074 : xfs_btree_get_node_keys(
    2174             :         struct xfs_btree_cur    *cur,
    2175             :         struct xfs_btree_block  *block,
    2176             :         union xfs_btree_key     *key)
    2177             : {
    2178   150525074 :         union xfs_btree_key     *hkey;
    2179   150525074 :         union xfs_btree_key     *max_hkey;
    2180   150525074 :         union xfs_btree_key     *high;
    2181   150525074 :         int                     n;
    2182             : 
    2183   150525074 :         if (cur->bc_flags & XFS_BTREE_OVERLAPPING) {
    2184   300700840 :                 memcpy(key, xfs_btree_key_addr(cur, 1, block),
    2185             :                                 cur->bc_ops->key_len / 2);
    2186             : 
    2187   150350420 :                 max_hkey = xfs_btree_high_key_addr(cur, 1, block);
    2188 11326078947 :                 for (n = 2; n <= xfs_btree_get_numrecs(block); n++) {
    2189 11175728723 :                         hkey = xfs_btree_high_key_addr(cur, n, block);
    2190 11175728723 :                         if (xfs_btree_keycmp_gt(cur, hkey, max_hkey))
    2191 11144203519 :                                 max_hkey = hkey;
    2192             :                 }
    2193             : 
    2194   150350224 :                 high = xfs_btree_high_key_from_key(cur, key);
    2195   300700570 :                 memcpy(high, max_hkey, cur->bc_ops->key_len / 2);
    2196             :         } else {
    2197      349308 :                 memcpy(key, xfs_btree_key_addr(cur, 1, block),
    2198             :                                 cur->bc_ops->key_len);
    2199             :         }
    2200   150524939 : }
    2201             : 
    2202             : /* Derive the keys for any btree block. */
    2203             : void
    2204  1197442913 : xfs_btree_get_keys(
    2205             :         struct xfs_btree_cur    *cur,
    2206             :         struct xfs_btree_block  *block,
    2207             :         union xfs_btree_key     *key)
    2208             : {
    2209  1197442913 :         if (be16_to_cpu(block->bb_level) == 0)
    2210  1195684608 :                 xfs_btree_get_leaf_keys(cur, block, key);
    2211             :         else
    2212     1758305 :                 xfs_btree_get_node_keys(cur, block, key);
    2213  1197270194 : }
    2214             : 
    2215             : /*
    2216             :  * Decide if we need to update the parent keys of a btree block.  For
    2217             :  * a standard btree this is only necessary if we're updating the first
    2218             :  * record/key.  For an overlapping btree, we must always update the
    2219             :  * keys because the highest key can be in any of the records or keys
    2220             :  * in the block.
    2221             :  */
    2222             : static inline bool
    2223             : xfs_btree_needs_key_update(
    2224             :         struct xfs_btree_cur    *cur,
    2225             :         int                     ptr)
    2226             : {
    2227  1650351955 :         return (cur->bc_flags & XFS_BTREE_OVERLAPPING) || ptr == 1;
    2228             : }
    2229             : 
    2230             : /*
    2231             :  * Update the low and high parent keys of the given level, progressing
    2232             :  * towards the root.  If force_all is false, stop if the keys for a given
    2233             :  * level do not need updating.
    2234             :  */
    2235             : STATIC int
    2236   729036108 : __xfs_btree_updkeys(
    2237             :         struct xfs_btree_cur    *cur,
    2238             :         int                     level,
    2239             :         struct xfs_btree_block  *block,
    2240             :         struct xfs_buf          *bp0,
    2241             :         bool                    force_all)
    2242             : {
    2243   729036108 :         union xfs_btree_key     key;    /* keys from current level */
    2244   729036108 :         union xfs_btree_key     *lkey;  /* keys from the next level up */
    2245   729036108 :         union xfs_btree_key     *hkey;
    2246   729036108 :         union xfs_btree_key     *nlkey; /* keys from the next level up */
    2247   729036108 :         union xfs_btree_key     *nhkey;
    2248   729036108 :         struct xfs_buf          *bp;
    2249   729036108 :         int                     ptr;
    2250             : 
    2251   729036108 :         ASSERT(cur->bc_flags & XFS_BTREE_OVERLAPPING);
    2252             : 
    2253             :         /* Exit if there aren't any parent levels to update. */
    2254   729036108 :         if (level + 1 >= cur->bc_nlevels)
    2255             :                 return 0;
    2256             : 
    2257   696693256 :         trace_xfs_btree_updkeys(cur, level, bp0);
    2258             : 
    2259   696680000 :         lkey = &key;
    2260   696680000 :         hkey = xfs_btree_high_key_from_key(cur, lkey);
    2261   696671438 :         xfs_btree_get_keys(cur, block, lkey);
    2262  1542144535 :         for (level++; level < cur->bc_nlevels; level++) {
    2263             : #ifdef DEBUG
    2264   845473097 :                 int             error;
    2265             : #endif
    2266   845473097 :                 block = xfs_btree_get_block(cur, level, &bp);
    2267   845471508 :                 trace_xfs_btree_updkeys(cur, level, bp);
    2268             : #ifdef DEBUG
    2269   845472510 :                 error = xfs_btree_check_block(cur, block, level, bp);
    2270   845465009 :                 if (error)
    2271           0 :                         return error;
    2272             : #endif
    2273   845465009 :                 ptr = cur->bc_levels[level].ptr;
    2274   845465009 :                 nlkey = xfs_btree_key_addr(cur, ptr, block);
    2275   845465009 :                 nhkey = xfs_btree_high_key_addr(cur, ptr, block);
    2276  1690610746 :                 if (!force_all &&
    2277   764016540 :                     xfs_btree_keycmp_eq(cur, nlkey, lkey) &&
    2278             :                     xfs_btree_keycmp_eq(cur, nhkey, hkey))
    2279             :                         break;
    2280   203464655 :                 xfs_btree_copy_keys(cur, nlkey, lkey, 1);
    2281   203475425 :                 xfs_btree_log_keys(cur, bp, ptr, ptr);
    2282   203476586 :                 if (level + 1 >= cur->bc_nlevels)
    2283             :                         break;
    2284   148753786 :                 xfs_btree_get_node_keys(cur, block, lkey);
    2285             :         }
    2286             : 
    2287             :         return 0;
    2288             : }
    2289             : 
    2290             : /* Update all the keys from some level in cursor back to the root. */
    2291             : STATIC int
    2292      183533 : xfs_btree_updkeys_force(
    2293             :         struct xfs_btree_cur    *cur,
    2294             :         int                     level)
    2295             : {
    2296      183533 :         struct xfs_buf          *bp;
    2297      183533 :         struct xfs_btree_block  *block;
    2298             : 
    2299      183533 :         block = xfs_btree_get_block(cur, level, &bp);
    2300      183533 :         return __xfs_btree_updkeys(cur, level, block, bp, true);
    2301             : }
    2302             : 
    2303             : /*
    2304             :  * Update the parent keys of the given level, progressing towards the root.
    2305             :  */
    2306             : STATIC int
    2307  1208653235 : xfs_btree_update_keys(
    2308             :         struct xfs_btree_cur    *cur,
    2309             :         int                     level)
    2310             : {
    2311  1208653235 :         struct xfs_btree_block  *block;
    2312  1208653235 :         struct xfs_buf          *bp;
    2313  1208653235 :         union xfs_btree_key     *kp;
    2314  1208653235 :         union xfs_btree_key     key;
    2315  1208653235 :         int                     ptr;
    2316             : 
    2317  1208653235 :         ASSERT(level >= 0);
    2318             : 
    2319  1208653235 :         block = xfs_btree_get_block(cur, level, &bp);
    2320  1208213983 :         if (cur->bc_flags & XFS_BTREE_OVERLAPPING)
    2321   728843023 :                 return __xfs_btree_updkeys(cur, level, block, bp, false);
    2322             : 
    2323             :         /*
    2324             :          * Go up the tree from this level toward the root.
    2325             :          * At each level, update the key value to the value input.
    2326             :          * Stop when we reach a level where the cursor isn't pointing
    2327             :          * at the first entry in the block.
    2328             :          */
    2329   479370960 :         xfs_btree_get_keys(cur, block, &key);
    2330   636754755 :         for (level++, ptr = 1; ptr == 1 && level < cur->bc_nlevels; level++) {
    2331             : #ifdef DEBUG
    2332   157093454 :                 int             error;
    2333             : #endif
    2334   157093454 :                 block = xfs_btree_get_block(cur, level, &bp);
    2335             : #ifdef DEBUG
    2336   157319922 :                 error = xfs_btree_check_block(cur, block, level, bp);
    2337   157321322 :                 if (error)
    2338           0 :                         return error;
    2339             : #endif
    2340   157321322 :                 ptr = cur->bc_levels[level].ptr;
    2341   157321322 :                 kp = xfs_btree_key_addr(cur, ptr, block);
    2342   157321322 :                 xfs_btree_copy_keys(cur, kp, &key, 1);
    2343   157318587 :                 xfs_btree_log_keys(cur, bp, ptr, ptr);
    2344             :         }
    2345             : 
    2346             :         return 0;
    2347             : }
    2348             : 
    2349             : /*
    2350             :  * Update the record referred to by cur to the value in the
    2351             :  * given record. This either works (return 0) or gets an
    2352             :  * EFSCORRUPTED error.
    2353             :  */
    2354             : int
    2355   927351671 : xfs_btree_update(
    2356             :         struct xfs_btree_cur    *cur,
    2357             :         union xfs_btree_rec     *rec)
    2358             : {
    2359   927351671 :         struct xfs_btree_block  *block;
    2360   927351671 :         struct xfs_buf          *bp;
    2361   927351671 :         int                     error;
    2362   927351671 :         int                     ptr;
    2363   927351671 :         union xfs_btree_rec     *rp;
    2364             : 
    2365             :         /* Pick up the current block. */
    2366   927351671 :         block = xfs_btree_get_block(cur, 0, &bp);
    2367             : 
    2368             : #ifdef DEBUG
    2369   926292121 :         error = xfs_btree_check_block(cur, block, 0, bp);
    2370   927512706 :         if (error)
    2371           0 :                 goto error0;
    2372             : #endif
    2373             :         /* Get the address of the rec to be updated. */
    2374   927512706 :         ptr = cur->bc_levels[0].ptr;
    2375   927512706 :         rp = xfs_btree_rec_addr(cur, ptr, block);
    2376             : 
    2377             :         /* Fill in the new contents and log them. */
    2378   927512706 :         xfs_btree_copy_recs(cur, rp, rec, 1);
    2379   927034544 :         xfs_btree_log_recs(cur, bp, ptr, ptr);
    2380             : 
    2381             :         /*
    2382             :          * If we are tracking the last record in the tree and
    2383             :          * we are at the far right edge of the tree, update it.
    2384             :          */
    2385   926999552 :         if (xfs_btree_is_lastrec(cur, block, 0)) {
    2386           0 :                 cur->bc_ops->update_lastrec(cur, block, rec,
    2387             :                                             ptr, LASTREC_UPDATE);
    2388             :         }
    2389             : 
    2390             :         /* Pass new key value up to our parent. */
    2391  1767795506 :         if (xfs_btree_needs_key_update(cur, ptr)) {
    2392   259854576 :                 error = xfs_btree_update_keys(cur, 0);
    2393   259472876 :                 if (error)
    2394           0 :                         goto error0;
    2395             :         }
    2396             : 
    2397             :         return 0;
    2398             : 
    2399             : error0:
    2400             :         return error;
    2401             : }
    2402             : 
    2403             : /*
    2404             :  * Move 1 record left from cur/level if possible.
    2405             :  * Update cur to reflect the new path.
    2406             :  */
    2407             : STATIC int                                      /* error */
    2408   141096585 : xfs_btree_lshift(
    2409             :         struct xfs_btree_cur    *cur,
    2410             :         int                     level,
    2411             :         int                     *stat)          /* success/failure */
    2412             : {
    2413   141096585 :         struct xfs_buf          *lbp;           /* left buffer pointer */
    2414   141096585 :         struct xfs_btree_block  *left;          /* left btree block */
    2415   141096585 :         int                     lrecs;          /* left record count */
    2416   141096585 :         struct xfs_buf          *rbp;           /* right buffer pointer */
    2417   141096585 :         struct xfs_btree_block  *right;         /* right btree block */
    2418   141096585 :         struct xfs_btree_cur    *tcur;          /* temporary btree cursor */
    2419   141096585 :         int                     rrecs;          /* right record count */
    2420   141096585 :         union xfs_btree_ptr     lptr;           /* left btree pointer */
    2421   141096585 :         union xfs_btree_key     *rkp = NULL;    /* right btree key */
    2422   141096585 :         union xfs_btree_ptr     *rpp = NULL;    /* right address pointer */
    2423   141096585 :         union xfs_btree_rec     *rrp = NULL;    /* right record pointer */
    2424   141096585 :         int                     error;          /* error return value */
    2425   141096585 :         int                     i;
    2426             : 
    2427   141096585 :         if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
    2428    54540757 :             level == cur->bc_nlevels - 1)
    2429           0 :                 goto out0;
    2430             : 
    2431             :         /* Set up variables for this block as "right". */
    2432   141096585 :         right = xfs_btree_get_block(cur, level, &rbp);
    2433             : 
    2434             : #ifdef DEBUG
    2435   141096066 :         error = xfs_btree_check_block(cur, right, level, rbp);
    2436   141097386 :         if (error)
    2437           0 :                 goto error0;
    2438             : #endif
    2439             : 
    2440             :         /* If we've got no left sibling then we can't shift an entry left. */
    2441   141097386 :         xfs_btree_get_sibling(cur, right, &lptr, XFS_BB_LEFTSIB);
    2442   282194680 :         if (xfs_btree_ptr_is_null(cur, &lptr))
    2443       83182 :                 goto out0;
    2444             : 
    2445             :         /*
    2446             :          * If the cursor entry is the one that would be moved, don't
    2447             :          * do it... it's too complicated.
    2448             :          */
    2449   141014158 :         if (cur->bc_levels[level].ptr <= 1)
    2450       27746 :                 goto out0;
    2451             : 
    2452             :         /* Set up the left neighbor as "left". */
    2453   140986412 :         error = xfs_btree_read_buf_block(cur, &lptr, 0, &left, &lbp);
    2454   140986636 :         if (error)
    2455          58 :                 goto error0;
    2456             : 
    2457             :         /* If it's full, it can't take another entry. */
    2458   140986578 :         lrecs = xfs_btree_get_numrecs(left);
    2459   140986578 :         if (lrecs == cur->bc_ops->get_maxrecs(cur, level))
    2460     1695757 :                 goto out0;
    2461             : 
    2462   139290803 :         rrecs = xfs_btree_get_numrecs(right);
    2463             : 
    2464             :         /*
    2465             :          * We add one entry to the left side and remove one for the right side.
    2466             :          * Account for it here, the changes will be updated on disk and logged
    2467             :          * later.
    2468             :          */
    2469   139290803 :         lrecs++;
    2470   139290803 :         rrecs--;
    2471             : 
    2472   139290803 :         XFS_BTREE_STATS_INC(cur, lshift);
    2473   139290812 :         XFS_BTREE_STATS_ADD(cur, moves, 1);
    2474             : 
    2475             :         /*
    2476             :          * If non-leaf, copy a key and a ptr to the left block.
    2477             :          * Log the changes to the left block.
    2478             :          */
    2479   139291048 :         if (level > 0) {
    2480             :                 /* It's a non-leaf.  Move keys and pointers. */
    2481      326705 :                 union xfs_btree_key     *lkp;   /* left btree key */
    2482      326705 :                 union xfs_btree_ptr     *lpp;   /* left address pointer */
    2483             : 
    2484      326705 :                 lkp = xfs_btree_key_addr(cur, lrecs, left);
    2485      326705 :                 rkp = xfs_btree_key_addr(cur, 1, right);
    2486             : 
    2487      326705 :                 lpp = xfs_btree_ptr_addr(cur, lrecs, left);
    2488      326705 :                 rpp = xfs_btree_ptr_addr(cur, 1, right);
    2489             : 
    2490      326705 :                 error = xfs_btree_debug_check_ptr(cur, rpp, 0, level);
    2491      326705 :                 if (error)
    2492           0 :                         goto error0;
    2493             : 
    2494      326705 :                 xfs_btree_copy_keys(cur, lkp, rkp, 1);
    2495      326705 :                 xfs_btree_copy_ptrs(cur, lpp, rpp, 1);
    2496             : 
    2497      326705 :                 xfs_btree_log_keys(cur, lbp, lrecs, lrecs);
    2498      326705 :                 xfs_btree_log_ptrs(cur, lbp, lrecs, lrecs);
    2499             : 
    2500      326705 :                 ASSERT(cur->bc_ops->keys_inorder(cur,
    2501             :                         xfs_btree_key_addr(cur, lrecs - 1, left), lkp));
    2502             :         } else {
    2503             :                 /* It's a leaf.  Move records.  */
    2504   138964343 :                 union xfs_btree_rec     *lrp;   /* left record pointer */
    2505             : 
    2506   138964343 :                 lrp = xfs_btree_rec_addr(cur, lrecs, left);
    2507   138964343 :                 rrp = xfs_btree_rec_addr(cur, 1, right);
    2508             : 
    2509   138964343 :                 xfs_btree_copy_recs(cur, lrp, rrp, 1);
    2510   138963046 :                 xfs_btree_log_recs(cur, lbp, lrecs, lrecs);
    2511             : 
    2512   138963999 :                 ASSERT(cur->bc_ops->recs_inorder(cur,
    2513             :                         xfs_btree_rec_addr(cur, lrecs - 1, left), lrp));
    2514             :         }
    2515             : 
    2516   139289811 :         xfs_btree_set_numrecs(left, lrecs);
    2517   139289811 :         xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS);
    2518             : 
    2519   139291148 :         xfs_btree_set_numrecs(right, rrecs);
    2520   139291148 :         xfs_btree_log_block(cur, rbp, XFS_BB_NUMRECS);
    2521             : 
    2522             :         /*
    2523             :          * Slide the contents of right down one entry.
    2524             :          */
    2525   139291338 :         XFS_BTREE_STATS_ADD(cur, moves, rrecs - 1);
    2526   139291261 :         if (level > 0) {
    2527             :                 /* It's a nonleaf. operate on keys and ptrs */
    2528    57806755 :                 for (i = 0; i < rrecs; i++) {
    2529    57480050 :                         error = xfs_btree_debug_check_ptr(cur, rpp, i + 1, level);
    2530    57480050 :                         if (error)
    2531           0 :                                 goto error0;
    2532             :                 }
    2533             : 
    2534      326705 :                 xfs_btree_shift_keys(cur,
    2535             :                                 xfs_btree_key_addr(cur, 2, right),
    2536             :                                 -1, rrecs);
    2537      326705 :                 xfs_btree_shift_ptrs(cur,
    2538             :                                 xfs_btree_ptr_addr(cur, 2, right),
    2539             :                                 -1, rrecs);
    2540             : 
    2541      326705 :                 xfs_btree_log_keys(cur, rbp, 1, rrecs);
    2542      326705 :                 xfs_btree_log_ptrs(cur, rbp, 1, rrecs);
    2543             :         } else {
    2544             :                 /* It's a leaf. operate on records */
    2545   138964556 :                 xfs_btree_shift_recs(cur,
    2546             :                         xfs_btree_rec_addr(cur, 2, right),
    2547             :                         -1, rrecs);
    2548   138964199 :                 xfs_btree_log_recs(cur, rbp, 1, rrecs);
    2549             :         }
    2550             : 
    2551             :         /*
    2552             :          * Using a temporary cursor, update the parent key values of the
    2553             :          * block on the left.
    2554             :          */
    2555   139291141 :         if (cur->bc_flags & XFS_BTREE_OVERLAPPING) {
    2556    44408095 :                 error = xfs_btree_dup_cursor(cur, &tcur);
    2557    44407902 :                 if (error)
    2558           0 :                         goto error0;
    2559    44407902 :                 i = xfs_btree_firstrec(tcur, level);
    2560    44407734 :                 if (XFS_IS_CORRUPT(tcur->bc_mp, i != 1)) {
    2561           0 :                         xfs_btree_mark_sick(cur);
    2562           0 :                         error = -EFSCORRUPTED;
    2563           0 :                         goto error0;
    2564             :                 }
    2565             : 
    2566    44407734 :                 error = xfs_btree_decrement(tcur, level, &i);
    2567    44407563 :                 if (error)
    2568           0 :                         goto error1;
    2569             : 
    2570             :                 /* Update the parent high keys of the left block, if needed. */
    2571    44407563 :                 error = xfs_btree_update_keys(tcur, level);
    2572    44408106 :                 if (error)
    2573           0 :                         goto error1;
    2574             : 
    2575    44408106 :                 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
    2576             :         }
    2577             : 
    2578             :         /* Update the parent keys of the right block. */
    2579   139291221 :         error = xfs_btree_update_keys(cur, level);
    2580   139291186 :         if (error)
    2581           0 :                 goto error0;
    2582             : 
    2583             :         /* Slide the cursor value left one. */
    2584   139291186 :         cur->bc_levels[level].ptr--;
    2585             : 
    2586   139291186 :         *stat = 1;
    2587   139291186 :         return 0;
    2588             : 
    2589     1806685 : out0:
    2590     1806685 :         *stat = 0;
    2591     1806685 :         return 0;
    2592             : 
    2593             : error0:
    2594             :         return error;
    2595             : 
    2596           0 : error1:
    2597           0 :         xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
    2598           0 :         return error;
    2599             : }
    2600             : 
    2601             : /*
    2602             :  * Move 1 record right from cur/level if possible.
    2603             :  * Update cur to reflect the new path.
    2604             :  */
    2605             : STATIC int                                      /* error */
    2606   198782309 : xfs_btree_rshift(
    2607             :         struct xfs_btree_cur    *cur,
    2608             :         int                     level,
    2609             :         int                     *stat)          /* success/failure */
    2610             : {
    2611   198782309 :         struct xfs_buf          *lbp;           /* left buffer pointer */
    2612   198782309 :         struct xfs_btree_block  *left;          /* left btree block */
    2613   198782309 :         struct xfs_buf          *rbp;           /* right buffer pointer */
    2614   198782309 :         struct xfs_btree_block  *right;         /* right btree block */
    2615   198782309 :         struct xfs_btree_cur    *tcur;          /* temporary btree cursor */
    2616   198782309 :         union xfs_btree_ptr     rptr;           /* right block pointer */
    2617   198782309 :         union xfs_btree_key     *rkp;           /* right btree key */
    2618   198782309 :         int                     rrecs;          /* right record count */
    2619   198782309 :         int                     lrecs;          /* left record count */
    2620   198782309 :         int                     error;          /* error return value */
    2621   198782309 :         int                     i;              /* loop counter */
    2622             : 
    2623   198782309 :         if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
    2624    79261465 :             (level == cur->bc_nlevels - 1))
    2625           0 :                 goto out0;
    2626             : 
    2627             :         /* Set up variables for this block as "left". */
    2628   198782309 :         left = xfs_btree_get_block(cur, level, &lbp);
    2629             : 
    2630             : #ifdef DEBUG
    2631   198780053 :         error = xfs_btree_check_block(cur, left, level, lbp);
    2632   198782654 :         if (error)
    2633           0 :                 goto error0;
    2634             : #endif
    2635             : 
    2636             :         /* If we've got no right sibling then we can't shift an entry right. */
    2637   198782654 :         xfs_btree_get_sibling(cur, left, &rptr, XFS_BB_RIGHTSIB);
    2638   397564914 :         if (xfs_btree_ptr_is_null(cur, &rptr))
    2639    83636748 :                 goto out0;
    2640             : 
    2641             :         /*
    2642             :          * If the cursor entry is the one that would be moved, don't
    2643             :          * do it... it's too complicated.
    2644             :          */
    2645   115145709 :         lrecs = xfs_btree_get_numrecs(left);
    2646   115145709 :         if (cur->bc_levels[level].ptr >= lrecs)
    2647    25015599 :                 goto out0;
    2648             : 
    2649             :         /* Set up the right neighbor as "right". */
    2650    90130110 :         error = xfs_btree_read_buf_block(cur, &rptr, 0, &right, &rbp);
    2651    90130222 :         if (error)
    2652         198 :                 goto error0;
    2653             : 
    2654             :         /* If it's full, it can't take another entry. */
    2655    90130024 :         rrecs = xfs_btree_get_numrecs(right);
    2656    90130024 :         if (rrecs == cur->bc_ops->get_maxrecs(cur, level))
    2657    16544037 :                 goto out0;
    2658             : 
    2659    73586396 :         XFS_BTREE_STATS_INC(cur, rshift);
    2660    73586389 :         XFS_BTREE_STATS_ADD(cur, moves, rrecs);
    2661             : 
    2662             :         /*
    2663             :          * Make a hole at the start of the right neighbor block, then
    2664             :          * copy the last left block entry to the hole.
    2665             :          */
    2666    73586528 :         if (level > 0) {
    2667             :                 /* It's a nonleaf. make a hole in the keys and ptrs */
    2668       82920 :                 union xfs_btree_key     *lkp;
    2669       82920 :                 union xfs_btree_ptr     *lpp;
    2670       82920 :                 union xfs_btree_ptr     *rpp;
    2671             : 
    2672       82920 :                 lkp = xfs_btree_key_addr(cur, lrecs, left);
    2673       82920 :                 lpp = xfs_btree_ptr_addr(cur, lrecs, left);
    2674       82920 :                 rkp = xfs_btree_key_addr(cur, 1, right);
    2675       82920 :                 rpp = xfs_btree_ptr_addr(cur, 1, right);
    2676             : 
    2677     6178987 :                 for (i = rrecs - 1; i >= 0; i--) {
    2678     6096067 :                         error = xfs_btree_debug_check_ptr(cur, rpp, i, level);
    2679     6096067 :                         if (error)
    2680           0 :                                 goto error0;
    2681             :                 }
    2682             : 
    2683       82920 :                 xfs_btree_shift_keys(cur, rkp, 1, rrecs);
    2684       82920 :                 xfs_btree_shift_ptrs(cur, rpp, 1, rrecs);
    2685             : 
    2686       82920 :                 error = xfs_btree_debug_check_ptr(cur, lpp, 0, level);
    2687       82920 :                 if (error)
    2688           0 :                         goto error0;
    2689             : 
    2690             :                 /* Now put the new data in, and log it. */
    2691       82920 :                 xfs_btree_copy_keys(cur, rkp, lkp, 1);
    2692       82920 :                 xfs_btree_copy_ptrs(cur, rpp, lpp, 1);
    2693             : 
    2694       82920 :                 xfs_btree_log_keys(cur, rbp, 1, rrecs + 1);
    2695       82920 :                 xfs_btree_log_ptrs(cur, rbp, 1, rrecs + 1);
    2696             : 
    2697       82920 :                 ASSERT(cur->bc_ops->keys_inorder(cur, rkp,
    2698             :                         xfs_btree_key_addr(cur, 2, right)));
    2699             :         } else {
    2700             :                 /* It's a leaf. make a hole in the records */
    2701    73503608 :                 union xfs_btree_rec     *lrp;
    2702    73503608 :                 union xfs_btree_rec     *rrp;
    2703             : 
    2704    73503608 :                 lrp = xfs_btree_rec_addr(cur, lrecs, left);
    2705    73503608 :                 rrp = xfs_btree_rec_addr(cur, 1, right);
    2706             : 
    2707    73503608 :                 xfs_btree_shift_recs(cur, rrp, 1, rrecs);
    2708             : 
    2709             :                 /* Now put the new data in, and log it. */
    2710    73502650 :                 xfs_btree_copy_recs(cur, rrp, lrp, 1);
    2711    73502562 :                 xfs_btree_log_recs(cur, rbp, 1, rrecs + 1);
    2712             :         }
    2713             : 
    2714             :         /*
    2715             :          * Decrement and log left's numrecs, bump and log right's numrecs.
    2716             :          */
    2717    73586242 :         xfs_btree_set_numrecs(left, --lrecs);
    2718    73586242 :         xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS);
    2719             : 
    2720    73586424 :         xfs_btree_set_numrecs(right, ++rrecs);
    2721    73586424 :         xfs_btree_log_block(cur, rbp, XFS_BB_NUMRECS);
    2722             : 
    2723             :         /*
    2724             :          * Using a temporary cursor, update the parent key values of the
    2725             :          * block on the right.
    2726             :          */
    2727    73586474 :         error = xfs_btree_dup_cursor(cur, &tcur);
    2728    73586032 :         if (error)
    2729           1 :                 goto error0;
    2730    73586031 :         i = xfs_btree_lastrec(tcur, level);
    2731    73586055 :         if (XFS_IS_CORRUPT(tcur->bc_mp, i != 1)) {
    2732           0 :                 xfs_btree_mark_sick(cur);
    2733           0 :                 error = -EFSCORRUPTED;
    2734           0 :                 goto error0;
    2735             :         }
    2736             : 
    2737    73586055 :         error = xfs_btree_increment(tcur, level, &i);
    2738    73585220 :         if (error)
    2739           0 :                 goto error1;
    2740             : 
    2741             :         /* Update the parent high keys of the left block, if needed. */
    2742    73585220 :         if (cur->bc_flags & XFS_BTREE_OVERLAPPING) {
    2743    34435165 :                 error = xfs_btree_update_keys(cur, level);
    2744    34435576 :                 if (error)
    2745           0 :                         goto error1;
    2746             :         }
    2747             : 
    2748             :         /* Update the parent keys of the right block. */
    2749    73585631 :         error = xfs_btree_update_keys(tcur, level);
    2750    73585666 :         if (error)
    2751           0 :                 goto error1;
    2752             : 
    2753    73585666 :         xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
    2754             : 
    2755    73585150 :         *stat = 1;
    2756    73585150 :         return 0;
    2757             : 
    2758   125196384 : out0:
    2759   125196384 :         *stat = 0;
    2760   125196384 :         return 0;
    2761             : 
    2762             : error0:
    2763             :         return error;
    2764             : 
    2765           0 : error1:
    2766           0 :         xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
    2767           0 :         return error;
    2768             : }
    2769             : 
    2770             : static inline int
    2771     1849737 : xfs_btree_alloc_block(
    2772             :         struct xfs_btree_cur            *cur,
    2773             :         const union xfs_btree_ptr       *hint_block,
    2774             :         union xfs_btree_ptr             *new_block,
    2775             :         int                             *stat)
    2776             : {
    2777     1849737 :         int                             error;
    2778             : 
    2779     1849737 :         error = cur->bc_ops->alloc_block(cur, hint_block, new_block, stat);
    2780     1849737 :         trace_xfs_btree_alloc_block(cur, new_block, *stat, error);
    2781     1849735 :         return error;
    2782             : }
    2783             : 
    2784             : /*
    2785             :  * Split cur/level block in half.
    2786             :  * Return new block number and the key to its first
    2787             :  * record (to be inserted into parent).
    2788             :  */
    2789             : STATIC int                                      /* error */
    2790     1806682 : __xfs_btree_split(
    2791             :         struct xfs_btree_cur    *cur,
    2792             :         int                     level,
    2793             :         union xfs_btree_ptr     *ptrp,
    2794             :         union xfs_btree_key     *key,
    2795             :         struct xfs_btree_cur    **curp,
    2796             :         int                     *stat)          /* success/failure */
    2797             : {
    2798     1806682 :         union xfs_btree_ptr     lptr;           /* left sibling block ptr */
    2799     1806682 :         struct xfs_buf          *lbp;           /* left buffer pointer */
    2800     1806682 :         struct xfs_btree_block  *left;          /* left btree block */
    2801     1806682 :         union xfs_btree_ptr     rptr;           /* right sibling block ptr */
    2802     1806682 :         struct xfs_buf          *rbp;           /* right buffer pointer */
    2803     1806682 :         struct xfs_btree_block  *right;         /* right btree block */
    2804     1806682 :         union xfs_btree_ptr     rrptr;          /* right-right sibling ptr */
    2805     1806682 :         struct xfs_buf          *rrbp;          /* right-right buffer pointer */
    2806     1806682 :         struct xfs_btree_block  *rrblock;       /* right-right btree block */
    2807     1806682 :         int                     lrecs;
    2808     1806682 :         int                     rrecs;
    2809     1806682 :         int                     src_index;
    2810     1806682 :         int                     error;          /* error return value */
    2811     1806682 :         int                     i;
    2812             : 
    2813     1806682 :         XFS_BTREE_STATS_INC(cur, split);
    2814             : 
    2815             :         /* Set up left block (current one). */
    2816     1806685 :         left = xfs_btree_get_block(cur, level, &lbp);
    2817             : 
    2818             : #ifdef DEBUG
    2819     1806685 :         error = xfs_btree_check_block(cur, left, level, lbp);
    2820     1806685 :         if (error)
    2821           0 :                 goto error0;
    2822             : #endif
    2823             : 
    2824     1806685 :         xfs_btree_buf_to_ptr(cur, lbp, &lptr);
    2825             : 
    2826             :         /* Allocate the new block. If we can't do it, we're toast. Give up. */
    2827     1806686 :         error = xfs_btree_alloc_block(cur, &lptr, &rptr, stat);
    2828     1806683 :         if (error)
    2829           4 :                 goto error0;
    2830     1806679 :         if (*stat == 0)
    2831           0 :                 goto out0;
    2832     1806679 :         XFS_BTREE_STATS_INC(cur, alloc);
    2833             : 
    2834             :         /* Set up the new block as "right". */
    2835     1806680 :         error = xfs_btree_get_buf_block(cur, &rptr, &right, &rbp);
    2836     1806679 :         if (error)
    2837           0 :                 goto error0;
    2838             : 
    2839             :         /* Fill in the btree header for the new right block. */
    2840     1806679 :         xfs_btree_init_block_cur(cur, rbp, xfs_btree_get_level(left), 0);
    2841             : 
    2842             :         /*
    2843             :          * Split the entries between the old and the new block evenly.
    2844             :          * Make sure that if there's an odd number of entries now, that
    2845             :          * each new block will have the same number of entries.
    2846             :          */
    2847     1806676 :         lrecs = xfs_btree_get_numrecs(left);
    2848     1806676 :         rrecs = lrecs / 2;
    2849     1806676 :         if ((lrecs & 1) && cur->bc_levels[level].ptr <= rrecs + 1)
    2850       53428 :                 rrecs++;
    2851     1806676 :         src_index = (lrecs - rrecs + 1);
    2852             : 
    2853     1806676 :         XFS_BTREE_STATS_ADD(cur, moves, rrecs);
    2854             : 
    2855             :         /* Adjust numrecs for the later get_*_keys() calls. */
    2856     1806680 :         lrecs -= rrecs;
    2857     1806680 :         xfs_btree_set_numrecs(left, lrecs);
    2858     1806680 :         xfs_btree_set_numrecs(right, xfs_btree_get_numrecs(right) + rrecs);
    2859             : 
    2860             :         /*
    2861             :          * Copy btree block entries from the left block over to the
    2862             :          * new block, the right. Update the right block and log the
    2863             :          * changes.
    2864             :          */
    2865     1806680 :         if (level > 0) {
    2866             :                 /* It's a non-leaf.  Move keys and pointers. */
    2867       10666 :                 union xfs_btree_key     *lkp;   /* left btree key */
    2868       10666 :                 union xfs_btree_ptr     *lpp;   /* left address pointer */
    2869       10666 :                 union xfs_btree_key     *rkp;   /* right btree key */
    2870       10666 :                 union xfs_btree_ptr     *rpp;   /* right address pointer */
    2871             : 
    2872       10666 :                 lkp = xfs_btree_key_addr(cur, src_index, left);
    2873       10666 :                 lpp = xfs_btree_ptr_addr(cur, src_index, left);
    2874       10666 :                 rkp = xfs_btree_key_addr(cur, 1, right);
    2875       10666 :                 rpp = xfs_btree_ptr_addr(cur, 1, right);
    2876             : 
    2877       21332 :                 for (i = src_index; i < rrecs; i++) {
    2878           0 :                         error = xfs_btree_debug_check_ptr(cur, lpp, i, level);
    2879           0 :                         if (error)
    2880           0 :                                 goto error0;
    2881             :                 }
    2882             : 
    2883             :                 /* Copy the keys & pointers to the new block. */
    2884       10666 :                 xfs_btree_copy_keys(cur, rkp, lkp, rrecs);
    2885       10666 :                 xfs_btree_copy_ptrs(cur, rpp, lpp, rrecs);
    2886             : 
    2887       10666 :                 xfs_btree_log_keys(cur, rbp, 1, rrecs);
    2888       10666 :                 xfs_btree_log_ptrs(cur, rbp, 1, rrecs);
    2889             : 
    2890             :                 /* Stash the keys of the new block for later insertion. */
    2891       10666 :                 xfs_btree_get_node_keys(cur, right, key);
    2892             :         } else {
    2893             :                 /* It's a leaf.  Move records.  */
    2894     1796014 :                 union xfs_btree_rec     *lrp;   /* left record pointer */
    2895     1796014 :                 union xfs_btree_rec     *rrp;   /* right record pointer */
    2896             : 
    2897     1796014 :                 lrp = xfs_btree_rec_addr(cur, src_index, left);
    2898     1796014 :                 rrp = xfs_btree_rec_addr(cur, 1, right);
    2899             : 
    2900             :                 /* Copy records to the new block. */
    2901     1796014 :                 xfs_btree_copy_recs(cur, rrp, lrp, rrecs);
    2902     1796016 :                 xfs_btree_log_recs(cur, rbp, 1, rrecs);
    2903             : 
    2904             :                 /* Stash the keys of the new block for later insertion. */
    2905     1796014 :                 xfs_btree_get_leaf_keys(cur, right, key);
    2906             :         }
    2907             : 
    2908             :         /*
    2909             :          * Find the left block number by looking in the buffer.
    2910             :          * Adjust sibling pointers.
    2911             :          */
    2912     1806683 :         xfs_btree_get_sibling(cur, left, &rrptr, XFS_BB_RIGHTSIB);
    2913     1806682 :         xfs_btree_set_sibling(cur, right, &rrptr, XFS_BB_RIGHTSIB);
    2914     1806682 :         xfs_btree_set_sibling(cur, right, &lptr, XFS_BB_LEFTSIB);
    2915     1806681 :         xfs_btree_set_sibling(cur, left, &rptr, XFS_BB_RIGHTSIB);
    2916             : 
    2917     1806681 :         xfs_btree_log_block(cur, rbp, XFS_BB_ALL_BITS);
    2918     1806683 :         xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB);
    2919             : 
    2920             :         /*
    2921             :          * If there's a block to the new block's right, make that block
    2922             :          * point back to right instead of to left.
    2923             :          */
    2924     3613366 :         if (!xfs_btree_ptr_is_null(cur, &rrptr)) {
    2925     1066705 :                 error = xfs_btree_read_buf_block(cur, &rrptr,
    2926             :                                                         0, &rrblock, &rrbp);
    2927     1066705 :                 if (error)
    2928         231 :                         goto error0;
    2929     1066474 :                 xfs_btree_set_sibling(cur, rrblock, &rptr, XFS_BB_LEFTSIB);
    2930     1066474 :                 xfs_btree_log_block(cur, rrbp, XFS_BB_LEFTSIB);
    2931             :         }
    2932             : 
    2933             :         /* Update the parent high keys of the left block, if needed. */
    2934     1806452 :         if (cur->bc_flags & XFS_BTREE_OVERLAPPING) {
    2935     1074902 :                 error = xfs_btree_update_keys(cur, level);
    2936     1074900 :                 if (error)
    2937           0 :                         goto error0;
    2938             :         }
    2939             : 
    2940             :         /*
    2941             :          * If the cursor is really in the right block, move it there.
    2942             :          * If it's just pointing past the last entry in left, then we'll
    2943             :          * insert there, so don't change anything in that case.
    2944             :          */
    2945     1806450 :         if (cur->bc_levels[level].ptr > lrecs + 1) {
    2946     1376942 :                 xfs_btree_setbuf(cur, level, rbp);
    2947     1376941 :                 cur->bc_levels[level].ptr -= lrecs;
    2948             :         }
    2949             :         /*
    2950             :          * If there are more levels, we'll need another cursor which refers
    2951             :          * the right block, no matter where this cursor was.
    2952             :          */
    2953     1806449 :         if (level + 1 < cur->bc_nlevels) {
    2954     1766277 :                 error = xfs_btree_dup_cursor(cur, curp);
    2955     1766277 :                 if (error)
    2956           4 :                         goto error0;
    2957     1766273 :                 (*curp)->bc_levels[level + 1].ptr++;
    2958             :         }
    2959     1806445 :         *ptrp = rptr;
    2960     1806445 :         *stat = 1;
    2961     1806445 :         return 0;
    2962             : out0:
    2963           0 :         *stat = 0;
    2964           0 :         return 0;
    2965             : 
    2966             : error0:
    2967             :         return error;
    2968             : }
    2969             : 
    2970             : #ifdef __KERNEL__
    2971             : struct xfs_btree_split_args {
    2972             :         struct xfs_btree_cur    *cur;
    2973             :         int                     level;
    2974             :         union xfs_btree_ptr     *ptrp;
    2975             :         union xfs_btree_key     *key;
    2976             :         struct xfs_btree_cur    **curp;
    2977             :         int                     *stat;          /* success/failure */
    2978             :         int                     result;
    2979             :         bool                    kswapd; /* allocation in kswapd context */
    2980             :         struct completion       *done;
    2981             :         struct work_struct      work;
    2982             : };
    2983             : 
    2984             : /*
    2985             :  * Stack switching interfaces for allocation
    2986             :  */
    2987             : static void
    2988       35688 : xfs_btree_split_worker(
    2989             :         struct work_struct      *work)
    2990             : {
    2991       35688 :         struct xfs_btree_split_args     *args = container_of(work,
    2992             :                                                 struct xfs_btree_split_args, work);
    2993       35688 :         unsigned long           pflags;
    2994       35688 :         unsigned long           new_pflags = 0;
    2995             : 
    2996             :         /*
    2997             :          * we are in a transaction context here, but may also be doing work
    2998             :          * in kswapd context, and hence we may need to inherit that state
    2999             :          * temporarily to ensure that we don't block waiting for memory reclaim
    3000             :          * in any way.
    3001             :          */
    3002       35688 :         if (args->kswapd)
    3003           0 :                 new_pflags |= PF_MEMALLOC | PF_KSWAPD;
    3004             : 
    3005       35688 :         current_set_flags_nested(&pflags, new_pflags);
    3006       35688 :         xfs_trans_set_context(args->cur->bc_tp);
    3007             : 
    3008       35687 :         args->result = __xfs_btree_split(args->cur, args->level, args->ptrp,
    3009             :                                          args->key, args->curp, args->stat);
    3010             : 
    3011       35688 :         xfs_trans_clear_context(args->cur->bc_tp);
    3012       35688 :         current_restore_flags_nested(&pflags, new_pflags);
    3013             : 
    3014             :         /*
    3015             :          * Do not access args after complete() has run here. We don't own args
    3016             :          * and the owner may run and free args before we return here.
    3017             :          */
    3018       35688 :         complete(args->done);
    3019             : 
    3020       35688 : }
    3021             : 
    3022             : /*
    3023             :  * BMBT split requests often come in with little stack to work on so we push
    3024             :  * them off to a worker thread so there is lots of stack to use. For the other
    3025             :  * btree types, just call directly to avoid the context switch overhead here.
    3026             :  *
    3027             :  * Care must be taken here - the work queue rescuer thread introduces potential
    3028             :  * AGF <> worker queue deadlocks if the BMBT block allocation has to lock new
    3029             :  * AGFs to allocate blocks. A task being run by the rescuer could attempt to
    3030             :  * lock an AGF that is already locked by a task queued to run by the rescuer,
    3031             :  * resulting in an ABBA deadlock as the rescuer cannot run the lock holder to
    3032             :  * release it until the current thread it is running gains the lock.
    3033             :  *
    3034             :  * To avoid this issue, we only ever queue BMBT splits that don't have an AGF
    3035             :  * already locked to allocate from. The only place that doesn't hold an AGF
    3036             :  * locked is unwritten extent conversion at IO completion, but that has already
    3037             :  * been offloaded to a worker thread and hence has no stack consumption issues
    3038             :  * we have to worry about.
    3039             :  */
    3040             : STATIC int                                      /* error */
    3041     1806684 : xfs_btree_split(
    3042             :         struct xfs_btree_cur    *cur,
    3043             :         int                     level,
    3044             :         union xfs_btree_ptr     *ptrp,
    3045             :         union xfs_btree_key     *key,
    3046             :         struct xfs_btree_cur    **curp,
    3047             :         int                     *stat)          /* success/failure */
    3048             : {
    3049     1806684 :         struct xfs_btree_split_args     args;
    3050     1806684 :         DECLARE_COMPLETION_ONSTACK(done);
    3051             : 
    3052     1806684 :         if (cur->bc_btnum != XFS_BTNUM_BMAP ||
    3053      481096 :             cur->bc_tp->t_highest_agno == NULLAGNUMBER)
    3054     1770997 :                 return __xfs_btree_split(cur, level, ptrp, key, curp, stat);
    3055             : 
    3056       35687 :         args.cur = cur;
    3057       35687 :         args.level = level;
    3058       35687 :         args.ptrp = ptrp;
    3059       35687 :         args.key = key;
    3060       35687 :         args.curp = curp;
    3061       35687 :         args.stat = stat;
    3062       35687 :         args.done = &done;
    3063       35687 :         args.kswapd = current_is_kswapd();
    3064       35687 :         INIT_WORK_ONSTACK(&args.work, xfs_btree_split_worker);
    3065       35687 :         queue_work(xfs_alloc_wq, &args.work);
    3066       35687 :         wait_for_completion(&done);
    3067       35687 :         destroy_work_on_stack(&args.work);
    3068       35687 :         return args.result;
    3069             : }
    3070             : #else
    3071             : #define xfs_btree_split __xfs_btree_split
    3072             : #endif /* __KERNEL__ */
    3073             : 
    3074             : 
    3075             : /*
    3076             :  * Copy the old inode root contents into a real block and make the
    3077             :  * broot point to it.
    3078             :  */
    3079             : int                                             /* error */
    3080        2878 : xfs_btree_new_iroot(
    3081             :         struct xfs_btree_cur    *cur,           /* btree cursor */
    3082             :         int                     *logflags,      /* logging flags for inode */
    3083             :         int                     *stat)          /* return status - 0 fail */
    3084             : {
    3085        2878 :         struct xfs_buf          *cbp;           /* buffer for cblock */
    3086        2878 :         struct xfs_btree_block  *block;         /* btree block */
    3087        2878 :         struct xfs_btree_block  *cblock;        /* child btree block */
    3088        2878 :         union xfs_btree_key     *ckp;           /* child key pointer */
    3089        2878 :         union xfs_btree_ptr     *cpp;           /* child ptr pointer */
    3090        2878 :         union xfs_btree_key     *kp;            /* pointer to btree key */
    3091        2878 :         union xfs_btree_ptr     *pp;            /* pointer to block addr */
    3092        2878 :         union xfs_btree_ptr     nptr;           /* new block addr */
    3093        2878 :         int                     level;          /* btree level */
    3094        2878 :         int                     error;          /* error return code */
    3095        2878 :         int                     i;              /* loop counter */
    3096             : 
    3097        2878 :         XFS_BTREE_STATS_INC(cur, newroot);
    3098             : 
    3099        2878 :         ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE);
    3100             : 
    3101        2878 :         level = cur->bc_nlevels - 1;
    3102             : 
    3103        2878 :         block = xfs_btree_get_iroot(cur);
    3104        2878 :         pp = xfs_btree_ptr_addr(cur, 1, block);
    3105             : 
    3106             :         /* Allocate the new block. If we can't do it, we're toast. Give up. */
    3107        2878 :         error = xfs_btree_alloc_block(cur, pp, &nptr, stat);
    3108        2878 :         if (error)
    3109           0 :                 goto error0;
    3110        2878 :         if (*stat == 0)
    3111             :                 return 0;
    3112             : 
    3113        2878 :         XFS_BTREE_STATS_INC(cur, alloc);
    3114             : 
    3115             :         /* Copy the root into a real block. */
    3116        2878 :         error = xfs_btree_get_buf_block(cur, &nptr, &cblock, &cbp);
    3117        2878 :         if (error)
    3118           0 :                 goto error0;
    3119             : 
    3120             :         /*
    3121             :          * we can't just memcpy() the root in for CRC enabled btree blocks.
    3122             :          * In that case have to also ensure the blkno remains correct
    3123             :          */
    3124        5756 :         memcpy(cblock, block, xfs_btree_block_len(cur));
    3125        2878 :         if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS) {
    3126        2878 :                 __be64 bno = cpu_to_be64(xfs_buf_daddr(cbp));
    3127        2878 :                 if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
    3128        2878 :                         cblock->bb_u.l.bb_blkno = bno;
    3129             :                 else
    3130           0 :                         cblock->bb_u.s.bb_blkno = bno;
    3131             :         }
    3132             : 
    3133        2878 :         be16_add_cpu(&block->bb_level, 1);
    3134        2878 :         xfs_btree_set_numrecs(block, 1);
    3135        2878 :         cur->bc_nlevels++;
    3136        2878 :         ASSERT(cur->bc_nlevels <= cur->bc_maxlevels);
    3137        2878 :         cur->bc_levels[level + 1].ptr = 1;
    3138             : 
    3139        2878 :         kp = xfs_btree_key_addr(cur, 1, block);
    3140        2878 :         ckp = xfs_btree_key_addr(cur, 1, cblock);
    3141        2878 :         xfs_btree_copy_keys(cur, ckp, kp, xfs_btree_get_numrecs(cblock));
    3142             : 
    3143        2878 :         cpp = xfs_btree_ptr_addr(cur, 1, cblock);
    3144       38782 :         for (i = 0; i < be16_to_cpu(cblock->bb_numrecs); i++) {
    3145       33026 :                 error = xfs_btree_debug_check_ptr(cur, pp, i, level);
    3146       33026 :                 if (error)
    3147           0 :                         goto error0;
    3148             :         }
    3149             : 
    3150        2878 :         xfs_btree_copy_ptrs(cur, cpp, pp, xfs_btree_get_numrecs(cblock));
    3151             : 
    3152        2878 :         error = xfs_btree_debug_check_ptr(cur, &nptr, 0, level);
    3153        2878 :         if (error)
    3154           0 :                 goto error0;
    3155             : 
    3156        2878 :         xfs_btree_copy_ptrs(cur, pp, &nptr, 1);
    3157             : 
    3158        2878 :         xfs_iroot_realloc(cur->bc_ino.ip,
    3159             :                           1 - xfs_btree_get_numrecs(cblock),
    3160        2878 :                           cur->bc_ino.whichfork);
    3161             : 
    3162        2878 :         xfs_btree_setbuf(cur, level, cbp);
    3163             : 
    3164             :         /*
    3165             :          * Do all this logging at the end so that
    3166             :          * the root is at the right level.
    3167             :          */
    3168        2878 :         xfs_btree_log_block(cur, cbp, XFS_BB_ALL_BITS);
    3169        2878 :         xfs_btree_log_keys(cur, cbp, 1, be16_to_cpu(cblock->bb_numrecs));
    3170        2878 :         xfs_btree_log_ptrs(cur, cbp, 1, be16_to_cpu(cblock->bb_numrecs));
    3171             : 
    3172        5756 :         *logflags |=
    3173        2878 :                 XFS_ILOG_CORE | xfs_ilog_fbroot(cur->bc_ino.whichfork);
    3174        2878 :         *stat = 1;
    3175        2878 :         return 0;
    3176             : error0:
    3177             :         return error;
    3178             : }
    3179             : 
    3180             : /*
    3181             :  * Allocate a new root block, fill it in.
    3182             :  */
    3183             : STATIC int                              /* error */
    3184       40174 : xfs_btree_new_root(
    3185             :         struct xfs_btree_cur    *cur,   /* btree cursor */
    3186             :         int                     *stat)  /* success/failure */
    3187             : {
    3188       40174 :         struct xfs_btree_block  *block; /* one half of the old root block */
    3189       40174 :         struct xfs_buf          *bp;    /* buffer containing block */
    3190       40174 :         int                     error;  /* error return value */
    3191       40174 :         struct xfs_buf          *lbp;   /* left buffer pointer */
    3192       40174 :         struct xfs_btree_block  *left;  /* left btree block */
    3193       40174 :         struct xfs_buf          *nbp;   /* new (root) buffer */
    3194       40174 :         struct xfs_btree_block  *new;   /* new (root) btree block */
    3195       40174 :         int                     nptr;   /* new value for key index, 1 or 2 */
    3196       40174 :         struct xfs_buf          *rbp;   /* right buffer pointer */
    3197       40174 :         struct xfs_btree_block  *right; /* right btree block */
    3198       40174 :         union xfs_btree_ptr     rptr;
    3199       40174 :         union xfs_btree_ptr     lptr;
    3200             : 
    3201       40174 :         XFS_BTREE_STATS_INC(cur, newroot);
    3202             : 
    3203             :         /* initialise our start point from the cursor */
    3204       40174 :         cur->bc_ops->init_ptr_from_cur(cur, &rptr);
    3205             : 
    3206             :         /* Allocate the new block. If we can't do it, we're toast. Give up. */
    3207       40174 :         error = xfs_btree_alloc_block(cur, &rptr, &lptr, stat);
    3208       40174 :         if (error)
    3209           0 :                 goto error0;
    3210       40174 :         if (*stat == 0)
    3211           0 :                 goto out0;
    3212       40174 :         XFS_BTREE_STATS_INC(cur, alloc);
    3213             : 
    3214             :         /* Set up the new block. */
    3215       40174 :         error = xfs_btree_get_buf_block(cur, &lptr, &new, &nbp);
    3216       40174 :         if (error)
    3217           0 :                 goto error0;
    3218             : 
    3219             :         /* Set the root in the holding structure  increasing the level by 1. */
    3220       40174 :         cur->bc_ops->set_root(cur, &lptr, 1);
    3221             : 
    3222             :         /*
    3223             :          * At the previous root level there are now two blocks: the old root,
    3224             :          * and the new block generated when it was split.  We don't know which
    3225             :          * one the cursor is pointing at, so we set up variables "left" and
    3226             :          * "right" for each case.
    3227             :          */
    3228       40174 :         block = xfs_btree_get_block(cur, cur->bc_nlevels - 1, &bp);
    3229             : 
    3230             : #ifdef DEBUG
    3231       40174 :         error = xfs_btree_check_block(cur, block, cur->bc_nlevels - 1, bp);
    3232       40174 :         if (error)
    3233           0 :                 goto error0;
    3234             : #endif
    3235             : 
    3236       40174 :         xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB);
    3237       80348 :         if (!xfs_btree_ptr_is_null(cur, &rptr)) {
    3238             :                 /* Our block is left, pick up the right block. */
    3239       17317 :                 lbp = bp;
    3240       17317 :                 xfs_btree_buf_to_ptr(cur, lbp, &lptr);
    3241       17317 :                 left = block;
    3242       17317 :                 error = xfs_btree_read_buf_block(cur, &rptr, 0, &right, &rbp);
    3243       17317 :                 if (error)
    3244           1 :                         goto error0;
    3245       17316 :                 bp = rbp;
    3246       17316 :                 nptr = 1;
    3247             :         } else {
    3248             :                 /* Our block is right, pick up the left block. */
    3249       22857 :                 rbp = bp;
    3250       22857 :                 xfs_btree_buf_to_ptr(cur, rbp, &rptr);
    3251       22857 :                 right = block;
    3252       22857 :                 xfs_btree_get_sibling(cur, right, &lptr, XFS_BB_LEFTSIB);
    3253       22857 :                 error = xfs_btree_read_buf_block(cur, &lptr, 0, &left, &lbp);
    3254       22857 :                 if (error)
    3255           0 :                         goto error0;
    3256       22857 :                 bp = lbp;
    3257       22857 :                 nptr = 2;
    3258             :         }
    3259             : 
    3260             :         /* Fill in the new block's btree header and log it. */
    3261       40173 :         xfs_btree_init_block_cur(cur, nbp, cur->bc_nlevels, 2);
    3262       40173 :         xfs_btree_log_block(cur, nbp, XFS_BB_ALL_BITS);
    3263      120519 :         ASSERT(!xfs_btree_ptr_is_null(cur, &lptr) &&
    3264             :                         !xfs_btree_ptr_is_null(cur, &rptr));
    3265             : 
    3266             :         /* Fill in the key data in the new root. */
    3267       40173 :         if (xfs_btree_get_level(left) > 0) {
    3268             :                 /*
    3269             :                  * Get the keys for the left block's keys and put them directly
    3270             :                  * in the parent block.  Do the same for the right block.
    3271             :                  */
    3272        1316 :                 xfs_btree_get_node_keys(cur, left,
    3273             :                                 xfs_btree_key_addr(cur, 1, new));
    3274        1316 :                 xfs_btree_get_node_keys(cur, right,
    3275             :                                 xfs_btree_key_addr(cur, 2, new));
    3276             :         } else {
    3277             :                 /*
    3278             :                  * Get the keys for the left block's records and put them
    3279             :                  * directly in the parent block.  Do the same for the right
    3280             :                  * block.
    3281             :                  */
    3282       38857 :                 xfs_btree_get_leaf_keys(cur, left,
    3283             :                         xfs_btree_key_addr(cur, 1, new));
    3284       38857 :                 xfs_btree_get_leaf_keys(cur, right,
    3285             :                         xfs_btree_key_addr(cur, 2, new));
    3286             :         }
    3287       40173 :         xfs_btree_log_keys(cur, nbp, 1, 2);
    3288             : 
    3289             :         /* Fill in the pointer data in the new root. */
    3290       40173 :         xfs_btree_copy_ptrs(cur,
    3291             :                 xfs_btree_ptr_addr(cur, 1, new), &lptr, 1);
    3292       40173 :         xfs_btree_copy_ptrs(cur,
    3293             :                 xfs_btree_ptr_addr(cur, 2, new), &rptr, 1);
    3294       40173 :         xfs_btree_log_ptrs(cur, nbp, 1, 2);
    3295             : 
    3296             :         /* Fix up the cursor. */
    3297       40173 :         xfs_btree_setbuf(cur, cur->bc_nlevels, nbp);
    3298       40173 :         cur->bc_levels[cur->bc_nlevels].ptr = nptr;
    3299       40173 :         cur->bc_nlevels++;
    3300       40173 :         ASSERT(cur->bc_nlevels <= cur->bc_maxlevels);
    3301       40173 :         *stat = 1;
    3302       40173 :         return 0;
    3303             : error0:
    3304             :         return error;
    3305             : out0:
    3306           0 :         *stat = 0;
    3307           0 :         return 0;
    3308             : }
    3309             : 
    3310             : STATIC int
    3311   171146056 : xfs_btree_make_block_unfull(
    3312             :         struct xfs_btree_cur    *cur,   /* btree cursor */
    3313             :         int                     level,  /* btree level */
    3314             :         int                     numrecs,/* # of recs in block */
    3315             :         int                     *oindex,/* old tree index */
    3316             :         int                     *index, /* new tree index */
    3317             :         union xfs_btree_ptr     *nptr,  /* new btree ptr */
    3318             :         struct xfs_btree_cur    **ncur, /* new btree cursor */
    3319             :         union xfs_btree_key     *key,   /* key of new block */
    3320             :         int                     *stat)
    3321             : {
    3322   171146056 :         int                     error = 0;
    3323             : 
    3324   171146056 :         if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
    3325    57403965 :             level == cur->bc_nlevels - 1) {
    3326       54584 :                 struct xfs_inode *ip = cur->bc_ino.ip;
    3327             : 
    3328       54584 :                 if (numrecs < cur->bc_ops->get_dmaxrecs(cur, level)) {
    3329             :                         /* A root block that can be made bigger. */
    3330       51707 :                         xfs_iroot_realloc(ip, 1, cur->bc_ino.whichfork);
    3331       51706 :                         *stat = 1;
    3332             :                 } else {
    3333             :                         /* A root block that needs replacing */
    3334        2878 :                         int     logflags = 0;
    3335             : 
    3336        2878 :                         error = xfs_btree_new_iroot(cur, &logflags, stat);
    3337        2878 :                         if (error || *stat == 0)
    3338           0 :                                 return error;
    3339             : 
    3340        2878 :                         xfs_trans_log_inode(cur->bc_tp, ip, logflags);
    3341             :                 }
    3342             : 
    3343       54584 :                 return 0;
    3344             :         }
    3345             : 
    3346             :         /* First, try shifting an entry to the right neighbor. */
    3347   171091472 :         error = xfs_btree_rshift(cur, level, stat);
    3348   171091975 :         if (error || *stat)
    3349             :                 return error;
    3350             : 
    3351             :         /* Next, try shifting an entry to the left neighbor. */
    3352   125196229 :         error = xfs_btree_lshift(cur, level, stat);
    3353   125196536 :         if (error)
    3354             :                 return error;
    3355             : 
    3356   125196478 :         if (*stat) {
    3357   123389793 :                 *oindex = *index = cur->bc_levels[level].ptr;
    3358   123389793 :                 return 0;
    3359             :         }
    3360             : 
    3361             :         /*
    3362             :          * Next, try splitting the current block in half.
    3363             :          *
    3364             :          * If this works we have to re-set our variables because we
    3365             :          * could be in a different block now.
    3366             :          */
    3367     1806685 :         error = xfs_btree_split(cur, level, nptr, key, ncur, stat);
    3368     1806685 :         if (error || *stat == 0)
    3369             :                 return error;
    3370             : 
    3371             : 
    3372     1806446 :         *index = cur->bc_levels[level].ptr;
    3373     1806446 :         return 0;
    3374             : }
    3375             : 
    3376             : /*
    3377             :  * Insert one record/level.  Return information to the caller
    3378             :  * allowing the next level up to proceed if necessary.
    3379             :  */
    3380             : STATIC int
    3381   924942907 : xfs_btree_insrec(
    3382             :         struct xfs_btree_cur    *cur,   /* btree cursor */
    3383             :         int                     level,  /* level to insert record at */
    3384             :         union xfs_btree_ptr     *ptrp,  /* i/o: block number inserted */
    3385             :         union xfs_btree_rec     *rec,   /* record to insert */
    3386             :         union xfs_btree_key     *key,   /* i/o: block key for ptrp */
    3387             :         struct xfs_btree_cur    **curp, /* output: new cursor replacing cur */
    3388             :         int                     *stat)  /* success/failure */
    3389             : {
    3390   924942907 :         struct xfs_btree_block  *block; /* btree block */
    3391   924942907 :         struct xfs_buf          *bp;    /* buffer for block */
    3392   924942907 :         union xfs_btree_ptr     nptr;   /* new block ptr */
    3393   924942907 :         struct xfs_btree_cur    *ncur = NULL;   /* new btree cursor */
    3394   924942907 :         union xfs_btree_key     nkey;   /* new block key */
    3395   924942907 :         union xfs_btree_key     *lkey;
    3396   924942907 :         int                     optr;   /* old key/record index */
    3397   924942907 :         int                     ptr;    /* key/record index */
    3398   924942907 :         int                     numrecs;/* number of records */
    3399   924942907 :         int                     error;  /* error return value */
    3400   924942907 :         int                     i;
    3401   924942907 :         xfs_daddr_t             old_bn;
    3402             : 
    3403   924942907 :         ncur = NULL;
    3404   924942907 :         lkey = &nkey;
    3405             : 
    3406             :         /*
    3407             :          * If we have an external root pointer, and we've made it to the
    3408             :          * root level, allocate a new root block and we're done.
    3409             :          */
    3410   924942907 :         if (!(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
    3411   775693894 :             (level >= cur->bc_nlevels)) {
    3412       40174 :                 error = xfs_btree_new_root(cur, stat);
    3413       40174 :                 xfs_btree_set_ptr_null(cur, ptrp);
    3414             : 
    3415       40174 :                 return error;
    3416             :         }
    3417             : 
    3418             :         /* If we're off the left edge, return failure. */
    3419   924902733 :         ptr = cur->bc_levels[level].ptr;
    3420   924902733 :         if (ptr == 0) {
    3421           0 :                 *stat = 0;
    3422           0 :                 return 0;
    3423             :         }
    3424             : 
    3425   924902733 :         optr = ptr;
    3426             : 
    3427   924902733 :         XFS_BTREE_STATS_INC(cur, insrec);
    3428             : 
    3429             :         /* Get pointers to the btree buffer and block. */
    3430   924905860 :         block = xfs_btree_get_block(cur, level, &bp);
    3431   924906187 :         old_bn = bp ? xfs_buf_daddr(bp) : XFS_BUF_DADDR_NULL;
    3432   924906187 :         numrecs = xfs_btree_get_numrecs(block);
    3433             : 
    3434             : #ifdef DEBUG
    3435   924906187 :         error = xfs_btree_check_block(cur, block, level, bp);
    3436   924933792 :         if (error)
    3437           0 :                 goto error0;
    3438             : 
    3439             :         /* Check that the new entry is being inserted in the right place. */
    3440   924933792 :         if (ptr <= numrecs) {
    3441   633832792 :                 if (level == 0) {
    3442   632821853 :                         ASSERT(cur->bc_ops->recs_inorder(cur, rec,
    3443             :                                 xfs_btree_rec_addr(cur, ptr, block)));
    3444             :                 } else {
    3445     1010939 :                         ASSERT(cur->bc_ops->keys_inorder(cur, key,
    3446             :                                 xfs_btree_key_addr(cur, ptr, block)));
    3447             :                 }
    3448             :         }
    3449             : #endif
    3450             : 
    3451             :         /*
    3452             :          * If the block is full, we can't insert the new entry until we
    3453             :          * make the block un-full.
    3454             :          */
    3455   924871309 :         xfs_btree_set_ptr_null(cur, &nptr);
    3456   924871309 :         if (numrecs == cur->bc_ops->get_maxrecs(cur, level)) {
    3457   171145726 :                 error = xfs_btree_make_block_unfull(cur, level, numrecs,
    3458             :                                         &optr, &ptr, &nptr, &ncur, lkey, stat);
    3459   171146750 :                 if (error || *stat == 0)
    3460         496 :                         goto error0;
    3461             :         }
    3462             : 
    3463             :         /*
    3464             :          * The current block may have changed if the block was
    3465             :          * previously full and we have just made space in it.
    3466             :          */
    3467   924860583 :         block = xfs_btree_get_block(cur, level, &bp);
    3468   924874439 :         numrecs = xfs_btree_get_numrecs(block);
    3469             : 
    3470             : #ifdef DEBUG
    3471   924874439 :         error = xfs_btree_check_block(cur, block, level, bp);
    3472   924925174 :         if (error)
    3473           0 :                 goto error0;
    3474             : #endif
    3475             : 
    3476             :         /*
    3477             :          * At this point we know there's room for our new entry in the block
    3478             :          * we're pointing at.
    3479             :          */
    3480   924925174 :         XFS_BTREE_STATS_ADD(cur, moves, numrecs - ptr + 1);
    3481             : 
    3482   924927833 :         if (level > 0) {
    3483             :                 /* It's a nonleaf. make a hole in the keys and ptrs */
    3484     1766267 :                 union xfs_btree_key     *kp;
    3485     1766267 :                 union xfs_btree_ptr     *pp;
    3486             : 
    3487     1766267 :                 kp = xfs_btree_key_addr(cur, ptr, block);
    3488     1766267 :                 pp = xfs_btree_ptr_addr(cur, ptr, block);
    3489             : 
    3490    32489355 :                 for (i = numrecs - ptr; i >= 0; i--) {
    3491    28956820 :                         error = xfs_btree_debug_check_ptr(cur, pp, i, level);
    3492    28956821 :                         if (error)
    3493           0 :                                 goto error0;
    3494             :                 }
    3495             : 
    3496     1766268 :                 xfs_btree_shift_keys(cur, kp, 1, numrecs - ptr + 1);
    3497     1766266 :                 xfs_btree_shift_ptrs(cur, pp, 1, numrecs - ptr + 1);
    3498             : 
    3499     1766266 :                 error = xfs_btree_debug_check_ptr(cur, ptrp, 0, level);
    3500     1766265 :                 if (error)
    3501           0 :                         goto error0;
    3502             : 
    3503             :                 /* Now put the new data in, bump numrecs and log it. */
    3504     1766265 :                 xfs_btree_copy_keys(cur, kp, key, 1);
    3505     1766264 :                 xfs_btree_copy_ptrs(cur, pp, ptrp, 1);
    3506     1766263 :                 numrecs++;
    3507     1766263 :                 xfs_btree_set_numrecs(block, numrecs);
    3508     1766263 :                 xfs_btree_log_ptrs(cur, bp, ptr, numrecs);
    3509     1766266 :                 xfs_btree_log_keys(cur, bp, ptr, numrecs);
    3510             : #ifdef DEBUG
    3511     1766268 :                 if (ptr < numrecs) {
    3512     1010810 :                         ASSERT(cur->bc_ops->keys_inorder(cur, kp,
    3513             :                                 xfs_btree_key_addr(cur, ptr + 1, block)));
    3514             :                 }
    3515             : #endif
    3516             :         } else {
    3517             :                 /* It's a leaf. make a hole in the records */
    3518   923161566 :                 union xfs_btree_rec             *rp;
    3519             : 
    3520   923161566 :                 rp = xfs_btree_rec_addr(cur, ptr, block);
    3521             : 
    3522   923161566 :                 xfs_btree_shift_recs(cur, rp, 1, numrecs - ptr + 1);
    3523             : 
    3524             :                 /* Now put the new data in, bump numrecs and log it. */
    3525   923139304 :                 xfs_btree_copy_recs(cur, rp, rec, 1);
    3526   923104273 :                 xfs_btree_set_numrecs(block, ++numrecs);
    3527   923104273 :                 xfs_btree_log_recs(cur, bp, ptr, numrecs);
    3528             : #ifdef DEBUG
    3529   923178400 :                 if (ptr < numrecs) {
    3530   632826493 :                         ASSERT(cur->bc_ops->recs_inorder(cur, rp,
    3531             :                                 xfs_btree_rec_addr(cur, ptr + 1, block)));
    3532             :                 }
    3533             : #endif
    3534             :         }
    3535             : 
    3536             :         /* Log the new number of records in the btree header. */
    3537   924936863 :         xfs_btree_log_block(cur, bp, XFS_BB_NUMRECS);
    3538             : 
    3539             :         /*
    3540             :          * If we just inserted into a new tree block, we have to
    3541             :          * recalculate nkey here because nkey is out of date.
    3542             :          *
    3543             :          * Otherwise we're just updating an existing block (having shoved
    3544             :          * some records into the new tree block), so use the regular key
    3545             :          * update mechanism.
    3546             :          */
    3547   924943237 :         if (bp && xfs_buf_daddr(bp) != old_bn) {
    3548     1379819 :                 xfs_btree_get_keys(cur, block, lkey);
    3549  1533100527 :         } else if (xfs_btree_needs_key_update(cur, optr)) {
    3550   474284725 :                 error = xfs_btree_update_keys(cur, level);
    3551   474271614 :                 if (error)
    3552           0 :                         goto error0;
    3553             :         }
    3554             : 
    3555             :         /*
    3556             :          * If we are tracking the last record in the tree and
    3557             :          * we are at the far right edge of the tree, update it.
    3558             :          */
    3559   924930125 :         if (xfs_btree_is_lastrec(cur, block, level)) {
    3560   136113962 :                 cur->bc_ops->update_lastrec(cur, block, rec,
    3561             :                                             ptr, LASTREC_INSREC);
    3562             :         }
    3563             : 
    3564             :         /*
    3565             :          * Return the new block number, if any.
    3566             :          * If there is one, give back a record value and a cursor too.
    3567             :          */
    3568   924873131 :         *ptrp = nptr;
    3569  1849746262 :         if (!xfs_btree_ptr_is_null(cur, &nptr)) {
    3570     1806447 :                 xfs_btree_copy_keys(cur, key, lkey, 1);
    3571     1806447 :                 *curp = ncur;
    3572             :         }
    3573             : 
    3574   924873131 :         *stat = 1;
    3575   924873131 :         return 0;
    3576             : 
    3577         496 : error0:
    3578         496 :         if (ncur)
    3579           0 :                 xfs_btree_del_cursor(ncur, error);
    3580             :         return error;
    3581             : }
    3582             : 
    3583             : /*
    3584             :  * Insert the record at the point referenced by cur.
    3585             :  *
    3586             :  * A multi-level split of the tree on insert will invalidate the original
    3587             :  * cursor.  All callers of this function should assume that the cursor is
    3588             :  * no longer valid and revalidate it.
    3589             :  */
    3590             : int
    3591   923177046 : xfs_btree_insert(
    3592             :         struct xfs_btree_cur    *cur,
    3593             :         int                     *stat)
    3594             : {
    3595   923177046 :         int                     error;  /* error return value */
    3596   923177046 :         int                     i;      /* result value, 0 for failure */
    3597   923177046 :         int                     level;  /* current level number in btree */
    3598   923177046 :         union xfs_btree_ptr     nptr;   /* new block number (split result) */
    3599   923177046 :         struct xfs_btree_cur    *ncur;  /* new cursor (split result) */
    3600   923177046 :         struct xfs_btree_cur    *pcur;  /* previous level's cursor */
    3601   923177046 :         union xfs_btree_key     bkey;   /* key of block to insert */
    3602   923177046 :         union xfs_btree_key     *key;
    3603   923177046 :         union xfs_btree_rec     rec;    /* record to insert */
    3604             : 
    3605   923177046 :         level = 0;
    3606   923177046 :         ncur = NULL;
    3607   923177046 :         pcur = cur;
    3608   923177046 :         key = &bkey;
    3609             : 
    3610   923177046 :         xfs_btree_set_ptr_null(cur, &nptr);
    3611             : 
    3612             :         /* Make a key out of the record data to be inserted, and save it. */
    3613   923177046 :         cur->bc_ops->init_rec_from_cur(cur, &rec);
    3614   923131000 :         cur->bc_ops->init_key_from_rec(key, &rec);
    3615             : 
    3616             :         /*
    3617             :          * Loop going up the tree, starting at the leaf level.
    3618             :          * Stop when we don't get a split block, that must mean that
    3619             :          * the insert is finished with this level.
    3620             :          */
    3621   924943913 :         do {
    3622             :                 /*
    3623             :                  * Insert nrec/nptr into this level of the tree.
    3624             :                  * Note if we fail, nptr will be null.
    3625             :                  */
    3626   924943913 :                 error = xfs_btree_insrec(pcur, level, &nptr, &rec, key,
    3627             :                                 &ncur, &i);
    3628   924921635 :                 if (error) {
    3629         497 :                         if (pcur != cur)
    3630           6 :                                 xfs_btree_del_cursor(pcur, XFS_BTREE_ERROR);
    3631         497 :                         goto error0;
    3632             :                 }
    3633             : 
    3634   924921138 :                 if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
    3635           0 :                         xfs_btree_mark_sick(cur);
    3636           0 :                         error = -EFSCORRUPTED;
    3637           0 :                         goto error0;
    3638             :                 }
    3639   924921138 :                 level++;
    3640             : 
    3641             :                 /*
    3642             :                  * See if the cursor we just used is trash.
    3643             :                  * Can't trash the caller's cursor, but otherwise we should
    3644             :                  * if ncur is a new cursor or we're about to be done.
    3645             :                  */
    3646   924921138 :                 if (pcur != cur &&
    3647     3525816 :                     (ncur || xfs_btree_ptr_is_null(cur, &nptr))) {
    3648             :                         /* Save the state from the cursor before we trash it */
    3649     1766267 :                         if (cur->bc_ops->update_cursor)
    3650      481093 :                                 cur->bc_ops->update_cursor(pcur, cur);
    3651     1766266 :                         cur->bc_nlevels = pcur->bc_nlevels;
    3652     1766266 :                         xfs_btree_del_cursor(pcur, XFS_BTREE_NOERROR);
    3653             :                 }
    3654             :                 /* If we got a new cursor, switch to it. */
    3655   924892235 :                 if (ncur) {
    3656     1779622 :                         pcur = ncur;
    3657     1779622 :                         ncur = NULL;
    3658             :                 }
    3659  1849784470 :         } while (!xfs_btree_ptr_is_null(cur, &nptr));
    3660             : 
    3661   923084043 :         *stat = i;
    3662   923084043 :         return 0;
    3663             : error0:
    3664             :         return error;
    3665             : }
    3666             : 
    3667             : /*
    3668             :  * Try to merge a non-leaf block back into the inode root.
    3669             :  *
    3670             :  * Note: the killroot names comes from the fact that we're effectively
    3671             :  * killing the old root block.  But because we can't just delete the
    3672             :  * inode we have to copy the single block it was pointing to into the
    3673             :  * inode.
    3674             :  */
    3675             : STATIC int
    3676    20161692 : xfs_btree_kill_iroot(
    3677             :         struct xfs_btree_cur    *cur)
    3678             : {
    3679    20161692 :         int                     whichfork = cur->bc_ino.whichfork;
    3680    20161692 :         struct xfs_inode        *ip = cur->bc_ino.ip;
    3681    20161692 :         struct xfs_ifork        *ifp = xfs_ifork_ptr(ip, whichfork);
    3682    20161580 :         struct xfs_btree_block  *block;
    3683    20161580 :         struct xfs_btree_block  *cblock;
    3684    20161580 :         union xfs_btree_key     *kp;
    3685    20161580 :         union xfs_btree_key     *ckp;
    3686    20161580 :         union xfs_btree_ptr     *pp;
    3687    20161580 :         union xfs_btree_ptr     *cpp;
    3688    20161580 :         struct xfs_buf          *cbp;
    3689    20161580 :         int                     level;
    3690    20161580 :         int                     index;
    3691    20161580 :         int                     numrecs;
    3692    20161580 :         int                     error;
    3693             : #ifdef DEBUG
    3694    20161580 :         union xfs_btree_ptr     ptr;
    3695             : #endif
    3696    20161580 :         int                     i;
    3697             : 
    3698    20161580 :         ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE);
    3699    20161580 :         ASSERT(cur->bc_nlevels > 1);
    3700             : 
    3701             :         /*
    3702             :          * Don't deal with the root block needs to be a leaf case.
    3703             :          * We're just going to turn the thing back into extents anyway.
    3704             :          */
    3705    20161580 :         level = cur->bc_nlevels - 1;
    3706    20161580 :         if (level == 1)
    3707    20013842 :                 goto out0;
    3708             : 
    3709             :         /*
    3710             :          * Give up if the root has multiple children.
    3711             :          */
    3712      147738 :         block = xfs_btree_get_iroot(cur);
    3713      147738 :         if (xfs_btree_get_numrecs(block) != 1)
    3714          22 :                 goto out0;
    3715             : 
    3716      147716 :         cblock = xfs_btree_get_block(cur, level - 1, &cbp);
    3717      147716 :         numrecs = xfs_btree_get_numrecs(cblock);
    3718             : 
    3719             :         /*
    3720             :          * Only do this if the next level will fit.
    3721             :          * Then the data must be copied up to the inode,
    3722             :          * instead of freeing the root you free the next level.
    3723             :          */
    3724      147716 :         if (numrecs > cur->bc_ops->get_dmaxrecs(cur, level))
    3725      145410 :                 goto out0;
    3726             : 
    3727        2306 :         XFS_BTREE_STATS_INC(cur, killroot);
    3728             : 
    3729             : #ifdef DEBUG
    3730        2306 :         xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_LEFTSIB);
    3731        4612 :         ASSERT(xfs_btree_ptr_is_null(cur, &ptr));
    3732        2306 :         xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
    3733        4612 :         ASSERT(xfs_btree_ptr_is_null(cur, &ptr));
    3734             : #endif
    3735             : 
    3736        2306 :         index = numrecs - cur->bc_ops->get_maxrecs(cur, level);
    3737        2306 :         if (index) {
    3738        2306 :                 xfs_iroot_realloc(cur->bc_ino.ip, index,
    3739        2306 :                                   cur->bc_ino.whichfork);
    3740        2306 :                 block = ifp->if_broot;
    3741             :         }
    3742             : 
    3743        2306 :         be16_add_cpu(&block->bb_numrecs, index);
    3744        2306 :         ASSERT(block->bb_numrecs == cblock->bb_numrecs);
    3745             : 
    3746        2306 :         kp = xfs_btree_key_addr(cur, 1, block);
    3747        2306 :         ckp = xfs_btree_key_addr(cur, 1, cblock);
    3748        2306 :         xfs_btree_copy_keys(cur, kp, ckp, numrecs);
    3749             : 
    3750        2306 :         pp = xfs_btree_ptr_addr(cur, 1, block);
    3751        2306 :         cpp = xfs_btree_ptr_addr(cur, 1, cblock);
    3752             : 
    3753       30032 :         for (i = 0; i < numrecs; i++) {
    3754       25420 :                 error = xfs_btree_debug_check_ptr(cur, cpp, i, level - 1);
    3755       25420 :                 if (error)
    3756           0 :                         return error;
    3757             :         }
    3758             : 
    3759        2306 :         xfs_btree_copy_ptrs(cur, pp, cpp, numrecs);
    3760             : 
    3761        2306 :         error = xfs_btree_free_block(cur, cbp);
    3762        2306 :         if (error)
    3763             :                 return error;
    3764             : 
    3765        2306 :         cur->bc_levels[level - 1].bp = NULL;
    3766        2306 :         be16_add_cpu(&block->bb_level, -1);
    3767        4612 :         xfs_trans_log_inode(cur->bc_tp, ip,
    3768        2306 :                 XFS_ILOG_CORE | xfs_ilog_fbroot(cur->bc_ino.whichfork));
    3769        2306 :         cur->bc_nlevels--;
    3770             : out0:
    3771             :         return 0;
    3772             : }
    3773             : 
    3774             : /*
    3775             :  * Kill the current root node, and replace it with it's only child node.
    3776             :  */
    3777             : STATIC int
    3778       26271 : xfs_btree_kill_root(
    3779             :         struct xfs_btree_cur    *cur,
    3780             :         struct xfs_buf          *bp,
    3781             :         int                     level,
    3782             :         union xfs_btree_ptr     *newroot)
    3783             : {
    3784       26271 :         int                     error;
    3785             : 
    3786       26271 :         XFS_BTREE_STATS_INC(cur, killroot);
    3787             : 
    3788             :         /*
    3789             :          * Update the root pointer, decreasing the level by 1 and then
    3790             :          * free the old root.
    3791             :          */
    3792       26271 :         cur->bc_ops->set_root(cur, newroot, -1);
    3793             : 
    3794       26271 :         error = xfs_btree_free_block(cur, bp);
    3795       26271 :         if (error)
    3796             :                 return error;
    3797             : 
    3798       26271 :         cur->bc_levels[level].bp = NULL;
    3799       26271 :         cur->bc_levels[level].ra = 0;
    3800       26271 :         cur->bc_nlevels--;
    3801             : 
    3802       26271 :         return 0;
    3803             : }
    3804             : 
    3805             : STATIC int
    3806   341875815 : xfs_btree_dec_cursor(
    3807             :         struct xfs_btree_cur    *cur,
    3808             :         int                     level,
    3809             :         int                     *stat)
    3810             : {
    3811   341875815 :         int                     error;
    3812   341875815 :         int                     i;
    3813             : 
    3814   341875815 :         if (level > 0) {
    3815      478715 :                 error = xfs_btree_decrement(cur, level, &i);
    3816      478715 :                 if (error)
    3817             :                         return error;
    3818             :         }
    3819             : 
    3820   341875815 :         *stat = 1;
    3821   341875815 :         return 0;
    3822             : }
    3823             : 
    3824             : /*
    3825             :  * Single level of the btree record deletion routine.
    3826             :  * Delete record pointed to by cur/level.
    3827             :  * Remove the record from its block then rebalance the tree.
    3828             :  * Return 0 for error, 1 for done, 2 to go on to the next level.
    3829             :  */
    3830             : STATIC int                                      /* error */
    3831   662298657 : xfs_btree_delrec(
    3832             :         struct xfs_btree_cur    *cur,           /* btree cursor */
    3833             :         int                     level,          /* level removing record from */
    3834             :         int                     *stat)          /* fail/done/go-on */
    3835             : {
    3836   662298657 :         struct xfs_btree_block  *block;         /* btree block */
    3837   662298657 :         union xfs_btree_ptr     cptr;           /* current block ptr */
    3838   662298657 :         struct xfs_buf          *bp;            /* buffer for block */
    3839   662298657 :         int                     error;          /* error return value */
    3840   662298657 :         int                     i;              /* loop counter */
    3841   662298657 :         union xfs_btree_ptr     lptr;           /* left sibling block ptr */
    3842   662298657 :         struct xfs_buf          *lbp;           /* left buffer pointer */
    3843   662298657 :         struct xfs_btree_block  *left;          /* left btree block */
    3844   662298657 :         int                     lrecs = 0;      /* left record count */
    3845   662298657 :         int                     ptr;            /* key/record index */
    3846   662298657 :         union xfs_btree_ptr     rptr;           /* right sibling block ptr */
    3847   662298657 :         struct xfs_buf          *rbp;           /* right buffer pointer */
    3848   662298657 :         struct xfs_btree_block  *right;         /* right btree block */
    3849   662298657 :         struct xfs_btree_block  *rrblock;       /* right-right btree block */
    3850   662298657 :         struct xfs_buf          *rrbp;          /* right-right buffer pointer */
    3851   662298657 :         int                     rrecs = 0;      /* right record count */
    3852   662298657 :         struct xfs_btree_cur    *tcur;          /* temporary btree cursor */
    3853   662298657 :         int                     numrecs;        /* temporary numrec count */
    3854             : 
    3855   662298657 :         tcur = NULL;
    3856             : 
    3857             :         /* Get the index of the entry being deleted, check for nothing there. */
    3858   662298657 :         ptr = cur->bc_levels[level].ptr;
    3859   662298657 :         if (ptr == 0) {
    3860           0 :                 *stat = 0;
    3861           0 :                 return 0;
    3862             :         }
    3863             : 
    3864             :         /* Get the buffer & block containing the record or key/ptr. */
    3865   662298657 :         block = xfs_btree_get_block(cur, level, &bp);
    3866   662268465 :         numrecs = xfs_btree_get_numrecs(block);
    3867             : 
    3868             : #ifdef DEBUG
    3869   662268465 :         error = xfs_btree_check_block(cur, block, level, bp);
    3870   662296993 :         if (error)
    3871           0 :                 goto error0;
    3872             : #endif
    3873             : 
    3874             :         /* Fail if we're off the end of the block. */
    3875   662296993 :         if (ptr > numrecs) {
    3876           0 :                 *stat = 0;
    3877           0 :                 return 0;
    3878             :         }
    3879             : 
    3880   662296993 :         XFS_BTREE_STATS_INC(cur, delrec);
    3881   662297905 :         XFS_BTREE_STATS_ADD(cur, moves, numrecs - ptr);
    3882             : 
    3883             :         /* Excise the entries being deleted. */
    3884   662305905 :         if (level > 0) {
    3885             :                 /* It's a nonleaf. operate on keys and ptrs */
    3886      510800 :                 union xfs_btree_key     *lkp;
    3887      510800 :                 union xfs_btree_ptr     *lpp;
    3888             : 
    3889      510800 :                 lkp = xfs_btree_key_addr(cur, ptr + 1, block);
    3890      510800 :                 lpp = xfs_btree_ptr_addr(cur, ptr + 1, block);
    3891             : 
    3892    14447183 :                 for (i = 0; i < numrecs - ptr; i++) {
    3893    13936383 :                         error = xfs_btree_debug_check_ptr(cur, lpp, i, level);
    3894    13936383 :                         if (error)
    3895           0 :                                 goto error0;
    3896             :                 }
    3897             : 
    3898      510800 :                 if (ptr < numrecs) {
    3899      265344 :                         xfs_btree_shift_keys(cur, lkp, -1, numrecs - ptr);
    3900      265344 :                         xfs_btree_shift_ptrs(cur, lpp, -1, numrecs - ptr);
    3901      265344 :                         xfs_btree_log_keys(cur, bp, ptr, numrecs - 1);
    3902      265344 :                         xfs_btree_log_ptrs(cur, bp, ptr, numrecs - 1);
    3903             :                 }
    3904             :         } else {
    3905             :                 /* It's a leaf. operate on records */
    3906   661795105 :                 if (ptr < numrecs) {
    3907   441394605 :                         xfs_btree_shift_recs(cur,
    3908             :                                 xfs_btree_rec_addr(cur, ptr + 1, block),
    3909             :                                 -1, numrecs - ptr);
    3910   441407015 :                         xfs_btree_log_recs(cur, bp, ptr, numrecs - 1);
    3911             :                 }
    3912             :         }
    3913             : 
    3914             :         /*
    3915             :          * Decrement and log the number of entries in the block.
    3916             :          */
    3917   662293378 :         xfs_btree_set_numrecs(block, --numrecs);
    3918   662293378 :         xfs_btree_log_block(cur, bp, XFS_BB_NUMRECS);
    3919             : 
    3920             :         /*
    3921             :          * If we are tracking the last record in the tree and
    3922             :          * we are at the far right edge of the tree, update it.
    3923             :          */
    3924   662313332 :         if (xfs_btree_is_lastrec(cur, block, level)) {
    3925   120990658 :                 cur->bc_ops->update_lastrec(cur, block, NULL,
    3926             :                                             ptr, LASTREC_DELREC);
    3927             :         }
    3928             : 
    3929             :         /*
    3930             :          * We're at the root level.  First, shrink the root block in-memory.
    3931             :          * Try to get rid of the next level down.  If we can't then there's
    3932             :          * nothing left to do.
    3933             :          */
    3934   662255626 :         if (level == cur->bc_nlevels - 1) {
    3935   292351590 :                 if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) {
    3936       30481 :                         xfs_iroot_realloc(cur->bc_ino.ip, -1,
    3937       30481 :                                           cur->bc_ino.whichfork);
    3938             : 
    3939       30481 :                         error = xfs_btree_kill_iroot(cur);
    3940       30481 :                         if (error)
    3941           0 :                                 goto error0;
    3942             : 
    3943       30481 :                         error = xfs_btree_dec_cursor(cur, level, stat);
    3944       30481 :                         if (error)
    3945           0 :                                 goto error0;
    3946       30481 :                         *stat = 1;
    3947       30481 :                         return 0;
    3948             :                 }
    3949             : 
    3950             :                 /*
    3951             :                  * If this is the root level, and there's only one entry left,
    3952             :                  * and it's NOT the leaf level, then we can get rid of this
    3953             :                  * level.
    3954             :                  */
    3955   292321109 :                 if (numrecs == 1 && level > 0) {
    3956       26271 :                         union xfs_btree_ptr     *pp;
    3957             :                         /*
    3958             :                          * pp is still set to the first pointer in the block.
    3959             :                          * Make it the new root of the btree.
    3960             :                          */
    3961       26271 :                         pp = xfs_btree_ptr_addr(cur, 1, block);
    3962       26271 :                         error = xfs_btree_kill_root(cur, bp, level, pp);
    3963       26271 :                         if (error)
    3964           0 :                                 goto error0;
    3965   292294838 :                 } else if (level > 0) {
    3966      137390 :                         error = xfs_btree_dec_cursor(cur, level, stat);
    3967      137390 :                         if (error)
    3968           0 :                                 goto error0;
    3969             :                 }
    3970   292321109 :                 *stat = 1;
    3971   292321109 :                 return 0;
    3972             :         }
    3973             : 
    3974             :         /*
    3975             :          * If we deleted the leftmost entry in the block, update the
    3976             :          * key values above us in the tree.
    3977             :          */
    3978   569932173 :         if (xfs_btree_needs_key_update(cur, ptr)) {
    3979   181641419 :                 error = xfs_btree_update_keys(cur, level);
    3980   181655345 :                 if (error)
    3981           0 :                         goto error0;
    3982             :         }
    3983             : 
    3984             :         /*
    3985             :          * If the number of records remaining in the block is at least
    3986             :          * the minimum, we're done.
    3987             :          */
    3988   369917962 :         if (numrecs >= cur->bc_ops->get_minrecs(cur, level)) {
    3989   305678887 :                 error = xfs_btree_dec_cursor(cur, level, stat);
    3990   305670231 :                 if (error)
    3991           0 :                         goto error0;
    3992             :                 return 0;
    3993             :         }
    3994             : 
    3995             :         /*
    3996             :          * Otherwise, we have to move some records around to keep the
    3997             :          * tree balanced.  Look at the left and right sibling blocks to
    3998             :          * see if we can re-balance by moving only one record.
    3999             :          */
    4000    64233527 :         xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB);
    4001    64233473 :         xfs_btree_get_sibling(cur, block, &lptr, XFS_BB_LEFTSIB);
    4002             : 
    4003    64233167 :         if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) {
    4004             :                 /*
    4005             :                  * One child of root, need to get a chance to copy its contents
    4006             :                  * into the root and delete it. Can't go up to next level,
    4007             :                  * there's nothing to delete there.
    4008             :                  */
    4009   127820751 :                 if (xfs_btree_ptr_is_null(cur, &rptr) &&
    4010    20131170 :                     xfs_btree_ptr_is_null(cur, &lptr) &&
    4011    20131170 :                     level == cur->bc_nlevels - 2) {
    4012    20131137 :                         error = xfs_btree_kill_iroot(cur);
    4013    20131100 :                         if (!error)
    4014    20131176 :                                 error = xfs_btree_dec_cursor(cur, level, stat);
    4015    20131018 :                         if (error)
    4016           0 :                                 goto error0;
    4017             :                         return 0;
    4018             :                 }
    4019             :         }
    4020             : 
    4021   111086486 :         ASSERT(!xfs_btree_ptr_is_null(cur, &rptr) ||
    4022             :                !xfs_btree_ptr_is_null(cur, &lptr));
    4023             : 
    4024             :         /*
    4025             :          * Duplicate the cursor so our btree manipulations here won't
    4026             :          * disrupt the next level up.
    4027             :          */
    4028    44102030 :         error = xfs_btree_dup_cursor(cur, &tcur);
    4029    44102060 :         if (error)
    4030           0 :                 goto error0;
    4031             : 
    4032             :         /*
    4033             :          * If there's a right sibling, see if it's ok to shift an entry
    4034             :          * out of it.
    4035             :          */
    4036    88204120 :         if (!xfs_btree_ptr_is_null(cur, &rptr)) {
    4037             :                 /*
    4038             :                  * Move the temp cursor to the last entry in the next block.
    4039             :                  * Actually any entry but the first would suffice.
    4040             :                  */
    4041    21219628 :                 i = xfs_btree_lastrec(tcur, level);
    4042    21219956 :                 if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
    4043           0 :                         xfs_btree_mark_sick(cur);
    4044           0 :                         error = -EFSCORRUPTED;
    4045           0 :                         goto error0;
    4046             :                 }
    4047             : 
    4048    21219956 :                 error = xfs_btree_increment(tcur, level, &i);
    4049    21219712 :                 if (error)
    4050          10 :                         goto error0;
    4051    21219702 :                 if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
    4052           0 :                         xfs_btree_mark_sick(cur);
    4053           0 :                         error = -EFSCORRUPTED;
    4054           0 :                         goto error0;
    4055             :                 }
    4056             : 
    4057    21219702 :                 i = xfs_btree_lastrec(tcur, level);
    4058    21220157 :                 if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
    4059           0 :                         xfs_btree_mark_sick(cur);
    4060           0 :                         error = -EFSCORRUPTED;
    4061           0 :                         goto error0;
    4062             :                 }
    4063             : 
    4064             :                 /* Grab a pointer to the block. */
    4065    21220157 :                 right = xfs_btree_get_block(tcur, level, &rbp);
    4066             : #ifdef DEBUG
    4067    21219833 :                 error = xfs_btree_check_block(tcur, right, level, rbp);
    4068    21220030 :                 if (error)
    4069           0 :                         goto error0;
    4070             : #endif
    4071             :                 /* Grab the current block number, for future use. */
    4072    21220030 :                 xfs_btree_get_sibling(tcur, right, &cptr, XFS_BB_LEFTSIB);
    4073             : 
    4074             :                 /*
    4075             :                  * If right block is full enough so that removing one entry
    4076             :                  * won't make it too empty, and left-shifting an entry out
    4077             :                  * of right to us works, we're done.
    4078             :                  */
    4079    21219433 :                 if (xfs_btree_get_numrecs(right) - 1 >=
    4080    21219433 :                     cur->bc_ops->get_minrecs(tcur, level)) {
    4081    15900369 :                         error = xfs_btree_lshift(tcur, level, &i);
    4082    15901226 :                         if (error)
    4083           0 :                                 goto error0;
    4084    15901226 :                         if (i) {
    4085    15901242 :                                 ASSERT(xfs_btree_get_numrecs(block) >=
    4086             :                                        cur->bc_ops->get_minrecs(tcur, level));
    4087             : 
    4088    15901062 :                                 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
    4089    15901323 :                                 tcur = NULL;
    4090             : 
    4091    15901323 :                                 error = xfs_btree_dec_cursor(cur, level, stat);
    4092    15900501 :                                 if (error)
    4093           0 :                                         goto error0;
    4094             :                                 return 0;
    4095             :                         }
    4096             :                 }
    4097             : 
    4098             :                 /*
    4099             :                  * Otherwise, grab the number of records in right for
    4100             :                  * future reference, and fix up the temp cursor to point
    4101             :                  * to our block again (last record).
    4102             :                  */
    4103     5319355 :                 rrecs = xfs_btree_get_numrecs(right);
    4104    10638710 :                 if (!xfs_btree_ptr_is_null(cur, &lptr)) {
    4105     5261639 :                         i = xfs_btree_firstrec(tcur, level);
    4106     5261662 :                         if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
    4107           0 :                                 xfs_btree_mark_sick(cur);
    4108           0 :                                 error = -EFSCORRUPTED;
    4109           0 :                                 goto error0;
    4110             :                         }
    4111             : 
    4112     5261662 :                         error = xfs_btree_decrement(tcur, level, &i);
    4113     5261662 :                         if (error)
    4114           0 :                                 goto error0;
    4115     5261662 :                         if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
    4116           0 :                                 xfs_btree_mark_sick(cur);
    4117           0 :                                 error = -EFSCORRUPTED;
    4118           0 :                                 goto error0;
    4119             :                         }
    4120             :                 }
    4121             :         }
    4122             : 
    4123             :         /*
    4124             :          * If there's a left sibling, see if it's ok to shift an entry
    4125             :          * out of it.
    4126             :          */
    4127    56403620 :         if (!xfs_btree_ptr_is_null(cur, &lptr)) {
    4128             :                 /*
    4129             :                  * Move the temp cursor to the first entry in the
    4130             :                  * previous block.
    4131             :                  */
    4132    28144087 :                 i = xfs_btree_firstrec(tcur, level);
    4133    28144097 :                 if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
    4134           0 :                         xfs_btree_mark_sick(cur);
    4135           0 :                         error = -EFSCORRUPTED;
    4136           0 :                         goto error0;
    4137             :                 }
    4138             : 
    4139    28144097 :                 error = xfs_btree_decrement(tcur, level, &i);
    4140    28144135 :                 if (error)
    4141           1 :                         goto error0;
    4142    28144134 :                 i = xfs_btree_firstrec(tcur, level);
    4143    28144134 :                 if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
    4144           0 :                         xfs_btree_mark_sick(cur);
    4145           0 :                         error = -EFSCORRUPTED;
    4146           0 :                         goto error0;
    4147             :                 }
    4148             : 
    4149             :                 /* Grab a pointer to the block. */
    4150    28144134 :                 left = xfs_btree_get_block(tcur, level, &lbp);
    4151             : #ifdef DEBUG
    4152    28144132 :                 error = xfs_btree_check_block(cur, left, level, lbp);
    4153    28144141 :                 if (error)
    4154           0 :                         goto error0;
    4155             : #endif
    4156             :                 /* Grab the current block number, for future use. */
    4157    28144141 :                 xfs_btree_get_sibling(tcur, left, &cptr, XFS_BB_RIGHTSIB);
    4158             : 
    4159             :                 /*
    4160             :                  * If left block is full enough so that removing one entry
    4161             :                  * won't make it too empty, and right-shifting an entry out
    4162             :                  * of left to us works, we're done.
    4163             :                  */
    4164    28144134 :                 if (xfs_btree_get_numrecs(left) - 1 >=
    4165    28144134 :                     cur->bc_ops->get_minrecs(tcur, level)) {
    4166    27691000 :                         error = xfs_btree_rshift(tcur, level, &i);
    4167    27691060 :                         if (error)
    4168           0 :                                 goto error0;
    4169    27691060 :                         if (i) {
    4170    27691050 :                                 ASSERT(xfs_btree_get_numrecs(block) >=
    4171             :                                        cur->bc_ops->get_minrecs(tcur, level));
    4172    27691023 :                                 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
    4173    27691065 :                                 tcur = NULL;
    4174    27691065 :                                 if (level == 0)
    4175    27686089 :                                         cur->bc_levels[0].ptr++;
    4176             : 
    4177    27691065 :                                 *stat = 1;
    4178    27691065 :                                 return 0;
    4179             :                         }
    4180             :                 }
    4181             : 
    4182             :                 /*
    4183             :                  * Otherwise, grab the number of records in right for
    4184             :                  * future reference.
    4185             :                  */
    4186      453081 :                 lrecs = xfs_btree_get_numrecs(left);
    4187             :         }
    4188             : 
    4189             :         /* Delete the temp cursor, we're done with it. */
    4190      510804 :         xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
    4191      510799 :         tcur = NULL;
    4192             : 
    4193             :         /* If here, we need to do a join to keep the tree balanced. */
    4194     1021598 :         ASSERT(!xfs_btree_ptr_is_null(cur, &cptr));
    4195             : 
    4196     1474679 :         if (!xfs_btree_ptr_is_null(cur, &lptr) &&
    4197      453082 :             lrecs + xfs_btree_get_numrecs(block) <=
    4198      453082 :                         cur->bc_ops->get_maxrecs(cur, level)) {
    4199             :                 /*
    4200             :                  * Set "right" to be the starting block,
    4201             :                  * "left" to be the left neighbor.
    4202             :                  */
    4203      453081 :                 rptr = cptr;
    4204      453081 :                 right = block;
    4205      453081 :                 rbp = bp;
    4206      453081 :                 error = xfs_btree_read_buf_block(cur, &lptr, 0, &left, &lbp);
    4207      453083 :                 if (error)
    4208           0 :                         goto error0;
    4209             : 
    4210             :         /*
    4211             :          * If that won't work, see if we can join with the right neighbor block.
    4212             :          */
    4213      173151 :         } else if (!xfs_btree_ptr_is_null(cur, &rptr) &&
    4214       57717 :                    rrecs + xfs_btree_get_numrecs(block) <=
    4215       57717 :                         cur->bc_ops->get_maxrecs(cur, level)) {
    4216             :                 /*
    4217             :                  * Set "left" to be the starting block,
    4218             :                  * "right" to be the right neighbor.
    4219             :                  */
    4220       57717 :                 lptr = cptr;
    4221       57717 :                 left = block;
    4222       57717 :                 lbp = bp;
    4223       57717 :                 error = xfs_btree_read_buf_block(cur, &rptr, 0, &right, &rbp);
    4224       57717 :                 if (error)
    4225           0 :                         goto error0;
    4226             : 
    4227             :         /*
    4228             :          * Otherwise, we can't fix the imbalance.
    4229             :          * Just return.  This is probably a logic error, but it's not fatal.
    4230             :          */
    4231             :         } else {
    4232           0 :                 error = xfs_btree_dec_cursor(cur, level, stat);
    4233           0 :                 if (error)
    4234           0 :                         goto error0;
    4235             :                 return 0;
    4236             :         }
    4237             : 
    4238      510800 :         rrecs = xfs_btree_get_numrecs(right);
    4239      510800 :         lrecs = xfs_btree_get_numrecs(left);
    4240             : 
    4241             :         /*
    4242             :          * We're now going to join "left" and "right" by moving all the stuff
    4243             :          * in "right" to "left" and deleting "right".
    4244             :          */
    4245      510800 :         XFS_BTREE_STATS_ADD(cur, moves, rrecs);
    4246      510800 :         if (level > 0) {
    4247             :                 /* It's a non-leaf.  Move keys and pointers. */
    4248         835 :                 union xfs_btree_key     *lkp;   /* left btree key */
    4249         835 :                 union xfs_btree_ptr     *lpp;   /* left address pointer */
    4250         835 :                 union xfs_btree_key     *rkp;   /* right btree key */
    4251         835 :                 union xfs_btree_ptr     *rpp;   /* right address pointer */
    4252             : 
    4253         835 :                 lkp = xfs_btree_key_addr(cur, lrecs + 1, left);
    4254         835 :                 lpp = xfs_btree_ptr_addr(cur, lrecs + 1, left);
    4255         835 :                 rkp = xfs_btree_key_addr(cur, 1, right);
    4256         835 :                 rpp = xfs_btree_ptr_addr(cur, 1, right);
    4257             : 
    4258       47505 :                 for (i = 1; i < rrecs; i++) {
    4259       46670 :                         error = xfs_btree_debug_check_ptr(cur, rpp, i, level);
    4260       46670 :                         if (error)
    4261           0 :                                 goto error0;
    4262             :                 }
    4263             : 
    4264         835 :                 xfs_btree_copy_keys(cur, lkp, rkp, rrecs);
    4265         835 :                 xfs_btree_copy_ptrs(cur, lpp, rpp, rrecs);
    4266             : 
    4267         835 :                 xfs_btree_log_keys(cur, lbp, lrecs + 1, lrecs + rrecs);
    4268         835 :                 xfs_btree_log_ptrs(cur, lbp, lrecs + 1, lrecs + rrecs);
    4269             :         } else {
    4270             :                 /* It's a leaf.  Move records.  */
    4271      509965 :                 union xfs_btree_rec     *lrp;   /* left record pointer */
    4272      509965 :                 union xfs_btree_rec     *rrp;   /* right record pointer */
    4273             : 
    4274      509965 :                 lrp = xfs_btree_rec_addr(cur, lrecs + 1, left);
    4275      509965 :                 rrp = xfs_btree_rec_addr(cur, 1, right);
    4276             : 
    4277      509965 :                 xfs_btree_copy_recs(cur, lrp, rrp, rrecs);
    4278      509965 :                 xfs_btree_log_recs(cur, lbp, lrecs + 1, lrecs + rrecs);
    4279             :         }
    4280             : 
    4281      510800 :         XFS_BTREE_STATS_INC(cur, join);
    4282             : 
    4283             :         /*
    4284             :          * Fix up the number of records and right block pointer in the
    4285             :          * surviving block, and log it.
    4286             :          */
    4287      510800 :         xfs_btree_set_numrecs(left, lrecs + rrecs);
    4288      510800 :         xfs_btree_get_sibling(cur, right, &cptr, XFS_BB_RIGHTSIB);
    4289      510799 :         xfs_btree_set_sibling(cur, left, &cptr, XFS_BB_RIGHTSIB);
    4290      510799 :         xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB);
    4291             : 
    4292             :         /* If there is a right sibling, point it to the remaining block. */
    4293      510800 :         xfs_btree_get_sibling(cur, left, &cptr, XFS_BB_RIGHTSIB);
    4294     1021600 :         if (!xfs_btree_ptr_is_null(cur, &cptr)) {
    4295      269030 :                 error = xfs_btree_read_buf_block(cur, &cptr, 0, &rrblock, &rrbp);
    4296      269030 :                 if (error)
    4297           0 :                         goto error0;
    4298      269030 :                 xfs_btree_set_sibling(cur, rrblock, &lptr, XFS_BB_LEFTSIB);
    4299      269030 :                 xfs_btree_log_block(cur, rrbp, XFS_BB_LEFTSIB);
    4300             :         }
    4301             : 
    4302             :         /* Free the deleted block. */
    4303      510801 :         error = xfs_btree_free_block(cur, rbp);
    4304      510800 :         if (error)
    4305           0 :                 goto error0;
    4306             : 
    4307             :         /*
    4308             :          * If we joined with the left neighbor, set the buffer in the
    4309             :          * cursor to the left block, and fix up the index.
    4310             :          */
    4311      510800 :         if (bp != lbp) {
    4312      453083 :                 cur->bc_levels[level].bp = lbp;
    4313      453083 :                 cur->bc_levels[level].ptr += lrecs;
    4314      453083 :                 cur->bc_levels[level].ra = 0;
    4315             :         }
    4316             :         /*
    4317             :          * If we joined with the right neighbor and there's a level above
    4318             :          * us, increment the cursor at that level.
    4319             :          */
    4320       57717 :         else if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) ||
    4321       55334 :                    (level + 1 < cur->bc_nlevels)) {
    4322       57717 :                 error = xfs_btree_increment(cur, level + 1, &i);
    4323       57717 :                 if (error)
    4324           0 :                         goto error0;
    4325             :         }
    4326             : 
    4327             :         /*
    4328             :          * Readjust the ptr at this level if it's not a leaf, since it's
    4329             :          * still pointing at the deletion point, which makes the cursor
    4330             :          * inconsistent.  If this makes the ptr 0, the caller fixes it up.
    4331             :          * We can't use decrement because it would change the next level up.
    4332             :          */
    4333      510800 :         if (level > 0)
    4334         835 :                 cur->bc_levels[level].ptr--;
    4335             : 
    4336             :         /*
    4337             :          * We combined blocks, so we have to update the parent keys if the
    4338             :          * btree supports overlapped intervals.  However,
    4339             :          * bc_levels[level + 1].ptr points to the old block so that the caller
    4340             :          * knows which record to delete.  Therefore, the caller must be savvy
    4341             :          * enough to call updkeys for us if we return stat == 2.  The other
    4342             :          * exit points from this function don't require deletions further up
    4343             :          * the tree, so they can call updkeys directly.
    4344             :          */
    4345             : 
    4346             :         /* Return value means the next level up has something to do. */
    4347      510800 :         *stat = 2;
    4348      510800 :         return 0;
    4349             : 
    4350          11 : error0:
    4351          11 :         if (tcur)
    4352          11 :                 xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
    4353             :         return error;
    4354             : }
    4355             : 
    4356             : /*
    4357             :  * Delete the record pointed to by cur.
    4358             :  * The cursor refers to the place where the record was (could be inserted)
    4359             :  * when the operation returns.
    4360             :  */
    4361             : int                                     /* error */
    4362   661782130 : xfs_btree_delete(
    4363             :         struct xfs_btree_cur    *cur,
    4364             :         int                     *stat)  /* success/failure */
    4365             : {
    4366   661782130 :         int                     error;  /* error return value */
    4367   661782130 :         int                     level;
    4368   661782130 :         int                     i;
    4369   661782130 :         bool                    joined = false;
    4370             : 
    4371             :         /*
    4372             :          * Go up the tree, starting at leaf level.
    4373             :          *
    4374             :          * If 2 is returned then a join was done; go to the next level.
    4375             :          * Otherwise we are done.
    4376             :          */
    4377  1324035453 :         for (level = 0, i = 2; i == 2; level++) {
    4378   662302451 :                 error = xfs_btree_delrec(cur, level, &i);
    4379   662253334 :                 if (error)
    4380          11 :                         goto error0;
    4381   662253323 :                 if (i == 2)
    4382      510800 :                         joined = true;
    4383             :         }
    4384             : 
    4385             :         /*
    4386             :          * If we combined blocks as part of deleting the record, delrec won't
    4387             :          * have updated the parent high keys so we have to do that here.
    4388             :          */
    4389   661733002 :         if (joined && (cur->bc_flags & XFS_BTREE_OVERLAPPING)) {
    4390      183533 :                 error = xfs_btree_updkeys_force(cur, 0);
    4391      183533 :                 if (error)
    4392           0 :                         goto error0;
    4393             :         }
    4394             : 
    4395   661733002 :         if (i == 0) {
    4396           0 :                 for (level = 1; level < cur->bc_nlevels; level++) {
    4397           0 :                         if (cur->bc_levels[level].ptr == 0) {
    4398           0 :                                 error = xfs_btree_decrement(cur, level, &i);
    4399           0 :                                 if (error)
    4400           0 :                                         goto error0;
    4401             :                                 break;
    4402             :                         }
    4403             :                 }
    4404             :         }
    4405             : 
    4406   661733002 :         *stat = i;
    4407   661733002 :         return 0;
    4408             : error0:
    4409             :         return error;
    4410             : }
    4411             : 
    4412             : /*
    4413             :  * Get the data from the pointed-to record.
    4414             :  */
    4415             : int                                     /* error */
    4416 >13236*10^7 : xfs_btree_get_rec(
    4417             :         struct xfs_btree_cur    *cur,   /* btree cursor */
    4418             :         union xfs_btree_rec     **recp, /* output: btree record */
    4419             :         int                     *stat)  /* output: success/failure */
    4420             : {
    4421 >13236*10^7 :         struct xfs_btree_block  *block; /* btree block */
    4422 >13236*10^7 :         struct xfs_buf          *bp;    /* buffer pointer */
    4423 >13236*10^7 :         int                     ptr;    /* record number */
    4424             : #ifdef DEBUG
    4425 >13236*10^7 :         int                     error;  /* error return value */
    4426             : #endif
    4427             : 
    4428 >13236*10^7 :         ptr = cur->bc_levels[0].ptr;
    4429 >13236*10^7 :         block = xfs_btree_get_block(cur, 0, &bp);
    4430             : 
    4431             : #ifdef DEBUG
    4432 >13209*10^7 :         error = xfs_btree_check_block(cur, block, 0, bp);
    4433 >13218*10^7 :         if (error)
    4434             :                 return error;
    4435             : #endif
    4436             : 
    4437             :         /*
    4438             :          * Off the right end or left end, return failure.
    4439             :          */
    4440 >13218*10^7 :         if (ptr > xfs_btree_get_numrecs(block) || ptr <= 0) {
    4441    12720738 :                 *stat = 0;
    4442    12720738 :                 return 0;
    4443             :         }
    4444             : 
    4445             :         /*
    4446             :          * Point to the record and extract its data.
    4447             :          */
    4448 >13217*10^7 :         *recp = xfs_btree_rec_addr(cur, ptr, block);
    4449 >13217*10^7 :         *stat = 1;
    4450 >13217*10^7 :         return 0;
    4451             : }
    4452             : 
    4453             : /* Visit a block in a btree. */
    4454             : STATIC int
    4455   151415361 : xfs_btree_visit_block(
    4456             :         struct xfs_btree_cur            *cur,
    4457             :         int                             level,
    4458             :         xfs_btree_visit_blocks_fn       fn,
    4459             :         void                            *data)
    4460             : {
    4461   151415361 :         struct xfs_btree_block          *block;
    4462   151415361 :         struct xfs_buf                  *bp;
    4463   151415361 :         union xfs_btree_ptr             rptr, bufptr;
    4464   151415361 :         int                             error;
    4465             : 
    4466             :         /* do right sibling readahead */
    4467   151415361 :         xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA);
    4468   151413506 :         block = xfs_btree_get_block(cur, level, &bp);
    4469             : 
    4470             :         /* process the block */
    4471   151413312 :         error = fn(cur, level, data);
    4472   151410953 :         if (error)
    4473             :                 return error;
    4474             : 
    4475             :         /* now read rh sibling block for next iteration */
    4476   151403796 :         xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB);
    4477   302805432 :         if (xfs_btree_ptr_is_null(cur, &rptr))
    4478             :                 return -ENOENT;
    4479             : 
    4480             :         /*
    4481             :          * We only visit blocks once in this walk, so we have to avoid the
    4482             :          * internal xfs_btree_lookup_get_block() optimisation where it will
    4483             :          * return the same block without checking if the right sibling points
    4484             :          * back to us and creates a cyclic reference in the btree.
    4485             :          */
    4486   130491809 :         xfs_btree_buf_to_ptr(cur, bp, &bufptr);
    4487   130492555 :         if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
    4488     6397516 :                 if (rptr.l == bufptr.l) {
    4489           0 :                         xfs_btree_mark_sick(cur);
    4490           0 :                         return -EFSCORRUPTED;
    4491             :                 }
    4492             :         } else {
    4493   124095039 :                 if (rptr.s == bufptr.s) {
    4494           0 :                         xfs_btree_mark_sick(cur);
    4495           0 :                         return -EFSCORRUPTED;
    4496             :                 }
    4497             :         }
    4498   130492555 :         return xfs_btree_lookup_get_block(cur, level, &rptr, &block);
    4499             : }
    4500             : 
    4501             : 
    4502             : /* Visit every block in a btree. */
    4503             : int
    4504    12666987 : xfs_btree_visit_blocks(
    4505             :         struct xfs_btree_cur            *cur,
    4506             :         xfs_btree_visit_blocks_fn       fn,
    4507             :         unsigned int                    flags,
    4508             :         void                            *data)
    4509             : {
    4510    12666987 :         union xfs_btree_ptr             lptr;
    4511    12666987 :         int                             level;
    4512    12666987 :         struct xfs_btree_block          *block = NULL;
    4513    12666987 :         int                             error = 0;
    4514             : 
    4515    12666987 :         cur->bc_ops->init_ptr_from_cur(cur, &lptr);
    4516             : 
    4517             :         /* for each level */
    4518    33786362 :         for (level = cur->bc_nlevels - 1; level >= 0; level--) {
    4519             :                 /* grab the left hand block */
    4520    21124038 :                 error = xfs_btree_lookup_get_block(cur, level, &lptr, &block);
    4521    21129545 :                 if (error)
    4522         266 :                         return error;
    4523             : 
    4524             :                 /* readahead the left most block for the next level down */
    4525    21129279 :                 if (level > 0) {
    4526     8455777 :                         union xfs_btree_ptr     *ptr;
    4527             : 
    4528     8455777 :                         ptr = xfs_btree_ptr_addr(cur, 1, block);
    4529     8455663 :                         xfs_btree_readahead_ptr(cur, ptr, 1);
    4530             : 
    4531             :                         /* save for the next iteration of the loop */
    4532     8456421 :                         xfs_btree_copy_ptrs(cur, &lptr, ptr, 1);
    4533             : 
    4534     8455825 :                         if (!(flags & XFS_BTREE_VISIT_LEAVES))
    4535      206203 :                                 continue;
    4536    12673502 :                 } else if (!(flags & XFS_BTREE_VISIT_RECORDS)) {
    4537           0 :                         continue;
    4538             :                 }
    4539             : 
    4540             :                 /* for each buffer in the level */
    4541   151416438 :                 do {
    4542   151416438 :                         error = xfs_btree_visit_block(cur, level, fn, data);
    4543   151412474 :                 } while (!error);
    4544             : 
    4545    20919160 :                 if (error != -ENOENT)
    4546        7399 :                         return error;
    4547             :         }
    4548             : 
    4549             :         return 0;
    4550             : }
    4551             : 
    4552             : /*
    4553             :  * Change the owner of a btree.
    4554             :  *
    4555             :  * The mechanism we use here is ordered buffer logging. Because we don't know
    4556             :  * how many buffers were are going to need to modify, we don't really want to
    4557             :  * have to make transaction reservations for the worst case of every buffer in a
    4558             :  * full size btree as that may be more space that we can fit in the log....
    4559             :  *
    4560             :  * We do the btree walk in the most optimal manner possible - we have sibling
    4561             :  * pointers so we can just walk all the blocks on each level from left to right
    4562             :  * in a single pass, and then move to the next level and do the same. We can
    4563             :  * also do readahead on the sibling pointers to get IO moving more quickly,
    4564             :  * though for slow disks this is unlikely to make much difference to performance
    4565             :  * as the amount of CPU work we have to do before moving to the next block is
    4566             :  * relatively small.
    4567             :  *
    4568             :  * For each btree block that we load, modify the owner appropriately, set the
    4569             :  * buffer as an ordered buffer and log it appropriately. We need to ensure that
    4570             :  * we mark the region we change dirty so that if the buffer is relogged in
    4571             :  * a subsequent transaction the changes we make here as an ordered buffer are
    4572             :  * correctly relogged in that transaction.  If we are in recovery context, then
    4573             :  * just queue the modified buffer as delayed write buffer so the transaction
    4574             :  * recovery completion writes the changes to disk.
    4575             :  */
    4576             : struct xfs_btree_block_change_owner_info {
    4577             :         uint64_t                new_owner;
    4578             :         struct list_head        *buffer_list;
    4579             : };
    4580             : 
    4581             : static int
    4582      189381 : xfs_btree_block_change_owner(
    4583             :         struct xfs_btree_cur    *cur,
    4584             :         int                     level,
    4585             :         void                    *data)
    4586             : {
    4587      189381 :         struct xfs_btree_block_change_owner_info        *bbcoi = data;
    4588      189381 :         struct xfs_btree_block  *block;
    4589      189381 :         struct xfs_buf          *bp;
    4590             : 
    4591             :         /* modify the owner */
    4592      189381 :         block = xfs_btree_get_block(cur, level, &bp);
    4593      189381 :         if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
    4594      189381 :                 if (block->bb_u.l.bb_owner == cpu_to_be64(bbcoi->new_owner))
    4595             :                         return 0;
    4596       15022 :                 block->bb_u.l.bb_owner = cpu_to_be64(bbcoi->new_owner);
    4597             :         } else {
    4598           0 :                 if (block->bb_u.s.bb_owner == cpu_to_be32(bbcoi->new_owner))
    4599             :                         return 0;
    4600           0 :                 block->bb_u.s.bb_owner = cpu_to_be32(bbcoi->new_owner);
    4601             :         }
    4602             : 
    4603             :         /*
    4604             :          * If the block is a root block hosted in an inode, we might not have a
    4605             :          * buffer pointer here and we shouldn't attempt to log the change as the
    4606             :          * information is already held in the inode and discarded when the root
    4607             :          * block is formatted into the on-disk inode fork. We still change it,
    4608             :          * though, so everything is consistent in memory.
    4609             :          */
    4610       15022 :         if (!bp) {
    4611        6255 :                 ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE);
    4612        6255 :                 ASSERT(level == cur->bc_nlevels - 1);
    4613        6255 :                 return 0;
    4614             :         }
    4615             : 
    4616        8767 :         if (cur->bc_tp) {
    4617        8767 :                 if (!xfs_trans_ordered_buf(cur->bc_tp, bp)) {
    4618        7399 :                         xfs_btree_log_block(cur, bp, XFS_BB_OWNER);
    4619        7399 :                         return -EAGAIN;
    4620             :                 }
    4621             :         } else {
    4622           0 :                 xfs_buf_delwri_queue(bp, bbcoi->buffer_list);
    4623             :         }
    4624             : 
    4625             :         return 0;
    4626             : }
    4627             : 
    4628             : int
    4629       13654 : xfs_btree_change_owner(
    4630             :         struct xfs_btree_cur    *cur,
    4631             :         uint64_t                new_owner,
    4632             :         struct list_head        *buffer_list)
    4633             : {
    4634       13654 :         struct xfs_btree_block_change_owner_info        bbcoi;
    4635             : 
    4636       13654 :         bbcoi.new_owner = new_owner;
    4637       13654 :         bbcoi.buffer_list = buffer_list;
    4638             : 
    4639       13654 :         return xfs_btree_visit_blocks(cur, xfs_btree_block_change_owner,
    4640             :                         XFS_BTREE_VISIT_ALL, &bbcoi);
    4641             : }
    4642             : 
    4643             : /* Verify the v5 fields of a long-format btree block. */
    4644             : xfs_failaddr_t
    4645   308699000 : xfs_btree_lblock_v5hdr_verify(
    4646             :         struct xfs_buf          *bp,
    4647             :         uint64_t                owner)
    4648             : {
    4649   308699000 :         struct xfs_mount        *mp = bp->b_mount;
    4650   308699000 :         struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
    4651             : 
    4652   308699000 :         if (!xfs_has_crc(mp))
    4653           0 :                 return __this_address;
    4654   308699000 :         if (!uuid_equal(&block->bb_u.l.bb_uuid, &mp->m_sb.sb_meta_uuid))
    4655           0 :                 return __this_address;
    4656   308698678 :         if (block->bb_u.l.bb_blkno != cpu_to_be64(xfs_buf_daddr(bp)))
    4657           0 :                 return __this_address;
    4658   308698678 :         if (owner != XFS_RMAP_OWN_UNKNOWN &&
    4659           0 :             be64_to_cpu(block->bb_u.l.bb_owner) != owner)
    4660           0 :                 return __this_address;
    4661             :         return NULL;
    4662             : }
    4663             : 
    4664             : /* Verify a long-format btree block. */
    4665             : xfs_failaddr_t
    4666    19492437 : xfs_btree_lblock_verify(
    4667             :         struct xfs_buf          *bp,
    4668             :         unsigned int            max_recs)
    4669             : {
    4670    19492437 :         struct xfs_mount        *mp = bp->b_mount;
    4671    19492437 :         struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
    4672    19492437 :         xfs_fsblock_t           fsb;
    4673    19492437 :         xfs_failaddr_t          fa;
    4674             : 
    4675    19492437 :         ASSERT(!(bp->b_target->bt_flags & XFS_BUFTARG_XFILE));
    4676             : 
    4677             :         /* numrecs verification */
    4678    19492437 :         if (be16_to_cpu(block->bb_numrecs) > max_recs)
    4679           0 :                 return __this_address;
    4680             : 
    4681             :         /* sibling pointer verification */
    4682    19492437 :         fsb = XFS_DADDR_TO_FSB(mp, xfs_buf_daddr(bp));
    4683    19492974 :         fa = xfs_btree_check_lblock_siblings(mp, NULL, -1, fsb,
    4684             :                         block->bb_u.l.bb_leftsib);
    4685    19492517 :         if (!fa)
    4686    19492759 :                 fa = xfs_btree_check_lblock_siblings(mp, NULL, -1, fsb,
    4687             :                                 block->bb_u.l.bb_rightsib);
    4688             :         return fa;
    4689             : }
    4690             : 
    4691             : /**
    4692             :  * xfs_btree_sblock_v5hdr_verify() -- verify the v5 fields of a short-format
    4693             :  *                                    btree block
    4694             :  *
    4695             :  * @bp: buffer containing the btree block
    4696             :  */
    4697             : xfs_failaddr_t
    4698   150787598 : xfs_btree_sblock_v5hdr_verify(
    4699             :         struct xfs_buf          *bp)
    4700             : {
    4701   150787598 :         struct xfs_mount        *mp = bp->b_mount;
    4702   150787598 :         struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
    4703   150787598 :         struct xfs_perag        *pag = bp->b_pag;
    4704             : 
    4705   150787598 :         if (!xfs_has_crc(mp))
    4706           0 :                 return __this_address;
    4707   150787598 :         if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid))
    4708           0 :                 return __this_address;
    4709   150787743 :         if (block->bb_u.s.bb_blkno != cpu_to_be64(xfs_buf_daddr(bp)))
    4710           0 :                 return __this_address;
    4711   150787743 :         if (pag && be32_to_cpu(block->bb_u.s.bb_owner) != pag->pag_agno)
    4712           0 :                 return __this_address;
    4713             :         return NULL;
    4714             : }
    4715             : 
    4716             : /**
    4717             :  * xfs_btree_sblock_verify() -- verify a short-format btree block
    4718             :  *
    4719             :  * @bp: buffer containing the btree block
    4720             :  * @max_recs: maximum records allowed in this btree node
    4721             :  */
    4722             : xfs_failaddr_t
    4723    47099308 : xfs_btree_sblock_verify(
    4724             :         struct xfs_buf          *bp,
    4725             :         unsigned int            max_recs)
    4726             : {
    4727    47099308 :         struct xfs_mount        *mp = bp->b_mount;
    4728    47099308 :         struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
    4729    47099308 :         xfs_agblock_t           agbno;
    4730    47099308 :         xfs_failaddr_t          fa;
    4731             : 
    4732    47099308 :         ASSERT(!(bp->b_target->bt_flags & XFS_BUFTARG_XFILE));
    4733             : 
    4734             :         /* numrecs verification */
    4735    47099308 :         if (be16_to_cpu(block->bb_numrecs) > max_recs)
    4736           0 :                 return __this_address;
    4737             : 
    4738             :         /* sibling pointer verification */
    4739    47099308 :         agbno = xfs_daddr_to_agbno(mp, xfs_buf_daddr(bp));
    4740    47099058 :         fa = xfs_btree_check_sblock_siblings(bp->b_pag, NULL, -1, agbno,
    4741             :                         block->bb_u.s.bb_leftsib);
    4742    47097187 :         if (!fa)
    4743    47098933 :                 fa = xfs_btree_check_sblock_siblings(bp->b_pag, NULL, -1, agbno,
    4744             :                                 block->bb_u.s.bb_rightsib);
    4745             :         return fa;
    4746             : }
    4747             : 
    4748             : /*
    4749             :  * For the given limits on leaf and keyptr records per block, calculate the
    4750             :  * height of the tree needed to index the number of leaf records.
    4751             :  */
    4752             : unsigned int
    4753      514780 : xfs_btree_compute_maxlevels(
    4754             :         const unsigned int      *limits,
    4755             :         unsigned long long      records)
    4756             : {
    4757      514780 :         unsigned long long      level_blocks = howmany_64(records, limits[0]);
    4758      514780 :         unsigned int            height = 1;
    4759             : 
    4760     3436459 :         while (level_blocks > 1) {
    4761     2921679 :                 level_blocks = howmany_64(level_blocks, limits[1]);
    4762     2921679 :                 height++;
    4763             :         }
    4764             : 
    4765      514780 :         return height;
    4766             : }
    4767             : 
    4768             : /*
    4769             :  * For the given limits on leaf and keyptr records per block, calculate the
    4770             :  * number of blocks needed to index the given number of leaf records.
    4771             :  */
    4772             : unsigned long long
    4773     9969173 : xfs_btree_calc_size(
    4774             :         const unsigned int      *limits,
    4775             :         unsigned long long      records)
    4776             : {
    4777     9969173 :         unsigned long long      level_blocks = howmany_64(records, limits[0]);
    4778     9969173 :         unsigned long long      blocks = level_blocks;
    4779             : 
    4780    25656655 :         while (level_blocks > 1) {
    4781    15686471 :                 level_blocks = howmany_64(level_blocks, limits[1]);
    4782    15687482 :                 blocks += level_blocks;
    4783             :         }
    4784             : 
    4785     9969711 :         return blocks;
    4786             : }
    4787             : 
    4788             : /*
    4789             :  * Given a number of available blocks for the btree to consume with records and
    4790             :  * pointers, calculate the height of the tree needed to index all the records
    4791             :  * that space can hold based on the number of pointers each interior node
    4792             :  * holds.
    4793             :  *
    4794             :  * We start by assuming a single level tree consumes a single block, then track
    4795             :  * the number of blocks each node level consumes until we no longer have space
    4796             :  * to store the next node level. At this point, we are indexing all the leaf
    4797             :  * blocks in the space, and there's no more free space to split the tree any
    4798             :  * further. That's our maximum btree height.
    4799             :  */
    4800             : unsigned int
    4801   712441009 : xfs_btree_space_to_height(
    4802             :         const unsigned int      *limits,
    4803             :         unsigned long long      leaf_blocks)
    4804             : {
    4805             :         /*
    4806             :          * The root btree block can have fewer than minrecs pointers in it
    4807             :          * because the tree might not be big enough to require that amount of
    4808             :          * fanout. Hence it has a minimum size of 2 pointers, not limits[1].
    4809             :          */
    4810   712441009 :         unsigned long long      node_blocks = 2;
    4811   712441009 :         unsigned long long      blocks_left = leaf_blocks - 1;
    4812   712441009 :         unsigned int            height = 1;
    4813             : 
    4814   712441009 :         if (leaf_blocks < 1)
    4815             :                 return 0;
    4816             : 
    4817  7836510949 :         while (node_blocks < blocks_left) {
    4818  7124069940 :                 blocks_left -= node_blocks;
    4819  7124069940 :                 node_blocks *= limits[1];
    4820  7124069940 :                 height++;
    4821             :         }
    4822             : 
    4823             :         return height;
    4824             : }
    4825             : 
    4826             : /*
    4827             :  * Query a regular btree for all records overlapping a given interval.
    4828             :  * Start with a LE lookup of the key of low_rec and return all records
    4829             :  * until we find a record with a key greater than the key of high_rec.
    4830             :  */
    4831             : STATIC int
    4832  3248224742 : xfs_btree_simple_query_range(
    4833             :         struct xfs_btree_cur            *cur,
    4834             :         const union xfs_btree_key       *low_key,
    4835             :         const union xfs_btree_key       *high_key,
    4836             :         xfs_btree_query_range_fn        fn,
    4837             :         void                            *priv)
    4838             : {
    4839  3248224742 :         union xfs_btree_rec             *recp;
    4840  3248224742 :         union xfs_btree_key             rec_key;
    4841  3248224742 :         int                             stat;
    4842  3248224742 :         bool                            firstrec = true;
    4843  3248224742 :         int                             error;
    4844             : 
    4845  3248224742 :         ASSERT(cur->bc_ops->init_high_key_from_rec);
    4846  3248224742 :         ASSERT(cur->bc_ops->diff_two_keys);
    4847             : 
    4848             :         /*
    4849             :          * Find the leftmost record.  The btree cursor must be set
    4850             :          * to the low record used to generate low_key.
    4851             :          */
    4852  3248224742 :         stat = 0;
    4853  3248224742 :         error = xfs_btree_lookup(cur, XFS_LOOKUP_LE, &stat);
    4854  3249032576 :         if (error)
    4855          12 :                 goto out;
    4856             : 
    4857             :         /* Nothing?  See if there's anything to the right. */
    4858  3249032564 :         if (!stat) {
    4859   753332073 :                 error = xfs_btree_increment(cur, 0, &stat);
    4860   753010653 :                 if (error)
    4861           0 :                         goto out;
    4862             :         }
    4863             : 
    4864 89214052672 :         while (stat) {
    4865             :                 /* Find the record. */
    4866 88594687694 :                 error = xfs_btree_get_rec(cur, &recp, &stat);
    4867 88622713674 :                 if (error || !stat)
    4868             :                         break;
    4869             : 
    4870             :                 /* Skip if low_key > high_key(rec). */
    4871 88622713674 :                 if (firstrec) {
    4872  2811813599 :                         cur->bc_ops->init_high_key_from_rec(&rec_key, recp);
    4873  2811145270 :                         firstrec = false;
    4874  2811145270 :                         if (xfs_btree_keycmp_gt(cur, low_key, &rec_key))
    4875  2494968069 :                                 goto advloop;
    4876             :                 }
    4877             : 
    4878             :                 /* Stop if low_key(rec) > high_key. */
    4879 86127208559 :                 cur->bc_ops->init_key_from_rec(&rec_key, recp);
    4880 86109342197 :                 if (xfs_btree_keycmp_gt(cur, &rec_key, high_key))
    4881             :                         break;
    4882             : 
    4883             :                 /* Callback */
    4884 83403834094 :                 error = fn(cur, recp, priv);
    4885 83266878881 :                 if (error)
    4886             :                         break;
    4887             : 
    4888 83266870237 : advloop:
    4889             :                 /* Move on to the next record. */
    4890 85761838306 :                 error = xfs_btree_increment(cur, 0, &stat);
    4891 85965341528 :                 if (error)
    4892             :                         break;
    4893             :         }
    4894             : 
    4895  3248234184 : out:
    4896  3248234196 :         return error;
    4897             : }
    4898             : 
    4899             : /*
    4900             :  * Query an overlapped interval btree for all records overlapping a given
    4901             :  * interval.  This function roughly follows the algorithm given in
    4902             :  * "Interval Trees" of _Introduction to Algorithms_, which is section
    4903             :  * 14.3 in the 2nd and 3rd editions.
    4904             :  *
    4905             :  * First, generate keys for the low and high records passed in.
    4906             :  *
    4907             :  * For any leaf node, generate the high and low keys for the record.
    4908             :  * If the record keys overlap with the query low/high keys, pass the
    4909             :  * record to the function iterator.
    4910             :  *
    4911             :  * For any internal node, compare the low and high keys of each
    4912             :  * pointer against the query low/high keys.  If there's an overlap,
    4913             :  * follow the pointer.
    4914             :  *
    4915             :  * As an optimization, we stop scanning a block when we find a low key
    4916             :  * that is greater than the query's high key.
    4917             :  */
    4918             : STATIC int
    4919  1098486102 : xfs_btree_overlapped_query_range(
    4920             :         struct xfs_btree_cur            *cur,
    4921             :         const union xfs_btree_key       *low_key,
    4922             :         const union xfs_btree_key       *high_key,
    4923             :         xfs_btree_query_range_fn        fn,
    4924             :         void                            *priv)
    4925             : {
    4926  1098486102 :         union xfs_btree_ptr             ptr;
    4927  1098486102 :         union xfs_btree_ptr             *pp;
    4928  1098486102 :         union xfs_btree_key             rec_key;
    4929  1098486102 :         union xfs_btree_key             rec_hkey;
    4930  1098486102 :         union xfs_btree_key             *lkp;
    4931  1098486102 :         union xfs_btree_key             *hkp;
    4932  1098486102 :         union xfs_btree_rec             *recp;
    4933  1098486102 :         struct xfs_btree_block          *block;
    4934  1098486102 :         int                             level;
    4935  1098486102 :         struct xfs_buf                  *bp;
    4936  1098486102 :         int                             i;
    4937  1098486102 :         int                             error;
    4938             : 
    4939             :         /* Load the root of the btree. */
    4940  1098486102 :         level = cur->bc_nlevels - 1;
    4941  1098486102 :         cur->bc_ops->init_ptr_from_cur(cur, &ptr);
    4942  1098568516 :         error = xfs_btree_lookup_get_block(cur, level, &ptr, &block);
    4943  1099028504 :         if (error)
    4944             :                 return error;
    4945  1099029701 :         xfs_btree_get_block(cur, level, &bp);
    4946  1098928561 :         trace_xfs_btree_overlapped_query_range(cur, level, bp);
    4947             : #ifdef DEBUG
    4948  1098745427 :         error = xfs_btree_check_block(cur, block, level, bp);
    4949  1098697251 :         if (error)
    4950           0 :                 goto out;
    4951             : #endif
    4952  1098697251 :         cur->bc_levels[level].ptr = 1;
    4953             : 
    4954 >12337*10^7 :         while (level < cur->bc_nlevels) {
    4955 >12228*10^7 :                 block = xfs_btree_get_block(cur, level, &bp);
    4956             : 
    4957             :                 /* End of node, pop back towards the root. */
    4958 >12529*10^7 :                 if (cur->bc_levels[level].ptr >
    4959 >12232*10^7 :                                         be16_to_cpu(block->bb_numrecs)) {
    4960   317097609 : pop_up:
    4961  2968593480 :                         if (level < cur->bc_nlevels - 1)
    4962  1872299990 :                                 cur->bc_levels[level + 1].ptr++;
    4963  2968593480 :                         level++;
    4964  2968593480 :                         continue;
    4965             :                 }
    4966             : 
    4967 >12201*10^7 :                 if (level == 0) {
    4968             :                         /* Handle a leaf node. */
    4969 81015465436 :                         recp = xfs_btree_rec_addr(cur, cur->bc_levels[0].ptr,
    4970             :                                         block);
    4971             : 
    4972 81015465436 :                         cur->bc_ops->init_high_key_from_rec(&rec_hkey, recp);
    4973 80964749606 :                         cur->bc_ops->init_key_from_rec(&rec_key, recp);
    4974             : 
    4975             :                         /*
    4976             :                          * If (query's high key < record's low key), then there
    4977             :                          * are no more interesting records in this block.  Pop
    4978             :                          * up to the leaf level to find more record blocks.
    4979             :                          *
    4980             :                          * If (record's high key >= query's low key) and
    4981             :                          *    (query's high key >= record's low key), then
    4982             :                          * this record overlaps the query range; callback.
    4983             :                          */
    4984 80920811001 :                         if (xfs_btree_keycmp_lt(cur, high_key, &rec_key))
    4985  1080040930 :                                 goto pop_up;
    4986 79882512660 :                         if (xfs_btree_keycmp_ge(cur, &rec_hkey, low_key)) {
    4987  3084611178 :                                 error = fn(cur, recp, priv);
    4988  3087793939 :                                 if (error)
    4989             :                                         break;
    4990             :                         }
    4991 79875346765 :                         cur->bc_levels[level].ptr++;
    4992 79875346765 :                         continue;
    4993             :                 }
    4994             : 
    4995             :                 /* Handle an internal node. */
    4996 40995381994 :                 lkp = xfs_btree_key_addr(cur, cur->bc_levels[level].ptr, block);
    4997 40995381994 :                 hkp = xfs_btree_high_key_addr(cur, cur->bc_levels[level].ptr,
    4998             :                                 block);
    4999 40995381994 :                 pp = xfs_btree_ptr_addr(cur, cur->bc_levels[level].ptr, block);
    5000             : 
    5001             :                 /*
    5002             :                  * If (query's high key < pointer's low key), then there are no
    5003             :                  * more interesting keys in this block.  Pop up one leaf level
    5004             :                  * to continue looking for records.
    5005             :                  *
    5006             :                  * If (pointer's high key >= query's low key) and
    5007             :                  *    (query's high key >= pointer's low key), then
    5008             :                  * this record overlaps the query range; follow pointer.
    5009             :                  */
    5010 40979959606 :                 if (xfs_btree_keycmp_lt(cur, high_key, lkp))
    5011  1571454941 :                         goto pop_up;
    5012 39410755382 :                 if (xfs_btree_keycmp_ge(cur, hkp, low_key)) {
    5013  1875160792 :                         level--;
    5014  1875160792 :                         error = xfs_btree_lookup_get_block(cur, level, pp,
    5015             :                                         &block);
    5016  1875473429 :                         if (error)
    5017           0 :                                 goto out;
    5018  1875473429 :                         xfs_btree_get_block(cur, level, &bp);
    5019  1875371290 :                         trace_xfs_btree_overlapped_query_range(cur, level, bp);
    5020             : #ifdef DEBUG
    5021  1875345041 :                         error = xfs_btree_check_block(cur, block, level, bp);
    5022  1875352553 :                         if (error)
    5023           0 :                                 goto out;
    5024             : #endif
    5025  1875352553 :                         cur->bc_levels[level].ptr = 1;
    5026  1875352553 :                         continue;
    5027             :                 }
    5028 37560979969 :                 cur->bc_levels[level].ptr++;
    5029             :         }
    5030             : 
    5031  1098996515 : out:
    5032             :         /*
    5033             :          * If we don't end this function with the cursor pointing at a record
    5034             :          * block, a subsequent non-error cursor deletion will not release
    5035             :          * node-level buffers, causing a buffer leak.  This is quite possible
    5036             :          * with a zero-results range query, so release the buffers if we
    5037             :          * failed to return any results.
    5038             :          */
    5039  1098996515 :         if (cur->bc_levels[0].bp == NULL) {
    5040       12127 :                 for (i = 0; i < cur->bc_nlevels; i++) {
    5041        9006 :                         if (cur->bc_levels[i].bp) {
    5042        5787 :                                 xfs_trans_brelse(cur->bc_tp,
    5043             :                                                 cur->bc_levels[i].bp);
    5044        5787 :                                 cur->bc_levels[i].bp = NULL;
    5045        5787 :                                 cur->bc_levels[i].ptr = 0;
    5046        5787 :                                 cur->bc_levels[i].ra = 0;
    5047             :                         }
    5048             :                 }
    5049             :         }
    5050             : 
    5051             :         return error;
    5052             : }
    5053             : 
    5054             : static inline void
    5055 15990089913 : xfs_btree_key_from_irec(
    5056             :         struct xfs_btree_cur            *cur,
    5057             :         union xfs_btree_key             *key,
    5058             :         const union xfs_btree_irec      *irec)
    5059             : {
    5060 15990089913 :         union xfs_btree_rec             rec;
    5061             : 
    5062 15990089913 :         cur->bc_rec = *irec;
    5063 15990089913 :         cur->bc_ops->init_rec_from_cur(cur, &rec);
    5064 15988981362 :         cur->bc_ops->init_key_from_rec(key, &rec);
    5065 15989503479 : }
    5066             : 
    5067             : /*
    5068             :  * Query a btree for all records overlapping a given interval of keys.  The
    5069             :  * supplied function will be called with each record found; return one of the
    5070             :  * XFS_BTREE_QUERY_RANGE_{CONTINUE,ABORT} values or the usual negative error
    5071             :  * code.  This function returns -ECANCELED, zero, or a negative error code.
    5072             :  */
    5073             : int
    5074  4328821228 : xfs_btree_query_range(
    5075             :         struct xfs_btree_cur            *cur,
    5076             :         const union xfs_btree_irec      *low_rec,
    5077             :         const union xfs_btree_irec      *high_rec,
    5078             :         xfs_btree_query_range_fn        fn,
    5079             :         void                            *priv)
    5080             : {
    5081  4328821228 :         union xfs_btree_key             low_key;
    5082  4328821228 :         union xfs_btree_key             high_key;
    5083             : 
    5084             :         /* Find the keys of both ends of the interval. */
    5085  4328821228 :         xfs_btree_key_from_irec(cur, &high_key, high_rec);
    5086  4329173518 :         xfs_btree_key_from_irec(cur, &low_key, low_rec);
    5087             : 
    5088             :         /* Enforce low key <= high key. */
    5089  4329568509 :         if (!xfs_btree_keycmp_le(cur, &low_key, &high_key))
    5090             :                 return -EINVAL;
    5091             : 
    5092  4329538172 :         if (!(cur->bc_flags & XFS_BTREE_OVERLAPPING))
    5093  3231149717 :                 return xfs_btree_simple_query_range(cur, &low_key,
    5094             :                                 &high_key, fn, priv);
    5095  1098388455 :         return xfs_btree_overlapped_query_range(cur, &low_key, &high_key,
    5096             :                         fn, priv);
    5097             : }
    5098             : 
    5099             : /* Query a btree for all records. */
    5100             : int
    5101    16710076 : xfs_btree_query_all(
    5102             :         struct xfs_btree_cur            *cur,
    5103             :         xfs_btree_query_range_fn        fn,
    5104             :         void                            *priv)
    5105             : {
    5106    16710076 :         union xfs_btree_key             low_key;
    5107    16710076 :         union xfs_btree_key             high_key;
    5108             : 
    5109    16710076 :         memset(&cur->bc_rec, 0, sizeof(cur->bc_rec));
    5110    16710076 :         memset(&low_key, 0, sizeof(low_key));
    5111    16710076 :         memset(&high_key, 0xFF, sizeof(high_key));
    5112             : 
    5113    16710076 :         return xfs_btree_simple_query_range(cur, &low_key, &high_key, fn, priv);
    5114             : }
    5115             : 
    5116             : static int
    5117   129030264 : xfs_btree_count_blocks_helper(
    5118             :         struct xfs_btree_cur    *cur,
    5119             :         int                     level,
    5120             :         void                    *data)
    5121             : {
    5122   129030264 :         xfs_extlen_t            *blocks = data;
    5123   129030264 :         (*blocks)++;
    5124             : 
    5125   129030264 :         return 0;
    5126             : }
    5127             : 
    5128             : /* Count the blocks in a btree and return the result in *blocks. */
    5129             : int
    5130     7407481 : xfs_btree_count_blocks(
    5131             :         struct xfs_btree_cur    *cur,
    5132             :         xfs_extlen_t            *blocks)
    5133             : {
    5134     7407481 :         *blocks = 0;
    5135     7407481 :         return xfs_btree_visit_blocks(cur, xfs_btree_count_blocks_helper,
    5136             :                         XFS_BTREE_VISIT_ALL, blocks);
    5137             : }
    5138             : 
    5139             : /* Compare two btree pointers. */
    5140             : int64_t
    5141    13858824 : xfs_btree_diff_two_ptrs(
    5142             :         struct xfs_btree_cur            *cur,
    5143             :         const union xfs_btree_ptr       *a,
    5144             :         const union xfs_btree_ptr       *b)
    5145             : {
    5146    13858824 :         if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
    5147     1763367 :                 return (int64_t)be64_to_cpu(a->l) - be64_to_cpu(b->l);
    5148    12095457 :         return (int64_t)be32_to_cpu(a->s) - be32_to_cpu(b->s);
    5149             : }
    5150             : 
    5151             : struct xfs_btree_has_records {
    5152             :         /* Keys for the start and end of the range we want to know about. */
    5153             :         union xfs_btree_key             start_key;
    5154             :         union xfs_btree_key             end_key;
    5155             : 
    5156             :         /* Mask for key comparisons, if desired. */
    5157             :         const union xfs_btree_key       *key_mask;
    5158             : 
    5159             :         /* Highest record key we've seen so far. */
    5160             :         union xfs_btree_key             high_key;
    5161             : 
    5162             :         enum xbtree_recpacking          outcome;
    5163             : };
    5164             : 
    5165             : STATIC int
    5166           0 : xfs_btree_has_records_helper(
    5167             :         struct xfs_btree_cur            *cur,
    5168             :         const union xfs_btree_rec       *rec,
    5169             :         void                            *priv)
    5170             : {
    5171           0 :         union xfs_btree_key             rec_key;
    5172           0 :         union xfs_btree_key             rec_high_key;
    5173           0 :         struct xfs_btree_has_records    *info = priv;
    5174           0 :         enum xbtree_key_contig          key_contig;
    5175             : 
    5176           0 :         cur->bc_ops->init_key_from_rec(&rec_key, rec);
    5177             : 
    5178           0 :         if (info->outcome == XBTREE_RECPACKING_EMPTY) {
    5179           0 :                 info->outcome = XBTREE_RECPACKING_SPARSE;
    5180             : 
    5181             :                 /*
    5182             :                  * If the first record we find does not overlap the start key,
    5183             :                  * then there is a hole at the start of the search range.
    5184             :                  * Classify this as sparse and stop immediately.
    5185             :                  */
    5186           0 :                 if (xfs_btree_masked_keycmp_lt(cur, &info->start_key, &rec_key,
    5187             :                                         info->key_mask))
    5188             :                         return -ECANCELED;
    5189             :         } else {
    5190             :                 /*
    5191             :                  * If a subsequent record does not overlap with the any record
    5192             :                  * we've seen so far, there is a hole in the middle of the
    5193             :                  * search range.  Classify this as sparse and stop.
    5194             :                  * If the keys overlap and this btree does not allow overlap,
    5195             :                  * signal corruption.
    5196             :                  */
    5197           0 :                 key_contig = cur->bc_ops->keys_contiguous(cur, &info->high_key,
    5198             :                                         &rec_key, info->key_mask);
    5199           0 :                 if (key_contig == XBTREE_KEY_OVERLAP &&
    5200           0 :                                 !(cur->bc_flags & XFS_BTREE_OVERLAPPING))
    5201             :                         return -EFSCORRUPTED;
    5202           0 :                 if (key_contig == XBTREE_KEY_GAP)
    5203             :                         return -ECANCELED;
    5204             :         }
    5205             : 
    5206             :         /*
    5207             :          * If high_key(rec) is larger than any other high key we've seen,
    5208             :          * remember it for later.
    5209             :          */
    5210           0 :         cur->bc_ops->init_high_key_from_rec(&rec_high_key, rec);
    5211           0 :         if (xfs_btree_masked_keycmp_gt(cur, &rec_high_key, &info->high_key,
    5212             :                                 info->key_mask))
    5213           0 :                 info->high_key = rec_high_key; /* struct copy */
    5214             : 
    5215             :         return 0;
    5216             : }
    5217             : 
    5218             : /*
    5219             :  * Scan part of the keyspace of a btree and tell us if that keyspace does not
    5220             :  * map to any records; is fully mapped to records; or is partially mapped to
    5221             :  * records.  This is the btree record equivalent to determining if a file is
    5222             :  * sparse.
    5223             :  *
    5224             :  * For most btree types, the record scan should use all available btree key
    5225             :  * fields to compare the keys encountered.  These callers should pass NULL for
    5226             :  * @mask.  However, some callers (e.g.  scanning physical space in the rmapbt)
    5227             :  * want to ignore some part of the btree record keyspace when performing the
    5228             :  * comparison.  These callers should pass in a union xfs_btree_key object with
    5229             :  * the fields that *should* be a part of the comparison set to any nonzero
    5230             :  * value, and the rest zeroed.
    5231             :  */
    5232             : int
    5233  3672659967 : xfs_btree_has_records(
    5234             :         struct xfs_btree_cur            *cur,
    5235             :         const union xfs_btree_irec      *low,
    5236             :         const union xfs_btree_irec      *high,
    5237             :         const union xfs_btree_key       *mask,
    5238             :         enum xbtree_recpacking          *outcome)
    5239             : {
    5240  3672659967 :         struct xfs_btree_has_records    info = {
    5241             :                 .outcome                = XBTREE_RECPACKING_EMPTY,
    5242             :                 .key_mask               = mask,
    5243             :         };
    5244  3672659967 :         int                             error;
    5245             : 
    5246             :         /* Not all btrees support this operation. */
    5247  3672659967 :         if (!cur->bc_ops->keys_contiguous) {
    5248           0 :                 ASSERT(0);
    5249           0 :                 return -EOPNOTSUPP;
    5250             :         }
    5251             : 
    5252  3672659967 :         xfs_btree_key_from_irec(cur, &info.start_key, low);
    5253  3670557825 :         xfs_btree_key_from_irec(cur, &info.end_key, high);
    5254             : 
    5255  3671247765 :         error = xfs_btree_query_range(cur, low, high,
    5256             :                         xfs_btree_has_records_helper, &info);
    5257  3672328770 :         if (error == -ECANCELED)
    5258           0 :                 goto out;
    5259  3672328770 :         if (error)
    5260             :                 return error;
    5261             : 
    5262  3672328770 :         if (info.outcome == XBTREE_RECPACKING_EMPTY)
    5263  3672328770 :                 goto out;
    5264             : 
    5265             :         /*
    5266             :          * If the largest high_key(rec) we saw during the walk is greater than
    5267             :          * the end of the search range, classify this as full.  Otherwise,
    5268             :          * there is a hole at the end of the search range.
    5269             :          */
    5270           0 :         if (xfs_btree_masked_keycmp_ge(cur, &info.high_key, &info.end_key,
    5271             :                                 mask))
    5272           0 :                 info.outcome = XBTREE_RECPACKING_FULL;
    5273             : 
    5274           0 : out:
    5275  3672328770 :         *outcome = info.outcome;
    5276  3672328770 :         return 0;
    5277             : }
    5278             : 
    5279             : /* Are there more records in this btree? */
    5280             : bool
    5281    72897628 : xfs_btree_has_more_records(
    5282             :         struct xfs_btree_cur    *cur)
    5283             : {
    5284    72897628 :         struct xfs_btree_block  *block;
    5285    72897628 :         struct xfs_buf          *bp;
    5286             : 
    5287    72897628 :         block = xfs_btree_get_block(cur, 0, &bp);
    5288             : 
    5289             :         /* There are still records in this block. */
    5290    72897496 :         if (cur->bc_levels[0].ptr < xfs_btree_get_numrecs(block))
    5291             :                 return true;
    5292             : 
    5293             :         /* There are more record blocks. */
    5294      551752 :         if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
    5295           0 :                 return block->bb_u.l.bb_rightsib != cpu_to_be64(NULLFSBLOCK);
    5296             :         else
    5297      551752 :                 return block->bb_u.s.bb_rightsib != cpu_to_be32(NULLAGBLOCK);
    5298             : }
    5299             : 
    5300             : /* Set up all the btree cursor caches. */
    5301             : int __init
    5302          50 : xfs_btree_init_cur_caches(void)
    5303             : {
    5304          50 :         int             error;
    5305             : 
    5306          50 :         error = xfs_allocbt_init_cur_cache();
    5307          50 :         if (error)
    5308             :                 return error;
    5309          50 :         error = xfs_inobt_init_cur_cache();
    5310          50 :         if (error)
    5311           0 :                 goto err;
    5312          50 :         error = xfs_bmbt_init_cur_cache();
    5313          50 :         if (error)
    5314           0 :                 goto err;
    5315          50 :         error = xfs_rmapbt_init_cur_cache();
    5316          50 :         if (error)
    5317           0 :                 goto err;
    5318          50 :         error = xfs_refcountbt_init_cur_cache();
    5319          50 :         if (error)
    5320           0 :                 goto err;
    5321             : 
    5322             :         return 0;
    5323           0 : err:
    5324           0 :         xfs_btree_destroy_cur_caches();
    5325           0 :         return error;
    5326             : }
    5327             : 
    5328             : /* Destroy all the btree cursor caches, if they've been allocated. */
    5329             : void
    5330          49 : xfs_btree_destroy_cur_caches(void)
    5331             : {
    5332          49 :         xfs_allocbt_destroy_cur_cache();
    5333          49 :         xfs_inobt_destroy_cur_cache();
    5334          49 :         xfs_bmbt_destroy_cur_cache();
    5335          49 :         xfs_rmapbt_destroy_cur_cache();
    5336          49 :         xfs_refcountbt_destroy_cur_cache();
    5337          49 : }
    5338             : 
    5339             : /* Move the btree cursor before the first record. */
    5340             : int
    5341   132214942 : xfs_btree_goto_left_edge(
    5342             :         struct xfs_btree_cur    *cur)
    5343             : {
    5344   132214942 :         int                     stat = 0;
    5345   132214942 :         int                     error;
    5346             : 
    5347   132214942 :         memset(&cur->bc_rec, 0, sizeof(cur->bc_rec));
    5348   132214942 :         error = xfs_btree_lookup(cur, XFS_LOOKUP_LE, &stat);
    5349   132215081 :         if (error)
    5350             :                 return error;
    5351   132215081 :         if (!stat)
    5352             :                 return 0;
    5353             : 
    5354           0 :         error = xfs_btree_decrement(cur, 0, &stat);
    5355           0 :         if (error)
    5356             :                 return error;
    5357           0 :         if (stat != 0) {
    5358           0 :                 ASSERT(0);
    5359           0 :                 xfs_btree_mark_sick(cur);
    5360           0 :                 return -EFSCORRUPTED;
    5361             :         }
    5362             : 
    5363             :         return 0;
    5364             : }

Generated by: LCOV version 1.14