LCOV - code coverage report
Current view: top level - fs/xfs/libxfs - xfs_btree.c (source / functions) Hit Total Coverage
Test: fstests of 6.5.0-rc4-xfsx @ Mon Jul 31 20:08:34 PDT 2023 Lines: 2145 2382 90.1 %
Date: 2023-07-31 20:08:34 Functions: 129 129 100.0 %

          Line data    Source code
       1             : // SPDX-License-Identifier: GPL-2.0
       2             : /*
       3             :  * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
       4             :  * All Rights Reserved.
       5             :  */
       6             : #include "xfs.h"
       7             : #include "xfs_fs.h"
       8             : #include "xfs_shared.h"
       9             : #include "xfs_format.h"
      10             : #include "xfs_log_format.h"
      11             : #include "xfs_trans_resv.h"
      12             : #include "xfs_bit.h"
      13             : #include "xfs_mount.h"
      14             : #include "xfs_inode.h"
      15             : #include "xfs_trans.h"
      16             : #include "xfs_buf_item.h"
      17             : #include "xfs_btree.h"
      18             : #include "xfs_errortag.h"
      19             : #include "xfs_error.h"
      20             : #include "xfs_trace.h"
      21             : #include "xfs_alloc.h"
      22             : #include "xfs_log.h"
      23             : #include "xfs_btree_staging.h"
      24             : #include "xfs_ag.h"
      25             : #include "xfs_alloc_btree.h"
      26             : #include "xfs_ialloc_btree.h"
      27             : #include "xfs_bmap_btree.h"
      28             : #include "xfs_rmap_btree.h"
      29             : #include "xfs_refcount_btree.h"
      30             : #include "xfs_health.h"
      31             : #include "scrub/xfile.h"
      32             : #include "scrub/xfbtree.h"
      33             : #include "xfs_btree_mem.h"
      34             : #include "xfs_rtgroup.h"
      35             : #include "xfs_rtrmap_btree.h"
      36             : #include "xfs_bmap.h"
      37             : #include "xfs_rmap.h"
      38             : #include "xfs_quota.h"
      39             : #include "xfs_imeta.h"
      40             : #include "xfs_rtrefcount_btree.h"
      41             : 
      42             : /*
      43             :  * Btree magic numbers.
      44             :  */
      45             : uint32_t
      46 >52556*10^7 : xfs_btree_magic(
      47             :         struct xfs_mount                *mp,
      48             :         const struct xfs_btree_ops      *ops)
      49             : {
      50 >52556*10^7 :         int                             idx = xfs_has_crc(mp) ? 1 : 0;
      51 >52556*10^7 :         __be32                          magic = ops->buf_ops->magic[idx];
      52             : 
      53             :         /* Ensure we asked for crc for crc-only magics. */
      54 >52546*10^7 :         ASSERT(magic != 0);
      55 >52546*10^7 :         return be32_to_cpu(magic);
      56             : }
      57             : 
      58             : /*
      59             :  * These sibling pointer checks are optimised for null sibling pointers. This
      60             :  * happens a lot, and we don't need to byte swap at runtime if the sibling
      61             :  * pointer is NULL.
      62             :  *
      63             :  * These are explicitly marked at inline because the cost of calling them as
      64             :  * functions instead of inlining them is about 36 bytes extra code per call site
      65             :  * on x86-64. Yes, gcc-11 fails to inline them, and explicit inlining of these
      66             :  * two sibling check functions reduces the compiled code size by over 300
      67             :  * bytes.
      68             :  */
      69             : static inline xfs_failaddr_t
      70 >47619*10^7 : xfs_btree_check_lblock_siblings(
      71             :         struct xfs_mount        *mp,
      72             :         struct xfs_btree_cur    *cur,
      73             :         int                     level,
      74             :         xfs_fsblock_t           fsb,
      75             :         __be64                  dsibling)
      76             : {
      77 >47619*10^7 :         xfs_fsblock_t           sibling;
      78             : 
      79 >47619*10^7 :         if (dsibling == cpu_to_be64(NULLFSBLOCK))
      80             :                 return NULL;
      81             : 
      82 >46408*10^7 :         sibling = be64_to_cpu(dsibling);
      83 >46408*10^7 :         if (sibling == fsb)
      84           0 :                 return __this_address;
      85 >46408*10^7 :         if (level >= 0) {
      86 >46401*10^7 :                 if (!xfs_btree_check_lptr(cur, sibling, level + 1))
      87           0 :                         return __this_address;
      88    73538915 :         } else if (cur && (cur->bc_flags & XFS_BTREE_IN_XFILE)) {
      89           0 :                 if (!xfbtree_verify_xfileoff(cur, sibling))
      90           0 :                         return __this_address;
      91             :         } else {
      92    73538915 :                 if (!xfs_verify_fsbno(mp, sibling))
      93           0 :                         return __this_address;
      94             :         }
      95             : 
      96             :         return NULL;
      97             : }
      98             : 
      99             : static inline xfs_failaddr_t
     100 >55589*10^7 : xfs_btree_check_sblock_siblings(
     101             :         struct xfs_perag        *pag,
     102             :         struct xfs_btree_cur    *cur,
     103             :         int                     level,
     104             :         xfs_agblock_t           agbno,
     105             :         __be32                  dsibling)
     106             : {
     107 >55589*10^7 :         xfs_agblock_t           sibling;
     108             : 
     109 >55589*10^7 :         if (dsibling == cpu_to_be32(NULLAGBLOCK))
     110             :                 return NULL;
     111             : 
     112 >40258*10^7 :         sibling = be32_to_cpu(dsibling);
     113 >40258*10^7 :         if (sibling == agbno)
     114           0 :                 return __this_address;
     115 >40258*10^7 :         if (level >= 0) {
     116 >40251*10^7 :                 if (!xfs_btree_check_sptr(cur, sibling, level + 1))
     117           0 :                         return __this_address;
     118    73122482 :         } else if (cur && (cur->bc_flags & XFS_BTREE_IN_XFILE)) {
     119           0 :                 if (!xfbtree_verify_xfileoff(cur, sibling))
     120           0 :                         return __this_address;
     121             :         } else {
     122    73122482 :                 if (!xfs_verify_agbno(pag, sibling))
     123           0 :                         return __this_address;
     124             :         }
     125             :         return NULL;
     126             : }
     127             : 
     128             : /*
     129             :  * Check a long btree block header.  Return the address of the failing check,
     130             :  * or NULL if everything is ok.
     131             :  */
     132             : xfs_failaddr_t
     133 >24697*10^7 : __xfs_btree_check_lblock(
     134             :         struct xfs_btree_cur    *cur,
     135             :         struct xfs_btree_block  *block,
     136             :         int                     level,
     137             :         struct xfs_buf          *bp)
     138             : {
     139 >24697*10^7 :         struct xfs_mount        *mp = cur->bc_mp;
     140 >24697*10^7 :         int                     crc = xfs_has_crc(mp);
     141 >24697*10^7 :         xfs_failaddr_t          fa;
     142 >24697*10^7 :         xfs_fsblock_t           fsb = NULLFSBLOCK;
     143             : 
     144 >24697*10^7 :         if (crc) {
     145 >24727*10^7 :                 if (!uuid_equal(&block->bb_u.l.bb_uuid, &mp->m_sb.sb_meta_uuid))
     146           0 :                         return __this_address;
     147 >49381*10^7 :                 if (block->bb_u.l.bb_blkno !=
     148 >24690*10^7 :                     cpu_to_be64(bp ? xfs_buf_daddr(bp) : XFS_BUF_DADDR_NULL))
     149           0 :                         return __this_address;
     150 >24690*10^7 :                 if (block->bb_u.l.bb_pad != cpu_to_be32(0))
     151           0 :                         return __this_address;
     152             :         }
     153             : 
     154 >24660*10^7 :         if (be32_to_cpu(block->bb_magic) != xfs_btree_magic(mp, cur->bc_ops))
     155           0 :                 return __this_address;
     156 >24734*10^7 :         if (be16_to_cpu(block->bb_level) != level)
     157           0 :                 return __this_address;
     158 >24745*10^7 :         if (be16_to_cpu(block->bb_numrecs) >
     159 >24734*10^7 :             cur->bc_ops->get_maxrecs(cur, level))
     160           0 :                 return __this_address;
     161             : 
     162 >24745*10^7 :         if ((cur->bc_flags & XFS_BTREE_IN_XFILE) && bp)
     163  3231822675 :                 fsb = xfbtree_buf_to_xfoff(cur, bp);
     164 >24422*10^7 :         else if (bp)
     165 >24422*10^7 :                 fsb = XFS_DADDR_TO_FSB(mp, xfs_buf_daddr(bp));
     166             : 
     167 >24604*10^7 :         fa = xfs_btree_check_lblock_siblings(mp, cur, level, fsb,
     168             :                         block->bb_u.l.bb_leftsib);
     169 >24415*10^7 :         if (!fa)
     170 >24400*10^7 :                 fa = xfs_btree_check_lblock_siblings(mp, cur, level, fsb,
     171             :                                 block->bb_u.l.bb_rightsib);
     172             :         return fa;
     173             : }
     174             : 
     175             : /* Check a long btree block header. */
     176             : static int
     177 >24765*10^7 : xfs_btree_check_lblock(
     178             :         struct xfs_btree_cur    *cur,
     179             :         struct xfs_btree_block  *block,
     180             :         int                     level,
     181             :         struct xfs_buf          *bp)
     182             : {
     183 >24765*10^7 :         struct xfs_mount        *mp = cur->bc_mp;
     184 >24765*10^7 :         xfs_failaddr_t          fa;
     185             : 
     186 >24765*10^7 :         fa = __xfs_btree_check_lblock(cur, block, level, bp);
     187 >49238*10^7 :         if (XFS_IS_CORRUPT(mp, fa != NULL) ||
     188 >24643*10^7 :             XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BTREE_CHECK_LBLOCK)) {
     189           0 :                 if (bp)
     190           0 :                         trace_xfs_btree_corrupt(bp, _RET_IP_);
     191           0 :                 xfs_btree_mark_sick(cur);
     192           0 :                 return -EFSCORRUPTED;
     193             :         }
     194             :         return 0;
     195             : }
     196             : 
     197             : /*
     198             :  * Check a short btree block header.  Return the address of the failing check,
     199             :  * or NULL if everything is ok.
     200             :  */
     201             : xfs_failaddr_t
     202 >27851*10^7 : __xfs_btree_check_sblock(
     203             :         struct xfs_btree_cur    *cur,
     204             :         struct xfs_btree_block  *block,
     205             :         int                     level,
     206             :         struct xfs_buf          *bp)
     207             : {
     208 >27851*10^7 :         struct xfs_mount        *mp = cur->bc_mp;
     209 >27851*10^7 :         struct xfs_perag        *pag = cur->bc_ag.pag;
     210 >27851*10^7 :         int                     crc = xfs_has_crc(mp);
     211 >27851*10^7 :         xfs_failaddr_t          fa;
     212 >27851*10^7 :         xfs_agblock_t           agbno = NULLAGBLOCK;
     213             : 
     214 >27851*10^7 :         if (crc) {
     215 >27841*10^7 :                 if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid))
     216           0 :                         return __this_address;
     217 >55531*10^7 :                 if (block->bb_u.s.bb_blkno !=
     218 >27765*10^7 :                     cpu_to_be64(bp ? xfs_buf_daddr(bp) : XFS_BUF_DADDR_NULL))
     219           0 :                         return __this_address;
     220             :         }
     221             : 
     222 >27775*10^7 :         if (be32_to_cpu(block->bb_magic) != xfs_btree_magic(mp, cur->bc_ops))
     223           0 :                 return __this_address;
     224 >27870*10^7 :         if (be16_to_cpu(block->bb_level) != level)
     225           0 :                 return __this_address;
     226 >27812*10^7 :         if (be16_to_cpu(block->bb_numrecs) >
     227 >27870*10^7 :             cur->bc_ops->get_maxrecs(cur, level))
     228           0 :                 return __this_address;
     229             : 
     230 >27812*10^7 :         if ((cur->bc_flags & XFS_BTREE_IN_XFILE) && bp) {
     231  1266691068 :                 pag = NULL;
     232  1266691068 :                 agbno = xfbtree_buf_to_xfoff(cur, bp);
     233 >27685*10^7 :         } else if (bp) {
     234 >27685*10^7 :                 agbno = xfs_daddr_to_agbno(mp, xfs_buf_daddr(bp));
     235             :         }
     236             : 
     237 >27808*10^7 :         fa = xfs_btree_check_sblock_siblings(pag, cur, level, agbno,
     238             :                         block->bb_u.s.bb_leftsib);
     239 >27785*10^7 :         if (!fa)
     240 >27795*10^7 :                 fa = xfs_btree_check_sblock_siblings(pag, cur, level, agbno,
     241             :                                 block->bb_u.s.bb_rightsib);
     242             :         return fa;
     243             : }
     244             : 
     245             : /* Check a short btree block header. */
     246             : STATIC int
     247 >27798*10^7 : xfs_btree_check_sblock(
     248             :         struct xfs_btree_cur    *cur,
     249             :         struct xfs_btree_block  *block,
     250             :         int                     level,
     251             :         struct xfs_buf          *bp)
     252             : {
     253 >27798*10^7 :         struct xfs_mount        *mp = cur->bc_mp;
     254 >27798*10^7 :         xfs_failaddr_t          fa;
     255             : 
     256 >27798*10^7 :         fa = __xfs_btree_check_sblock(cur, block, level, bp);
     257 >55557*10^7 :         if (XFS_IS_CORRUPT(mp, fa != NULL) ||
     258 >27820*10^7 :             XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BTREE_CHECK_SBLOCK)) {
     259           0 :                 if (bp)
     260           0 :                         trace_xfs_btree_corrupt(bp, _RET_IP_);
     261           0 :                 xfs_btree_mark_sick(cur);
     262           0 :                 return -EFSCORRUPTED;
     263             :         }
     264             :         return 0;
     265             : }
     266             : 
     267             : /*
     268             :  * Debug routine: check that block header is ok.
     269             :  */
     270             : int
     271 >52711*10^7 : xfs_btree_check_block(
     272             :         struct xfs_btree_cur    *cur,   /* btree cursor */
     273             :         struct xfs_btree_block  *block, /* generic btree block pointer */
     274             :         int                     level,  /* level of the btree block */
     275             :         struct xfs_buf          *bp)    /* buffer containing block, if any */
     276             : {
     277             :         /* Don't check the inode-core root. */
     278 >52711*10^7 :         if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
     279 >24603*10^7 :             level == cur->bc_nlevels - 1)
     280             :                 return 0;
     281             : 
     282 >52619*10^7 :         if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
     283 >24827*10^7 :                 return xfs_btree_check_lblock(cur, block, level, bp);
     284             :         else
     285 >27792*10^7 :                 return xfs_btree_check_sblock(cur, block, level, bp);
     286             : }
     287             : 
     288             : /* Check that this long pointer is valid and points within the fs. */
     289             : bool
     290 >47852*10^7 : xfs_btree_check_lptr(
     291             :         struct xfs_btree_cur    *cur,
     292             :         xfs_fsblock_t           fsbno,
     293             :         int                     level)
     294             : {
     295 >47852*10^7 :         if (level <= 0)
     296             :                 return false;
     297 >47852*10^7 :         if (cur->bc_flags & XFS_BTREE_IN_XFILE)
     298  1159411948 :                 return xfbtree_verify_xfileoff(cur, fsbno);
     299 >47736*10^7 :         return xfs_verify_fsbno(cur->bc_mp, fsbno);
     300             : }
     301             : 
     302             : /* Check that this short pointer is valid and points within the AG. */
     303             : bool
     304 >46357*10^7 : xfs_btree_check_sptr(
     305             :         struct xfs_btree_cur    *cur,
     306             :         xfs_agblock_t           agbno,
     307             :         int                     level)
     308             : {
     309 >46357*10^7 :         if (level <= 0)
     310             :                 return false;
     311 >46357*10^7 :         if (cur->bc_flags & XFS_BTREE_IN_XFILE)
     312  2051609154 :                 return xfbtree_verify_xfileoff(cur, agbno);
     313 >46152*10^7 :         return xfs_verify_agbno(cur->bc_ag.pag, agbno);
     314             : }
     315             : 
     316             : /*
     317             :  * Check that a given (indexed) btree pointer at a certain level of a
     318             :  * btree is valid and doesn't point past where it should.
     319             :  */
     320             : static int
     321 80011276302 : xfs_btree_check_ptr(
     322             :         struct xfs_btree_cur            *cur,
     323             :         const union xfs_btree_ptr       *ptr,
     324             :         int                             index,
     325             :         int                             level)
     326             : {
     327 80011276302 :         if (cur->bc_flags & XFS_BTREE_IN_XFILE)
     328  4020278862 :                 return xfbtree_check_ptr(cur, ptr, index, level);
     329             : 
     330 75990997440 :         if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
     331 14287545789 :                 if (xfs_btree_check_lptr(cur, be64_to_cpu((&ptr->l)[index]),
     332             :                                 level))
     333             :                         return 0;
     334           0 :                 xfs_err(cur->bc_mp,
     335             : "Inode %llu fork %d: Corrupt btree %d pointer at level %d index %d.",
     336             :                                 cur->bc_ino.ip->i_ino,
     337             :                                 cur->bc_ino.whichfork, cur->bc_btnum,
     338             :                                 level, index);
     339             :         } else {
     340 61703451651 :                 if (xfs_btree_check_sptr(cur, be32_to_cpu((&ptr->s)[index]),
     341             :                                 level))
     342             :                         return 0;
     343           0 :                 xfs_err(cur->bc_mp,
     344             : "AG %u: Corrupt btree %d pointer at level %d index %d.",
     345             :                                 cur->bc_ag.pag->pag_agno, cur->bc_btnum,
     346             :                                 level, index);
     347             :         }
     348             : 
     349           0 :         xfs_btree_mark_sick(cur);
     350           0 :         return -EFSCORRUPTED;
     351             : }
     352             : 
     353             : #ifdef DEBUG
     354             : # define xfs_btree_debug_check_ptr      xfs_btree_check_ptr
     355             : #else
     356             : # define xfs_btree_debug_check_ptr(...) (0)
     357             : #endif
     358             : 
     359             : /*
     360             :  * Calculate CRC on the whole btree block and stuff it into the
     361             :  * long-form btree header.
     362             :  *
     363             :  * Prior to calculting the CRC, pull the LSN out of the buffer log item and put
     364             :  * it into the buffer so recovery knows what the last modification was that made
     365             :  * it to disk.
     366             :  */
     367             : void
     368    29995396 : xfs_btree_lblock_calc_crc(
     369             :         struct xfs_buf          *bp)
     370             : {
     371    29995396 :         struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
     372    29995396 :         struct xfs_buf_log_item *bip = bp->b_log_item;
     373             : 
     374    29995396 :         if (!xfs_has_crc(bp->b_mount))
     375             :                 return;
     376    29995396 :         if (bip)
     377    29065388 :                 block->bb_u.l.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn);
     378    29995396 :         xfs_buf_update_cksum(bp, XFS_BTREE_LBLOCK_CRC_OFF);
     379             : }
     380             : 
     381             : bool
     382     5411621 : xfs_btree_lblock_verify_crc(
     383             :         struct xfs_buf          *bp)
     384             : {
     385     5411621 :         struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
     386     5411621 :         struct xfs_mount        *mp = bp->b_mount;
     387             : 
     388     5411621 :         if (xfs_has_crc(mp)) {
     389     5411621 :                 if (!xfs_log_check_lsn(mp, be64_to_cpu(block->bb_u.l.bb_lsn)))
     390             :                         return false;
     391     5411621 :                 return xfs_buf_verify_cksum(bp, XFS_BTREE_LBLOCK_CRC_OFF);
     392             :         }
     393             : 
     394             :         return true;
     395             : }
     396             : 
     397             : /*
     398             :  * Calculate CRC on the whole btree block and stuff it into the
     399             :  * short-form btree header.
     400             :  *
     401             :  * Prior to calculting the CRC, pull the LSN out of the buffer log item and put
     402             :  * it into the buffer so recovery knows what the last modification was that made
     403             :  * it to disk.
     404             :  */
     405             : void
     406    25225718 : xfs_btree_sblock_calc_crc(
     407             :         struct xfs_buf          *bp)
     408             : {
     409    25225718 :         struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
     410    25225718 :         struct xfs_buf_log_item *bip = bp->b_log_item;
     411             : 
     412    25225718 :         if (!xfs_has_crc(bp->b_mount))
     413             :                 return;
     414    25184724 :         if (bip)
     415    23431613 :                 block->bb_u.s.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn);
     416    25184724 :         xfs_buf_update_cksum(bp, XFS_BTREE_SBLOCK_CRC_OFF);
     417             : }
     418             : 
     419             : bool
     420     2940215 : xfs_btree_sblock_verify_crc(
     421             :         struct xfs_buf          *bp)
     422             : {
     423     2940215 :         struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
     424     2940215 :         struct xfs_mount        *mp = bp->b_mount;
     425             : 
     426     2940215 :         if (xfs_has_crc(mp)) {
     427     2939377 :                 if (!xfs_log_check_lsn(mp, be64_to_cpu(block->bb_u.s.bb_lsn)))
     428             :                         return false;
     429     2939377 :                 return xfs_buf_verify_cksum(bp, XFS_BTREE_SBLOCK_CRC_OFF);
     430             :         }
     431             : 
     432             :         return true;
     433             : }
     434             : 
     435             : static int
     436      663084 : xfs_btree_free_block(
     437             :         struct xfs_btree_cur    *cur,
     438             :         struct xfs_buf          *bp)
     439             : {
     440      663084 :         int                     error;
     441             : 
     442      663084 :         trace_xfs_btree_free_block(cur, bp);
     443             : 
     444      663085 :         error = cur->bc_ops->free_block(cur, bp);
     445      663083 :         if (!error) {
     446      663083 :                 xfs_trans_binval(cur->bc_tp, bp);
     447      663085 :                 XFS_BTREE_STATS_INC(cur, free);
     448             :         }
     449      663084 :         return error;
     450             : }
     451             : 
     452             : /*
     453             :  * Delete the btree cursor.
     454             :  */
     455             : void
     456 12052255438 : xfs_btree_del_cursor(
     457             :         struct xfs_btree_cur    *cur,           /* btree cursor */
     458             :         int                     error)          /* del because of error */
     459             : {
     460 12052255438 :         int                     i;              /* btree level */
     461             : 
     462             :         /*
     463             :          * Clear the buffer pointers and release the buffers. If we're doing
     464             :          * this because of an error, inspect all of the entries in the bc_bufs
     465             :          * array for buffers to be unlocked. This is because some of the btree
     466             :          * code works from level n down to 0, and if we get an error along the
     467             :          * way we won't have initialized all the entries down to 0.
     468             :          */
     469 31513400791 :         for (i = 0; i < cur->bc_nlevels; i++) {
     470 20663227176 :                 if (cur->bc_levels[i].bp)
     471 17618129557 :                         xfs_trans_brelse(cur->bc_tp, cur->bc_levels[i].bp);
     472  3045097619 :                 else if (!error)
     473             :                         break;
     474             :         }
     475             : 
     476             :         /*
     477             :          * If we are doing a BMBT update, the number of unaccounted blocks
     478             :          * allocated during this cursor life time should be zero. If it's not
     479             :          * zero, then we should be shut down or on our way to shutdown due to
     480             :          * cancelling a dirty transaction on error.
     481             :          */
     482 12059102899 :         ASSERT(cur->bc_btnum != XFS_BTNUM_BMAP || cur->bc_ino.allocated == 0 ||
     483             :                xfs_is_shutdown(cur->bc_mp) || error != 0);
     484 12059102899 :         if (unlikely(cur->bc_flags & XFS_BTREE_STAGING))
     485         334 :                 kmem_free(cur->bc_ops);
     486 12059102899 :         if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
     487  1574628397 :             !(cur->bc_flags & XFS_BTREE_IN_XFILE) && cur->bc_ino.rtg)
     488  1032997944 :                 xfs_rtgroup_put(cur->bc_ino.rtg);
     489 12061034521 :         if (!(cur->bc_flags & XFS_BTREE_LONG_PTRS) &&
     490  9373195011 :             !(cur->bc_flags & XFS_BTREE_IN_XFILE) && cur->bc_ag.pag)
     491  9373195011 :                 xfs_perag_put(cur->bc_ag.pag);
     492 12065620930 :         if (cur->bc_flags & XFS_BTREE_IN_XFILE) {
     493  1111347682 :                 if (cur->bc_mem.pag)
     494   114364366 :                         xfs_perag_put(cur->bc_mem.pag);
     495  1111357183 :                 if (cur->bc_mem.rtg)
     496    77320087 :                         xfs_rtgroup_put(cur->bc_mem.rtg);
     497             :         }
     498 12065638898 :         kmem_cache_free(cur->bc_cache, cur);
     499 12062912725 : }
     500             : 
     501             : /* Return the buffer target for this btree's buffer. */
     502             : static inline struct xfs_buftarg *
     503 21475007524 : xfs_btree_buftarg(
     504             :         struct xfs_btree_cur    *cur)
     505             : {
     506 21475007524 :         if (cur->bc_flags & XFS_BTREE_IN_XFILE)
     507  1591750814 :                 return xfbtree_target(cur->bc_mem.xfbtree);
     508 19883256710 :         return cur->bc_mp->m_ddev_targp;
     509             : }
     510             : 
     511             : /* Return the block size (in units of 512b sectors) for this btree. */
     512             : static inline unsigned int
     513 21480020214 : xfs_btree_bbsize(
     514             :         struct xfs_btree_cur    *cur)
     515             : {
     516 21480020214 :         if (cur->bc_flags & XFS_BTREE_IN_XFILE)
     517  1591882100 :                 return xfbtree_bbsize();
     518 19888138114 :         return cur->bc_mp->m_bsize;
     519             : }
     520             : 
     521             : /*
     522             :  * Duplicate the btree cursor.
     523             :  * Allocate a new one, copy the record, re-get the buffers.
     524             :  */
     525             : int                                     /* error */
     526   403032548 : xfs_btree_dup_cursor(
     527             :         struct xfs_btree_cur *cur,              /* input cursor */
     528             :         struct xfs_btree_cur **ncur)            /* output cursor */
     529             : {
     530   403032548 :         struct xfs_buf  *bp;            /* btree block's buffer pointer */
     531   403032548 :         int             error;          /* error return value */
     532   403032548 :         int             i;              /* level number of btree block */
     533   403032548 :         xfs_mount_t     *mp;            /* mount structure for filesystem */
     534   403032548 :         struct xfs_btree_cur *new;              /* new cursor value */
     535   403032548 :         xfs_trans_t     *tp;            /* transaction pointer, can be NULL */
     536             : 
     537   403032548 :         tp = cur->bc_tp;
     538   403032548 :         mp = cur->bc_mp;
     539             : 
     540             :         /*
     541             :          * Allocate a new cursor like the old one.
     542             :          */
     543   403032548 :         new = cur->bc_ops->dup_cursor(cur);
     544             : 
     545             :         /*
     546             :          * Copy the record currently in the cursor.
     547             :          */
     548   403399690 :         new->bc_rec = cur->bc_rec;
     549             : 
     550             :         /*
     551             :          * For each level current, re-get the buffer and copy the ptr value.
     552             :          */
     553  1372369380 :         for (i = 0; i < new->bc_nlevels; i++) {
     554   969870163 :                 new->bc_levels[i].ptr = cur->bc_levels[i].ptr;
     555   969870163 :                 new->bc_levels[i].ra = cur->bc_levels[i].ra;
     556   969870163 :                 bp = cur->bc_levels[i].bp;
     557   969870163 :                 if (bp) {
     558   844960003 :                         error = xfs_trans_read_buf(mp, tp,
     559             :                                         xfs_btree_buftarg(cur),
     560             :                                         xfs_buf_daddr(bp),
     561   845058065 :                                         xfs_btree_bbsize(cur), 0, &bp,
     562   845058065 :                                         cur->bc_ops->buf_ops);
     563   844157603 :                         if (xfs_metadata_is_sick(error))
     564           0 :                                 xfs_btree_mark_sick(new);
     565   844157603 :                         if (error) {
     566          11 :                                 xfs_btree_del_cursor(new, error);
     567          11 :                                 *ncur = NULL;
     568          11 :                                 return error;
     569             :                         }
     570             :                 }
     571   968969690 :                 new->bc_levels[i].bp = bp;
     572             :         }
     573   402499217 :         *ncur = new;
     574   402499217 :         return 0;
     575             : }
     576             : 
     577             : /*
     578             :  * XFS btree block layout and addressing:
     579             :  *
     580             :  * There are two types of blocks in the btree: leaf and non-leaf blocks.
     581             :  *
     582             :  * The leaf record start with a header then followed by records containing
     583             :  * the values.  A non-leaf block also starts with the same header, and
     584             :  * then first contains lookup keys followed by an equal number of pointers
     585             :  * to the btree blocks at the previous level.
     586             :  *
     587             :  *              +--------+-------+-------+-------+-------+-------+-------+
     588             :  * Leaf:        | header | rec 1 | rec 2 | rec 3 | rec 4 | rec 5 | rec N |
     589             :  *              +--------+-------+-------+-------+-------+-------+-------+
     590             :  *
     591             :  *              +--------+-------+-------+-------+-------+-------+-------+
     592             :  * Non-Leaf:    | header | key 1 | key 2 | key N | ptr 1 | ptr 2 | ptr N |
     593             :  *              +--------+-------+-------+-------+-------+-------+-------+
     594             :  *
     595             :  * The header is called struct xfs_btree_block for reasons better left unknown
     596             :  * and comes in different versions for short (32bit) and long (64bit) block
     597             :  * pointers.  The record and key structures are defined by the btree instances
     598             :  * and opaque to the btree core.  The block pointers are simple disk endian
     599             :  * integers, available in a short (32bit) and long (64bit) variant.
     600             :  *
     601             :  * The helpers below calculate the offset of a given record, key or pointer
     602             :  * into a btree block (xfs_btree_*_offset) or return a pointer to the given
     603             :  * record, key or pointer (xfs_btree_*_addr).  Note that all addressing
     604             :  * inside the btree block is done using indices starting at one, not zero!
     605             :  *
     606             :  * If XFS_BTREE_OVERLAPPING is set, then this btree supports keys containing
     607             :  * overlapping intervals.  In such a tree, records are still sorted lowest to
     608             :  * highest and indexed by the smallest key value that refers to the record.
     609             :  * However, nodes are different: each pointer has two associated keys -- one
     610             :  * indexing the lowest key available in the block(s) below (the same behavior
     611             :  * as the key in a regular btree) and another indexing the highest key
     612             :  * available in the block(s) below.  Because records are /not/ sorted by the
     613             :  * highest key, all leaf block updates require us to compute the highest key
     614             :  * that matches any record in the leaf and to recursively update the high keys
     615             :  * in the nodes going further up in the tree, if necessary.  Nodes look like
     616             :  * this:
     617             :  *
     618             :  *              +--------+-----+-----+-----+-----+-----+-------+-------+-----+
     619             :  * Non-Leaf:    | header | lo1 | hi1 | lo2 | hi2 | ... | ptr 1 | ptr 2 | ... |
     620             :  *              +--------+-----+-----+-----+-----+-----+-------+-------+-----+
     621             :  *
     622             :  * To perform an interval query on an overlapped tree, perform the usual
     623             :  * depth-first search and use the low and high keys to decide if we can skip
     624             :  * that particular node.  If a leaf node is reached, return the records that
     625             :  * intersect the interval.  Note that an interval query may return numerous
     626             :  * entries.  For a non-overlapped tree, simply search for the record associated
     627             :  * with the lowest key and iterate forward until a non-matching record is
     628             :  * found.  Section 14.3 ("Interval Trees") of _Introduction to Algorithms_ by
     629             :  * Cormen, Leiserson, Rivest, and Stein (2nd or 3rd ed. only) discuss this in
     630             :  * more detail.
     631             :  *
     632             :  * Why do we care about overlapping intervals?  Let's say you have a bunch of
     633             :  * reverse mapping records on a reflink filesystem:
     634             :  *
     635             :  * 1: +- file A startblock B offset C length D -----------+
     636             :  * 2:      +- file E startblock F offset G length H --------------+
     637             :  * 3:      +- file I startblock F offset J length K --+
     638             :  * 4:                                                        +- file L... --+
     639             :  *
     640             :  * Now say we want to map block (B+D) into file A at offset (C+D).  Ideally,
     641             :  * we'd simply increment the length of record 1.  But how do we find the record
     642             :  * that ends at (B+D-1) (i.e. record 1)?  A LE lookup of (B+D-1) would return
     643             :  * record 3 because the keys are ordered first by startblock.  An interval
     644             :  * query would return records 1 and 2 because they both overlap (B+D-1), and
     645             :  * from that we can pick out record 1 as the appropriate left neighbor.
     646             :  *
     647             :  * In the non-overlapped case you can do a LE lookup and decrement the cursor
     648             :  * because a record's interval must end before the next record.
     649             :  */
     650             : 
     651             : /*
     652             :  * Return size of the btree block header for this btree instance.
     653             :  */
     654 >10621*10^8 : static inline size_t xfs_btree_block_len(struct xfs_btree_cur *cur)
     655             : {
     656 >10621*10^8 :         if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
     657 >34582*10^7 :                 if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS)
     658             :                         return XFS_BTREE_LBLOCK_CRC_LEN;
     659           0 :                 return XFS_BTREE_LBLOCK_LEN;
     660             :         }
     661 >71636*10^7 :         if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS)
     662 >71640*10^7 :                 return XFS_BTREE_SBLOCK_CRC_LEN;
     663             :         return XFS_BTREE_SBLOCK_LEN;
     664             : }
     665             : 
     666             : /*
     667             :  * Return size of btree block pointers for this btree instance.
     668             :  */
     669             : static inline size_t xfs_btree_ptr_len(struct xfs_btree_cur *cur)
     670             : {
     671 88846558388 :         return (cur->bc_flags & XFS_BTREE_LONG_PTRS) ?
     672 88846558388 :                 sizeof(__be64) : sizeof(__be32);
     673             : }
     674             : 
     675             : /*
     676             :  * Calculate offset of the n-th record in a btree block.
     677             :  */
     678             : STATIC size_t
     679 >74392*10^7 : xfs_btree_rec_offset(
     680             :         struct xfs_btree_cur    *cur,
     681             :         int                     n)
     682             : {
     683 >74392*10^7 :         return xfs_btree_block_len(cur) +
     684 >74392*10^7 :                 (n - 1) * cur->bc_ops->rec_len;
     685             : }
     686             : 
     687             : /*
     688             :  * Calculate offset of the n-th key in a btree block.
     689             :  */
     690             : STATIC size_t
     691 >13279*10^7 : xfs_btree_key_offset(
     692             :         struct xfs_btree_cur    *cur,
     693             :         int                     n)
     694             : {
     695 >13279*10^7 :         return xfs_btree_block_len(cur) +
     696 >13279*10^7 :                 (n - 1) * cur->bc_ops->key_len;
     697             : }
     698             : 
     699             : /*
     700             :  * Calculate offset of the n-th high key in a btree block.
     701             :  */
     702             : STATIC size_t
     703 >10071*10^7 : xfs_btree_high_key_offset(
     704             :         struct xfs_btree_cur    *cur,
     705             :         int                     n)
     706             : {
     707 >10071*10^7 :         return xfs_btree_block_len(cur) +
     708 >10071*10^7 :                 (n - 1) * cur->bc_ops->key_len + (cur->bc_ops->key_len / 2);
     709             : }
     710             : 
     711             : /*
     712             :  * Calculate offset of the n-th block pointer in a btree block.
     713             :  */
     714             : STATIC size_t
     715 88802442329 : xfs_btree_ptr_offset(
     716             :         struct xfs_btree_cur    *cur,
     717             :         int                     n,
     718             :         int                     level)
     719             : {
     720 88802442329 :         return xfs_btree_block_len(cur) +
     721 >17762*10^7 :                 cur->bc_ops->get_maxrecs(cur, level) * cur->bc_ops->key_len +
     722 >15644*10^7 :                 (n - 1) * xfs_btree_ptr_len(cur);
     723             : }
     724             : 
     725             : /*
     726             :  * Return a pointer to the n-th record in the btree block.
     727             :  */
     728             : union xfs_btree_rec *
     729  6413680615 : xfs_btree_rec_addr(
     730             :         struct xfs_btree_cur    *cur,
     731             :         int                     n,
     732             :         struct xfs_btree_block  *block)
     733             : {
     734 >73874*10^7 :         return (union xfs_btree_rec *)
     735 >73874*10^7 :                 ((char *)block + xfs_btree_rec_offset(cur, n));
     736             : }
     737             : 
     738             : /*
     739             :  * Return a pointer to the n-th key in the btree block.
     740             :  */
     741             : union xfs_btree_key *
     742  4713403652 : xfs_btree_key_addr(
     743             :         struct xfs_btree_cur    *cur,
     744             :         int                     n,
     745             :         struct xfs_btree_block  *block)
     746             : {
     747 >13171*10^7 :         return (union xfs_btree_key *)
     748 >13171*10^7 :                 ((char *)block + xfs_btree_key_offset(cur, n));
     749             : }
     750             : 
     751             : /*
     752             :  * Return a pointer to the n-th high key in the btree block.
     753             :  */
     754             : union xfs_btree_key *
     755  3404088964 : xfs_btree_high_key_addr(
     756             :         struct xfs_btree_cur    *cur,
     757             :         int                     n,
     758             :         struct xfs_btree_block  *block)
     759             : {
     760 >10083*10^7 :         return (union xfs_btree_key *)
     761 >10083*10^7 :                 ((char *)block + xfs_btree_high_key_offset(cur, n));
     762             : }
     763             : 
     764             : /*
     765             :  * Return a pointer to the n-th block pointer in the btree block.
     766             :  */
     767             : union xfs_btree_ptr *
     768 88823855620 : xfs_btree_ptr_addr(
     769             :         struct xfs_btree_cur    *cur,
     770             :         int                     n,
     771             :         struct xfs_btree_block  *block)
     772             : {
     773 88823855620 :         int                     level = xfs_btree_get_level(block);
     774             : 
     775 88823855620 :         ASSERT(block->bb_level != 0);
     776             : 
     777 >17762*10^7 :         return (union xfs_btree_ptr *)
     778 88823855620 :                 ((char *)block + xfs_btree_ptr_offset(cur, n, level));
     779             : }
     780             : 
     781             : struct xfs_ifork *
     782  8684309717 : xfs_btree_ifork_ptr(
     783             :         struct xfs_btree_cur    *cur)
     784             : {
     785  8684309717 :         ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE);
     786             : 
     787  8684309717 :         if (cur->bc_flags & XFS_BTREE_STAGING)
     788      325930 :                 return cur->bc_ino.ifake->if_fork;
     789  8683983787 :         return xfs_ifork_ptr(cur->bc_ino.ip, cur->bc_ino.whichfork);
     790             : }
     791             : 
     792             : /*
     793             :  * Get the root block which is stored in the inode.
     794             :  *
     795             :  * For now this btree implementation assumes the btree root is always
     796             :  * stored in the if_broot field of an inode fork.
     797             :  */
     798             : STATIC struct xfs_btree_block *
     799  5465441538 : xfs_btree_get_iroot(
     800             :         struct xfs_btree_cur    *cur)
     801             : {
     802  5465441538 :         struct xfs_ifork        *ifp = xfs_btree_ifork_ptr(cur);
     803             : 
     804  5462107573 :         return (struct xfs_btree_block *)ifp->if_broot;
     805             : }
     806             : 
     807             : /*
     808             :  * Retrieve the block pointer from the cursor at the given level.
     809             :  * This may be an inode btree root or from a buffer.
     810             :  */
     811             : struct xfs_btree_block *                /* generic btree block pointer */
     812 >77202*10^7 : xfs_btree_get_block(
     813             :         struct xfs_btree_cur    *cur,   /* btree cursor */
     814             :         int                     level,  /* level in btree */
     815             :         struct xfs_buf          **bpp)  /* buffer containing the block */
     816             : {
     817 >77202*10^7 :         if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
     818 >29903*10^7 :             (level == cur->bc_nlevels - 1)) {
     819  1966332155 :                 *bpp = NULL;
     820  1966332155 :                 return xfs_btree_get_iroot(cur);
     821             :         }
     822             : 
     823 >77005*10^7 :         *bpp = cur->bc_levels[level].bp;
     824 >77005*10^7 :         return XFS_BUF_TO_BLOCK(*bpp);
     825             : }
     826             : 
     827             : /*
     828             :  * Change the cursor to point to the first record at the given level.
     829             :  * Other levels are unaffected.
     830             :  */
     831             : STATIC int                              /* success=1, failure=0 */
     832   157241496 : xfs_btree_firstrec(
     833             :         struct xfs_btree_cur    *cur,   /* btree cursor */
     834             :         int                     level)  /* level to change */
     835             : {
     836   157241496 :         struct xfs_btree_block  *block; /* generic btree block pointer */
     837   157241496 :         struct xfs_buf          *bp;    /* buffer containing block */
     838             : 
     839             :         /*
     840             :          * Get the block pointer for this level.
     841             :          */
     842   157241496 :         block = xfs_btree_get_block(cur, level, &bp);
     843   157241181 :         if (xfs_btree_check_block(cur, block, level, bp))
     844             :                 return 0;
     845             :         /*
     846             :          * It's empty, there is no such record.
     847             :          */
     848   157238254 :         if (!block->bb_numrecs)
     849             :                 return 0;
     850             :         /*
     851             :          * Set the ptr value to 1, that's the first record/key.
     852             :          */
     853   157238254 :         cur->bc_levels[level].ptr = 1;
     854   157238254 :         return 1;
     855             : }
     856             : 
     857             : /*
     858             :  * Change the cursor to point to the last record in the current block
     859             :  * at the given level.  Other levels are unaffected.
     860             :  */
     861             : STATIC int                              /* success=1, failure=0 */
     862   163555311 : xfs_btree_lastrec(
     863             :         struct xfs_btree_cur    *cur,   /* btree cursor */
     864             :         int                     level)  /* level to change */
     865             : {
     866   163555311 :         struct xfs_btree_block  *block; /* generic btree block pointer */
     867   163555311 :         struct xfs_buf          *bp;    /* buffer containing block */
     868             : 
     869             :         /*
     870             :          * Get the block pointer for this level.
     871             :          */
     872   163555311 :         block = xfs_btree_get_block(cur, level, &bp);
     873   163554973 :         if (xfs_btree_check_block(cur, block, level, bp))
     874             :                 return 0;
     875             :         /*
     876             :          * It's empty, there is no such record.
     877             :          */
     878   163557997 :         if (!block->bb_numrecs)
     879             :                 return 0;
     880             :         /*
     881             :          * Set the ptr value to numrecs, that's the last record/key.
     882             :          */
     883   163557997 :         cur->bc_levels[level].ptr = be16_to_cpu(block->bb_numrecs);
     884   163557997 :         return 1;
     885             : }
     886             : 
     887             : /*
     888             :  * Compute first and last byte offsets for the fields given.
     889             :  * Interprets the offsets table, which contains struct field offsets.
     890             :  */
     891             : void
     892  3579010942 : xfs_btree_offsets(
     893             :         uint32_t        fields,         /* bitmask of fields */
     894             :         const short     *offsets,       /* table of field offsets */
     895             :         int             nbits,          /* number of bits to inspect */
     896             :         int             *first,         /* output: first byte offset */
     897             :         int             *last)          /* output: last byte offset */
     898             : {
     899  3579010942 :         int             i;              /* current bit number */
     900  3579010942 :         uint32_t        imask;          /* mask for current bit number */
     901             : 
     902  3579010942 :         ASSERT(fields != 0);
     903             :         /*
     904             :          * Find the lowest bit, so the first byte offset.
     905             :          */
     906 13593899279 :         for (i = 0, imask = 1u; ; i++, imask <<= 1) {
     907 13593899279 :                 if (imask & fields) {
     908  3579010942 :                         *first = offsets[i];
     909  3579010942 :                         break;
     910             :                 }
     911             :         }
     912             :         /*
     913             :          * Find the highest bit, so the last byte offset.
     914             :          */
     915  3579010942 :         for (i = nbits - 1, imask = 1u << i; ; i--, imask >>= 1) {
     916 24883352593 :                 if (imask & fields) {
     917  3579010942 :                         *last = offsets[i + 1] - 1;
     918  3579010942 :                         break;
     919             :                 }
     920             :         }
     921  3579010942 : }
     922             : 
     923             : /*
     924             :  * Get a buffer for the block, return it read in.
     925             :  * Long-form addressing.
     926             :  */
     927             : int
     928   546958190 : xfs_btree_read_bufl(
     929             :         struct xfs_mount        *mp,            /* file system mount point */
     930             :         struct xfs_trans        *tp,            /* transaction pointer */
     931             :         xfs_fsblock_t           fsbno,          /* file system block number */
     932             :         struct xfs_buf          **bpp,          /* buffer for fsbno */
     933             :         int                     refval,         /* ref count value for buffer */
     934             :         const struct xfs_buf_ops *ops)
     935             : {
     936   546958190 :         struct xfs_buf          *bp;            /* return value */
     937   546958190 :         xfs_daddr_t             d;              /* real disk block address */
     938   546958190 :         int                     error;
     939             : 
     940   546958190 :         if (!xfs_verify_fsbno(mp, fsbno))
     941             :                 return -EFSCORRUPTED;
     942   546958303 :         d = XFS_FSB_TO_DADDR(mp, fsbno);
     943   546956777 :         error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d,
     944             :                                    mp->m_bsize, 0, &bp, ops);
     945   546953620 :         if (error)
     946             :                 return error;
     947   546953563 :         if (bp)
     948   546953563 :                 xfs_buf_set_ref(bp, refval);
     949   546948113 :         *bpp = bp;
     950   546948113 :         return 0;
     951             : }
     952             : 
     953             : /*
     954             :  * Read-ahead the block, don't wait for it, don't return a buffer.
     955             :  * Long-form addressing.
     956             :  */
     957             : /* ARGSUSED */
     958             : void
     959  1294649822 : xfs_btree_reada_bufl(
     960             :         struct xfs_mount        *mp,            /* file system mount point */
     961             :         xfs_fsblock_t           fsbno,          /* file system block number */
     962             :         xfs_extlen_t            count,          /* count of filesystem blocks */
     963             :         const struct xfs_buf_ops *ops)
     964             : {
     965  1294649822 :         xfs_daddr_t             d;
     966             : 
     967  1294649822 :         ASSERT(fsbno != NULLFSBLOCK);
     968  1294649822 :         d = XFS_FSB_TO_DADDR(mp, fsbno);
     969  1294338079 :         xfs_buf_readahead(mp->m_ddev_targp, d, mp->m_bsize * count, ops);
     970  1294948980 : }
     971             : 
     972             : /*
     973             :  * Read-ahead the block, don't wait for it, don't return a buffer.
     974             :  * Short-form addressing.
     975             :  */
     976             : /* ARGSUSED */
     977             : void
     978  7202655758 : xfs_btree_reada_bufs(
     979             :         struct xfs_mount        *mp,            /* file system mount point */
     980             :         xfs_agnumber_t          agno,           /* allocation group number */
     981             :         xfs_agblock_t           agbno,          /* allocation group block number */
     982             :         xfs_extlen_t            count,          /* count of filesystem blocks */
     983             :         const struct xfs_buf_ops *ops)
     984             : {
     985  7202655758 :         xfs_daddr_t             d;
     986             : 
     987  7202655758 :         ASSERT(agno != NULLAGNUMBER);
     988  7202655758 :         ASSERT(agbno != NULLAGBLOCK);
     989  7202655758 :         d = XFS_AGB_TO_DADDR(mp, agno, agbno);
     990  7202655758 :         xfs_buf_readahead(mp->m_ddev_targp, d, mp->m_bsize * count, ops);
     991  7203043701 : }
     992             : 
     993             : STATIC int
     994  1325850469 : xfs_btree_readahead_lblock(
     995             :         struct xfs_btree_cur    *cur,
     996             :         int                     lr,
     997             :         struct xfs_btree_block  *block)
     998             : {
     999  1325850469 :         int                     rval = 0;
    1000  1325850469 :         xfs_fsblock_t           left = be64_to_cpu(block->bb_u.l.bb_leftsib);
    1001  1325850469 :         xfs_fsblock_t           right = be64_to_cpu(block->bb_u.l.bb_rightsib);
    1002             : 
    1003  1325850469 :         if (cur->bc_flags & XFS_BTREE_IN_XFILE)
    1004             :                 return 0;
    1005             : 
    1006  1294583357 :         if ((lr & XFS_BTCUR_LEFTRA) && left != NULLFSBLOCK) {
    1007   108178207 :                 xfs_btree_reada_bufl(cur->bc_mp, left, 1,
    1008   108178207 :                                      cur->bc_ops->buf_ops);
    1009   108178207 :                 rval++;
    1010             :         }
    1011             : 
    1012  1294583907 :         if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLFSBLOCK) {
    1013  1186375913 :                 xfs_btree_reada_bufl(cur->bc_mp, right, 1,
    1014  1186375913 :                                      cur->bc_ops->buf_ops);
    1015  1186798465 :                 rval++;
    1016             :         }
    1017             : 
    1018             :         return rval;
    1019             : }
    1020             : 
    1021             : STATIC int
    1022  3080137321 : xfs_btree_readahead_sblock(
    1023             :         struct xfs_btree_cur    *cur,
    1024             :         int                     lr,
    1025             :         struct xfs_btree_block *block)
    1026             : {
    1027  3080137321 :         int                     rval = 0;
    1028  3080137321 :         xfs_agblock_t           left = be32_to_cpu(block->bb_u.s.bb_leftsib);
    1029  3080137321 :         xfs_agblock_t           right = be32_to_cpu(block->bb_u.s.bb_rightsib);
    1030             : 
    1031  3080137321 :         if (cur->bc_flags & XFS_BTREE_IN_XFILE)
    1032             :                 return 0;
    1033             : 
    1034  3031019609 :         if ((lr & XFS_BTCUR_LEFTRA) && left != NULLAGBLOCK) {
    1035   131767102 :                 xfs_btree_reada_bufs(cur->bc_mp, cur->bc_ag.pag->pag_agno,
    1036   131767102 :                                      left, 1, cur->bc_ops->buf_ops);
    1037   131767102 :                 rval++;
    1038             :         }
    1039             : 
    1040  3031021735 :         if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLAGBLOCK) {
    1041  2899106066 :                 xfs_btree_reada_bufs(cur->bc_mp, cur->bc_ag.pag->pag_agno,
    1042  2899106066 :                                      right, 1, cur->bc_ops->buf_ops);
    1043  2899937326 :                 rval++;
    1044             :         }
    1045             : 
    1046             :         return rval;
    1047             : }
    1048             : 
    1049             : /*
    1050             :  * Read-ahead btree blocks, at the given level.
    1051             :  * Bits in lr are set from XFS_BTCUR_{LEFT,RIGHT}RA.
    1052             :  */
    1053             : STATIC int
    1054 >25336*10^7 : xfs_btree_readahead(
    1055             :         struct xfs_btree_cur    *cur,           /* btree cursor */
    1056             :         int                     lev,            /* level in btree */
    1057             :         int                     lr)             /* left/right bits */
    1058             : {
    1059 >25336*10^7 :         struct xfs_btree_block  *block;
    1060             : 
    1061             :         /*
    1062             :          * No readahead needed if we are at the root level and the
    1063             :          * btree root is stored in the inode.
    1064             :          */
    1065 >25336*10^7 :         if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
    1066 >12014*10^7 :             (lev == cur->bc_nlevels - 1))
    1067             :                 return 0;
    1068             : 
    1069 >25308*10^7 :         if ((cur->bc_levels[lev].ra | lr) == cur->bc_levels[lev].ra)
    1070             :                 return 0;
    1071             : 
    1072  4406320940 :         cur->bc_levels[lev].ra |= lr;
    1073  4406320940 :         block = XFS_BUF_TO_BLOCK(cur->bc_levels[lev].bp);
    1074             : 
    1075  4406320940 :         if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
    1076  1325882841 :                 return xfs_btree_readahead_lblock(cur, lr, block);
    1077  3080438099 :         return xfs_btree_readahead_sblock(cur, lr, block);
    1078             : }
    1079             : 
    1080             : STATIC int
    1081 64279450937 : xfs_btree_ptr_to_daddr(
    1082             :         struct xfs_btree_cur            *cur,
    1083             :         const union xfs_btree_ptr       *ptr,
    1084             :         xfs_daddr_t                     *daddr)
    1085             : {
    1086 64279450937 :         xfs_fsblock_t           fsbno;
    1087 64279450937 :         xfs_agblock_t           agbno;
    1088 64279450937 :         int                     error;
    1089             : 
    1090 64279450937 :         error = xfs_btree_check_ptr(cur, ptr, 0, 1);
    1091 64280914006 :         if (error)
    1092             :                 return error;
    1093             : 
    1094 64280914006 :         if (cur->bc_flags & XFS_BTREE_IN_XFILE) {
    1095  3442302083 :                 *daddr = xfbtree_ptr_to_daddr(cur, ptr);
    1096  3441986621 :                 return 0;
    1097             :         }
    1098             : 
    1099 60838611923 :         if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
    1100  9539477543 :                 fsbno = be64_to_cpu(ptr->l);
    1101  9539477543 :                 *daddr = XFS_FSB_TO_DADDR(cur->bc_mp, fsbno);
    1102             :         } else {
    1103 51299134380 :                 agbno = be32_to_cpu(ptr->s);
    1104 51299134380 :                 *daddr = XFS_AGB_TO_DADDR(cur->bc_mp, cur->bc_ag.pag->pag_agno,
    1105             :                                 agbno);
    1106             :         }
    1107             : 
    1108             :         return 0;
    1109             : }
    1110             : 
    1111             : /*
    1112             :  * Readahead @count btree blocks at the given @ptr location.
    1113             :  *
    1114             :  * We don't need to care about long or short form btrees here as we have a
    1115             :  * method of converting the ptr directly to a daddr available to us.
    1116             :  */
    1117             : STATIC void
    1118    29972218 : xfs_btree_readahead_ptr(
    1119             :         struct xfs_btree_cur    *cur,
    1120             :         union xfs_btree_ptr     *ptr,
    1121             :         xfs_extlen_t            count)
    1122             : {
    1123    29972218 :         xfs_daddr_t             daddr;
    1124             : 
    1125    29972218 :         if (xfs_btree_ptr_to_daddr(cur, ptr, &daddr))
    1126           0 :                 return;
    1127    59944646 :         xfs_buf_readahead(xfs_btree_buftarg(cur), daddr,
    1128    29972315 :                         xfs_btree_bbsize(cur) * count,
    1129    29972331 :                         cur->bc_ops->buf_ops);
    1130             : }
    1131             : 
    1132             : /*
    1133             :  * Set the buffer for level "lev" in the cursor to bp, releasing
    1134             :  * any previous buffer.
    1135             :  */
    1136             : STATIC void
    1137 20242156464 : xfs_btree_setbuf(
    1138             :         struct xfs_btree_cur    *cur,   /* btree cursor */
    1139             :         int                     lev,    /* level in btree */
    1140             :         struct xfs_buf          *bp)    /* new buffer to set */
    1141             : {
    1142 20242156464 :         struct xfs_btree_block  *b;     /* btree block */
    1143             : 
    1144 20242156464 :         if (cur->bc_levels[lev].bp)
    1145  3473601980 :                 xfs_trans_brelse(cur->bc_tp, cur->bc_levels[lev].bp);
    1146 20244584276 :         cur->bc_levels[lev].bp = bp;
    1147 20244584276 :         cur->bc_levels[lev].ra = 0;
    1148             : 
    1149 20244584276 :         b = XFS_BUF_TO_BLOCK(bp);
    1150 20244584276 :         if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
    1151  4842101883 :                 if (b->bb_u.l.bb_leftsib == cpu_to_be64(NULLFSBLOCK))
    1152  1942829152 :                         cur->bc_levels[lev].ra |= XFS_BTCUR_LEFTRA;
    1153  4842101883 :                 if (b->bb_u.l.bb_rightsib == cpu_to_be64(NULLFSBLOCK))
    1154  2406589099 :                         cur->bc_levels[lev].ra |= XFS_BTCUR_RIGHTRA;
    1155             :         } else {
    1156 15402482393 :                 if (b->bb_u.s.bb_leftsib == cpu_to_be32(NULLAGBLOCK))
    1157 10071199649 :                         cur->bc_levels[lev].ra |= XFS_BTCUR_LEFTRA;
    1158 15402482393 :                 if (b->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK))
    1159 10193316594 :                         cur->bc_levels[lev].ra |= XFS_BTCUR_RIGHTRA;
    1160             :         }
    1161 20244584276 : }
    1162             : 
    1163             : bool
    1164    55635280 : xfs_btree_ptr_is_null(
    1165             :         struct xfs_btree_cur            *cur,
    1166             :         const union xfs_btree_ptr       *ptr)
    1167             : {
    1168   527401510 :         if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
    1169  5236406313 :                 return ptr->l == cpu_to_be64(NULLFSBLOCK);
    1170             :         else
    1171  8722242372 :                 return ptr->s == cpu_to_be32(NULLAGBLOCK);
    1172             : }
    1173             : 
    1174             : void
    1175     4507722 : xfs_btree_set_ptr_null(
    1176             :         struct xfs_btree_cur    *cur,
    1177             :         union xfs_btree_ptr     *ptr)
    1178             : {
    1179     4507722 :         if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
    1180  1548641434 :                 ptr->l = cpu_to_be64(NULLFSBLOCK);
    1181             :         else
    1182  1427583723 :                 ptr->s = cpu_to_be32(NULLAGBLOCK);
    1183     4507722 : }
    1184             : 
    1185             : /*
    1186             :  * Get/set/init sibling pointers
    1187             :  */
    1188             : void
    1189 10294330013 : xfs_btree_get_sibling(
    1190             :         struct xfs_btree_cur    *cur,
    1191             :         struct xfs_btree_block  *block,
    1192             :         union xfs_btree_ptr     *ptr,
    1193             :         int                     lr)
    1194             : {
    1195 10294330013 :         ASSERT(lr == XFS_BB_LEFTSIB || lr == XFS_BB_RIGHTSIB);
    1196             : 
    1197 10294330013 :         if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
    1198  3184292348 :                 if (lr == XFS_BB_RIGHTSIB)
    1199  2625251036 :                         ptr->l = block->bb_u.l.bb_rightsib;
    1200             :                 else
    1201   559041312 :                         ptr->l = block->bb_u.l.bb_leftsib;
    1202             :         } else {
    1203  7110037665 :                 if (lr == XFS_BB_RIGHTSIB)
    1204  6888226939 :                         ptr->s = block->bb_u.s.bb_rightsib;
    1205             :                 else
    1206   221810726 :                         ptr->s = block->bb_u.s.bb_leftsib;
    1207             :         }
    1208 10294330013 : }
    1209             : 
    1210             : void
    1211    17174977 : xfs_btree_set_sibling(
    1212             :         struct xfs_btree_cur            *cur,
    1213             :         struct xfs_btree_block          *block,
    1214             :         const union xfs_btree_ptr       *ptr,
    1215             :         int                             lr)
    1216             : {
    1217    17174977 :         ASSERT(lr == XFS_BB_LEFTSIB || lr == XFS_BB_RIGHTSIB);
    1218             : 
    1219    17174977 :         if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
    1220     8117922 :                 if (lr == XFS_BB_RIGHTSIB)
    1221     4494261 :                         block->bb_u.l.bb_rightsib = ptr->l;
    1222             :                 else
    1223     3623661 :                         block->bb_u.l.bb_leftsib = ptr->l;
    1224             :         } else {
    1225     9057055 :                 if (lr == XFS_BB_RIGHTSIB)
    1226     4399875 :                         block->bb_u.s.bb_rightsib = ptr->s;
    1227             :                 else
    1228     4657180 :                         block->bb_u.s.bb_leftsib = ptr->s;
    1229             :         }
    1230    17174977 : }
    1231             : 
    1232             : static void
    1233    27180198 : __xfs_btree_init_block(
    1234             :         struct xfs_mount        *mp,
    1235             :         struct xfs_btree_block  *buf,
    1236             :         const struct xfs_btree_ops *ops,
    1237             :         xfs_daddr_t             blkno,
    1238             :         __u16                   level,
    1239             :         __u16                   numrecs,
    1240             :         __u64                   owner)
    1241             : {
    1242    27180198 :         int                     crc = xfs_has_crc(mp);
    1243    27180198 :         __u32                   magic = xfs_btree_magic(mp, ops);
    1244             : 
    1245    27180858 :         buf->bb_magic = cpu_to_be32(magic);
    1246    27180858 :         buf->bb_level = cpu_to_be16(level);
    1247    27180858 :         buf->bb_numrecs = cpu_to_be16(numrecs);
    1248             : 
    1249    27180858 :         if (ops->geom_flags & XFS_BTREE_LONG_PTRS) {
    1250    23676401 :                 buf->bb_u.l.bb_leftsib = cpu_to_be64(NULLFSBLOCK);
    1251    23676401 :                 buf->bb_u.l.bb_rightsib = cpu_to_be64(NULLFSBLOCK);
    1252    23676401 :                 if (crc) {
    1253    23676406 :                         buf->bb_u.l.bb_blkno = cpu_to_be64(blkno);
    1254    23676406 :                         buf->bb_u.l.bb_owner = cpu_to_be64(owner);
    1255    23676406 :                         uuid_copy(&buf->bb_u.l.bb_uuid, &mp->m_sb.sb_meta_uuid);
    1256    23676322 :                         buf->bb_u.l.bb_pad = 0;
    1257    23676322 :                         buf->bb_u.l.bb_lsn = 0;
    1258             :                 }
    1259             :         } else {
    1260             :                 /* owner is a 32 bit value on short blocks */
    1261     3504457 :                 __u32 __owner = (__u32)owner;
    1262             : 
    1263     3504457 :                 buf->bb_u.s.bb_leftsib = cpu_to_be32(NULLAGBLOCK);
    1264     3504457 :                 buf->bb_u.s.bb_rightsib = cpu_to_be32(NULLAGBLOCK);
    1265     3504457 :                 if (crc) {
    1266     3463780 :                         buf->bb_u.s.bb_blkno = cpu_to_be64(blkno);
    1267     3463780 :                         buf->bb_u.s.bb_owner = cpu_to_be32(__owner);
    1268     3463780 :                         uuid_copy(&buf->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid);
    1269     3462849 :                         buf->bb_u.s.bb_lsn = 0;
    1270             :                 }
    1271             :         }
    1272    27179843 : }
    1273             : 
    1274             : void
    1275    17075272 : xfs_btree_init_block(
    1276             :         struct xfs_mount        *mp,
    1277             :         struct xfs_btree_block  *block,
    1278             :         const struct xfs_btree_ops *ops,
    1279             :         __u16                   level,
    1280             :         __u16                   numrecs,
    1281             :         __u64                   owner)
    1282             : {
    1283    17075272 :         __xfs_btree_init_block(mp, block, ops, XFS_BUF_DADDR_NULL, level,
    1284             :                         numrecs, owner);
    1285    17075266 : }
    1286             : 
    1287             : void
    1288    10038438 : xfs_btree_init_buf(
    1289             :         struct xfs_mount                *mp,
    1290             :         struct xfs_buf                  *bp,
    1291             :         const struct xfs_btree_ops      *ops,
    1292             :         __u16                           level,
    1293             :         __u16                           numrecs,
    1294             :         __u64                           owner)
    1295             : {
    1296    10038438 :         __xfs_btree_init_block(mp, XFS_BUF_TO_BLOCK(bp), ops,
    1297             :                         xfs_buf_daddr(bp), level, numrecs, owner);
    1298    10038393 :         bp->b_ops = ops->buf_ops;
    1299    10038393 : }
    1300             : 
    1301             : void
    1302     5923295 : xfs_btree_init_block_cur(
    1303             :         struct xfs_btree_cur    *cur,
    1304             :         struct xfs_buf          *bp,
    1305             :         int                     level,
    1306             :         int                     numrecs)
    1307             : {
    1308     5923295 :         __u64                   owner;
    1309             : 
    1310             :         /*
    1311             :          * we can pull the owner from the cursor right now as the different
    1312             :          * owners align directly with the pointer size of the btree. This may
    1313             :          * change in future, but is safe for current users of the generic btree
    1314             :          * code.
    1315             :          */
    1316     5923295 :         if (cur->bc_flags & XFS_BTREE_IN_XFILE)
    1317      941248 :                 owner = xfbtree_owner(cur);
    1318     4982047 :         else if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
    1319     2283101 :                 owner = cur->bc_ino.ip->i_ino;
    1320             :         else
    1321     2698946 :                 owner = cur->bc_ag.pag->pag_agno;
    1322             : 
    1323     5923302 :         xfs_btree_init_buf(cur->bc_mp, bp, cur->bc_ops, level, numrecs, owner);
    1324     5922958 : }
    1325             : 
    1326             : /*
    1327             :  * Return true if ptr is the last record in the btree and
    1328             :  * we need to track updates to this record.  The decision
    1329             :  * will be further refined in the update_lastrec method.
    1330             :  */
    1331             : STATIC int
    1332  3649877492 : xfs_btree_is_lastrec(
    1333             :         struct xfs_btree_cur    *cur,
    1334             :         struct xfs_btree_block  *block,
    1335             :         int                     level)
    1336             : {
    1337  3649877492 :         union xfs_btree_ptr     ptr;
    1338             : 
    1339  3649877492 :         if (level > 0)
    1340             :                 return 0;
    1341  3646030667 :         if (!(cur->bc_flags & XFS_BTREE_LASTREC_UPDATE))
    1342             :                 return 0;
    1343             : 
    1344   336903552 :         xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
    1345   673750396 :         if (!xfs_btree_ptr_is_null(cur, &ptr))
    1346    57029103 :                 return 0;
    1347             :         return 1;
    1348             : }
    1349             : 
    1350             : STATIC void
    1351   169472700 : xfs_btree_buf_to_ptr(
    1352             :         struct xfs_btree_cur    *cur,
    1353             :         struct xfs_buf          *bp,
    1354             :         union xfs_btree_ptr     *ptr)
    1355             : {
    1356   169472700 :         if (cur->bc_flags & XFS_BTREE_IN_XFILE) {
    1357      941270 :                 xfbtree_buf_to_ptr(cur, bp, ptr);
    1358      941270 :                 return;
    1359             :         }
    1360             : 
    1361   168531430 :         if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
    1362    23050631 :                 ptr->l = cpu_to_be64(XFS_DADDR_TO_FSB(cur->bc_mp,
    1363             :                                         xfs_buf_daddr(bp)));
    1364             :         else {
    1365   145480799 :                 ptr->s = cpu_to_be32(xfs_daddr_to_agbno(cur->bc_mp,
    1366             :                                         xfs_buf_daddr(bp)));
    1367             :         }
    1368             : }
    1369             : 
    1370             : static inline void
    1371             : xfs_btree_set_refs(
    1372             :         struct xfs_btree_cur    *cur,
    1373             :         struct xfs_buf          *bp)
    1374             : {
    1375 20611702841 :         xfs_buf_set_ref(bp, cur->bc_ops->lru_refs);
    1376             : }
    1377             : 
    1378             : int
    1379     5965489 : xfs_btree_get_buf_block(
    1380             :         struct xfs_btree_cur            *cur,
    1381             :         const union xfs_btree_ptr       *ptr,
    1382             :         struct xfs_btree_block          **block,
    1383             :         struct xfs_buf                  **bpp)
    1384             : {
    1385     5965489 :         xfs_daddr_t                     d;
    1386     5965489 :         int                             error;
    1387             : 
    1388     5965489 :         error = xfs_btree_ptr_to_daddr(cur, ptr, &d);
    1389     5966153 :         if (error)
    1390             :                 return error;
    1391     5965814 :         error = xfs_trans_get_buf(cur->bc_tp, xfs_btree_buftarg(cur), d,
    1392     5966221 :                         xfs_btree_bbsize(cur), 0, bpp);
    1393     5966145 :         if (error)
    1394             :                 return error;
    1395             : 
    1396     5966145 :         (*bpp)->b_ops = cur->bc_ops->buf_ops;
    1397     5966145 :         *block = XFS_BUF_TO_BLOCK(*bpp);
    1398     5966145 :         return 0;
    1399             : }
    1400             : 
    1401             : /*
    1402             :  * Read in the buffer at the given ptr and return the buffer and
    1403             :  * the block pointer within the buffer.
    1404             :  */
    1405             : int
    1406 20602847496 : xfs_btree_read_buf_block(
    1407             :         struct xfs_btree_cur            *cur,
    1408             :         const union xfs_btree_ptr       *ptr,
    1409             :         int                             flags,
    1410             :         struct xfs_btree_block          **block,
    1411             :         struct xfs_buf                  **bpp)
    1412             : {
    1413 20602847496 :         struct xfs_mount        *mp = cur->bc_mp;
    1414 20602847496 :         xfs_daddr_t             d;
    1415 20602847496 :         int                     error;
    1416             : 
    1417             :         /* need to sort out how callers deal with failures first */
    1418 20602847496 :         ASSERT(!(flags & XBF_TRYLOCK));
    1419             : 
    1420 20602847496 :         error = xfs_btree_ptr_to_daddr(cur, ptr, &d);
    1421 20606680455 :         if (error)
    1422             :                 return error;
    1423 20594417516 :         error = xfs_trans_read_buf(mp, cur->bc_tp, xfs_btree_buftarg(cur), d,
    1424 20607228275 :                         xfs_btree_bbsize(cur), flags, bpp,
    1425 20607228275 :                         cur->bc_ops->buf_ops);
    1426 20611090799 :         if (xfs_metadata_is_sick(error))
    1427        1352 :                 xfs_btree_mark_sick(cur);
    1428 20611090799 :         if (error)
    1429             :                 return error;
    1430             : 
    1431 20611702841 :         xfs_btree_set_refs(cur, *bpp);
    1432 20607101649 :         *block = XFS_BUF_TO_BLOCK(*bpp);
    1433 20607101649 :         return 0;
    1434             : }
    1435             : 
    1436             : /*
    1437             :  * Copy keys from one btree block to another.
    1438             :  */
    1439             : void
    1440   670547246 : xfs_btree_copy_keys(
    1441             :         struct xfs_btree_cur            *cur,
    1442             :         union xfs_btree_key             *dst_key,
    1443             :         const union xfs_btree_key       *src_key,
    1444             :         int                             numkeys)
    1445             : {
    1446   670547246 :         ASSERT(numkeys >= 0);
    1447  1341094492 :         memcpy(dst_key, src_key, numkeys * cur->bc_ops->key_len);
    1448   670547246 : }
    1449             : 
    1450             : /*
    1451             :  * Copy records from one btree block to another.
    1452             :  */
    1453             : STATIC void
    1454  2997999376 : xfs_btree_copy_recs(
    1455             :         struct xfs_btree_cur    *cur,
    1456             :         union xfs_btree_rec     *dst_rec,
    1457             :         union xfs_btree_rec     *src_rec,
    1458             :         int                     numrecs)
    1459             : {
    1460  2997999376 :         ASSERT(numrecs >= 0);
    1461  5995998752 :         memcpy(dst_rec, src_rec, numrecs * cur->bc_ops->rec_len);
    1462  2997999376 : }
    1463             : 
    1464             : /*
    1465             :  * Copy block pointers from one btree block to another.
    1466             :  */
    1467             : void
    1468    39767478 : xfs_btree_copy_ptrs(
    1469             :         struct xfs_btree_cur    *cur,
    1470             :         union xfs_btree_ptr     *dst_ptr,
    1471             :         const union xfs_btree_ptr *src_ptr,
    1472             :         int                     numptrs)
    1473             : {
    1474    39767478 :         ASSERT(numptrs >= 0);
    1475    87658747 :         memcpy(dst_ptr, src_ptr, numptrs * xfs_btree_ptr_len(cur));
    1476    39767478 : }
    1477             : 
    1478             : /*
    1479             :  * Shift keys one index left/right inside a single btree block.
    1480             :  */
    1481             : STATIC void
    1482     4348587 : xfs_btree_shift_keys(
    1483             :         struct xfs_btree_cur    *cur,
    1484             :         union xfs_btree_key     *key,
    1485             :         int                     dir,
    1486             :         int                     numkeys)
    1487             : {
    1488     4348587 :         char                    *dst_key;
    1489             : 
    1490     4348587 :         ASSERT(numkeys >= 0);
    1491     4348587 :         ASSERT(dir == 1 || dir == -1);
    1492             : 
    1493     4348587 :         dst_key = (char *)key + (dir * cur->bc_ops->key_len);
    1494     8697174 :         memmove(dst_key, key, numkeys * cur->bc_ops->key_len);
    1495     4348587 : }
    1496             : 
    1497             : /*
    1498             :  * Shift records one index left/right inside a single btree block.
    1499             :  */
    1500             : STATIC void
    1501  2402781324 : xfs_btree_shift_recs(
    1502             :         struct xfs_btree_cur    *cur,
    1503             :         union xfs_btree_rec     *rec,
    1504             :         int                     dir,
    1505             :         int                     numrecs)
    1506             : {
    1507  2402781324 :         char                    *dst_rec;
    1508             : 
    1509  2402781324 :         ASSERT(numrecs >= 0);
    1510  2402781324 :         ASSERT(dir == 1 || dir == -1);
    1511             : 
    1512  2402781324 :         dst_rec = (char *)rec + (dir * cur->bc_ops->rec_len);
    1513  4805562648 :         memmove(dst_rec, rec, numrecs * cur->bc_ops->rec_len);
    1514  2402781324 : }
    1515             : 
    1516             : /*
    1517             :  * Shift block pointers one index left/right inside a single btree block.
    1518             :  */
    1519             : STATIC void
    1520     4348581 : xfs_btree_shift_ptrs(
    1521             :         struct xfs_btree_cur    *cur,
    1522             :         union xfs_btree_ptr     *ptr,
    1523             :         int                     dir,
    1524             :         int                     numptrs)
    1525             : {
    1526     4348581 :         char                    *dst_ptr;
    1527             : 
    1528     4348581 :         ASSERT(numptrs >= 0);
    1529     4348581 :         ASSERT(dir == 1 || dir == -1);
    1530             : 
    1531     4348581 :         dst_ptr = (char *)ptr + (dir * xfs_btree_ptr_len(cur));
    1532     8697162 :         memmove(dst_ptr, ptr, numptrs * xfs_btree_ptr_len(cur));
    1533     4348581 : }
    1534             : 
    1535             : /*
    1536             :  * Log key values from the btree block.
    1537             :  */
    1538             : STATIC void
    1539   666283745 : xfs_btree_log_keys(
    1540             :         struct xfs_btree_cur    *cur,
    1541             :         struct xfs_buf          *bp,
    1542             :         int                     first,
    1543             :         int                     last)
    1544             : {
    1545             : 
    1546   666283745 :         if (bp) {
    1547   615079709 :                 xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF);
    1548   615066321 :                 xfs_trans_log_buf(cur->bc_tp, bp,
    1549   615066321 :                                   xfs_btree_key_offset(cur, first),
    1550   615066321 :                                   xfs_btree_key_offset(cur, last + 1) - 1);
    1551             :         } else {
    1552    51204036 :                 xfs_trans_log_inode(cur->bc_tp, cur->bc_ino.ip,
    1553    51204036 :                                 xfs_ilog_fbroot(cur->bc_ino.whichfork));
    1554             :         }
    1555   666283802 : }
    1556             : 
    1557             : /*
    1558             :  * Log record values from the btree block.
    1559             :  */
    1560             : void
    1561  3807182821 : xfs_btree_log_recs(
    1562             :         struct xfs_btree_cur    *cur,
    1563             :         struct xfs_buf          *bp,
    1564             :         int                     first,
    1565             :         int                     last)
    1566             : {
    1567  3807182821 :         if (!bp) {
    1568    63436902 :                 xfs_trans_log_inode(cur->bc_tp, cur->bc_ino.ip,
    1569    31718451 :                                 xfs_ilog_fbroot(cur->bc_ino.whichfork));
    1570    31718451 :                 return;
    1571             :         }
    1572             : 
    1573  3775464370 :         xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF);
    1574  3775179272 :         xfs_trans_log_buf(cur->bc_tp, bp,
    1575  3775179272 :                           xfs_btree_rec_offset(cur, first),
    1576  3775179272 :                           xfs_btree_rec_offset(cur, last + 1) - 1);
    1577             : }
    1578             : 
    1579             : /*
    1580             :  * Log block pointer fields from a btree block (nonleaf).
    1581             :  */
    1582             : STATIC void
    1583     4965078 : xfs_btree_log_ptrs(
    1584             :         struct xfs_btree_cur    *cur,   /* btree cursor */
    1585             :         struct xfs_buf          *bp,    /* buffer containing btree block */
    1586             :         int                     first,  /* index of first pointer to log */
    1587             :         int                     last)   /* index of last pointer to log */
    1588             : {
    1589             : 
    1590     4965078 :         if (bp) {
    1591     4758433 :                 struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
    1592     4758433 :                 int                     level = xfs_btree_get_level(block);
    1593             : 
    1594     4758433 :                 xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF);
    1595     4758444 :                 xfs_trans_log_buf(cur->bc_tp, bp,
    1596     4758441 :                                 xfs_btree_ptr_offset(cur, first, level),
    1597     4758432 :                                 xfs_btree_ptr_offset(cur, last + 1, level) - 1);
    1598             :         } else {
    1599      206645 :                 xfs_trans_log_inode(cur->bc_tp, cur->bc_ino.ip,
    1600      206645 :                         xfs_ilog_fbroot(cur->bc_ino.whichfork));
    1601             :         }
    1602             : 
    1603     4965089 : }
    1604             : 
    1605             : /*
    1606             :  * Log fields from a btree block header.
    1607             :  */
    1608             : void
    1609  3121645687 : xfs_btree_log_block(
    1610             :         struct xfs_btree_cur    *cur,   /* btree cursor */
    1611             :         struct xfs_buf          *bp,    /* buffer containing btree block */
    1612             :         uint32_t                fields) /* mask of fields: XFS_BB_... */
    1613             : {
    1614  3121645687 :         int                     first;  /* first byte offset logged */
    1615  3121645687 :         int                     last;   /* last byte offset logged */
    1616  3121645687 :         static const short      soffsets[] = {  /* table of offsets (short) */
    1617             :                 offsetof(struct xfs_btree_block, bb_magic),
    1618             :                 offsetof(struct xfs_btree_block, bb_level),
    1619             :                 offsetof(struct xfs_btree_block, bb_numrecs),
    1620             :                 offsetof(struct xfs_btree_block, bb_u.s.bb_leftsib),
    1621             :                 offsetof(struct xfs_btree_block, bb_u.s.bb_rightsib),
    1622             :                 offsetof(struct xfs_btree_block, bb_u.s.bb_blkno),
    1623             :                 offsetof(struct xfs_btree_block, bb_u.s.bb_lsn),
    1624             :                 offsetof(struct xfs_btree_block, bb_u.s.bb_uuid),
    1625             :                 offsetof(struct xfs_btree_block, bb_u.s.bb_owner),
    1626             :                 offsetof(struct xfs_btree_block, bb_u.s.bb_crc),
    1627             :                 XFS_BTREE_SBLOCK_CRC_LEN
    1628             :         };
    1629  3121645687 :         static const short      loffsets[] = {  /* table of offsets (long) */
    1630             :                 offsetof(struct xfs_btree_block, bb_magic),
    1631             :                 offsetof(struct xfs_btree_block, bb_level),
    1632             :                 offsetof(struct xfs_btree_block, bb_numrecs),
    1633             :                 offsetof(struct xfs_btree_block, bb_u.l.bb_leftsib),
    1634             :                 offsetof(struct xfs_btree_block, bb_u.l.bb_rightsib),
    1635             :                 offsetof(struct xfs_btree_block, bb_u.l.bb_blkno),
    1636             :                 offsetof(struct xfs_btree_block, bb_u.l.bb_lsn),
    1637             :                 offsetof(struct xfs_btree_block, bb_u.l.bb_uuid),
    1638             :                 offsetof(struct xfs_btree_block, bb_u.l.bb_owner),
    1639             :                 offsetof(struct xfs_btree_block, bb_u.l.bb_crc),
    1640             :                 offsetof(struct xfs_btree_block, bb_u.l.bb_pad),
    1641             :                 XFS_BTREE_LBLOCK_CRC_LEN
    1642             :         };
    1643             : 
    1644  3121645687 :         if (bp) {
    1645  3113761311 :                 int nbits;
    1646             : 
    1647  3113761311 :                 if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS) {
    1648             :                         /*
    1649             :                          * We don't log the CRC when updating a btree
    1650             :                          * block but instead recreate it during log
    1651             :                          * recovery.  As the log buffers have checksums
    1652             :                          * of their own this is safe and avoids logging a crc
    1653             :                          * update in a lot of places.
    1654             :                          */
    1655  3113745812 :                         if (fields == XFS_BB_ALL_BITS)
    1656     7192372 :                                 fields = XFS_BB_ALL_BITS_CRC;
    1657             :                         nbits = XFS_BB_NUM_BITS_CRC;
    1658             :                 } else {
    1659             :                         nbits = XFS_BB_NUM_BITS;
    1660             :                 }
    1661  3113761311 :                 xfs_btree_offsets(fields,
    1662  3113761311 :                                   (cur->bc_flags & XFS_BTREE_LONG_PTRS) ?
    1663             :                                         loffsets : soffsets,
    1664             :                                   nbits, &first, &last);
    1665  3113583602 :                 xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF);
    1666  3113668140 :                 xfs_trans_log_buf(cur->bc_tp, bp, first, last);
    1667             :         } else {
    1668     7884376 :                 xfs_trans_log_inode(cur->bc_tp, cur->bc_ino.ip,
    1669     7884376 :                         xfs_ilog_fbroot(cur->bc_ino.whichfork));
    1670             :         }
    1671  3121765115 : }
    1672             : 
    1673             : /*
    1674             :  * Increment cursor by one record at the level.
    1675             :  * For nonzero levels the leaf-ward information is untouched.
    1676             :  */
    1677             : int                                             /* error */
    1678 >25016*10^7 : xfs_btree_increment(
    1679             :         struct xfs_btree_cur    *cur,
    1680             :         int                     level,
    1681             :         int                     *stat)          /* success/failure */
    1682             : {
    1683 >25016*10^7 :         struct xfs_btree_block  *block;
    1684 >25016*10^7 :         union xfs_btree_ptr     ptr;
    1685 >25016*10^7 :         struct xfs_buf          *bp;
    1686 >25016*10^7 :         int                     error;          /* error return value */
    1687 >25016*10^7 :         int                     lev;
    1688             : 
    1689 >25016*10^7 :         ASSERT(level < cur->bc_nlevels);
    1690             : 
    1691             :         /* Read-ahead to the right at this level. */
    1692 >25016*10^7 :         xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA);
    1693             : 
    1694             :         /* Get a pointer to the btree block. */
    1695 >25046*10^7 :         block = xfs_btree_get_block(cur, level, &bp);
    1696             : 
    1697             : #ifdef DEBUG
    1698 >25037*10^7 :         error = xfs_btree_check_block(cur, block, level, bp);
    1699 >24962*10^7 :         if (error)
    1700           0 :                 goto error0;
    1701             : #endif
    1702             : 
    1703             :         /* We're done if we remain in the block after the increment. */
    1704 >24962*10^7 :         if (++cur->bc_levels[level].ptr <= xfs_btree_get_numrecs(block))
    1705 >24238*10^7 :                 goto out1;
    1706             : 
    1707             :         /* Fail if we just went off the right edge of the tree. */
    1708  7246226259 :         xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
    1709 14488708568 :         if (xfs_btree_ptr_is_null(cur, &ptr))
    1710  5477910187 :                 goto out0;
    1711             : 
    1712  1766444097 :         XFS_BTREE_STATS_INC(cur, increment);
    1713             : 
    1714             :         /*
    1715             :          * March up the tree incrementing pointers.
    1716             :          * Stop when we don't go off the right edge of a block.
    1717             :          */
    1718  1785816045 :         for (lev = level + 1; lev < cur->bc_nlevels; lev++) {
    1719  1785844058 :                 block = xfs_btree_get_block(cur, lev, &bp);
    1720             : 
    1721             : #ifdef DEBUG
    1722  1785802680 :                 error = xfs_btree_check_block(cur, block, lev, bp);
    1723  1785814045 :                 if (error)
    1724           0 :                         goto error0;
    1725             : #endif
    1726             : 
    1727  1785814045 :                 if (++cur->bc_levels[lev].ptr <= xfs_btree_get_numrecs(block))
    1728             :                         break;
    1729             : 
    1730             :                 /* Read-ahead the right block for the next loop. */
    1731    19175559 :                 xfs_btree_readahead(cur, lev, XFS_BTCUR_RIGHTRA);
    1732             :         }
    1733             : 
    1734             :         /*
    1735             :          * If we went off the root then we are either seriously
    1736             :          * confused or have the tree root in an inode.
    1737             :          */
    1738  1766610473 :         if (lev == cur->bc_nlevels) {
    1739           0 :                 if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
    1740           0 :                         goto out0;
    1741           0 :                 ASSERT(0);
    1742           0 :                 xfs_btree_mark_sick(cur);
    1743           0 :                 error = -EFSCORRUPTED;
    1744           0 :                 goto error0;
    1745             :         }
    1746  1766610473 :         ASSERT(lev < cur->bc_nlevels);
    1747             : 
    1748             :         /*
    1749             :          * Now walk back down the tree, fixing up the cursor's buffer
    1750             :          * pointers and key numbers.
    1751             :          */
    1752  3552543537 :         for (block = xfs_btree_get_block(cur, lev, &bp); lev > level; ) {
    1753  1785750844 :                 union xfs_btree_ptr     *ptrp;
    1754             : 
    1755  1785750844 :                 ptrp = xfs_btree_ptr_addr(cur, cur->bc_levels[lev].ptr, block);
    1756  1785787051 :                 --lev;
    1757  1785787051 :                 error = xfs_btree_read_buf_block(cur, ptrp, 0, &block, &bp);
    1758  1785702147 :                 if (error)
    1759          23 :                         goto error0;
    1760             : 
    1761  1785702124 :                 xfs_btree_setbuf(cur, lev, bp);
    1762  1785933064 :                 cur->bc_levels[lev].ptr = 1;
    1763             :         }
    1764  1766771162 : out1:
    1765 >24414*10^7 :         *stat = 1;
    1766 >24414*10^7 :         return 0;
    1767             : 
    1768  5477910187 : out0:
    1769  5477910187 :         *stat = 0;
    1770  5477910187 :         return 0;
    1771             : 
    1772             : error0:
    1773             :         return error;
    1774             : }
    1775             : 
    1776             : /*
    1777             :  * Decrement cursor by one record at the level.
    1778             :  * For nonzero levels the leaf-ward information is untouched.
    1779             :  */
    1780             : int                                             /* error */
    1781  2906331846 : xfs_btree_decrement(
    1782             :         struct xfs_btree_cur    *cur,
    1783             :         int                     level,
    1784             :         int                     *stat)          /* success/failure */
    1785             : {
    1786  2906331846 :         struct xfs_btree_block  *block;
    1787  2906331846 :         struct xfs_buf          *bp;
    1788  2906331846 :         int                     error;          /* error return value */
    1789  2906331846 :         int                     lev;
    1790  2906331846 :         union xfs_btree_ptr     ptr;
    1791             : 
    1792  2906331846 :         ASSERT(level < cur->bc_nlevels);
    1793             : 
    1794             :         /* Read-ahead to the left at this level. */
    1795  2906331846 :         xfs_btree_readahead(cur, level, XFS_BTCUR_LEFTRA);
    1796             : 
    1797             :         /* We're done if we remain in the block after the decrement. */
    1798  2906306442 :         if (--cur->bc_levels[level].ptr > 0)
    1799  2478177049 :                 goto out1;
    1800             : 
    1801             :         /* Get a pointer to the btree block. */
    1802   428129393 :         block = xfs_btree_get_block(cur, level, &bp);
    1803             : 
    1804             : #ifdef DEBUG
    1805   428128679 :         error = xfs_btree_check_block(cur, block, level, bp);
    1806   428132656 :         if (error)
    1807           0 :                 goto error0;
    1808             : #endif
    1809             : 
    1810             :         /* Fail if we just went off the left edge of the tree. */
    1811   428132656 :         xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_LEFTSIB);
    1812   856261386 :         if (xfs_btree_ptr_is_null(cur, &ptr))
    1813   292896641 :                 goto out0;
    1814             : 
    1815   135234052 :         XFS_BTREE_STATS_INC(cur, decrement);
    1816             : 
    1817             :         /*
    1818             :          * March up the tree decrementing pointers.
    1819             :          * Stop when we don't go off the left edge of a block.
    1820             :          */
    1821   135736164 :         for (lev = level + 1; lev < cur->bc_nlevels; lev++) {
    1822   135736200 :                 if (--cur->bc_levels[lev].ptr > 0)
    1823             :                         break;
    1824             :                 /* Read-ahead the left block for the next loop. */
    1825      500966 :                 xfs_btree_readahead(cur, lev, XFS_BTCUR_LEFTRA);
    1826             :         }
    1827             : 
    1828             :         /*
    1829             :          * If we went off the root then we are seriously confused.
    1830             :          * or the root of the tree is in an inode.
    1831             :          */
    1832   135235198 :         if (lev == cur->bc_nlevels) {
    1833           0 :                 if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
    1834           0 :                         goto out0;
    1835           0 :                 ASSERT(0);
    1836           0 :                 xfs_btree_mark_sick(cur);
    1837           0 :                 error = -EFSCORRUPTED;
    1838           0 :                 goto error0;
    1839             :         }
    1840   135235198 :         ASSERT(lev < cur->bc_nlevels);
    1841             : 
    1842             :         /*
    1843             :          * Now walk back down the tree, fixing up the cursor's buffer
    1844             :          * pointers and key numbers.
    1845             :          */
    1846   270965708 :         for (block = xfs_btree_get_block(cur, lev, &bp); lev > level; ) {
    1847   135735170 :                 union xfs_btree_ptr     *ptrp;
    1848             : 
    1849   135735170 :                 ptrp = xfs_btree_ptr_addr(cur, cur->bc_levels[lev].ptr, block);
    1850   135733920 :                 --lev;
    1851   135733920 :                 error = xfs_btree_read_buf_block(cur, ptrp, 0, &block, &bp);
    1852   135736749 :                 if (error)
    1853          10 :                         goto error0;
    1854   135736739 :                 xfs_btree_setbuf(cur, lev, bp);
    1855   135730510 :                 cur->bc_levels[lev].ptr = xfs_btree_get_numrecs(block);
    1856             :         }
    1857   135229553 : out1:
    1858  2613406602 :         *stat = 1;
    1859  2613406602 :         return 0;
    1860             : 
    1861   292896641 : out0:
    1862   292896641 :         *stat = 0;
    1863   292896641 :         return 0;
    1864             : 
    1865             : error0:
    1866             :         return error;
    1867             : }
    1868             : 
    1869             : /*
    1870             :  * Check the btree block owner now that we have the context to know who the
    1871             :  * real owner is.
    1872             :  */
    1873             : static inline xfs_failaddr_t
    1874 18323416887 : xfs_btree_check_block_owner(
    1875             :         struct xfs_btree_cur    *cur,
    1876             :         struct xfs_btree_block  *block)
    1877             : {
    1878 18323416887 :         if (!xfs_has_crc(cur->bc_mp))
    1879             :                 return NULL;
    1880             : 
    1881 18323412772 :         if (cur->bc_flags & XFS_BTREE_IN_XFILE)
    1882  1294354826 :                 return xfbtree_check_block_owner(cur, block);
    1883             : 
    1884 17029057946 :         if (!(cur->bc_flags & XFS_BTREE_LONG_PTRS)) {
    1885 14373178167 :                 if (be32_to_cpu(block->bb_u.s.bb_owner) !=
    1886 14373178167 :                                                 cur->bc_ag.pag->pag_agno)
    1887           0 :                         return __this_address;
    1888             :                 return NULL;
    1889             :         }
    1890             : 
    1891  2655879779 :         if (cur->bc_ino.flags & XFS_BTCUR_BMBT_INVALID_OWNER)
    1892             :                 return NULL;
    1893             : 
    1894  2655834749 :         if (be64_to_cpu(block->bb_u.l.bb_owner) != cur->bc_ino.ip->i_ino)
    1895           0 :                 return __this_address;
    1896             : 
    1897             :         return NULL;
    1898             : }
    1899             : 
    1900             : int
    1901 47157112396 : xfs_btree_lookup_get_block(
    1902             :         struct xfs_btree_cur            *cur,   /* btree cursor */
    1903             :         int                             level,  /* level in the btree */
    1904             :         const union xfs_btree_ptr       *pp,    /* ptr to btree block */
    1905             :         struct xfs_btree_block          **blkp) /* return btree block */
    1906             : {
    1907 47157112396 :         struct xfs_buf          *bp;    /* buffer pointer for btree block */
    1908 47157112396 :         xfs_daddr_t             daddr;
    1909 47157112396 :         int                     error = 0;
    1910             : 
    1911             :         /* special case the root block if in an inode */
    1912 47157112396 :         if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
    1913  9115702244 :             (level == cur->bc_nlevels - 1)) {
    1914  3495134677 :                 *blkp = xfs_btree_get_iroot(cur);
    1915  3489840477 :                 return 0;
    1916             :         }
    1917             : 
    1918             :         /*
    1919             :          * If the old buffer at this level for the disk address we are
    1920             :          * looking for re-use it.
    1921             :          *
    1922             :          * Otherwise throw it away and get a new one.
    1923             :          */
    1924 43661977719 :         bp = cur->bc_levels[level].bp;
    1925 43661977719 :         error = xfs_btree_ptr_to_daddr(cur, pp, &daddr);
    1926 43664039966 :         if (error)
    1927             :                 return error;
    1928 43664039966 :         if (bp && xfs_buf_daddr(bp) == daddr) {
    1929 25345801356 :                 *blkp = XFS_BUF_TO_BLOCK(bp);
    1930 25345801356 :                 return 0;
    1931             :         }
    1932             : 
    1933 18318238610 :         error = xfs_btree_read_buf_block(cur, pp, 0, blkp, &bp);
    1934 18323580051 :         if (error)
    1935             :                 return error;
    1936             : 
    1937             :         /* Check the inode owner since the verifiers don't. */
    1938 18323943314 :         if (xfs_btree_check_block_owner(cur, *blkp) != NULL)
    1939           0 :                 goto out_bad;
    1940             : 
    1941             :         /* Did we get the level we were looking for? */
    1942 18322684589 :         if (be16_to_cpu((*blkp)->bb_level) != level)
    1943           0 :                 goto out_bad;
    1944             : 
    1945             :         /* Check that internal nodes have at least one record. */
    1946 18322684589 :         if (level != 0 && be16_to_cpu((*blkp)->bb_numrecs) == 0)
    1947           0 :                 goto out_bad;
    1948             : 
    1949 18322684589 :         xfs_btree_setbuf(cur, level, bp);
    1950 18322684589 :         return 0;
    1951             : 
    1952           0 : out_bad:
    1953           0 :         *blkp = NULL;
    1954           0 :         xfs_buf_mark_corrupt(bp);
    1955           0 :         xfs_trans_brelse(cur->bc_tp, bp);
    1956           0 :         xfs_btree_mark_sick(cur);
    1957           0 :         return -EFSCORRUPTED;
    1958             : }
    1959             : 
    1960             : /*
    1961             :  * Get current search key.  For level 0 we don't actually have a key
    1962             :  * structure so we make one up from the record.  For all other levels
    1963             :  * we just return the right key.
    1964             :  */
    1965             : STATIC union xfs_btree_key *
    1966 >17565*10^7 : xfs_lookup_get_search_key(
    1967             :         struct xfs_btree_cur    *cur,
    1968             :         int                     level,
    1969             :         int                     keyno,
    1970             :         struct xfs_btree_block  *block,
    1971             :         union xfs_btree_key     *kp)
    1972             : {
    1973 >17565*10^7 :         if (level == 0) {
    1974 >12217*10^7 :                 cur->bc_ops->init_key_from_rec(kp,
    1975             :                                 xfs_btree_rec_addr(cur, keyno, block));
    1976 >12217*10^7 :                 return kp;
    1977             :         }
    1978             : 
    1979 53476925652 :         return xfs_btree_key_addr(cur, keyno, block);
    1980             : }
    1981             : 
    1982             : /*
    1983             :  * Lookup the record.  The cursor is made to point to it, based on dir.
    1984             :  * stat is set to 0 if can't find any such record, 1 for success.
    1985             :  */
    1986             : int                                     /* error */
    1987 25710616410 : xfs_btree_lookup(
    1988             :         struct xfs_btree_cur    *cur,   /* btree cursor */
    1989             :         xfs_lookup_t            dir,    /* <=, ==, or >= */
    1990             :         int                     *stat)  /* success/failure */
    1991             : {
    1992 25710616410 :         struct xfs_btree_block  *block; /* current btree block */
    1993 25710616410 :         int64_t                 diff;   /* difference for the current key */
    1994 25710616410 :         int                     error;  /* error return value */
    1995 25710616410 :         int                     keyno;  /* current key number */
    1996 25710616410 :         int                     level;  /* level in the btree */
    1997 25710616410 :         union xfs_btree_ptr     *pp;    /* ptr to btree block */
    1998 25710616410 :         union xfs_btree_ptr     ptr;    /* ptr to btree block */
    1999             : 
    2000 25710616410 :         XFS_BTREE_STATS_INC(cur, lookup);
    2001             : 
    2002             :         /* No such thing as a zero-level tree. */
    2003 25692474274 :         if (XFS_IS_CORRUPT(cur->bc_mp, cur->bc_nlevels == 0)) {
    2004           0 :                 xfs_btree_mark_sick(cur);
    2005           0 :                 return -EFSCORRUPTED;
    2006             :         }
    2007             : 
    2008 25692474274 :         block = NULL;
    2009 25692474274 :         keyno = 0;
    2010             : 
    2011             :         /* initialise start pointer from cursor */
    2012 25692474274 :         cur->bc_ops->init_ptr_from_cur(cur, &ptr);
    2013 25700942447 :         pp = &ptr;
    2014             : 
    2015             :         /*
    2016             :          * Iterate over each level in the btree, starting at the root.
    2017             :          * For each level above the leaves, find the key we need, based
    2018             :          * on the lookup record, then follow the corresponding block
    2019             :          * pointer down to the next level.
    2020             :          */
    2021 64326578934 :         for (level = cur->bc_nlevels - 1, diff = 1; level >= 0; level--) {
    2022             :                 /* Get the block we need to do the lookup on. */
    2023 41284776044 :                 error = xfs_btree_lookup_get_block(cur, level, pp, &block);
    2024 41291500079 :                 if (error)
    2025        6796 :                         goto error0;
    2026             : 
    2027 41291493283 :                 if (diff == 0) {
    2028             :                         /*
    2029             :                          * If we already had a key match at a higher level, we
    2030             :                          * know we need to use the first entry in this block.
    2031             :                          */
    2032             :                         keyno = 1;
    2033             :                 } else {
    2034             :                         /* Otherwise search this block. Do a binary search. */
    2035             : 
    2036 41238233546 :                         int     high;   /* high entry number */
    2037 41238233546 :                         int     low;    /* low entry number */
    2038             : 
    2039             :                         /* Set low and high entry numbers, 1-based. */
    2040 41238233546 :                         low = 1;
    2041 41238233546 :                         high = xfs_btree_get_numrecs(block);
    2042 41238233546 :                         if (!high) {
    2043             :                                 /* Block is empty, must be an empty leaf. */
    2044  2668507518 :                                 if (level != 0 || cur->bc_nlevels != 1) {
    2045           0 :                                         XFS_CORRUPTION_ERROR(__func__,
    2046             :                                                         XFS_ERRLEVEL_LOW,
    2047             :                                                         cur->bc_mp, block,
    2048             :                                                         sizeof(*block));
    2049           0 :                                         xfs_btree_mark_sick(cur);
    2050           0 :                                         return -EFSCORRUPTED;
    2051             :                                 }
    2052             : 
    2053  2668507518 :                                 cur->bc_levels[0].ptr = dir != XFS_LOOKUP_LE;
    2054  2668507518 :                                 *stat = 0;
    2055  2668507518 :                                 return 0;
    2056             :                         }
    2057             : 
    2058             :                         /* Binary search the block. */
    2059 >21067*10^7 :                         while (low <= high) {
    2060 >17573*10^7 :                                 union xfs_btree_key     key;
    2061 >17573*10^7 :                                 union xfs_btree_key     *kp;
    2062             : 
    2063 >17573*10^7 :                                 XFS_BTREE_STATS_INC(cur, compare);
    2064             : 
    2065             :                                 /* keyno is average of low and high. */
    2066 >17543*10^7 :                                 keyno = (low + high) >> 1;
    2067             : 
    2068             :                                 /* Get current search key */
    2069 >17543*10^7 :                                 kp = xfs_lookup_get_search_key(cur, level,
    2070             :                                                 keyno, block, &key);
    2071             : 
    2072             :                                 /*
    2073             :                                  * Compute difference to get next direction:
    2074             :                                  *  - less than, move right
    2075             :                                  *  - greater than, move left
    2076             :                                  *  - equal, we're done
    2077             :                                  */
    2078 >17535*10^7 :                                 diff = cur->bc_ops->key_diff(cur, kp);
    2079 >17575*10^7 :                                 if (diff < 0)
    2080 99635221023 :                                         low = keyno + 1;
    2081 76115207071 :                                 else if (diff > 0)
    2082 72474200254 :                                         high = keyno - 1;
    2083             :                                 else
    2084             :                                         break;
    2085             :                         }
    2086             :                 }
    2087             : 
    2088             :                 /*
    2089             :                  * If there are more levels, set up for the next level
    2090             :                  * by getting the block number and filling in the cursor.
    2091             :                  */
    2092 38638854681 :                 if (level > 0) {
    2093             :                         /*
    2094             :                          * If we moved left, need the previous key number,
    2095             :                          * unless there isn't one.
    2096             :                          */
    2097 15595449779 :                         if (diff > 0 && --keyno < 1)
    2098             :                                 keyno = 1;
    2099 15595449779 :                         pp = xfs_btree_ptr_addr(cur, keyno, block);
    2100             : 
    2101 15585464082 :                         error = xfs_btree_debug_check_ptr(cur, pp, 0, level);
    2102 15582231585 :                         if (error)
    2103           0 :                                 goto error0;
    2104             : 
    2105 15582231585 :                         cur->bc_levels[level].ptr = keyno;
    2106             :                 }
    2107             :         }
    2108             : 
    2109             :         /* Done with the search. See if we need to adjust the results. */
    2110 23041802890 :         if (dir != XFS_LOOKUP_LE && diff < 0) {
    2111  1238982200 :                 keyno++;
    2112             :                 /*
    2113             :                  * If ge search and we went off the end of the block, but it's
    2114             :                  * not the last block, we're in the wrong block.
    2115             :                  */
    2116  1238982200 :                 xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
    2117  1238966534 :                 if (dir == XFS_LOOKUP_GE &&
    2118   384287945 :                     keyno > xfs_btree_get_numrecs(block) &&
    2119             :                     !xfs_btree_ptr_is_null(cur, &ptr)) {
    2120     2135389 :                         int     i;
    2121             : 
    2122     2135389 :                         cur->bc_levels[0].ptr = keyno;
    2123     2135389 :                         error = xfs_btree_increment(cur, 0, &i);
    2124     2135389 :                         if (error)
    2125           5 :                                 goto error0;
    2126     2135384 :                         if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
    2127           0 :                                 xfs_btree_mark_sick(cur);
    2128     2135384 :                                 return -EFSCORRUPTED;
    2129             :                         }
    2130     2135384 :                         *stat = 1;
    2131     2135384 :                         return 0;
    2132             :                 }
    2133 21802820690 :         } else if (dir == XFS_LOOKUP_LE && diff > 0)
    2134  9976589165 :                 keyno--;
    2135 23039651835 :         cur->bc_levels[0].ptr = keyno;
    2136             : 
    2137             :         /* Return if we succeeded or not. */
    2138 23039651835 :         if (keyno == 0 || keyno > xfs_btree_get_numrecs(block))
    2139  4983749680 :                 *stat = 0;
    2140 18055902155 :         else if (dir != XFS_LOOKUP_EQ || diff == 0)
    2141 17170200089 :                 *stat = 1;
    2142             :         else
    2143   885702066 :                 *stat = 0;
    2144             :         return 0;
    2145             : 
    2146             : error0:
    2147             :         return error;
    2148             : }
    2149             : 
    2150             : /* Find the high key storage area from a regular key. */
    2151             : union xfs_btree_key *
    2152  3079569984 : xfs_btree_high_key_from_key(
    2153             :         struct xfs_btree_cur    *cur,
    2154             :         union xfs_btree_key     *key)
    2155             : {
    2156  3079569984 :         ASSERT(cur->bc_flags & XFS_BTREE_OVERLAPPING);
    2157  3079569984 :         return (union xfs_btree_key *)((char *)key +
    2158  3079569984 :                         (cur->bc_ops->key_len / 2));
    2159             : }
    2160             : 
    2161             : /* Determine the low (and high if overlapped) keys of a leaf block */
    2162             : STATIC void
    2163  2061869312 : xfs_btree_get_leaf_keys(
    2164             :         struct xfs_btree_cur    *cur,
    2165             :         struct xfs_btree_block  *block,
    2166             :         union xfs_btree_key     *key)
    2167             : {
    2168  2061869312 :         union xfs_btree_key     max_hkey;
    2169  2061869312 :         union xfs_btree_key     hkey;
    2170  2061869312 :         union xfs_btree_rec     *rec;
    2171  2061869312 :         union xfs_btree_key     *high;
    2172  2061869312 :         int                     n;
    2173             : 
    2174  2061869312 :         rec = xfs_btree_rec_addr(cur, 1, block);
    2175  2061869312 :         cur->bc_ops->init_key_from_rec(key, rec);
    2176             : 
    2177  2061962436 :         if (cur->bc_flags & XFS_BTREE_OVERLAPPING) {
    2178             : 
    2179  1363847542 :                 cur->bc_ops->init_high_key_from_rec(&max_hkey, rec);
    2180 >18142*10^7 :                 for (n = 2; n <= xfs_btree_get_numrecs(block); n++) {
    2181 >17869*10^7 :                         rec = xfs_btree_rec_addr(cur, n, block);
    2182 >17869*10^7 :                         cur->bc_ops->init_high_key_from_rec(&hkey, rec);
    2183 >17854*10^7 :                         if (xfs_btree_keycmp_gt(cur, &hkey, &max_hkey))
    2184 >17500*10^7 :                                 max_hkey = hkey;
    2185             :                 }
    2186             : 
    2187  1363997963 :                 high = xfs_btree_high_key_from_key(cur, key);
    2188  2727956746 :                 memcpy(high, &max_hkey, cur->bc_ops->key_len / 2);
    2189             :         }
    2190  2062093267 : }
    2191             : 
    2192             : /* Determine the low (and high if overlapped) keys of a node block */
    2193             : STATIC void
    2194   353714661 : xfs_btree_get_node_keys(
    2195             :         struct xfs_btree_cur    *cur,
    2196             :         struct xfs_btree_block  *block,
    2197             :         union xfs_btree_key     *key)
    2198             : {
    2199   353714661 :         union xfs_btree_key     *hkey;
    2200   353714661 :         union xfs_btree_key     *max_hkey;
    2201   353714661 :         union xfs_btree_key     *high;
    2202   353714661 :         int                     n;
    2203             : 
    2204   353714661 :         if (cur->bc_flags & XFS_BTREE_OVERLAPPING) {
    2205   706861038 :                 memcpy(key, xfs_btree_key_addr(cur, 1, block),
    2206             :                                 cur->bc_ops->key_len / 2);
    2207             : 
    2208   353430519 :                 max_hkey = xfs_btree_high_key_addr(cur, 1, block);
    2209 24456883863 :                 for (n = 2; n <= xfs_btree_get_numrecs(block); n++) {
    2210 24103457119 :                         hkey = xfs_btree_high_key_addr(cur, n, block);
    2211 24103457119 :                         if (xfs_btree_keycmp_gt(cur, hkey, max_hkey))
    2212 23789929704 :                                 max_hkey = hkey;
    2213             :                 }
    2214             : 
    2215   353426744 :                 high = xfs_btree_high_key_from_key(cur, key);
    2216   706855430 :                 memcpy(high, max_hkey, cur->bc_ops->key_len / 2);
    2217             :         } else {
    2218      568284 :                 memcpy(key, xfs_btree_key_addr(cur, 1, block),
    2219             :                                 cur->bc_ops->key_len);
    2220             :         }
    2221   353711857 : }
    2222             : 
    2223             : /* Derive the keys for any btree block. */
    2224             : void
    2225  2062458819 : xfs_btree_get_keys(
    2226             :         struct xfs_btree_cur    *cur,
    2227             :         struct xfs_btree_block  *block,
    2228             :         union xfs_btree_key     *key)
    2229             : {
    2230  2062458819 :         if (be16_to_cpu(block->bb_level) == 0)
    2231  2058599219 :                 xfs_btree_get_leaf_keys(cur, block, key);
    2232             :         else
    2233     3859600 :                 xfs_btree_get_node_keys(cur, block, key);
    2234  2062613774 : }
    2235             : 
    2236             : /*
    2237             :  * Decide if we need to update the parent keys of a btree block.  For
    2238             :  * a standard btree this is only necessary if we're updating the first
    2239             :  * record/key.  For an overlapping btree, we must always update the
    2240             :  * keys because the highest key can be in any of the records or keys
    2241             :  * in the block.
    2242             :  */
    2243             : static inline bool
    2244             : xfs_btree_needs_key_update(
    2245             :         struct xfs_btree_cur    *cur,
    2246             :         int                     ptr)
    2247             : {
    2248  2123077465 :         return (cur->bc_flags & XFS_BTREE_OVERLAPPING) || ptr == 1;
    2249             : }
    2250             : 
    2251             : /*
    2252             :  * Update the low and high parent keys of the given level, progressing
    2253             :  * towards the root.  If force_all is false, stop if the keys for a given
    2254             :  * level do not need updating.
    2255             :  */
    2256             : STATIC int
    2257  1390355410 : __xfs_btree_updkeys(
    2258             :         struct xfs_btree_cur    *cur,
    2259             :         int                     level,
    2260             :         struct xfs_btree_block  *block,
    2261             :         struct xfs_buf          *bp0,
    2262             :         bool                    force_all)
    2263             : {
    2264  1390355410 :         union xfs_btree_key     key;    /* keys from current level */
    2265  1390355410 :         union xfs_btree_key     *lkey;  /* keys from the next level up */
    2266  1390355410 :         union xfs_btree_key     *hkey;
    2267  1390355410 :         union xfs_btree_key     *nlkey; /* keys from the next level up */
    2268  1390355410 :         union xfs_btree_key     *nhkey;
    2269  1390355410 :         struct xfs_buf          *bp;
    2270  1390355410 :         int                     ptr;
    2271             : 
    2272  1390355410 :         ASSERT(cur->bc_flags & XFS_BTREE_OVERLAPPING);
    2273             : 
    2274             :         /* Exit if there aren't any parent levels to update. */
    2275  1390355410 :         if (level + 1 >= cur->bc_nlevels)
    2276             :                 return 0;
    2277             : 
    2278  1336733439 :         trace_xfs_btree_updkeys(cur, level, bp0);
    2279             : 
    2280  1336712029 :         lkey = &key;
    2281  1336712029 :         hkey = xfs_btree_high_key_from_key(cur, lkey);
    2282  1336712820 :         xfs_btree_get_keys(cur, block, lkey);
    2283  3023314271 :         for (level++; level < cur->bc_nlevels; level++) {
    2284             : #ifdef DEBUG
    2285  1686601451 :                 int             error;
    2286             : #endif
    2287  1686601451 :                 block = xfs_btree_get_block(cur, level, &bp);
    2288  1686600239 :                 trace_xfs_btree_updkeys(cur, level, bp);
    2289             : #ifdef DEBUG
    2290  1686610398 :                 error = xfs_btree_check_block(cur, block, level, bp);
    2291  1686545175 :                 if (error)
    2292           0 :                         return error;
    2293             : #endif
    2294  1686545175 :                 ptr = cur->bc_levels[level].ptr;
    2295  1686545175 :                 nlkey = xfs_btree_key_addr(cur, ptr, block);
    2296  1686545175 :                 nhkey = xfs_btree_high_key_addr(cur, ptr, block);
    2297  3372649007 :                 if (!force_all &&
    2298  1514585413 :                     xfs_btree_keycmp_eq(cur, nlkey, lkey) &&
    2299             :                     xfs_btree_keycmp_eq(cur, nhkey, hkey))
    2300             :                         break;
    2301   474355634 :                 xfs_btree_copy_keys(cur, nlkey, lkey, 1);
    2302   474437473 :                 xfs_btree_log_keys(cur, bp, ptr, ptr);
    2303   474438888 :                 if (level + 1 >= cur->bc_nlevels)
    2304             :                         break;
    2305   349830653 :                 xfs_btree_get_node_keys(cur, block, lkey);
    2306             :         }
    2307             : 
    2308             :         return 0;
    2309             : }
    2310             : 
    2311             : /* Update all the keys from some level in cursor back to the root. */
    2312             : STATIC int
    2313      238446 : xfs_btree_updkeys_force(
    2314             :         struct xfs_btree_cur    *cur,
    2315             :         int                     level)
    2316             : {
    2317      238446 :         struct xfs_buf          *bp;
    2318      238446 :         struct xfs_btree_block  *block;
    2319             : 
    2320      238446 :         block = xfs_btree_get_block(cur, level, &bp);
    2321      238446 :         return __xfs_btree_updkeys(cur, level, block, bp, true);
    2322             : }
    2323             : 
    2324             : /*
    2325             :  * Update the parent keys of the given level, progressing towards the root.
    2326             :  */
    2327             : STATIC int
    2328  2071937490 : xfs_btree_update_keys(
    2329             :         struct xfs_btree_cur    *cur,
    2330             :         int                     level)
    2331             : {
    2332  2071937490 :         struct xfs_btree_block  *block;
    2333  2071937490 :         struct xfs_buf          *bp;
    2334  2071937490 :         union xfs_btree_key     *kp;
    2335  2071937490 :         union xfs_btree_key     key;
    2336  2071937490 :         int                     ptr;
    2337             : 
    2338  2071937490 :         ASSERT(level >= 0);
    2339             : 
    2340  2071937490 :         block = xfs_btree_get_block(cur, level, &bp);
    2341  2071469584 :         if (cur->bc_flags & XFS_BTREE_OVERLAPPING)
    2342  1390094396 :                 return __xfs_btree_updkeys(cur, level, block, bp, false);
    2343             : 
    2344             :         /*
    2345             :          * Go up the tree from this level toward the root.
    2346             :          * At each level, update the key value to the value input.
    2347             :          * Stop when we reach a level where the cursor isn't pointing
    2348             :          * at the first entry in the block.
    2349             :          */
    2350   681375188 :         xfs_btree_get_keys(cur, block, &key);
    2351   868431406 :         for (level++, ptr = 1; ptr == 1 && level < cur->bc_nlevels; level++) {
    2352             : #ifdef DEBUG
    2353   186842161 :                 int             error;
    2354             : #endif
    2355   186842161 :                 block = xfs_btree_get_block(cur, level, &bp);
    2356             : #ifdef DEBUG
    2357   186882910 :                 error = xfs_btree_check_block(cur, block, level, bp);
    2358   186884953 :                 if (error)
    2359           0 :                         return error;
    2360             : #endif
    2361   186884953 :                 ptr = cur->bc_levels[level].ptr;
    2362   186884953 :                 kp = xfs_btree_key_addr(cur, ptr, block);
    2363   186884953 :                 xfs_btree_copy_keys(cur, kp, &key, 1);
    2364   186882603 :                 xfs_btree_log_keys(cur, bp, ptr, ptr);
    2365             :         }
    2366             : 
    2367             :         return 0;
    2368             : }
    2369             : 
    2370             : /*
    2371             :  * Update the record referred to by cur to the value in the
    2372             :  * given record. This either works (return 0) or gets an
    2373             :  * EFSCORRUPTED error.
    2374             :  */
    2375             : int
    2376  1188087642 : xfs_btree_update(
    2377             :         struct xfs_btree_cur    *cur,
    2378             :         union xfs_btree_rec     *rec)
    2379             : {
    2380  1188087642 :         struct xfs_btree_block  *block;
    2381  1188087642 :         struct xfs_buf          *bp;
    2382  1188087642 :         int                     error;
    2383  1188087642 :         int                     ptr;
    2384  1188087642 :         union xfs_btree_rec     *rp;
    2385             : 
    2386             :         /* Pick up the current block. */
    2387  1188087642 :         block = xfs_btree_get_block(cur, 0, &bp);
    2388             : 
    2389             : #ifdef DEBUG
    2390  1186891839 :         error = xfs_btree_check_block(cur, block, 0, bp);
    2391  1188186935 :         if (error)
    2392           0 :                 goto error0;
    2393             : #endif
    2394             :         /* Get the address of the rec to be updated. */
    2395  1188186935 :         ptr = cur->bc_levels[0].ptr;
    2396  1188186935 :         rp = xfs_btree_rec_addr(cur, ptr, block);
    2397             : 
    2398             :         /* Fill in the new contents and log them. */
    2399  1188186935 :         xfs_btree_copy_recs(cur, rp, rec, 1);
    2400  1187599447 :         xfs_btree_log_recs(cur, bp, ptr, ptr);
    2401             : 
    2402             :         /*
    2403             :          * If we are tracking the last record in the tree and
    2404             :          * we are at the far right edge of the tree, update it.
    2405             :          */
    2406  1188021615 :         if (xfs_btree_is_lastrec(cur, block, 0)) {
    2407           0 :                 cur->bc_ops->update_lastrec(cur, block, rec,
    2408             :                                             ptr, LASTREC_UPDATE);
    2409             :         }
    2410             : 
    2411             :         /* Pass new key value up to our parent. */
    2412  2199800099 :         if (xfs_btree_needs_key_update(cur, ptr)) {
    2413   374612672 :                 error = xfs_btree_update_keys(cur, 0);
    2414   374677810 :                 if (error)
    2415           0 :                         goto error0;
    2416             :         }
    2417             : 
    2418             :         return 0;
    2419             : 
    2420             : error0:
    2421             :         return error;
    2422             : }
    2423             : 
    2424             : /*
    2425             :  * Move 1 record left from cur/level if possible.
    2426             :  * Update cur to reflect the new path.
    2427             :  */
    2428             : STATIC int                                      /* error */
    2429   213217985 : xfs_btree_lshift(
    2430             :         struct xfs_btree_cur    *cur,
    2431             :         int                     level,
    2432             :         int                     *stat)          /* success/failure */
    2433             : {
    2434   213217985 :         struct xfs_buf          *lbp;           /* left buffer pointer */
    2435   213217985 :         struct xfs_btree_block  *left;          /* left btree block */
    2436   213217985 :         int                     lrecs;          /* left record count */
    2437   213217985 :         struct xfs_buf          *rbp;           /* right buffer pointer */
    2438   213217985 :         struct xfs_btree_block  *right;         /* right btree block */
    2439   213217985 :         struct xfs_btree_cur    *tcur;          /* temporary btree cursor */
    2440   213217985 :         int                     rrecs;          /* right record count */
    2441   213217985 :         union xfs_btree_ptr     lptr;           /* left btree pointer */
    2442   213217985 :         union xfs_btree_key     *rkp = NULL;    /* right btree key */
    2443   213217985 :         union xfs_btree_ptr     *rpp = NULL;    /* right address pointer */
    2444   213217985 :         union xfs_btree_rec     *rrp = NULL;    /* right record pointer */
    2445   213217985 :         int                     error;          /* error return value */
    2446   213217985 :         int                     i;
    2447             : 
    2448   213217985 :         if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
    2449   108296317 :             level == cur->bc_nlevels - 1)
    2450           0 :                 goto out0;
    2451             : 
    2452             :         /* Set up variables for this block as "right". */
    2453   213217985 :         right = xfs_btree_get_block(cur, level, &rbp);
    2454             : 
    2455             : #ifdef DEBUG
    2456   213216286 :         error = xfs_btree_check_block(cur, right, level, rbp);
    2457   213218462 :         if (error)
    2458           0 :                 goto error0;
    2459             : #endif
    2460             : 
    2461             :         /* If we've got no left sibling then we can't shift an entry left. */
    2462   213218462 :         xfs_btree_get_sibling(cur, right, &lptr, XFS_BB_LEFTSIB);
    2463   426436026 :         if (xfs_btree_ptr_is_null(cur, &lptr))
    2464      203909 :                 goto out0;
    2465             : 
    2466             :         /*
    2467             :          * If the cursor entry is the one that would be moved, don't
    2468             :          * do it... it's too complicated.
    2469             :          */
    2470   213014104 :         if (cur->bc_levels[level].ptr <= 1)
    2471       53506 :                 goto out0;
    2472             : 
    2473             :         /* Set up the left neighbor as "left". */
    2474   212960598 :         error = xfs_btree_read_buf_block(cur, &lptr, 0, &left, &lbp);
    2475   212960664 :         if (error)
    2476          58 :                 goto error0;
    2477             : 
    2478             :         /* If it's full, it can't take another entry. */
    2479   212960606 :         lrecs = xfs_btree_get_numrecs(left);
    2480   212960606 :         if (lrecs == cur->bc_ops->get_maxrecs(cur, level))
    2481     3076740 :                 goto out0;
    2482             : 
    2483   209884005 :         rrecs = xfs_btree_get_numrecs(right);
    2484             : 
    2485             :         /*
    2486             :          * We add one entry to the left side and remove one for the right side.
    2487             :          * Account for it here, the changes will be updated on disk and logged
    2488             :          * later.
    2489             :          */
    2490   209884005 :         lrecs++;
    2491   209884005 :         rrecs--;
    2492             : 
    2493   209884005 :         XFS_BTREE_STATS_INC(cur, lshift);
    2494   209884608 :         XFS_BTREE_STATS_ADD(cur, moves, 1);
    2495             : 
    2496             :         /*
    2497             :          * If non-leaf, copy a key and a ptr to the left block.
    2498             :          * Log the changes to the left block.
    2499             :          */
    2500   209885291 :         if (level > 0) {
    2501             :                 /* It's a non-leaf.  Move keys and pointers. */
    2502      514229 :                 union xfs_btree_key     *lkp;   /* left btree key */
    2503      514229 :                 union xfs_btree_ptr     *lpp;   /* left address pointer */
    2504             : 
    2505      514229 :                 lkp = xfs_btree_key_addr(cur, lrecs, left);
    2506      514229 :                 rkp = xfs_btree_key_addr(cur, 1, right);
    2507             : 
    2508      514229 :                 lpp = xfs_btree_ptr_addr(cur, lrecs, left);
    2509      514229 :                 rpp = xfs_btree_ptr_addr(cur, 1, right);
    2510             : 
    2511      514229 :                 error = xfs_btree_debug_check_ptr(cur, rpp, 0, level);
    2512      514229 :                 if (error)
    2513           0 :                         goto error0;
    2514             : 
    2515      514229 :                 xfs_btree_copy_keys(cur, lkp, rkp, 1);
    2516      514229 :                 xfs_btree_copy_ptrs(cur, lpp, rpp, 1);
    2517             : 
    2518      514229 :                 xfs_btree_log_keys(cur, lbp, lrecs, lrecs);
    2519      514229 :                 xfs_btree_log_ptrs(cur, lbp, lrecs, lrecs);
    2520             : 
    2521      514229 :                 ASSERT(cur->bc_ops->keys_inorder(cur,
    2522             :                         xfs_btree_key_addr(cur, lrecs - 1, left), lkp));
    2523             :         } else {
    2524             :                 /* It's a leaf.  Move records.  */
    2525   209371062 :                 union xfs_btree_rec     *lrp;   /* left record pointer */
    2526             : 
    2527   209371062 :                 lrp = xfs_btree_rec_addr(cur, lrecs, left);
    2528   209371062 :                 rrp = xfs_btree_rec_addr(cur, 1, right);
    2529             : 
    2530   209371062 :                 xfs_btree_copy_recs(cur, lrp, rrp, 1);
    2531   209367703 :                 xfs_btree_log_recs(cur, lbp, lrecs, lrecs);
    2532             : 
    2533   209369812 :                 ASSERT(cur->bc_ops->recs_inorder(cur,
    2534             :                         xfs_btree_rec_addr(cur, lrecs - 1, left), lrp));
    2535             :         }
    2536             : 
    2537   209883154 :         xfs_btree_set_numrecs(left, lrecs);
    2538   209883154 :         xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS);
    2539             : 
    2540   209886098 :         xfs_btree_set_numrecs(right, rrecs);
    2541   209886098 :         xfs_btree_log_block(cur, rbp, XFS_BB_NUMRECS);
    2542             : 
    2543             :         /*
    2544             :          * Slide the contents of right down one entry.
    2545             :          */
    2546   209886583 :         XFS_BTREE_STATS_ADD(cur, moves, rrecs - 1);
    2547   209886462 :         if (level > 0) {
    2548             :                 /* It's a nonleaf. operate on keys and ptrs */
    2549    77430289 :                 for (i = 0; i < rrecs; i++) {
    2550    76916060 :                         error = xfs_btree_debug_check_ptr(cur, rpp, i + 1, level);
    2551    76916060 :                         if (error)
    2552           0 :                                 goto error0;
    2553             :                 }
    2554             : 
    2555      514229 :                 xfs_btree_shift_keys(cur,
    2556             :                                 xfs_btree_key_addr(cur, 2, right),
    2557             :                                 -1, rrecs);
    2558      514229 :                 xfs_btree_shift_ptrs(cur,
    2559             :                                 xfs_btree_ptr_addr(cur, 2, right),
    2560             :                                 -1, rrecs);
    2561             : 
    2562      514228 :                 xfs_btree_log_keys(cur, rbp, 1, rrecs);
    2563      514228 :                 xfs_btree_log_ptrs(cur, rbp, 1, rrecs);
    2564             :         } else {
    2565             :                 /* It's a leaf. operate on records */
    2566   209372233 :                 xfs_btree_shift_recs(cur,
    2567             :                         xfs_btree_rec_addr(cur, 2, right),
    2568             :                         -1, rrecs);
    2569   209372225 :                 xfs_btree_log_recs(cur, rbp, 1, rrecs);
    2570             :         }
    2571             : 
    2572             :         /*
    2573             :          * Using a temporary cursor, update the parent key values of the
    2574             :          * block on the left.
    2575             :          */
    2576   209886109 :         if (cur->bc_flags & XFS_BTREE_OVERLAPPING) {
    2577    94079013 :                 error = xfs_btree_dup_cursor(cur, &tcur);
    2578    94078025 :                 if (error)
    2579           0 :                         goto error0;
    2580    94078025 :                 i = xfs_btree_firstrec(tcur, level);
    2581    94074374 :                 if (XFS_IS_CORRUPT(tcur->bc_mp, i != 1)) {
    2582           0 :                         xfs_btree_mark_sick(cur);
    2583           0 :                         error = -EFSCORRUPTED;
    2584           0 :                         goto error0;
    2585             :                 }
    2586             : 
    2587    94074374 :                 error = xfs_btree_decrement(tcur, level, &i);
    2588    94071759 :                 if (error)
    2589           0 :                         goto error1;
    2590             : 
    2591             :                 /* Update the parent high keys of the left block, if needed. */
    2592    94071759 :                 error = xfs_btree_update_keys(tcur, level);
    2593    94079379 :                 if (error)
    2594           0 :                         goto error1;
    2595             : 
    2596    94079379 :                 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
    2597             :         }
    2598             : 
    2599             :         /* Update the parent keys of the right block. */
    2600   209886615 :         error = xfs_btree_update_keys(cur, level);
    2601   209886173 :         if (error)
    2602           0 :                 goto error0;
    2603             : 
    2604             :         /* Slide the cursor value left one. */
    2605   209886173 :         cur->bc_levels[level].ptr--;
    2606             : 
    2607   209886173 :         *stat = 1;
    2608   209886173 :         return 0;
    2609             : 
    2610     3334155 : out0:
    2611     3334155 :         *stat = 0;
    2612     3334155 :         return 0;
    2613             : 
    2614             : error0:
    2615             :         return error;
    2616             : 
    2617           0 : error1:
    2618           0 :         xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
    2619           0 :         return error;
    2620             : }
    2621             : 
    2622             : /*
    2623             :  * Move 1 record right from cur/level if possible.
    2624             :  * Update cur to reflect the new path.
    2625             :  */
    2626             : STATIC int                                      /* error */
    2627   307292519 : xfs_btree_rshift(
    2628             :         struct xfs_btree_cur    *cur,
    2629             :         int                     level,
    2630             :         int                     *stat)          /* success/failure */
    2631             : {
    2632   307292519 :         struct xfs_buf          *lbp;           /* left buffer pointer */
    2633   307292519 :         struct xfs_btree_block  *left;          /* left btree block */
    2634   307292519 :         struct xfs_buf          *rbp;           /* right buffer pointer */
    2635   307292519 :         struct xfs_btree_block  *right;         /* right btree block */
    2636   307292519 :         struct xfs_btree_cur    *tcur;          /* temporary btree cursor */
    2637   307292519 :         union xfs_btree_ptr     rptr;           /* right block pointer */
    2638   307292519 :         union xfs_btree_key     *rkp;           /* right btree key */
    2639   307292519 :         int                     rrecs;          /* right record count */
    2640   307292519 :         int                     lrecs;          /* left record count */
    2641   307292519 :         int                     error;          /* error return value */
    2642   307292519 :         int                     i;              /* loop counter */
    2643             : 
    2644   307292519 :         if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
    2645   146106070 :             (level == cur->bc_nlevels - 1))
    2646           0 :                 goto out0;
    2647             : 
    2648             :         /* Set up variables for this block as "left". */
    2649   307292519 :         left = xfs_btree_get_block(cur, level, &lbp);
    2650             : 
    2651             : #ifdef DEBUG
    2652   307288266 :         error = xfs_btree_check_block(cur, left, level, lbp);
    2653   307291759 :         if (error)
    2654           0 :                 goto error0;
    2655             : #endif
    2656             : 
    2657             :         /* If we've got no right sibling then we can't shift an entry right. */
    2658   307291759 :         xfs_btree_get_sibling(cur, left, &rptr, XFS_BB_RIGHTSIB);
    2659   614583414 :         if (xfs_btree_ptr_is_null(cur, &rptr))
    2660   120383781 :                 goto out0;
    2661             : 
    2662             :         /*
    2663             :          * If the cursor entry is the one that would be moved, don't
    2664             :          * do it... it's too complicated.
    2665             :          */
    2666   186907926 :         lrecs = xfs_btree_get_numrecs(left);
    2667   186907926 :         if (cur->bc_levels[level].ptr >= lrecs)
    2668    41599649 :                 goto out0;
    2669             : 
    2670             :         /* Set up the right neighbor as "right". */
    2671   145308277 :         error = xfs_btree_read_buf_block(cur, &rptr, 0, &right, &rbp);
    2672   145309219 :         if (error)
    2673         197 :                 goto error0;
    2674             : 
    2675             :         /* If it's full, it can't take another entry. */
    2676   145309022 :         rrecs = xfs_btree_get_numrecs(right);
    2677   145309022 :         if (rrecs == cur->bc_ops->get_maxrecs(cur, level))
    2678    32312947 :                 goto out0;
    2679             : 
    2680   112996336 :         XFS_BTREE_STATS_INC(cur, rshift);
    2681   112996478 :         XFS_BTREE_STATS_ADD(cur, moves, rrecs);
    2682             : 
    2683             :         /*
    2684             :          * Make a hole at the start of the right neighbor block, then
    2685             :          * copy the last left block entry to the hole.
    2686             :          */
    2687   112996762 :         if (level > 0) {
    2688             :                 /* It's a nonleaf. make a hole in the keys and ptrs */
    2689      262152 :                 union xfs_btree_key     *lkp;
    2690      262152 :                 union xfs_btree_ptr     *lpp;
    2691      262152 :                 union xfs_btree_ptr     *rpp;
    2692             : 
    2693      262152 :                 lkp = xfs_btree_key_addr(cur, lrecs, left);
    2694      262152 :                 lpp = xfs_btree_ptr_addr(cur, lrecs, left);
    2695      262152 :                 rkp = xfs_btree_key_addr(cur, 1, right);
    2696      262152 :                 rpp = xfs_btree_ptr_addr(cur, 1, right);
    2697             : 
    2698    17094003 :                 for (i = rrecs - 1; i >= 0; i--) {
    2699    16831851 :                         error = xfs_btree_debug_check_ptr(cur, rpp, i, level);
    2700    16831851 :                         if (error)
    2701           0 :                                 goto error0;
    2702             :                 }
    2703             : 
    2704      262152 :                 xfs_btree_shift_keys(cur, rkp, 1, rrecs);
    2705      262152 :                 xfs_btree_shift_ptrs(cur, rpp, 1, rrecs);
    2706             : 
    2707      262152 :                 error = xfs_btree_debug_check_ptr(cur, lpp, 0, level);
    2708      262152 :                 if (error)
    2709           0 :                         goto error0;
    2710             : 
    2711             :                 /* Now put the new data in, and log it. */
    2712      262152 :                 xfs_btree_copy_keys(cur, rkp, lkp, 1);
    2713      262152 :                 xfs_btree_copy_ptrs(cur, rpp, lpp, 1);
    2714             : 
    2715      262152 :                 xfs_btree_log_keys(cur, rbp, 1, rrecs + 1);
    2716      262152 :                 xfs_btree_log_ptrs(cur, rbp, 1, rrecs + 1);
    2717             : 
    2718      262152 :                 ASSERT(cur->bc_ops->keys_inorder(cur, rkp,
    2719             :                         xfs_btree_key_addr(cur, 2, right)));
    2720             :         } else {
    2721             :                 /* It's a leaf. make a hole in the records */
    2722   112734610 :                 union xfs_btree_rec     *lrp;
    2723   112734610 :                 union xfs_btree_rec     *rrp;
    2724             : 
    2725   112734610 :                 lrp = xfs_btree_rec_addr(cur, lrecs, left);
    2726   112734610 :                 rrp = xfs_btree_rec_addr(cur, 1, right);
    2727             : 
    2728   112734610 :                 xfs_btree_shift_recs(cur, rrp, 1, rrecs);
    2729             : 
    2730             :                 /* Now put the new data in, and log it. */
    2731   112733258 :                 xfs_btree_copy_recs(cur, rrp, lrp, 1);
    2732   112733196 :                 xfs_btree_log_recs(cur, rbp, 1, rrecs + 1);
    2733             :         }
    2734             : 
    2735             :         /*
    2736             :          * Decrement and log left's numrecs, bump and log right's numrecs.
    2737             :          */
    2738   112996272 :         xfs_btree_set_numrecs(left, --lrecs);
    2739   112996272 :         xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS);
    2740             : 
    2741   112996909 :         xfs_btree_set_numrecs(right, ++rrecs);
    2742   112996909 :         xfs_btree_log_block(cur, rbp, XFS_BB_NUMRECS);
    2743             : 
    2744             :         /*
    2745             :          * Using a temporary cursor, update the parent key values of the
    2746             :          * block on the right.
    2747             :          */
    2748   112997021 :         error = xfs_btree_dup_cursor(cur, &tcur);
    2749   112996026 :         if (error)
    2750           3 :                 goto error0;
    2751   112996023 :         i = xfs_btree_lastrec(tcur, level);
    2752   112996391 :         if (XFS_IS_CORRUPT(tcur->bc_mp, i != 1)) {
    2753           0 :                 xfs_btree_mark_sick(cur);
    2754           0 :                 error = -EFSCORRUPTED;
    2755           0 :                 goto error0;
    2756             :         }
    2757             : 
    2758   112996391 :         error = xfs_btree_increment(tcur, level, &i);
    2759   112995363 :         if (error)
    2760           0 :                 goto error1;
    2761             : 
    2762             :         /* Update the parent high keys of the left block, if needed. */
    2763   112995363 :         if (cur->bc_flags & XFS_BTREE_OVERLAPPING) {
    2764    73359401 :                 error = xfs_btree_update_keys(cur, level);
    2765    73360422 :                 if (error)
    2766           0 :                         goto error1;
    2767             :         }
    2768             : 
    2769             :         /* Update the parent keys of the right block. */
    2770   112996384 :         error = xfs_btree_update_keys(tcur, level);
    2771   112996203 :         if (error)
    2772           0 :                 goto error1;
    2773             : 
    2774   112996203 :         xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
    2775             : 
    2776   112995595 :         *stat = 1;
    2777   112995595 :         return 0;
    2778             : 
    2779   194296377 : out0:
    2780   194296377 :         *stat = 0;
    2781   194296377 :         return 0;
    2782             : 
    2783             : error0:
    2784             :         return error;
    2785             : 
    2786           0 : error1:
    2787           0 :         xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
    2788           0 :         return error;
    2789             : }
    2790             : 
    2791             : static inline int
    2792     3446270 : xfs_btree_alloc_block(
    2793             :         struct xfs_btree_cur            *cur,
    2794             :         const union xfs_btree_ptr       *hint_block,
    2795             :         union xfs_btree_ptr             *new_block,
    2796             :         int                             *stat)
    2797             : {
    2798     3446270 :         int                             error;
    2799             : 
    2800     3446270 :         error = cur->bc_ops->alloc_block(cur, hint_block, new_block, stat);
    2801     3446272 :         trace_xfs_btree_alloc_block(cur, new_block, *stat, error);
    2802     3446264 :         return error;
    2803             : }
    2804             : 
    2805             : /*
    2806             :  * Split cur/level block in half.
    2807             :  * Return new block number and the key to its first
    2808             :  * record (to be inserted into parent).
    2809             :  */
    2810             : STATIC int                                      /* error */
    2811     3334148 : __xfs_btree_split(
    2812             :         struct xfs_btree_cur    *cur,
    2813             :         int                     level,
    2814             :         union xfs_btree_ptr     *ptrp,
    2815             :         union xfs_btree_key     *key,
    2816             :         struct xfs_btree_cur    **curp,
    2817             :         int                     *stat)          /* success/failure */
    2818             : {
    2819     3334148 :         union xfs_btree_ptr     lptr;           /* left sibling block ptr */
    2820     3334148 :         struct xfs_buf          *lbp;           /* left buffer pointer */
    2821     3334148 :         struct xfs_btree_block  *left;          /* left btree block */
    2822     3334148 :         union xfs_btree_ptr     rptr;           /* right sibling block ptr */
    2823     3334148 :         struct xfs_buf          *rbp;           /* right buffer pointer */
    2824     3334148 :         struct xfs_btree_block  *right;         /* right btree block */
    2825     3334148 :         union xfs_btree_ptr     rrptr;          /* right-right sibling ptr */
    2826     3334148 :         struct xfs_buf          *rrbp;          /* right-right buffer pointer */
    2827     3334148 :         struct xfs_btree_block  *rrblock;       /* right-right btree block */
    2828     3334148 :         int                     lrecs;
    2829     3334148 :         int                     rrecs;
    2830     3334148 :         int                     src_index;
    2831     3334148 :         int                     error;          /* error return value */
    2832     3334148 :         int                     i;
    2833             : 
    2834     3334148 :         XFS_BTREE_STATS_INC(cur, split);
    2835             : 
    2836             :         /* Set up left block (current one). */
    2837     3334152 :         left = xfs_btree_get_block(cur, level, &lbp);
    2838             : 
    2839             : #ifdef DEBUG
    2840     3334152 :         error = xfs_btree_check_block(cur, left, level, lbp);
    2841     3334154 :         if (error)
    2842           0 :                 goto error0;
    2843             : #endif
    2844             : 
    2845     3334154 :         xfs_btree_buf_to_ptr(cur, lbp, &lptr);
    2846             : 
    2847             :         /* Allocate the new block. If we can't do it, we're toast. Give up. */
    2848     3334148 :         error = xfs_btree_alloc_block(cur, &lptr, &rptr, stat);
    2849     3334146 :         if (error)
    2850           4 :                 goto error0;
    2851     3334142 :         if (*stat == 0)
    2852           0 :                 goto out0;
    2853     3334142 :         XFS_BTREE_STATS_INC(cur, alloc);
    2854             : 
    2855             :         /* Set up the new block as "right". */
    2856     3334141 :         error = xfs_btree_get_buf_block(cur, &rptr, &right, &rbp);
    2857     3334135 :         if (error)
    2858           0 :                 goto error0;
    2859             : 
    2860             :         /* Fill in the btree header for the new right block. */
    2861     3334135 :         xfs_btree_init_block_cur(cur, rbp, xfs_btree_get_level(left), 0);
    2862             : 
    2863             :         /*
    2864             :          * Split the entries between the old and the new block evenly.
    2865             :          * Make sure that if there's an odd number of entries now, that
    2866             :          * each new block will have the same number of entries.
    2867             :          */
    2868     3334122 :         lrecs = xfs_btree_get_numrecs(left);
    2869     3334122 :         rrecs = lrecs / 2;
    2870     3334122 :         if ((lrecs & 1) && cur->bc_levels[level].ptr <= rrecs + 1)
    2871      348086 :                 rrecs++;
    2872     3334122 :         src_index = (lrecs - rrecs + 1);
    2873             : 
    2874     3334122 :         XFS_BTREE_STATS_ADD(cur, moves, rrecs);
    2875             : 
    2876             :         /* Adjust numrecs for the later get_*_keys() calls. */
    2877     3334137 :         lrecs -= rrecs;
    2878     3334137 :         xfs_btree_set_numrecs(left, lrecs);
    2879     3334137 :         xfs_btree_set_numrecs(right, xfs_btree_get_numrecs(right) + rrecs);
    2880             : 
    2881             :         /*
    2882             :          * Copy btree block entries from the left block over to the
    2883             :          * new block, the right. Update the right block and log the
    2884             :          * changes.
    2885             :          */
    2886     3334137 :         if (level > 0) {
    2887             :                 /* It's a non-leaf.  Move keys and pointers. */
    2888       24719 :                 union xfs_btree_key     *lkp;   /* left btree key */
    2889       24719 :                 union xfs_btree_ptr     *lpp;   /* left address pointer */
    2890       24719 :                 union xfs_btree_key     *rkp;   /* right btree key */
    2891       24719 :                 union xfs_btree_ptr     *rpp;   /* right address pointer */
    2892             : 
    2893       24719 :                 lkp = xfs_btree_key_addr(cur, src_index, left);
    2894       24719 :                 lpp = xfs_btree_ptr_addr(cur, src_index, left);
    2895       24719 :                 rkp = xfs_btree_key_addr(cur, 1, right);
    2896       24719 :                 rpp = xfs_btree_ptr_addr(cur, 1, right);
    2897             : 
    2898       49438 :                 for (i = src_index; i < rrecs; i++) {
    2899           0 :                         error = xfs_btree_debug_check_ptr(cur, lpp, i, level);
    2900           0 :                         if (error)
    2901           0 :                                 goto error0;
    2902             :                 }
    2903             : 
    2904             :                 /* Copy the keys & pointers to the new block. */
    2905       24719 :                 xfs_btree_copy_keys(cur, rkp, lkp, rrecs);
    2906       24719 :                 xfs_btree_copy_ptrs(cur, rpp, lpp, rrecs);
    2907             : 
    2908       24719 :                 xfs_btree_log_keys(cur, rbp, 1, rrecs);
    2909       24719 :                 xfs_btree_log_ptrs(cur, rbp, 1, rrecs);
    2910             : 
    2911             :                 /* Stash the keys of the new block for later insertion. */
    2912       24719 :                 xfs_btree_get_node_keys(cur, right, key);
    2913             :         } else {
    2914             :                 /* It's a leaf.  Move records.  */
    2915     3309418 :                 union xfs_btree_rec     *lrp;   /* left record pointer */
    2916     3309418 :                 union xfs_btree_rec     *rrp;   /* right record pointer */
    2917             : 
    2918     3309418 :                 lrp = xfs_btree_rec_addr(cur, src_index, left);
    2919     3309418 :                 rrp = xfs_btree_rec_addr(cur, 1, right);
    2920             : 
    2921             :                 /* Copy records to the new block. */
    2922     3309418 :                 xfs_btree_copy_recs(cur, rrp, lrp, rrecs);
    2923     3309420 :                 xfs_btree_log_recs(cur, rbp, 1, rrecs);
    2924             : 
    2925             :                 /* Stash the keys of the new block for later insertion. */
    2926     3309419 :                 xfs_btree_get_leaf_keys(cur, right, key);
    2927             :         }
    2928             : 
    2929             :         /*
    2930             :          * Find the left block number by looking in the buffer.
    2931             :          * Adjust sibling pointers.
    2932             :          */
    2933     3334151 :         xfs_btree_get_sibling(cur, left, &rrptr, XFS_BB_RIGHTSIB);
    2934     3334151 :         xfs_btree_set_sibling(cur, right, &rrptr, XFS_BB_RIGHTSIB);
    2935     3334147 :         xfs_btree_set_sibling(cur, right, &lptr, XFS_BB_LEFTSIB);
    2936     3334140 :         xfs_btree_set_sibling(cur, left, &rptr, XFS_BB_RIGHTSIB);
    2937             : 
    2938     3334141 :         xfs_btree_log_block(cur, rbp, XFS_BB_ALL_BITS);
    2939     3334138 :         xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB);
    2940             : 
    2941             :         /*
    2942             :          * If there's a block to the new block's right, make that block
    2943             :          * point back to right instead of to left.
    2944             :          */
    2945     6668306 :         if (!xfs_btree_ptr_is_null(cur, &rrptr)) {
    2946     2116527 :                 error = xfs_btree_read_buf_block(cur, &rrptr,
    2947             :                                                         0, &rrblock, &rrbp);
    2948     2116522 :                 if (error)
    2949         214 :                         goto error0;
    2950     2116308 :                 xfs_btree_set_sibling(cur, rrblock, &rptr, XFS_BB_LEFTSIB);
    2951     2116304 :                 xfs_btree_log_block(cur, rrbp, XFS_BB_LEFTSIB);
    2952             :         }
    2953             : 
    2954             :         /* Update the parent high keys of the left block, if needed. */
    2955     3333936 :         if (cur->bc_flags & XFS_BTREE_OVERLAPPING) {
    2956     2379669 :                 error = xfs_btree_update_keys(cur, level);
    2957     2379672 :                 if (error)
    2958           0 :                         goto error0;
    2959             :         }
    2960             : 
    2961             :         /*
    2962             :          * If the cursor is really in the right block, move it there.
    2963             :          * If it's just pointing past the last entry in left, then we'll
    2964             :          * insert there, so don't change anything in that case.
    2965             :          */
    2966     3333939 :         if (cur->bc_levels[level].ptr > lrecs + 1) {
    2967     2506616 :                 xfs_btree_setbuf(cur, level, rbp);
    2968     2506602 :                 cur->bc_levels[level].ptr -= lrecs;
    2969             :         }
    2970             :         /*
    2971             :          * If there are more levels, we'll need another cursor which refers
    2972             :          * the right block, no matter where this cursor was.
    2973             :          */
    2974     3333925 :         if (level + 1 < cur->bc_nlevels) {
    2975     3263536 :                 error = xfs_btree_dup_cursor(cur, curp);
    2976     3263551 :                 if (error)
    2977           7 :                         goto error0;
    2978     3263544 :                 (*curp)->bc_levels[level + 1].ptr++;
    2979             :         }
    2980     3333933 :         *ptrp = rptr;
    2981     3333933 :         *stat = 1;
    2982     3333933 :         return 0;
    2983             : out0:
    2984           0 :         *stat = 0;
    2985           0 :         return 0;
    2986             : 
    2987             : error0:
    2988             :         return error;
    2989             : }
    2990             : 
    2991             : #ifdef __KERNEL__
    2992             : struct xfs_btree_split_args {
    2993             :         struct xfs_btree_cur    *cur;
    2994             :         int                     level;
    2995             :         union xfs_btree_ptr     *ptrp;
    2996             :         union xfs_btree_key     *key;
    2997             :         struct xfs_btree_cur    **curp;
    2998             :         int                     *stat;          /* success/failure */
    2999             :         int                     result;
    3000             :         bool                    kswapd; /* allocation in kswapd context */
    3001             :         struct completion       *done;
    3002             :         struct work_struct      work;
    3003             : };
    3004             : 
    3005             : /*
    3006             :  * Stack switching interfaces for allocation
    3007             :  */
    3008             : static void
    3009       54070 : xfs_btree_split_worker(
    3010             :         struct work_struct      *work)
    3011             : {
    3012       54070 :         struct xfs_btree_split_args     *args = container_of(work,
    3013             :                                                 struct xfs_btree_split_args, work);
    3014       54070 :         unsigned long           pflags;
    3015       54070 :         unsigned long           new_pflags = 0;
    3016             : 
    3017             :         /*
    3018             :          * we are in a transaction context here, but may also be doing work
    3019             :          * in kswapd context, and hence we may need to inherit that state
    3020             :          * temporarily to ensure that we don't block waiting for memory reclaim
    3021             :          * in any way.
    3022             :          */
    3023       54070 :         if (args->kswapd)
    3024           0 :                 new_pflags |= PF_MEMALLOC | PF_KSWAPD;
    3025             : 
    3026       54070 :         current_set_flags_nested(&pflags, new_pflags);
    3027       54070 :         xfs_trans_set_context(args->cur->bc_tp);
    3028             : 
    3029       54069 :         args->result = __xfs_btree_split(args->cur, args->level, args->ptrp,
    3030             :                                          args->key, args->curp, args->stat);
    3031             : 
    3032       54070 :         xfs_trans_clear_context(args->cur->bc_tp);
    3033       54069 :         current_restore_flags_nested(&pflags, new_pflags);
    3034             : 
    3035             :         /*
    3036             :          * Do not access args after complete() has run here. We don't own args
    3037             :          * and the owner may run and free args before we return here.
    3038             :          */
    3039       54069 :         complete(args->done);
    3040             : 
    3041       54070 : }
    3042             : 
    3043             : /*
    3044             :  * BMBT split requests often come in with little stack to work on so we push
    3045             :  * them off to a worker thread so there is lots of stack to use. For the other
    3046             :  * btree types, just call directly to avoid the context switch overhead here.
    3047             :  *
    3048             :  * Care must be taken here - the work queue rescuer thread introduces potential
    3049             :  * AGF <> worker queue deadlocks if the BMBT block allocation has to lock new
    3050             :  * AGFs to allocate blocks. A task being run by the rescuer could attempt to
    3051             :  * lock an AGF that is already locked by a task queued to run by the rescuer,
    3052             :  * resulting in an ABBA deadlock as the rescuer cannot run the lock holder to
    3053             :  * release it until the current thread it is running gains the lock.
    3054             :  *
    3055             :  * To avoid this issue, we only ever queue BMBT splits that don't have an AGF
    3056             :  * already locked to allocate from. The only place that doesn't hold an AGF
    3057             :  * locked is unwritten extent conversion at IO completion, but that has already
    3058             :  * been offloaded to a worker thread and hence has no stack consumption issues
    3059             :  * we have to worry about.
    3060             :  */
    3061             : STATIC int                                      /* error */
    3062     3334157 : xfs_btree_split(
    3063             :         struct xfs_btree_cur    *cur,
    3064             :         int                     level,
    3065             :         union xfs_btree_ptr     *ptrp,
    3066             :         union xfs_btree_key     *key,
    3067             :         struct xfs_btree_cur    **curp,
    3068             :         int                     *stat)          /* success/failure */
    3069             : {
    3070     3334157 :         struct xfs_btree_split_args     args;
    3071     3334157 :         DECLARE_COMPLETION_ONSTACK(done);
    3072             : 
    3073     3334157 :         if (cur->bc_btnum != XFS_BTNUM_BMAP ||
    3074      665188 :             cur->bc_tp->t_highest_agno == NULLAGNUMBER)
    3075     3280087 :                 return __xfs_btree_split(cur, level, ptrp, key, curp, stat);
    3076             : 
    3077       54070 :         args.cur = cur;
    3078       54070 :         args.level = level;
    3079       54070 :         args.ptrp = ptrp;
    3080       54070 :         args.key = key;
    3081       54070 :         args.curp = curp;
    3082       54070 :         args.stat = stat;
    3083       54070 :         args.done = &done;
    3084       54070 :         args.kswapd = current_is_kswapd();
    3085       54070 :         INIT_WORK_ONSTACK(&args.work, xfs_btree_split_worker);
    3086       54070 :         queue_work(xfs_alloc_wq, &args.work);
    3087       54069 :         wait_for_completion(&done);
    3088       54070 :         destroy_work_on_stack(&args.work);
    3089       54070 :         return args.result;
    3090             : }
    3091             : #else
    3092             : #define xfs_btree_split __xfs_btree_split
    3093             : #endif /* __KERNEL__ */
    3094             : 
    3095             : static inline void
    3096     7893759 : xfs_btree_iroot_realloc(
    3097             :         struct xfs_btree_cur            *cur,
    3098             :         int                             rec_diff)
    3099             : {
    3100     7893759 :         ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE);
    3101             : 
    3102     7893759 :         xfs_iroot_realloc(cur->bc_ino.ip, cur->bc_ino.whichfork,
    3103     7893759 :                         cur->bc_ops->iroot_ops, rec_diff);
    3104     7893758 : }
    3105             : 
    3106             : /*
    3107             :  * Move the records from a root leaf block to a separate block.
    3108             :  *
    3109             :  * Trickery here: The amount of memory that we need per record for the incore
    3110             :  * root block changes when we convert a leaf block to an internal block.
    3111             :  * Therefore, we copy leaf records into the new btree block (cblock) before
    3112             :  * freeing the incore root block and changing the tree height.
    3113             :  *
    3114             :  * Once we've changed the tree height, we allocate a new incore root block
    3115             :  * (which will now be an internal root block) and populate it with a pointer to
    3116             :  * cblock and the relevant keys.
    3117             :  */
    3118             : STATIC void
    3119       35679 : xfs_btree_promote_leaf_iroot(
    3120             :         struct xfs_btree_cur    *cur,
    3121             :         struct xfs_btree_block  *block,
    3122             :         struct xfs_buf          *cbp,
    3123             :         union xfs_btree_ptr     *cptr,
    3124             :         struct xfs_btree_block  *cblock)
    3125             : {
    3126       35679 :         union xfs_btree_rec     *rp;
    3127       35679 :         union xfs_btree_rec     *crp;
    3128       35679 :         union xfs_btree_key     *kp;
    3129       35679 :         union xfs_btree_ptr     *pp;
    3130       35679 :         size_t                  size;
    3131       35679 :         int                     numrecs = xfs_btree_get_numrecs(block);
    3132             : 
    3133             :         /* Copy the records from the leaf root into the new child block. */
    3134       35679 :         rp = xfs_btree_rec_addr(cur, 1, block);
    3135       35679 :         crp = xfs_btree_rec_addr(cur, 1, cblock);
    3136       35679 :         xfs_btree_copy_recs(cur, crp, rp, numrecs);
    3137             : 
    3138             :         /* Zap the old root and change the tree height. */
    3139       35679 :         xfs_iroot_free(cur->bc_ino.ip, cur->bc_ino.whichfork);
    3140       35679 :         cur->bc_nlevels++;
    3141       35679 :         cur->bc_levels[1].ptr = 1;
    3142             : 
    3143             :         /*
    3144             :          * Allocate a new internal root block buffer and reinitialize it to
    3145             :          * point to a single new child.
    3146             :          */
    3147       35679 :         size = cur->bc_ops->iroot_ops->size(cur->bc_mp, cur->bc_nlevels - 1, 1);
    3148       35679 :         xfs_iroot_alloc(cur->bc_ino.ip, cur->bc_ino.whichfork, size);
    3149       35679 :         block = xfs_btree_get_iroot(cur);
    3150       35679 :         xfs_btree_init_block(cur->bc_mp, block, cur->bc_ops,
    3151       35679 :                         cur->bc_nlevels - 1, 1, cur->bc_ino.ip->i_ino);
    3152             : 
    3153       35679 :         pp = xfs_btree_ptr_addr(cur, 1, block);
    3154       35679 :         kp = xfs_btree_key_addr(cur, 1, block);
    3155       35679 :         xfs_btree_copy_ptrs(cur, pp, cptr, 1);
    3156       35679 :         xfs_btree_get_keys(cur, cblock, kp);
    3157             : 
    3158             :         /* Attach the new block to the cursor and log it. */
    3159       35679 :         xfs_btree_setbuf(cur, 0, cbp);
    3160       35679 :         xfs_btree_log_block(cur, cbp, XFS_BB_ALL_BITS);
    3161       35679 :         xfs_btree_log_recs(cur, cbp, 1, numrecs);
    3162       35679 : }
    3163             : 
    3164             : /*
    3165             :  * Move the keys and pointers from a root block to a separate block.
    3166             :  *
    3167             :  * Since the keyptr size does not change, all we have to do is increase the
    3168             :  * tree height, copy the keyptrs to the new internal node (cblock), shrink
    3169             :  * the root, and copy the pointers there.
    3170             :  */
    3171             : STATIC int
    3172        6060 : xfs_btree_promote_node_iroot(
    3173             :         struct xfs_btree_cur    *cur,
    3174             :         struct xfs_btree_block  *block,
    3175             :         int                     level,
    3176             :         struct xfs_buf          *cbp,
    3177             :         union xfs_btree_ptr     *cptr,
    3178             :         struct xfs_btree_block  *cblock)
    3179             : {
    3180        6060 :         union xfs_btree_key     *ckp;
    3181        6060 :         union xfs_btree_key     *kp;
    3182        6060 :         union xfs_btree_ptr     *cpp;
    3183        6060 :         union xfs_btree_ptr     *pp;
    3184        6060 :         int                     i;
    3185        6060 :         int                     error;
    3186        6060 :         int                     numrecs = xfs_btree_get_numrecs(block);
    3187             : 
    3188             :         /*
    3189             :          * Increase tree height, adjusting the root block level to match.
    3190             :          * We cannot change the root btree node size until we've copied the
    3191             :          * block contents to the new child block.
    3192             :          */
    3193        6060 :         be16_add_cpu(&block->bb_level, 1);
    3194        6060 :         cur->bc_nlevels++;
    3195        6060 :         cur->bc_levels[level + 1].ptr = 1;
    3196             : 
    3197             :         /*
    3198             :          * Adjust the root btree record count, then copy the keys from the old
    3199             :          * root to the new child block.
    3200             :          */
    3201        6060 :         xfs_btree_set_numrecs(block, 1);
    3202        6060 :         kp = xfs_btree_key_addr(cur, 1, block);
    3203        6060 :         ckp = xfs_btree_key_addr(cur, 1, cblock);
    3204        6060 :         xfs_btree_copy_keys(cur, ckp, kp, numrecs);
    3205             : 
    3206             :         /* Check the pointers and copy them to the new child block. */
    3207        6060 :         pp = xfs_btree_ptr_addr(cur, 1, block);
    3208        6060 :         cpp = xfs_btree_ptr_addr(cur, 1, cblock);
    3209       60315 :         for (i = 0; i < numrecs; i++) {
    3210       48195 :                 error = xfs_btree_debug_check_ptr(cur, pp, i, level);
    3211       48195 :                 if (error)
    3212           0 :                         return error;
    3213             :         }
    3214        6060 :         xfs_btree_copy_ptrs(cur, cpp, pp, numrecs);
    3215             : 
    3216             :         /*
    3217             :          * Set the first keyptr to point to the new child block, then shrink
    3218             :          * the memory buffer for the root block.
    3219             :          */
    3220        6060 :         error = xfs_btree_debug_check_ptr(cur, cptr, 0, level);
    3221        6060 :         if (error)
    3222             :                 return error;
    3223        6060 :         xfs_btree_copy_ptrs(cur, pp, cptr, 1);
    3224        6060 :         xfs_btree_get_keys(cur, cblock, kp);
    3225        6060 :         xfs_btree_iroot_realloc(cur, 1 - numrecs);
    3226             : 
    3227             :         /* Attach the new block to the cursor and log it. */
    3228        6060 :         xfs_btree_setbuf(cur, level, cbp);
    3229        6060 :         xfs_btree_log_block(cur, cbp, XFS_BB_ALL_BITS);
    3230        6060 :         xfs_btree_log_keys(cur, cbp, 1, numrecs);
    3231        6060 :         xfs_btree_log_ptrs(cur, cbp, 1, numrecs);
    3232        6060 :         return 0;
    3233             : }
    3234             : 
    3235             : /*
    3236             :  * Copy the old inode root contents into a real block and make the
    3237             :  * broot point to it.
    3238             :  */
    3239             : int                                             /* error */
    3240       41739 : xfs_btree_new_iroot(
    3241             :         struct xfs_btree_cur    *cur,           /* btree cursor */
    3242             :         int                     *logflags,      /* logging flags for inode */
    3243             :         int                     *stat)          /* return status - 0 fail */
    3244             : {
    3245       41739 :         struct xfs_buf          *cbp;           /* buffer for cblock */
    3246       41739 :         struct xfs_btree_block  *block;         /* btree block */
    3247       41739 :         struct xfs_btree_block  *cblock;        /* child btree block */
    3248       41739 :         union xfs_btree_ptr     aptr;
    3249       41739 :         union xfs_btree_ptr     nptr;           /* new block addr */
    3250       41739 :         int                     level;          /* btree level */
    3251       41739 :         int                     error;          /* error return code */
    3252             : 
    3253       41739 :         XFS_BTREE_STATS_INC(cur, newroot);
    3254             : 
    3255       41739 :         ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE);
    3256             : 
    3257       41739 :         level = cur->bc_nlevels - 1;
    3258             : 
    3259       41739 :         block = xfs_btree_get_iroot(cur);
    3260       41739 :         ASSERT(level > 0 || (cur->bc_flags & XFS_BTREE_IROOT_RECORDS));
    3261       41739 :         if (level > 0)
    3262        6060 :                 aptr = *xfs_btree_ptr_addr(cur, 1, block);
    3263             :         else
    3264       35679 :                 aptr.l = cpu_to_be64(XFS_INO_TO_FSB(cur->bc_mp,
    3265             :                                 cur->bc_ino.ip->i_ino));
    3266             : 
    3267             :         /* Allocate the new block. If we can't do it, we're toast. Give up. */
    3268       41739 :         error = xfs_btree_alloc_block(cur, &aptr, &nptr, stat);
    3269       41739 :         if (error)
    3270           0 :                 goto error0;
    3271       41739 :         if (*stat == 0)
    3272             :                 return 0;
    3273             : 
    3274       41739 :         XFS_BTREE_STATS_INC(cur, alloc);
    3275             : 
    3276             :         /* Copy the root into a real block. */
    3277       41739 :         error = xfs_btree_get_buf_block(cur, &nptr, &cblock, &cbp);
    3278       41739 :         if (error)
    3279           0 :                 goto error0;
    3280             : 
    3281             :         /*
    3282             :          * we can't just memcpy() the root in for CRC enabled btree blocks.
    3283             :          * In that case have to also ensure the blkno remains correct
    3284             :          */
    3285       83478 :         memcpy(cblock, block, xfs_btree_block_len(cur));
    3286       41739 :         if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS) {
    3287       41739 :                 __be64 bno = cpu_to_be64(xfs_buf_daddr(cbp));
    3288       41739 :                 if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
    3289       41739 :                         cblock->bb_u.l.bb_blkno = bno;
    3290             :                 else
    3291           0 :                         cblock->bb_u.s.bb_blkno = bno;
    3292             :         }
    3293             : 
    3294       41739 :         if (level > 0) {
    3295        6060 :                 error = xfs_btree_promote_node_iroot(cur, block, level, cbp,
    3296             :                                 &nptr, cblock);
    3297        6060 :                 if (error)
    3298           0 :                         goto error0;
    3299             :         } else {
    3300       35679 :                 xfs_btree_promote_leaf_iroot(cur, block, cbp, &nptr, cblock);
    3301             :         }
    3302             : 
    3303       83478 :         *logflags |=
    3304       41739 :                 XFS_ILOG_CORE | xfs_ilog_fbroot(cur->bc_ino.whichfork);
    3305       41739 :         *stat = 1;
    3306       41739 :         return 0;
    3307             : error0:
    3308             :         return error;
    3309             : }
    3310             : 
    3311             : /*
    3312             :  * Allocate a new root block, fill it in.
    3313             :  */
    3314             : STATIC int                              /* error */
    3315       70386 : xfs_btree_new_root(
    3316             :         struct xfs_btree_cur    *cur,   /* btree cursor */
    3317             :         int                     *stat)  /* success/failure */
    3318             : {
    3319       70386 :         struct xfs_btree_block  *block; /* one half of the old root block */
    3320       70386 :         struct xfs_buf          *bp;    /* buffer containing block */
    3321       70386 :         int                     error;  /* error return value */
    3322       70386 :         struct xfs_buf          *lbp;   /* left buffer pointer */
    3323       70386 :         struct xfs_btree_block  *left;  /* left btree block */
    3324       70386 :         struct xfs_buf          *nbp;   /* new (root) buffer */
    3325       70386 :         struct xfs_btree_block  *new;   /* new (root) btree block */
    3326       70386 :         int                     nptr;   /* new value for key index, 1 or 2 */
    3327       70386 :         struct xfs_buf          *rbp;   /* right buffer pointer */
    3328       70386 :         struct xfs_btree_block  *right; /* right btree block */
    3329       70386 :         union xfs_btree_ptr     rptr;
    3330       70386 :         union xfs_btree_ptr     lptr;
    3331             : 
    3332       70386 :         XFS_BTREE_STATS_INC(cur, newroot);
    3333             : 
    3334             :         /* initialise our start point from the cursor */
    3335       70385 :         cur->bc_ops->init_ptr_from_cur(cur, &rptr);
    3336             : 
    3337             :         /* Allocate the new block. If we can't do it, we're toast. Give up. */
    3338       70384 :         error = xfs_btree_alloc_block(cur, &rptr, &lptr, stat);
    3339       70386 :         if (error)
    3340           0 :                 goto error0;
    3341       70386 :         if (*stat == 0)
    3342           0 :                 goto out0;
    3343       70386 :         XFS_BTREE_STATS_INC(cur, alloc);
    3344             : 
    3345             :         /* Set up the new block. */
    3346       70385 :         error = xfs_btree_get_buf_block(cur, &lptr, &new, &nbp);
    3347       70386 :         if (error)
    3348           0 :                 goto error0;
    3349             : 
    3350             :         /* Set the root in the holding structure  increasing the level by 1. */
    3351       70386 :         cur->bc_ops->set_root(cur, &lptr, 1);
    3352             : 
    3353             :         /*
    3354             :          * At the previous root level there are now two blocks: the old root,
    3355             :          * and the new block generated when it was split.  We don't know which
    3356             :          * one the cursor is pointing at, so we set up variables "left" and
    3357             :          * "right" for each case.
    3358             :          */
    3359       70386 :         block = xfs_btree_get_block(cur, cur->bc_nlevels - 1, &bp);
    3360             : 
    3361             : #ifdef DEBUG
    3362       70386 :         error = xfs_btree_check_block(cur, block, cur->bc_nlevels - 1, bp);
    3363       70385 :         if (error)
    3364           0 :                 goto error0;
    3365             : #endif
    3366             : 
    3367       70385 :         xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB);
    3368      140770 :         if (!xfs_btree_ptr_is_null(cur, &rptr)) {
    3369             :                 /* Our block is left, pick up the right block. */
    3370       33011 :                 lbp = bp;
    3371       33011 :                 xfs_btree_buf_to_ptr(cur, lbp, &lptr);
    3372       33011 :                 left = block;
    3373       33011 :                 error = xfs_btree_read_buf_block(cur, &rptr, 0, &right, &rbp);
    3374       33011 :                 if (error)
    3375           0 :                         goto error0;
    3376       33011 :                 bp = rbp;
    3377       33011 :                 nptr = 1;
    3378             :         } else {
    3379             :                 /* Our block is right, pick up the left block. */
    3380       37374 :                 rbp = bp;
    3381       37374 :                 xfs_btree_buf_to_ptr(cur, rbp, &rptr);
    3382       37375 :                 right = block;
    3383       37375 :                 xfs_btree_get_sibling(cur, right, &lptr, XFS_BB_LEFTSIB);
    3384       37375 :                 error = xfs_btree_read_buf_block(cur, &lptr, 0, &left, &lbp);
    3385       37375 :                 if (error)
    3386           0 :                         goto error0;
    3387       37375 :                 bp = lbp;
    3388       37375 :                 nptr = 2;
    3389             :         }
    3390             : 
    3391             :         /* Fill in the new block's btree header and log it. */
    3392       70386 :         xfs_btree_init_block_cur(cur, nbp, cur->bc_nlevels, 2);
    3393       70385 :         xfs_btree_log_block(cur, nbp, XFS_BB_ALL_BITS);
    3394      211158 :         ASSERT(!xfs_btree_ptr_is_null(cur, &lptr) &&
    3395             :                         !xfs_btree_ptr_is_null(cur, &rptr));
    3396             : 
    3397             :         /* Fill in the key data in the new root. */
    3398       70386 :         if (xfs_btree_get_level(left) > 0) {
    3399             :                 /*
    3400             :                  * Get the keys for the left block's keys and put them directly
    3401             :                  * in the parent block.  Do the same for the right block.
    3402             :                  */
    3403        2864 :                 xfs_btree_get_node_keys(cur, left,
    3404             :                                 xfs_btree_key_addr(cur, 1, new));
    3405        2864 :                 xfs_btree_get_node_keys(cur, right,
    3406             :                                 xfs_btree_key_addr(cur, 2, new));
    3407             :         } else {
    3408             :                 /*
    3409             :                  * Get the keys for the left block's records and put them
    3410             :                  * directly in the parent block.  Do the same for the right
    3411             :                  * block.
    3412             :                  */
    3413       67522 :                 xfs_btree_get_leaf_keys(cur, left,
    3414             :                         xfs_btree_key_addr(cur, 1, new));
    3415       67521 :                 xfs_btree_get_leaf_keys(cur, right,
    3416             :                         xfs_btree_key_addr(cur, 2, new));
    3417             :         }
    3418       70386 :         xfs_btree_log_keys(cur, nbp, 1, 2);
    3419             : 
    3420             :         /* Fill in the pointer data in the new root. */
    3421       70386 :         xfs_btree_copy_ptrs(cur,
    3422             :                 xfs_btree_ptr_addr(cur, 1, new), &lptr, 1);
    3423       70385 :         xfs_btree_copy_ptrs(cur,
    3424             :                 xfs_btree_ptr_addr(cur, 2, new), &rptr, 1);
    3425       70385 :         xfs_btree_log_ptrs(cur, nbp, 1, 2);
    3426             : 
    3427             :         /* Fix up the cursor. */
    3428       70385 :         xfs_btree_setbuf(cur, cur->bc_nlevels, nbp);
    3429       70384 :         cur->bc_levels[cur->bc_nlevels].ptr = nptr;
    3430       70384 :         cur->bc_nlevels++;
    3431       70384 :         ASSERT(cur->bc_nlevels <= cur->bc_maxlevels);
    3432       70384 :         *stat = 1;
    3433       70384 :         return 0;
    3434             : error0:
    3435             :         return error;
    3436             : out0:
    3437           0 :         *stat = 0;
    3438           0 :         return 0;
    3439             : }
    3440             : 
    3441             : STATIC int
    3442   283497968 : xfs_btree_make_block_unfull(
    3443             :         struct xfs_btree_cur    *cur,   /* btree cursor */
    3444             :         int                     level,  /* btree level */
    3445             :         int                     numrecs,/* # of recs in block */
    3446             :         int                     *oindex,/* old tree index */
    3447             :         int                     *index, /* new tree index */
    3448             :         union xfs_btree_ptr     *nptr,  /* new btree ptr */
    3449             :         struct xfs_btree_cur    **ncur, /* new btree cursor */
    3450             :         union xfs_btree_key     *key,   /* key of new block */
    3451             :         int                     *stat)
    3452             : {
    3453   283497968 :         int                     error = 0;
    3454             : 
    3455   283497968 :         if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
    3456   128290715 :             level == cur->bc_nlevels - 1) {
    3457     4152717 :                 struct xfs_inode *ip = cur->bc_ino.ip;
    3458             : 
    3459     4152717 :                 if (numrecs < cur->bc_ops->get_dmaxrecs(cur, level)) {
    3460             :                         /* A root block that can be made bigger. */
    3461     4110978 :                         xfs_btree_iroot_realloc(cur, 1);
    3462     4110977 :                         *stat = 1;
    3463             :                 } else {
    3464             :                         /* A root block that needs replacing */
    3465       41739 :                         int     logflags = 0;
    3466             : 
    3467       41739 :                         error = xfs_btree_new_iroot(cur, &logflags, stat);
    3468       41739 :                         if (error || *stat == 0)
    3469           0 :                                 return error;
    3470             : 
    3471       41739 :                         xfs_trans_log_inode(cur->bc_tp, ip, logflags);
    3472             :                 }
    3473             : 
    3474     4152716 :                 return 0;
    3475             :         }
    3476             : 
    3477             :         /* First, try shifting an entry to the right neighbor. */
    3478   279345251 :         error = xfs_btree_rshift(cur, level, stat);
    3479   279346012 :         if (error || *stat)
    3480             :                 return error;
    3481             : 
    3482             :         /* Next, try shifting an entry to the left neighbor. */
    3483   194296214 :         error = xfs_btree_lshift(cur, level, stat);
    3484   194297534 :         if (error)
    3485             :                 return error;
    3486             : 
    3487   194297476 :         if (*stat) {
    3488   190963319 :                 *oindex = *index = cur->bc_levels[level].ptr;
    3489   190963319 :                 return 0;
    3490             :         }
    3491             : 
    3492             :         /*
    3493             :          * Next, try splitting the current block in half.
    3494             :          *
    3495             :          * If this works we have to re-set our variables because we
    3496             :          * could be in a different block now.
    3497             :          */
    3498     3334157 :         error = xfs_btree_split(cur, level, nptr, key, ncur, stat);
    3499     3334151 :         if (error || *stat == 0)
    3500             :                 return error;
    3501             : 
    3502             : 
    3503     3333926 :         *index = cur->bc_levels[level].ptr;
    3504     3333926 :         return 0;
    3505             : }
    3506             : 
    3507             : /*
    3508             :  * Insert one record/level.  Return information to the caller
    3509             :  * allowing the next level up to proceed if necessary.
    3510             :  */
    3511             : STATIC int
    3512  1487514824 : xfs_btree_insrec(
    3513             :         struct xfs_btree_cur    *cur,   /* btree cursor */
    3514             :         int                     level,  /* level to insert record at */
    3515             :         union xfs_btree_ptr     *ptrp,  /* i/o: block number inserted */
    3516             :         union xfs_btree_rec     *rec,   /* record to insert */
    3517             :         union xfs_btree_key     *key,   /* i/o: block key for ptrp */
    3518             :         struct xfs_btree_cur    **curp, /* output: new cursor replacing cur */
    3519             :         int                     *stat)  /* success/failure */
    3520             : {
    3521  1487514824 :         struct xfs_btree_block  *block; /* btree block */
    3522  1487514824 :         struct xfs_buf          *bp;    /* buffer for block */
    3523  1487514824 :         union xfs_btree_ptr     nptr;   /* new block ptr */
    3524  1487514824 :         struct xfs_btree_cur    *ncur = NULL;   /* new btree cursor */
    3525  1487514824 :         union xfs_btree_key     nkey;   /* new block key */
    3526  1487514824 :         union xfs_btree_key     *lkey;
    3527  1487514824 :         int                     optr;   /* old key/record index */
    3528  1487514824 :         int                     ptr;    /* key/record index */
    3529  1487514824 :         int                     numrecs;/* number of records */
    3530  1487514824 :         int                     error;  /* error return value */
    3531  1487514824 :         int                     i;
    3532  1487514824 :         xfs_daddr_t             old_bn;
    3533             : 
    3534  1487514824 :         ncur = NULL;
    3535  1487514824 :         lkey = &nkey;
    3536             : 
    3537             :         /*
    3538             :          * If we have an external root pointer, and we've made it to the
    3539             :          * root level, allocate a new root block and we're done.
    3540             :          */
    3541  1487514824 :         if (!(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
    3542  1050970204 :             (level >= cur->bc_nlevels)) {
    3543       70385 :                 error = xfs_btree_new_root(cur, stat);
    3544       70384 :                 xfs_btree_set_ptr_null(cur, ptrp);
    3545             : 
    3546       70384 :                 return error;
    3547             :         }
    3548             : 
    3549             :         /* If we're off the left edge, return failure. */
    3550  1487444439 :         ptr = cur->bc_levels[level].ptr;
    3551  1487444439 :         if (ptr == 0) {
    3552           0 :                 *stat = 0;
    3553           0 :                 return 0;
    3554             :         }
    3555             : 
    3556  1487444439 :         optr = ptr;
    3557             : 
    3558  1487444439 :         XFS_BTREE_STATS_INC(cur, insrec);
    3559             : 
    3560             :         /* Get pointers to the btree buffer and block. */
    3561  1487437361 :         block = xfs_btree_get_block(cur, level, &bp);
    3562  1487438784 :         old_bn = bp ? xfs_buf_daddr(bp) : XFS_BUF_DADDR_NULL;
    3563  1487438784 :         numrecs = xfs_btree_get_numrecs(block);
    3564             : 
    3565             : #ifdef DEBUG
    3566  1487438784 :         error = xfs_btree_check_block(cur, block, level, bp);
    3567  1487480883 :         if (error)
    3568           0 :                 goto error0;
    3569             : 
    3570             :         /* Check that the new entry is being inserted in the right place. */
    3571  1487480883 :         if (ptr <= numrecs) {
    3572   959151685 :                 if (level == 0) {
    3573   957101430 :                         ASSERT(cur->bc_ops->recs_inorder(cur, rec,
    3574             :                                 xfs_btree_rec_addr(cur, ptr, block)));
    3575             :                 } else {
    3576     2050255 :                         ASSERT(cur->bc_ops->keys_inorder(cur, key,
    3577             :                                 xfs_btree_key_addr(cur, ptr, block)));
    3578             :                 }
    3579             :         }
    3580             : #endif
    3581             : 
    3582             :         /*
    3583             :          * If the block is full, we can't insert the new entry until we
    3584             :          * make the block un-full.
    3585             :          */
    3586  1487400171 :         xfs_btree_set_ptr_null(cur, &nptr);
    3587  1487400171 :         if (numrecs == cur->bc_ops->get_maxrecs(cur, level)) {
    3588   283496006 :                 error = xfs_btree_make_block_unfull(cur, level, numrecs,
    3589             :                                         &optr, &ptr, &nptr, &ncur, lkey, stat);
    3590   283500537 :                 if (error || *stat == 0)
    3591         483 :                         goto error0;
    3592             :         }
    3593             : 
    3594             :         /*
    3595             :          * The current block may have changed if the block was
    3596             :          * previously full and we have just made space in it.
    3597             :          */
    3598  1487385899 :         block = xfs_btree_get_block(cur, level, &bp);
    3599  1487401389 :         numrecs = xfs_btree_get_numrecs(block);
    3600             : 
    3601             : #ifdef DEBUG
    3602  1487401389 :         error = xfs_btree_check_block(cur, block, level, bp);
    3603  1487468663 :         if (error)
    3604           0 :                 goto error0;
    3605             : #endif
    3606             : 
    3607             :         /*
    3608             :          * At this point we know there's room for our new entry in the block
    3609             :          * we're pointing at.
    3610             :          */
    3611  1487468663 :         XFS_BTREE_STATS_ADD(cur, moves, numrecs - ptr + 1);
    3612             : 
    3613  1487475600 :         if (level > 0) {
    3614             :                 /* It's a nonleaf. make a hole in the keys and ptrs */
    3615     3263543 :                 union xfs_btree_key     *kp;
    3616     3263543 :                 union xfs_btree_ptr     *pp;
    3617             : 
    3618     3263543 :                 kp = xfs_btree_key_addr(cur, ptr, block);
    3619     3263543 :                 pp = xfs_btree_ptr_addr(cur, ptr, block);
    3620             : 
    3621    63771406 :                 for (i = numrecs - ptr; i >= 0; i--) {
    3622    57244324 :                         error = xfs_btree_debug_check_ptr(cur, pp, i, level);
    3623    57244325 :                         if (error)
    3624           0 :                                 goto error0;
    3625             :                 }
    3626             : 
    3627     3263539 :                 xfs_btree_shift_keys(cur, kp, 1, numrecs - ptr + 1);
    3628     3263539 :                 xfs_btree_shift_ptrs(cur, pp, 1, numrecs - ptr + 1);
    3629             : 
    3630     3263531 :                 error = xfs_btree_debug_check_ptr(cur, ptrp, 0, level);
    3631     3263534 :                 if (error)
    3632           0 :                         goto error0;
    3633             : 
    3634             :                 /* Now put the new data in, bump numrecs and log it. */
    3635     3263534 :                 xfs_btree_copy_keys(cur, kp, key, 1);
    3636     3263533 :                 xfs_btree_copy_ptrs(cur, pp, ptrp, 1);
    3637     3263528 :                 numrecs++;
    3638     3263528 :                 xfs_btree_set_numrecs(block, numrecs);
    3639     3263528 :                 xfs_btree_log_ptrs(cur, bp, ptr, numrecs);
    3640     3263538 :                 xfs_btree_log_keys(cur, bp, ptr, numrecs);
    3641             : #ifdef DEBUG
    3642     3263542 :                 if (ptr < numrecs) {
    3643     2049882 :                         ASSERT(cur->bc_ops->keys_inorder(cur, kp,
    3644             :                                 xfs_btree_key_addr(cur, ptr + 1, block)));
    3645             :                 }
    3646             : #endif
    3647             :         } else {
    3648             :                 /* It's a leaf. make a hole in the records */
    3649  1484212057 :                 union xfs_btree_rec             *rp;
    3650             : 
    3651  1484212057 :                 rp = xfs_btree_rec_addr(cur, ptr, block);
    3652             : 
    3653  1484212057 :                 xfs_btree_shift_recs(cur, rp, 1, numrecs - ptr + 1);
    3654             : 
    3655             :                 /* Now put the new data in, bump numrecs and log it. */
    3656  1484174874 :                 xfs_btree_copy_recs(cur, rp, rec, 1);
    3657  1484130078 :                 xfs_btree_set_numrecs(block, ++numrecs);
    3658  1484130078 :                 xfs_btree_log_recs(cur, bp, ptr, numrecs);
    3659             : #ifdef DEBUG
    3660  1484247323 :                 if (ptr < numrecs) {
    3661   957121105 :                         ASSERT(cur->bc_ops->recs_inorder(cur, rp,
    3662             :                                 xfs_btree_rec_addr(cur, ptr + 1, block)));
    3663             :                 }
    3664             : #endif
    3665             :         }
    3666             : 
    3667             :         /* Log the new number of records in the btree header. */
    3668  1487498056 :         xfs_btree_log_block(cur, bp, XFS_BB_NUMRECS);
    3669             : 
    3670             :         /*
    3671             :          * Update btree keys to reflect the newly added record or keyptr.
    3672             :          * There are three cases here to be aware of.  Normally, all we have to
    3673             :          * do is walk towards the root, updating keys as necessary.
    3674             :          *
    3675             :          * If the caller had us target a full block for the insertion, we dealt
    3676             :          * with that by calling the _make_block_unfull function.  If the
    3677             :          * "make unfull" function splits the block, it'll hand us back the key
    3678             :          * and pointer of the new block.  We haven't yet added the new block to
    3679             :          * the next level up, so if we decide to add the new record to the new
    3680             :          * block (bp->b_bn != old_bn), we have to update the caller's pointer
    3681             :          * so that the caller adds the new block with the correct key.
    3682             :          *
    3683             :          * However, there is a third possibility-- if the selected block is the
    3684             :          * root block of an inode-rooted btree and cannot be expanded further,
    3685             :          * the "make unfull" function moves the root block contents to a new
    3686             :          * block and updates the root block to point to the new block.  In this
    3687             :          * case, no block pointer is passed back because the block has already
    3688             :          * been added to the btree.  In this case, we need to use the regular
    3689             :          * key update function, just like the first case.  This is critical for
    3690             :          * overlapping btrees, because the high key must be updated to reflect
    3691             :          * the entire tree, not just the subtree accessible through the first
    3692             :          * child of the root (which is now two levels down from the root).
    3693             :          */
    3694  2975060394 :         if (!xfs_btree_ptr_is_null(cur, &nptr) &&
    3695     3333932 :             bp && xfs_buf_daddr(bp) != old_bn) {
    3696     2506616 :                 xfs_btree_get_keys(cur, block, lkey);
    3697  2365601003 :         } else if (xfs_btree_needs_key_update(cur, optr)) {
    3698   916382239 :                 error = xfs_btree_update_keys(cur, level);
    3699   916372347 :                 if (error)
    3700           0 :                         goto error0;
    3701             :         }
    3702             : 
    3703             :         /*
    3704             :          * If we are tracking the last record in the tree and
    3705             :          * we are at the far right edge of the tree, update it.
    3706             :          */
    3707  1487520301 :         if (xfs_btree_is_lastrec(cur, block, level)) {
    3708   148051080 :                 cur->bc_ops->update_lastrec(cur, block, rec,
    3709             :                                             ptr, LASTREC_INSREC);
    3710             :         }
    3711             : 
    3712             :         /*
    3713             :          * Return the new block number, if any.
    3714             :          * If there is one, give back a record value and a cursor too.
    3715             :          */
    3716  1487418623 :         *ptrp = nptr;
    3717  2974837246 :         if (!xfs_btree_ptr_is_null(cur, &nptr)) {
    3718     3333929 :                 xfs_btree_copy_keys(cur, key, lkey, 1);
    3719     3333928 :                 *curp = ncur;
    3720             :         }
    3721             : 
    3722  1487418622 :         *stat = 1;
    3723  1487418622 :         return 0;
    3724             : 
    3725         483 : error0:
    3726         483 :         if (ncur)
    3727           0 :                 xfs_btree_del_cursor(ncur, error);
    3728             :         return error;
    3729             : }
    3730             : 
    3731             : /*
    3732             :  * Insert the record at the point referenced by cur.
    3733             :  *
    3734             :  * A multi-level split of the tree on insert will invalidate the original
    3735             :  * cursor.  All callers of this function should assume that the cursor is
    3736             :  * no longer valid and revalidate it.
    3737             :  */
    3738             : int
    3739  1484246880 : xfs_btree_insert(
    3740             :         struct xfs_btree_cur    *cur,
    3741             :         int                     *stat)
    3742             : {
    3743  1484246880 :         int                     error;  /* error return value */
    3744  1484246880 :         int                     i;      /* result value, 0 for failure */
    3745  1484246880 :         int                     level;  /* current level number in btree */
    3746  1484246880 :         union xfs_btree_ptr     nptr;   /* new block number (split result) */
    3747  1484246880 :         struct xfs_btree_cur    *ncur;  /* new cursor (split result) */
    3748  1484246880 :         struct xfs_btree_cur    *pcur;  /* previous level's cursor */
    3749  1484246880 :         union xfs_btree_key     bkey;   /* key of block to insert */
    3750  1484246880 :         union xfs_btree_key     *key;
    3751  1484246880 :         union xfs_btree_rec     rec;    /* record to insert */
    3752             : 
    3753  1484246880 :         level = 0;
    3754  1484246880 :         ncur = NULL;
    3755  1484246880 :         pcur = cur;
    3756  1484246880 :         key = &bkey;
    3757             : 
    3758  1484246880 :         xfs_btree_set_ptr_null(cur, &nptr);
    3759             : 
    3760             :         /* Make a key out of the record data to be inserted, and save it. */
    3761  1484246880 :         cur->bc_ops->init_rec_from_cur(cur, &rec);
    3762  1484175083 :         cur->bc_ops->init_key_from_rec(key, &rec);
    3763             : 
    3764             :         /*
    3765             :          * Loop going up the tree, starting at the leaf level.
    3766             :          * Stop when we don't get a split block, that must mean that
    3767             :          * the insert is finished with this level.
    3768             :          */
    3769  1487522954 :         do {
    3770             :                 /*
    3771             :                  * Insert nrec/nptr into this level of the tree.
    3772             :                  * Note if we fail, nptr will be null.
    3773             :                  */
    3774  1487522954 :                 error = xfs_btree_insrec(pcur, level, &nptr, &rec, key,
    3775             :                                 &ncur, &i);
    3776  1487495814 :                 if (error) {
    3777         483 :                         if (pcur != cur)
    3778           4 :                                 xfs_btree_del_cursor(pcur, XFS_BTREE_ERROR);
    3779         483 :                         goto error0;
    3780             :                 }
    3781             : 
    3782  1487495331 :                 if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
    3783           0 :                         xfs_btree_mark_sick(cur);
    3784           0 :                         error = -EFSCORRUPTED;
    3785           0 :                         goto error0;
    3786             :                 }
    3787  1487495331 :                 level++;
    3788             : 
    3789             :                 /*
    3790             :                  * See if the cursor we just used is trash.
    3791             :                  * Can't trash the caller's cursor, but otherwise we should
    3792             :                  * if ncur is a new cursor or we're about to be done.
    3793             :                  */
    3794  1487495331 :                 if (pcur != cur &&
    3795     6510945 :                     (ncur || xfs_btree_ptr_is_null(cur, &nptr))) {
    3796             :                         /* Save the state from the cursor before we trash it */
    3797     3263536 :                         if (cur->bc_ops->update_cursor)
    3798      665183 :                                 cur->bc_ops->update_cursor(pcur, cur);
    3799     3263536 :                         cur->bc_nlevels = pcur->bc_nlevels;
    3800     3263536 :                         xfs_btree_del_cursor(pcur, XFS_BTREE_NOERROR);
    3801             :                 }
    3802             :                 /* If we got a new cursor, switch to it. */
    3803  1487463556 :                 if (ncur) {
    3804     3262936 :                         pcur = ncur;
    3805     3262936 :                         ncur = NULL;
    3806             :                 }
    3807  2974927112 :         } while (!xfs_btree_ptr_is_null(cur, &nptr));
    3808             : 
    3809  1484117225 :         *stat = i;
    3810  1484117225 :         return 0;
    3811             : error0:
    3812             :         return error;
    3813             : }
    3814             : 
    3815             : /*
    3816             :  * Move the records from a child leaf block to the root block.
    3817             :  *
    3818             :  * Trickery here: The amount of memory we need per record for the incore root
    3819             :  * block changes when we convert a leaf block to an internal block.  Therefore,
    3820             :  * we free the incore root block, change the tree height, allocate a new incore
    3821             :  * root, and copy the records from the doomed block into the new root.
    3822             :  */
    3823             : STATIC void
    3824       31162 : xfs_btree_demote_leaf_child(
    3825             :         struct xfs_btree_cur    *cur,
    3826             :         struct xfs_btree_block  *cblock,
    3827             :         int                     numrecs)
    3828             : {
    3829       31162 :         union xfs_btree_rec     *rp;
    3830       31162 :         union xfs_btree_rec     *crp;
    3831       31162 :         struct xfs_btree_block  *block;
    3832       31162 :         size_t                  size;
    3833             : 
    3834             :         /* Zap the old root and change the tree height. */
    3835       31162 :         xfs_iroot_free(cur->bc_ino.ip, cur->bc_ino.whichfork);
    3836       31162 :         cur->bc_levels[0].bp = NULL;
    3837       31162 :         cur->bc_nlevels--;
    3838             : 
    3839             :         /*
    3840             :          * Allocate a new internal root block buffer and reinitialize it with
    3841             :          * the leaf records in the child.
    3842             :          */
    3843       31162 :         size = cur->bc_ops->iroot_ops->size(cur->bc_mp, 0, numrecs);
    3844       31162 :         xfs_iroot_alloc(cur->bc_ino.ip, cur->bc_ino.whichfork, size);
    3845       31162 :         block = xfs_btree_get_iroot(cur);
    3846       31162 :         xfs_btree_init_block(cur->bc_mp, block, cur->bc_ops, 0, numrecs,
    3847       31162 :                         cur->bc_ino.ip->i_ino);
    3848             : 
    3849       31162 :         rp = xfs_btree_rec_addr(cur, 1, block);
    3850       31162 :         crp = xfs_btree_rec_addr(cur, 1, cblock);
    3851       31162 :         xfs_btree_copy_recs(cur, rp, crp, numrecs);
    3852       31162 : }
    3853             : 
    3854             : /*
    3855             :  * Move the keyptrs from a child node block to the root block.
    3856             :  *
    3857             :  * Since the keyptr size does not change, all we have to do is increase the
    3858             :  * tree height, copy the keyptrs to the new internal node (cblock), shrink
    3859             :  * the root, and copy the pointers there.
    3860             :  */
    3861             : STATIC int
    3862        3324 : xfs_btree_demote_node_child(
    3863             :         struct xfs_btree_cur    *cur,
    3864             :         struct xfs_btree_block  *cblock,
    3865             :         int                     level,
    3866             :         int                     numrecs)
    3867             : {
    3868        3324 :         struct xfs_btree_block  *block;
    3869        3324 :         union xfs_btree_key     *ckp;
    3870        3324 :         union xfs_btree_key     *kp;
    3871        3324 :         union xfs_btree_ptr     *cpp;
    3872        3324 :         union xfs_btree_ptr     *pp;
    3873        3324 :         int                     i;
    3874        3324 :         int                     error;
    3875        3324 :         int                     diff;
    3876             : 
    3877             :         /*
    3878             :          * Adjust the root btree node size and the record count to match the
    3879             :          * doomed child so that we can copy the keyptrs ahead of changing the
    3880             :          * tree shape.
    3881             :          */
    3882        3324 :         diff = numrecs - cur->bc_ops->get_maxrecs(cur, level);
    3883        3324 :         xfs_btree_iroot_realloc(cur, diff);
    3884        3324 :         block = xfs_btree_get_iroot(cur);
    3885             : 
    3886        3324 :         xfs_btree_set_numrecs(block, numrecs);
    3887        3324 :         ASSERT(block->bb_numrecs == cblock->bb_numrecs);
    3888             : 
    3889             :         /* Copy keys from the doomed block. */
    3890        3324 :         kp = xfs_btree_key_addr(cur, 1, block);
    3891        3324 :         ckp = xfs_btree_key_addr(cur, 1, cblock);
    3892        3324 :         xfs_btree_copy_keys(cur, kp, ckp, numrecs);
    3893             : 
    3894             :         /* Copy pointers from the doomed block. */
    3895        3324 :         pp = xfs_btree_ptr_addr(cur, 1, block);
    3896        3324 :         cpp = xfs_btree_ptr_addr(cur, 1, cblock);
    3897       38754 :         for (i = 0; i < numrecs; i++) {
    3898       32106 :                 error = xfs_btree_debug_check_ptr(cur, cpp, i, level - 1);
    3899       32106 :                 if (error)
    3900           0 :                         return error;
    3901             :         }
    3902        3324 :         xfs_btree_copy_ptrs(cur, pp, cpp, numrecs);
    3903             : 
    3904             :         /* Decrease tree height, adjusting the root block level to match. */
    3905        3324 :         cur->bc_levels[level - 1].bp = NULL;
    3906        3324 :         be16_add_cpu(&block->bb_level, -1);
    3907        3324 :         cur->bc_nlevels--;
    3908        3324 :         return 0;
    3909             : }
    3910             : 
    3911             : /*
    3912             :  * Try to merge a non-leaf block back into the inode root.
    3913             :  *
    3914             :  * Note: the killroot names comes from the fact that we're effectively
    3915             :  * killing the old root block.  But because we can't just delete the
    3916             :  * inode we have to copy the single block it was pointing to into the
    3917             :  * inode.
    3918             :  */
    3919             : STATIC int
    3920    44897821 : xfs_btree_kill_iroot(
    3921             :         struct xfs_btree_cur    *cur)
    3922             : {
    3923    44897821 :         struct xfs_inode        *ip = cur->bc_ino.ip;
    3924    44897821 :         struct xfs_btree_block  *block;
    3925    44897821 :         struct xfs_btree_block  *cblock;
    3926    44897821 :         struct xfs_buf          *cbp;
    3927    44897821 :         int                     level;
    3928    44897821 :         int                     numrecs;
    3929    44897821 :         int                     error;
    3930             : #ifdef DEBUG
    3931    44897821 :         union xfs_btree_ptr     ptr;
    3932             : #endif
    3933             : 
    3934    44897821 :         ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE);
    3935    44897821 :         ASSERT((cur->bc_flags & XFS_BTREE_IROOT_RECORDS) ||
    3936             :                cur->bc_nlevels > 1);
    3937             : 
    3938             :         /*
    3939             :          * Don't deal with the root block needs to be a leaf case.
    3940             :          * We're just going to turn the thing back into extents anyway.
    3941             :          */
    3942    44897821 :         level = cur->bc_nlevels - 1;
    3943    44897821 :         if (level == 1 && !(cur->bc_flags & XFS_BTREE_IROOT_RECORDS))
    3944    34547633 :                 goto out0;
    3945             : 
    3946             :         /* If we're already a leaf, jump out. */
    3947    10350188 :         if (level == 0)
    3948     3733457 :                 goto out0;
    3949             : 
    3950             :         /*
    3951             :          * Give up if the root has multiple children.
    3952             :          */
    3953     6616731 :         block = xfs_btree_get_iroot(cur);
    3954     6616730 :         if (xfs_btree_get_numrecs(block) != 1)
    3955        2010 :                 goto out0;
    3956             : 
    3957     6614720 :         cblock = xfs_btree_get_block(cur, level - 1, &cbp);
    3958     6614721 :         numrecs = xfs_btree_get_numrecs(cblock);
    3959             : 
    3960             :         /*
    3961             :          * Only do this if the next level will fit.
    3962             :          * Then the data must be copied up to the inode,
    3963             :          * instead of freeing the root you free the next level.
    3964             :          */
    3965     6614721 :         if (numrecs > cur->bc_ops->get_dmaxrecs(cur, level))
    3966     6580234 :                 goto out0;
    3967             : 
    3968       34486 :         XFS_BTREE_STATS_INC(cur, killroot);
    3969             : 
    3970             : #ifdef DEBUG
    3971       34486 :         xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_LEFTSIB);
    3972       68972 :         ASSERT(xfs_btree_ptr_is_null(cur, &ptr));
    3973       34486 :         xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
    3974       68972 :         ASSERT(xfs_btree_ptr_is_null(cur, &ptr));
    3975             : #endif
    3976             : 
    3977       34486 :         if (level > 1) {
    3978        3324 :                 error = xfs_btree_demote_node_child(cur, cblock, level,
    3979             :                                 numrecs);
    3980        3324 :                 if (error)
    3981             :                         return error;
    3982             :         } else
    3983       31162 :                 xfs_btree_demote_leaf_child(cur, cblock, numrecs);
    3984             : 
    3985       34486 :         error = xfs_btree_free_block(cur, cbp);
    3986       34486 :         if (error)
    3987             :                 return error;
    3988             : 
    3989       68972 :         xfs_trans_log_inode(cur->bc_tp, ip,
    3990       34486 :                 XFS_ILOG_CORE | xfs_ilog_fbroot(cur->bc_ino.whichfork));
    3991             : out0:
    3992             :         return 0;
    3993             : }
    3994             : 
    3995             : /*
    3996             :  * Kill the current root node, and replace it with it's only child node.
    3997             :  */
    3998             : STATIC int
    3999       45316 : xfs_btree_kill_root(
    4000             :         struct xfs_btree_cur    *cur,
    4001             :         struct xfs_buf          *bp,
    4002             :         int                     level,
    4003             :         union xfs_btree_ptr     *newroot)
    4004             : {
    4005       45316 :         int                     error;
    4006             : 
    4007       45316 :         XFS_BTREE_STATS_INC(cur, killroot);
    4008             : 
    4009             :         /*
    4010             :          * Update the root pointer, decreasing the level by 1 and then
    4011             :          * free the old root.
    4012             :          */
    4013       45316 :         cur->bc_ops->set_root(cur, newroot, -1);
    4014             : 
    4015       45316 :         error = xfs_btree_free_block(cur, bp);
    4016       45316 :         if (error)
    4017             :                 return error;
    4018             : 
    4019       45316 :         cur->bc_levels[level].bp = NULL;
    4020       45316 :         cur->bc_levels[level].ra = 0;
    4021       45316 :         cur->bc_nlevels--;
    4022             : 
    4023       45316 :         return 0;
    4024             : }
    4025             : 
    4026             : STATIC int
    4027   478843097 : xfs_btree_dec_cursor(
    4028             :         struct xfs_btree_cur    *cur,
    4029             :         int                     level,
    4030             :         int                     *stat)
    4031             : {
    4032   478843097 :         int                     error;
    4033   478843097 :         int                     i;
    4034             : 
    4035   478843097 :         if (level > 0) {
    4036      529754 :                 error = xfs_btree_decrement(cur, level, &i);
    4037      529754 :                 if (error)
    4038             :                         return error;
    4039             :         }
    4040             : 
    4041   478843097 :         *stat = 1;
    4042   478843097 :         return 0;
    4043             : }
    4044             : 
    4045             : /*
    4046             :  * Single level of the btree record deletion routine.
    4047             :  * Delete record pointed to by cur/level.
    4048             :  * Remove the record from its block then rebalance the tree.
    4049             :  * Return 0 for error, 1 for done, 2 to go on to the next level.
    4050             :  */
    4051             : STATIC int                                      /* error */
    4052   975013509 : xfs_btree_delrec(
    4053             :         struct xfs_btree_cur    *cur,           /* btree cursor */
    4054             :         int                     level,          /* level removing record from */
    4055             :         int                     *stat)          /* fail/done/go-on */
    4056             : {
    4057   975013509 :         struct xfs_btree_block  *block;         /* btree block */
    4058   975013509 :         union xfs_btree_ptr     cptr;           /* current block ptr */
    4059   975013509 :         struct xfs_buf          *bp;            /* buffer for block */
    4060   975013509 :         int                     error;          /* error return value */
    4061   975013509 :         int                     i;              /* loop counter */
    4062   975013509 :         union xfs_btree_ptr     lptr;           /* left sibling block ptr */
    4063   975013509 :         struct xfs_buf          *lbp;           /* left buffer pointer */
    4064   975013509 :         struct xfs_btree_block  *left;          /* left btree block */
    4065   975013509 :         int                     lrecs = 0;      /* left record count */
    4066   975013509 :         int                     ptr;            /* key/record index */
    4067   975013509 :         union xfs_btree_ptr     rptr;           /* right sibling block ptr */
    4068   975013509 :         struct xfs_buf          *rbp;           /* right buffer pointer */
    4069   975013509 :         struct xfs_btree_block  *right;         /* right btree block */
    4070   975013509 :         struct xfs_btree_block  *rrblock;       /* right-right btree block */
    4071   975013509 :         struct xfs_buf          *rrbp;          /* right-right buffer pointer */
    4072   975013509 :         int                     rrecs = 0;      /* right record count */
    4073   975013509 :         struct xfs_btree_cur    *tcur;          /* temporary btree cursor */
    4074   975013509 :         int                     numrecs;        /* temporary numrec count */
    4075             : 
    4076   975013509 :         tcur = NULL;
    4077             : 
    4078             :         /* Get the index of the entry being deleted, check for nothing there. */
    4079   975013509 :         ptr = cur->bc_levels[level].ptr;
    4080   975013509 :         if (ptr == 0) {
    4081           0 :                 *stat = 0;
    4082           0 :                 return 0;
    4083             :         }
    4084             : 
    4085             :         /* Get the buffer & block containing the record or key/ptr. */
    4086   975013509 :         block = xfs_btree_get_block(cur, level, &bp);
    4087   974948014 :         numrecs = xfs_btree_get_numrecs(block);
    4088             : 
    4089             : #ifdef DEBUG
    4090   974948014 :         error = xfs_btree_check_block(cur, block, level, bp);
    4091   974996654 :         if (error)
    4092           0 :                 goto error0;
    4093             : #endif
    4094             : 
    4095             :         /* Fail if we're off the end of the block. */
    4096   974996654 :         if (ptr > numrecs) {
    4097           0 :                 *stat = 0;
    4098           0 :                 return 0;
    4099             :         }
    4100             : 
    4101   974996654 :         XFS_BTREE_STATS_INC(cur, delrec);
    4102   974986031 :         XFS_BTREE_STATS_ADD(cur, moves, numrecs - ptr);
    4103             : 
    4104             :         /* Excise the entries being deleted. */
    4105   975020189 :         if (level > 0) {
    4106             :                 /* It's a nonleaf. operate on keys and ptrs */
    4107      583283 :                 union xfs_btree_key     *lkp;
    4108      583283 :                 union xfs_btree_ptr     *lpp;
    4109             : 
    4110      583283 :                 lkp = xfs_btree_key_addr(cur, ptr + 1, block);
    4111      583283 :                 lpp = xfs_btree_ptr_addr(cur, ptr + 1, block);
    4112             : 
    4113    17111115 :                 for (i = 0; i < numrecs - ptr; i++) {
    4114    16527832 :                         error = xfs_btree_debug_check_ptr(cur, lpp, i, level);
    4115    16527832 :                         if (error)
    4116           0 :                                 goto error0;
    4117             :                 }
    4118             : 
    4119      583283 :                 if (ptr < numrecs) {
    4120      308668 :                         xfs_btree_shift_keys(cur, lkp, -1, numrecs - ptr);
    4121      308668 :                         xfs_btree_shift_ptrs(cur, lpp, -1, numrecs - ptr);
    4122      308668 :                         xfs_btree_log_keys(cur, bp, ptr, numrecs - 1);
    4123      308668 :                         xfs_btree_log_ptrs(cur, bp, ptr, numrecs - 1);
    4124             :                 }
    4125             :         } else {
    4126             :                 /* It's a leaf. operate on records */
    4127   974436906 :                 if (ptr < numrecs) {
    4128   596561674 :                         xfs_btree_shift_recs(cur,
    4129             :                                 xfs_btree_rec_addr(cur, ptr + 1, block),
    4130             :                                 -1, numrecs - ptr);
    4131   596574804 :                         xfs_btree_log_recs(cur, bp, ptr, numrecs - 1);
    4132             :                 }
    4133             :         }
    4134             : 
    4135             :         /*
    4136             :          * Decrement and log the number of entries in the block.
    4137             :          */
    4138   975007057 :         xfs_btree_set_numrecs(block, --numrecs);
    4139   975007057 :         xfs_btree_log_block(cur, bp, XFS_BB_NUMRECS);
    4140             : 
    4141             :         /*
    4142             :          * If we are tracking the last record in the tree and
    4143             :          * we are at the far right edge of the tree, update it.
    4144             :          */
    4145   975027665 :         if (xfs_btree_is_lastrec(cur, block, level)) {
    4146   131804230 :                 cur->bc_ops->update_lastrec(cur, block, NULL,
    4147             :                                             ptr, LASTREC_DELREC);
    4148             :         }
    4149             : 
    4150             :         /*
    4151             :          * We're at the root level.  First, shrink the root block in-memory.
    4152             :          * Try to get rid of the next level down.  If we can't then there's
    4153             :          * nothing left to do.
    4154             :          */
    4155   974960260 :         if (level == cur->bc_nlevels - 1) {
    4156   471506466 :                 if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) {
    4157     3773397 :                         xfs_btree_iroot_realloc(cur, -1);
    4158             : 
    4159     3773397 :                         error = xfs_btree_kill_iroot(cur);
    4160     3773397 :                         if (error)
    4161           0 :                                 goto error0;
    4162             : 
    4163     3773397 :                         error = xfs_btree_dec_cursor(cur, level, stat);
    4164     3773397 :                         if (error)
    4165           0 :                                 goto error0;
    4166     3773397 :                         *stat = 1;
    4167     3773397 :                         return 0;
    4168             :                 }
    4169             : 
    4170             :                 /*
    4171             :                  * If this is the root level, and there's only one entry left,
    4172             :                  * and it's NOT the leaf level, then we can get rid of this
    4173             :                  * level.
    4174             :                  */
    4175   467733069 :                 if (numrecs == 1 && level > 0) {
    4176       45316 :                         union xfs_btree_ptr     *pp;
    4177             :                         /*
    4178             :                          * pp is still set to the first pointer in the block.
    4179             :                          * Make it the new root of the btree.
    4180             :                          */
    4181       45316 :                         pp = xfs_btree_ptr_addr(cur, 1, block);
    4182       45316 :                         error = xfs_btree_kill_root(cur, bp, level, pp);
    4183       45316 :                         if (error)
    4184           0 :                                 goto error0;
    4185   467687753 :                 } else if (level > 0) {
    4186      143497 :                         error = xfs_btree_dec_cursor(cur, level, stat);
    4187      143497 :                         if (error)
    4188           0 :                                 goto error0;
    4189             :                 }
    4190   467733069 :                 *stat = 1;
    4191   467733069 :                 return 0;
    4192             :         }
    4193             : 
    4194             :         /*
    4195             :          * If we deleted the leftmost entry in the block, update the
    4196             :          * key values above us in the tree.
    4197             :          */
    4198   733749224 :         if (xfs_btree_needs_key_update(cur, ptr)) {
    4199   288211726 :                 error = xfs_btree_update_keys(cur, level);
    4200   288224376 :                 if (error)
    4201           0 :                         goto error0;
    4202             :         }
    4203             : 
    4204             :         /*
    4205             :          * If the number of records remaining in the block is at least
    4206             :          * the minimum, we're done.
    4207             :          */
    4208   503466444 :         if (numrecs >= cur->bc_ops->get_minrecs(cur, level)) {
    4209   414878227 :                 error = xfs_btree_dec_cursor(cur, level, stat);
    4210   414874693 :                 if (error)
    4211           0 :                         goto error0;
    4212             :                 return 0;
    4213             :         }
    4214             : 
    4215             :         /*
    4216             :          * Otherwise, we have to move some records around to keep the
    4217             :          * tree balanced.  Look at the left and right sibling blocks to
    4218             :          * see if we can re-balance by moving only one record.
    4219             :          */
    4220    88576600 :         xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB);
    4221    88576238 :         xfs_btree_get_sibling(cur, block, &lptr, XFS_BB_LEFTSIB);
    4222             : 
    4223    88576250 :         if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) {
    4224             :                 /*
    4225             :                  * One child of root, need to get a chance to copy its contents
    4226             :                  * into the root and delete it. Can't go up to next level,
    4227             :                  * there's nothing to delete there.
    4228             :                  */
    4229   196409474 :                 if (xfs_btree_ptr_is_null(cur, &rptr) &&
    4230    41124450 :                     xfs_btree_ptr_is_null(cur, &lptr) &&
    4231    41124450 :                     level == cur->bc_nlevels - 2) {
    4232    41124484 :                         error = xfs_btree_kill_iroot(cur);
    4233    41124379 :                         if (!error)
    4234    41124396 :                                 error = xfs_btree_dec_cursor(cur, level, stat);
    4235    41124372 :                         if (error)
    4236           0 :                                 goto error0;
    4237             :                         return 0;
    4238             :                 }
    4239             :         }
    4240             : 
    4241   117075032 :         ASSERT(!xfs_btree_ptr_is_null(cur, &rptr) ||
    4242             :                !xfs_btree_ptr_is_null(cur, &lptr));
    4243             : 
    4244             :         /*
    4245             :          * Duplicate the cursor so our btree manipulations here won't
    4246             :          * disrupt the next level up.
    4247             :          */
    4248    47451766 :         error = xfs_btree_dup_cursor(cur, &tcur);
    4249    47452264 :         if (error)
    4250           0 :                 goto error0;
    4251             : 
    4252             :         /*
    4253             :          * If there's a right sibling, see if it's ok to shift an entry
    4254             :          * out of it.
    4255             :          */
    4256    94904528 :         if (!xfs_btree_ptr_is_null(cur, &rptr)) {
    4257             :                 /*
    4258             :                  * Move the temp cursor to the last entry in the next block.
    4259             :                  * Actually any entry but the first would suffice.
    4260             :                  */
    4261    25280815 :                 i = xfs_btree_lastrec(tcur, level);
    4262    25280993 :                 if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
    4263           0 :                         xfs_btree_mark_sick(cur);
    4264           0 :                         error = -EFSCORRUPTED;
    4265           0 :                         goto error0;
    4266             :                 }
    4267             : 
    4268    25280993 :                 error = xfs_btree_increment(tcur, level, &i);
    4269    25281187 :                 if (error)
    4270          11 :                         goto error0;
    4271    25281176 :                 if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
    4272           0 :                         xfs_btree_mark_sick(cur);
    4273           0 :                         error = -EFSCORRUPTED;
    4274           0 :                         goto error0;
    4275             :                 }
    4276             : 
    4277    25281176 :                 i = xfs_btree_lastrec(tcur, level);
    4278    25281198 :                 if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
    4279           0 :                         xfs_btree_mark_sick(cur);
    4280           0 :                         error = -EFSCORRUPTED;
    4281           0 :                         goto error0;
    4282             :                 }
    4283             : 
    4284             :                 /* Grab a pointer to the block. */
    4285    25281198 :                 right = xfs_btree_get_block(tcur, level, &rbp);
    4286             : #ifdef DEBUG
    4287    25281238 :                 error = xfs_btree_check_block(tcur, right, level, rbp);
    4288    25281338 :                 if (error)
    4289           0 :                         goto error0;
    4290             : #endif
    4291             :                 /* Grab the current block number, for future use. */
    4292    25281338 :                 xfs_btree_get_sibling(tcur, right, &cptr, XFS_BB_LEFTSIB);
    4293             : 
    4294             :                 /*
    4295             :                  * If right block is full enough so that removing one entry
    4296             :                  * won't make it too empty, and left-shifting an entry out
    4297             :                  * of right to us works, we're done.
    4298             :                  */
    4299    25281010 :                 if (xfs_btree_get_numrecs(right) - 1 >=
    4300    25281010 :                     cur->bc_ops->get_minrecs(tcur, level)) {
    4301    18922436 :                         error = xfs_btree_lshift(tcur, level, &i);
    4302    18922941 :                         if (error)
    4303           0 :                                 goto error0;
    4304    18922941 :                         if (i) {
    4305    18922792 :                                 ASSERT(xfs_btree_get_numrecs(block) >=
    4306             :                                        cur->bc_ops->get_minrecs(tcur, level));
    4307             : 
    4308    18922636 :                                 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
    4309    18923205 :                                 tcur = NULL;
    4310             : 
    4311    18923205 :                                 error = xfs_btree_dec_cursor(cur, level, stat);
    4312    18923051 :                                 if (error)
    4313           0 :                                         goto error0;
    4314             :                                 return 0;
    4315             :                         }
    4316             :                 }
    4317             : 
    4318             :                 /*
    4319             :                  * Otherwise, grab the number of records in right for
    4320             :                  * future reference, and fix up the temp cursor to point
    4321             :                  * to our block again (last record).
    4322             :                  */
    4323     6358431 :                 rrecs = xfs_btree_get_numrecs(right);
    4324    12716862 :                 if (!xfs_btree_ptr_is_null(cur, &lptr)) {
    4325     6273963 :                         i = xfs_btree_firstrec(tcur, level);
    4326     6273994 :                         if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
    4327           0 :                                 xfs_btree_mark_sick(cur);
    4328           0 :                                 error = -EFSCORRUPTED;
    4329           0 :                                 goto error0;
    4330             :                         }
    4331             : 
    4332     6273994 :                         error = xfs_btree_decrement(tcur, level, &i);
    4333     6273999 :                         if (error)
    4334           0 :                                 goto error0;
    4335     6273999 :                         if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
    4336           0 :                                 xfs_btree_mark_sick(cur);
    4337           0 :                                 error = -EFSCORRUPTED;
    4338           0 :                                 goto error0;
    4339             :                         }
    4340             :                 }
    4341             :         }
    4342             : 
    4343             :         /*
    4344             :          * If there's a left sibling, see if it's ok to shift an entry
    4345             :          * out of it.
    4346             :          */
    4347    57059832 :         if (!xfs_btree_ptr_is_null(cur, &lptr)) {
    4348             :                 /*
    4349             :                  * Move the temp cursor to the first entry in the
    4350             :                  * previous block.
    4351             :                  */
    4352    28445403 :                 i = xfs_btree_firstrec(tcur, level);
    4353    28445458 :                 if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
    4354           0 :                         xfs_btree_mark_sick(cur);
    4355           0 :                         error = -EFSCORRUPTED;
    4356           0 :                         goto error0;
    4357             :                 }
    4358             : 
    4359    28445458 :                 error = xfs_btree_decrement(tcur, level, &i);
    4360    28445495 :                 if (error)
    4361           2 :                         goto error0;
    4362    28445493 :                 i = xfs_btree_firstrec(tcur, level);
    4363    28445499 :                 if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
    4364           0 :                         xfs_btree_mark_sick(cur);
    4365           0 :                         error = -EFSCORRUPTED;
    4366           0 :                         goto error0;
    4367             :                 }
    4368             : 
    4369             :                 /* Grab a pointer to the block. */
    4370    28445499 :                 left = xfs_btree_get_block(tcur, level, &lbp);
    4371             : #ifdef DEBUG
    4372    28445493 :                 error = xfs_btree_check_block(cur, left, level, lbp);
    4373    28445515 :                 if (error)
    4374           0 :                         goto error0;
    4375             : #endif
    4376             :                 /* Grab the current block number, for future use. */
    4377    28445515 :                 xfs_btree_get_sibling(tcur, left, &cptr, XFS_BB_RIGHTSIB);
    4378             : 
    4379             :                 /*
    4380             :                  * If left block is full enough so that removing one entry
    4381             :                  * won't make it too empty, and right-shifting an entry out
    4382             :                  * of left to us works, we're done.
    4383             :                  */
    4384    28445491 :                 if (xfs_btree_get_numrecs(left) - 1 >=
    4385    28445491 :                     cur->bc_ops->get_minrecs(tcur, level)) {
    4386    27946647 :                         error = xfs_btree_rshift(tcur, level, &i);
    4387    27946700 :                         if (error)
    4388           0 :                                 goto error0;
    4389    27946700 :                         if (i) {
    4390    27946694 :                                 ASSERT(xfs_btree_get_numrecs(block) >=
    4391             :                                        cur->bc_ops->get_minrecs(tcur, level));
    4392    27946683 :                                 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
    4393    27946730 :                                 tcur = NULL;
    4394    27946730 :                                 if (level == 0)
    4395    27939628 :                                         cur->bc_levels[0].ptr++;
    4396             : 
    4397    27946730 :                                 *stat = 1;
    4398    27946730 :                                 return 0;
    4399             :                         }
    4400             :                 }
    4401             : 
    4402             :                 /*
    4403             :                  * Otherwise, grab the number of records in right for
    4404             :                  * future reference.
    4405             :                  */
    4406      498818 :                 lrecs = xfs_btree_get_numrecs(left);
    4407             :         }
    4408             : 
    4409             :         /* Delete the temp cursor, we're done with it. */
    4410      583331 :         xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
    4411      583283 :         tcur = NULL;
    4412             : 
    4413             :         /* If here, we need to do a join to keep the tree balanced. */
    4414     1166566 :         ASSERT(!xfs_btree_ptr_is_null(cur, &cptr));
    4415             : 
    4416     1665384 :         if (!xfs_btree_ptr_is_null(cur, &lptr) &&
    4417      498818 :             lrecs + xfs_btree_get_numrecs(block) <=
    4418      498818 :                         cur->bc_ops->get_maxrecs(cur, level)) {
    4419             :                 /*
    4420             :                  * Set "right" to be the starting block,
    4421             :                  * "left" to be the left neighbor.
    4422             :                  */
    4423      498818 :                 rptr = cptr;
    4424      498818 :                 right = block;
    4425      498818 :                 rbp = bp;
    4426      498818 :                 error = xfs_btree_read_buf_block(cur, &lptr, 0, &left, &lbp);
    4427      498818 :                 if (error)
    4428           0 :                         goto error0;
    4429             : 
    4430             :         /*
    4431             :          * If that won't work, see if we can join with the right neighbor block.
    4432             :          */
    4433      253395 :         } else if (!xfs_btree_ptr_is_null(cur, &rptr) &&
    4434       84465 :                    rrecs + xfs_btree_get_numrecs(block) <=
    4435       84465 :                         cur->bc_ops->get_maxrecs(cur, level)) {
    4436             :                 /*
    4437             :                  * Set "left" to be the starting block,
    4438             :                  * "right" to be the right neighbor.
    4439             :                  */
    4440       84465 :                 lptr = cptr;
    4441       84465 :                 left = block;
    4442       84465 :                 lbp = bp;
    4443       84465 :                 error = xfs_btree_read_buf_block(cur, &rptr, 0, &right, &rbp);
    4444       84465 :                 if (error)
    4445           0 :                         goto error0;
    4446             : 
    4447             :         /*
    4448             :          * Otherwise, we can't fix the imbalance.
    4449             :          * Just return.  This is probably a logic error, but it's not fatal.
    4450             :          */
    4451             :         } else {
    4452           0 :                 error = xfs_btree_dec_cursor(cur, level, stat);
    4453           0 :                 if (error)
    4454           0 :                         goto error0;
    4455             :                 return 0;
    4456             :         }
    4457             : 
    4458      583283 :         rrecs = xfs_btree_get_numrecs(right);
    4459      583283 :         lrecs = xfs_btree_get_numrecs(left);
    4460             : 
    4461             :         /*
    4462             :          * We're now going to join "left" and "right" by moving all the stuff
    4463             :          * in "right" to "left" and deleting "right".
    4464             :          */
    4465      583283 :         XFS_BTREE_STATS_ADD(cur, moves, rrecs);
    4466      583282 :         if (level > 0) {
    4467             :                 /* It's a non-leaf.  Move keys and pointers. */
    4468        1110 :                 union xfs_btree_key     *lkp;   /* left btree key */
    4469        1110 :                 union xfs_btree_ptr     *lpp;   /* left address pointer */
    4470        1110 :                 union xfs_btree_key     *rkp;   /* right btree key */
    4471        1110 :                 union xfs_btree_ptr     *rpp;   /* right address pointer */
    4472             : 
    4473        1110 :                 lkp = xfs_btree_key_addr(cur, lrecs + 1, left);
    4474        1110 :                 lpp = xfs_btree_ptr_addr(cur, lrecs + 1, left);
    4475        1110 :                 rkp = xfs_btree_key_addr(cur, 1, right);
    4476        1110 :                 rpp = xfs_btree_ptr_addr(cur, 1, right);
    4477             : 
    4478       62057 :                 for (i = 1; i < rrecs; i++) {
    4479       60947 :                         error = xfs_btree_debug_check_ptr(cur, rpp, i, level);
    4480       60947 :                         if (error)
    4481           0 :                                 goto error0;
    4482             :                 }
    4483             : 
    4484        1110 :                 xfs_btree_copy_keys(cur, lkp, rkp, rrecs);
    4485        1110 :                 xfs_btree_copy_ptrs(cur, lpp, rpp, rrecs);
    4486             : 
    4487        1110 :                 xfs_btree_log_keys(cur, lbp, lrecs + 1, lrecs + rrecs);
    4488        1110 :                 xfs_btree_log_ptrs(cur, lbp, lrecs + 1, lrecs + rrecs);
    4489             :         } else {
    4490             :                 /* It's a leaf.  Move records.  */
    4491      582172 :                 union xfs_btree_rec     *lrp;   /* left record pointer */
    4492      582172 :                 union xfs_btree_rec     *rrp;   /* right record pointer */
    4493             : 
    4494      582172 :                 lrp = xfs_btree_rec_addr(cur, lrecs + 1, left);
    4495      582172 :                 rrp = xfs_btree_rec_addr(cur, 1, right);
    4496             : 
    4497      582172 :                 xfs_btree_copy_recs(cur, lrp, rrp, rrecs);
    4498      582172 :                 xfs_btree_log_recs(cur, lbp, lrecs + 1, lrecs + rrecs);
    4499             :         }
    4500             : 
    4501      583283 :         XFS_BTREE_STATS_INC(cur, join);
    4502             : 
    4503             :         /*
    4504             :          * Fix up the number of records and right block pointer in the
    4505             :          * surviving block, and log it.
    4506             :          */
    4507      583283 :         xfs_btree_set_numrecs(left, lrecs + rrecs);
    4508      583283 :         xfs_btree_get_sibling(cur, right, &cptr, XFS_BB_RIGHTSIB);
    4509      583282 :         xfs_btree_set_sibling(cur, left, &cptr, XFS_BB_RIGHTSIB);
    4510      583282 :         xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB);
    4511             : 
    4512             :         /* If there is a right sibling, point it to the remaining block. */
    4513      583283 :         xfs_btree_get_sibling(cur, left, &cptr, XFS_BB_RIGHTSIB);
    4514     1166564 :         if (!xfs_btree_ptr_is_null(cur, &cptr)) {
    4515      311904 :                 error = xfs_btree_read_buf_block(cur, &cptr, 0, &rrblock, &rrbp);
    4516      311904 :                 if (error)
    4517           0 :                         goto error0;
    4518      311904 :                 xfs_btree_set_sibling(cur, rrblock, &lptr, XFS_BB_LEFTSIB);
    4519      311904 :                 xfs_btree_log_block(cur, rrbp, XFS_BB_LEFTSIB);
    4520             :         }
    4521             : 
    4522             :         /* Free the deleted block. */
    4523      583282 :         error = xfs_btree_free_block(cur, rbp);
    4524      583283 :         if (error)
    4525           0 :                 goto error0;
    4526             : 
    4527             :         /*
    4528             :          * If we joined with the left neighbor, set the buffer in the
    4529             :          * cursor to the left block, and fix up the index.
    4530             :          */
    4531      583283 :         if (bp != lbp) {
    4532      498818 :                 cur->bc_levels[level].bp = lbp;
    4533      498818 :                 cur->bc_levels[level].ptr += lrecs;
    4534      498818 :                 cur->bc_levels[level].ra = 0;
    4535             :         }
    4536             :         /*
    4537             :          * If we joined with the right neighbor and there's a level above
    4538             :          * us, increment the cursor at that level.
    4539             :          */
    4540       84465 :         else if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) ||
    4541       68104 :                    (level + 1 < cur->bc_nlevels)) {
    4542       84465 :                 error = xfs_btree_increment(cur, level + 1, &i);
    4543       84465 :                 if (error)
    4544           0 :                         goto error0;
    4545             :         }
    4546             : 
    4547             :         /*
    4548             :          * Readjust the ptr at this level if it's not a leaf, since it's
    4549             :          * still pointing at the deletion point, which makes the cursor
    4550             :          * inconsistent.  If this makes the ptr 0, the caller fixes it up.
    4551             :          * We can't use decrement because it would change the next level up.
    4552             :          */
    4553      583283 :         if (level > 0)
    4554        1110 :                 cur->bc_levels[level].ptr--;
    4555             : 
    4556             :         /*
    4557             :          * We combined blocks, so we have to update the parent keys if the
    4558             :          * btree supports overlapped intervals.  However,
    4559             :          * bc_levels[level + 1].ptr points to the old block so that the caller
    4560             :          * knows which record to delete.  Therefore, the caller must be savvy
    4561             :          * enough to call updkeys for us if we return stat == 2.  The other
    4562             :          * exit points from this function don't require deletions further up
    4563             :          * the tree, so they can call updkeys directly.
    4564             :          */
    4565             : 
    4566             :         /* Return value means the next level up has something to do. */
    4567      583283 :         *stat = 2;
    4568      583283 :         return 0;
    4569             : 
    4570          13 : error0:
    4571          13 :         if (tcur)
    4572          13 :                 xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
    4573             :         return error;
    4574             : }
    4575             : 
    4576             : /*
    4577             :  * Delete the record pointed to by cur.
    4578             :  * The cursor refers to the place where the record was (could be inserted)
    4579             :  * when the operation returns.
    4580             :  */
    4581             : int                                     /* error */
    4582   974405854 : xfs_btree_delete(
    4583             :         struct xfs_btree_cur    *cur,
    4584             :         int                     *stat)  /* success/failure */
    4585             : {
    4586   974405854 :         int                     error;  /* error return value */
    4587   974405854 :         int                     level;
    4588   974405854 :         int                     i;
    4589   974405854 :         bool                    joined = false;
    4590             : 
    4591             :         /*
    4592             :          * Go up the tree, starting at leaf level.
    4593             :          *
    4594             :          * If 2 is returned then a join was done; go to the next level.
    4595             :          * Otherwise we are done.
    4596             :          */
    4597  1949371679 :         for (level = 0, i = 2; i == 2; level++) {
    4598   974989185 :                 error = xfs_btree_delrec(cur, level, &i);
    4599   974965838 :                 if (error)
    4600          13 :                         goto error0;
    4601   974965825 :                 if (i == 2)
    4602      583283 :                         joined = true;
    4603             :         }
    4604             : 
    4605             :         /*
    4606             :          * If we combined blocks as part of deleting the record, delrec won't
    4607             :          * have updated the parent high keys so we have to do that here.
    4608             :          */
    4609   974382494 :         if (joined && (cur->bc_flags & XFS_BTREE_OVERLAPPING)) {
    4610      238445 :                 error = xfs_btree_updkeys_force(cur, 0);
    4611      238446 :                 if (error)
    4612           0 :                         goto error0;
    4613             :         }
    4614             : 
    4615   974382495 :         if (i == 0) {
    4616           0 :                 for (level = 1; level < cur->bc_nlevels; level++) {
    4617           0 :                         if (cur->bc_levels[level].ptr == 0) {
    4618           0 :                                 error = xfs_btree_decrement(cur, level, &i);
    4619           0 :                                 if (error)
    4620           0 :                                         goto error0;
    4621             :                                 break;
    4622             :                         }
    4623             :                 }
    4624             :         }
    4625             : 
    4626   974382495 :         *stat = i;
    4627   974382495 :         return 0;
    4628             : error0:
    4629             :         return error;
    4630             : }
    4631             : 
    4632             : /*
    4633             :  * Get the data from the pointed-to record.
    4634             :  */
    4635             : int                                     /* error */
    4636 >26286*10^7 : xfs_btree_get_rec(
    4637             :         struct xfs_btree_cur    *cur,   /* btree cursor */
    4638             :         union xfs_btree_rec     **recp, /* output: btree record */
    4639             :         int                     *stat)  /* output: success/failure */
    4640             : {
    4641 >26286*10^7 :         struct xfs_btree_block  *block; /* btree block */
    4642 >26286*10^7 :         struct xfs_buf          *bp;    /* buffer pointer */
    4643 >26286*10^7 :         int                     ptr;    /* record number */
    4644             : #ifdef DEBUG
    4645 >26286*10^7 :         int                     error;  /* error return value */
    4646             : #endif
    4647             : 
    4648 >26286*10^7 :         ptr = cur->bc_levels[0].ptr;
    4649 >26286*10^7 :         block = xfs_btree_get_block(cur, 0, &bp);
    4650             : 
    4651             : #ifdef DEBUG
    4652 >26180*10^7 :         error = xfs_btree_check_block(cur, block, 0, bp);
    4653 >26165*10^7 :         if (error)
    4654             :                 return error;
    4655             : #endif
    4656             : 
    4657             :         /*
    4658             :          * Off the right end or left end, return failure.
    4659             :          */
    4660 >26165*10^7 :         if (ptr > xfs_btree_get_numrecs(block) || ptr <= 0) {
    4661    18665101 :                 *stat = 0;
    4662    18665101 :                 return 0;
    4663             :         }
    4664             : 
    4665             :         /*
    4666             :          * Point to the record and extract its data.
    4667             :          */
    4668 >26164*10^7 :         *recp = xfs_btree_rec_addr(cur, ptr, block);
    4669 >26164*10^7 :         *stat = 1;
    4670 >26164*10^7 :         return 0;
    4671             : }
    4672             : 
    4673             : /* Visit a block in a btree. */
    4674             : STATIC int
    4675   236599218 : xfs_btree_visit_block(
    4676             :         struct xfs_btree_cur            *cur,
    4677             :         int                             level,
    4678             :         xfs_btree_visit_blocks_fn       fn,
    4679             :         void                            *data)
    4680             : {
    4681   236599218 :         struct xfs_btree_block          *block;
    4682   236599218 :         struct xfs_buf                  *bp;
    4683   236599218 :         union xfs_btree_ptr             rptr, bufptr;
    4684   236599218 :         int                             error;
    4685             : 
    4686             :         /* do right sibling readahead */
    4687   236599218 :         xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA);
    4688   236595685 :         block = xfs_btree_get_block(cur, level, &bp);
    4689             : 
    4690             :         /* process the block */
    4691   236592438 :         error = fn(cur, level, data);
    4692   236590340 :         if (error)
    4693             :                 return error;
    4694             : 
    4695             :         /* now read rh sibling block for next iteration */
    4696   236587610 :         xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB);
    4697   473171016 :         if (xfs_btree_ptr_is_null(cur, &rptr))
    4698             :                 return -ENOENT;
    4699             : 
    4700             :         /*
    4701             :          * We only visit blocks once in this walk, so we have to avoid the
    4702             :          * internal xfs_btree_lookup_get_block() optimisation where it will
    4703             :          * return the same block without checking if the right sibling points
    4704             :          * back to us and creates a cyclic reference in the btree.
    4705             :          */
    4706   166066819 :         xfs_btree_buf_to_ptr(cur, bp, &bufptr);
    4707   166068526 :         if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
    4708    21697263 :                 if (rptr.l == bufptr.l) {
    4709           0 :                         xfs_btree_mark_sick(cur);
    4710           0 :                         return -EFSCORRUPTED;
    4711             :                 }
    4712             :         } else {
    4713   144371263 :                 if (rptr.s == bufptr.s) {
    4714           0 :                         xfs_btree_mark_sick(cur);
    4715           0 :                         return -EFSCORRUPTED;
    4716             :                 }
    4717             :         }
    4718   166068526 :         return xfs_btree_lookup_get_block(cur, level, &rptr, &block);
    4719             : }
    4720             : 
    4721             : 
    4722             : /* Visit every block in a btree. */
    4723             : int
    4724    40796848 : xfs_btree_visit_blocks(
    4725             :         struct xfs_btree_cur            *cur,
    4726             :         xfs_btree_visit_blocks_fn       fn,
    4727             :         unsigned int                    flags,
    4728             :         void                            *data)
    4729             : {
    4730    40796848 :         union xfs_btree_ptr             lptr;
    4731    40796848 :         int                             level;
    4732    40796848 :         struct xfs_btree_block          *block = NULL;
    4733    40796848 :         int                             error = 0;
    4734             : 
    4735    40796848 :         cur->bc_ops->init_ptr_from_cur(cur, &lptr);
    4736             : 
    4737             :         /* for each level */
    4738   111567999 :         for (level = cur->bc_nlevels - 1; level >= 0; level--) {
    4739             :                 /* grab the left hand block */
    4740    70770287 :                 error = xfs_btree_lookup_get_block(cur, level, &lptr, &block);
    4741    70779309 :                 if (error)
    4742         233 :                         return error;
    4743             : 
    4744             :                 /* readahead the left most block for the next level down */
    4745    70779076 :                 if (level > 0) {
    4746    29972538 :                         union xfs_btree_ptr     *ptr;
    4747             : 
    4748    29972538 :                         ptr = xfs_btree_ptr_addr(cur, 1, block);
    4749    29972342 :                         xfs_btree_readahead_ptr(cur, ptr, 1);
    4750             : 
    4751             :                         /* save for the next iteration of the loop */
    4752    29972992 :                         xfs_btree_copy_ptrs(cur, &lptr, ptr, 1);
    4753             : 
    4754    29972600 :                         if (!(flags & XFS_BTREE_VISIT_LEAVES))
    4755      250542 :                                 continue;
    4756    40806538 :                 } else if (!(flags & XFS_BTREE_VISIT_RECORDS)) {
    4757           0 :                         continue;
    4758             :                 }
    4759             : 
    4760             :                 /* for each buffer in the level */
    4761   236599429 :                 do {
    4762   236599429 :                         error = xfs_btree_visit_block(cur, level, fn, data);
    4763   236592006 :                 } while (!error);
    4764             : 
    4765    70521173 :                 if (error != -ENOENT)
    4766        1549 :                         return error;
    4767             :         }
    4768             : 
    4769             :         return 0;
    4770             : }
    4771             : 
    4772             : /*
    4773             :  * Change the owner of a btree.
    4774             :  *
    4775             :  * The mechanism we use here is ordered buffer logging. Because we don't know
    4776             :  * how many buffers were are going to need to modify, we don't really want to
    4777             :  * have to make transaction reservations for the worst case of every buffer in a
    4778             :  * full size btree as that may be more space that we can fit in the log....
    4779             :  *
    4780             :  * We do the btree walk in the most optimal manner possible - we have sibling
    4781             :  * pointers so we can just walk all the blocks on each level from left to right
    4782             :  * in a single pass, and then move to the next level and do the same. We can
    4783             :  * also do readahead on the sibling pointers to get IO moving more quickly,
    4784             :  * though for slow disks this is unlikely to make much difference to performance
    4785             :  * as the amount of CPU work we have to do before moving to the next block is
    4786             :  * relatively small.
    4787             :  *
    4788             :  * For each btree block that we load, modify the owner appropriately, set the
    4789             :  * buffer as an ordered buffer and log it appropriately. We need to ensure that
    4790             :  * we mark the region we change dirty so that if the buffer is relogged in
    4791             :  * a subsequent transaction the changes we make here as an ordered buffer are
    4792             :  * correctly relogged in that transaction.  If we are in recovery context, then
    4793             :  * just queue the modified buffer as delayed write buffer so the transaction
    4794             :  * recovery completion writes the changes to disk.
    4795             :  */
    4796             : struct xfs_btree_block_change_owner_info {
    4797             :         uint64_t                new_owner;
    4798             :         struct list_head        *buffer_list;
    4799             : };
    4800             : 
    4801             : static int
    4802       47737 : xfs_btree_block_change_owner(
    4803             :         struct xfs_btree_cur    *cur,
    4804             :         int                     level,
    4805             :         void                    *data)
    4806             : {
    4807       47737 :         struct xfs_btree_block_change_owner_info        *bbcoi = data;
    4808       47737 :         struct xfs_btree_block  *block;
    4809       47737 :         struct xfs_buf          *bp;
    4810             : 
    4811             :         /* modify the owner */
    4812       47737 :         block = xfs_btree_get_block(cur, level, &bp);
    4813       47737 :         if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
    4814       47737 :                 if (block->bb_u.l.bb_owner == cpu_to_be64(bbcoi->new_owner))
    4815             :                         return 0;
    4816        2912 :                 block->bb_u.l.bb_owner = cpu_to_be64(bbcoi->new_owner);
    4817             :         } else {
    4818           0 :                 if (block->bb_u.s.bb_owner == cpu_to_be32(bbcoi->new_owner))
    4819             :                         return 0;
    4820           0 :                 block->bb_u.s.bb_owner = cpu_to_be32(bbcoi->new_owner);
    4821             :         }
    4822             : 
    4823             :         /*
    4824             :          * If the block is a root block hosted in an inode, we might not have a
    4825             :          * buffer pointer here and we shouldn't attempt to log the change as the
    4826             :          * information is already held in the inode and discarded when the root
    4827             :          * block is formatted into the on-disk inode fork. We still change it,
    4828             :          * though, so everything is consistent in memory.
    4829             :          */
    4830        2912 :         if (!bp) {
    4831        1159 :                 ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE);
    4832        1159 :                 ASSERT(level == cur->bc_nlevels - 1);
    4833        1159 :                 return 0;
    4834             :         }
    4835             : 
    4836        1753 :         if (cur->bc_tp) {
    4837        1753 :                 if (!xfs_trans_ordered_buf(cur->bc_tp, bp)) {
    4838        1548 :                         xfs_btree_log_block(cur, bp, XFS_BB_OWNER);
    4839        1548 :                         return -EAGAIN;
    4840             :                 }
    4841             :         } else {
    4842           0 :                 xfs_buf_delwri_queue(bp, bbcoi->buffer_list);
    4843             :         }
    4844             : 
    4845             :         return 0;
    4846             : }
    4847             : 
    4848             : int
    4849        2707 : xfs_btree_change_owner(
    4850             :         struct xfs_btree_cur    *cur,
    4851             :         uint64_t                new_owner,
    4852             :         struct list_head        *buffer_list)
    4853             : {
    4854        2707 :         struct xfs_btree_block_change_owner_info        bbcoi;
    4855             : 
    4856        2707 :         bbcoi.new_owner = new_owner;
    4857        2707 :         bbcoi.buffer_list = buffer_list;
    4858             : 
    4859        2707 :         return xfs_btree_visit_blocks(cur, xfs_btree_block_change_owner,
    4860             :                         XFS_BTREE_VISIT_ALL, &bbcoi);
    4861             : }
    4862             : 
    4863             : /* Verify the v5 fields of a long-format btree block. */
    4864             : xfs_failaddr_t
    4865   810891614 : xfs_btree_lblock_v5hdr_verify(
    4866             :         struct xfs_buf          *bp,
    4867             :         uint64_t                owner)
    4868             : {
    4869   810891614 :         struct xfs_mount        *mp = bp->b_mount;
    4870   810891614 :         struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
    4871             : 
    4872   810891614 :         if (!xfs_has_crc(mp))
    4873           0 :                 return __this_address;
    4874   810891614 :         if (!uuid_equal(&block->bb_u.l.bb_uuid, &mp->m_sb.sb_meta_uuid))
    4875           0 :                 return __this_address;
    4876   810889099 :         if (block->bb_u.l.bb_blkno != cpu_to_be64(xfs_buf_daddr(bp)))
    4877           0 :                 return __this_address;
    4878   810889099 :         if (owner != XFS_RMAP_OWN_UNKNOWN &&
    4879           0 :             be64_to_cpu(block->bb_u.l.bb_owner) != owner)
    4880           0 :                 return __this_address;
    4881             :         return NULL;
    4882             : }
    4883             : 
    4884             : /* Verify a long-format btree block. */
    4885             : xfs_failaddr_t
    4886    62977264 : xfs_btree_lblock_verify(
    4887             :         struct xfs_buf          *bp,
    4888             :         unsigned int            max_recs)
    4889             : {
    4890    62977264 :         struct xfs_mount        *mp = bp->b_mount;
    4891    62977264 :         struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
    4892    62977264 :         xfs_fsblock_t           fsb;
    4893    62977264 :         xfs_failaddr_t          fa;
    4894             : 
    4895    62977264 :         ASSERT(!(bp->b_target->bt_flags & XFS_BUFTARG_XFILE));
    4896             : 
    4897             :         /* numrecs verification */
    4898    62977264 :         if (be16_to_cpu(block->bb_numrecs) > max_recs)
    4899           0 :                 return __this_address;
    4900             : 
    4901             :         /* sibling pointer verification */
    4902    62977264 :         fsb = XFS_DADDR_TO_FSB(mp, xfs_buf_daddr(bp));
    4903    62977367 :         fa = xfs_btree_check_lblock_siblings(mp, NULL, -1, fsb,
    4904             :                         block->bb_u.l.bb_leftsib);
    4905    62977112 :         if (!fa)
    4906    62977588 :                 fa = xfs_btree_check_lblock_siblings(mp, NULL, -1, fsb,
    4907             :                                 block->bb_u.l.bb_rightsib);
    4908             :         return fa;
    4909             : }
    4910             : 
    4911             : /**
    4912             :  * xfs_btree_sblock_v5hdr_verify() -- verify the v5 fields of a short-format
    4913             :  *                                    btree block
    4914             :  *
    4915             :  * @bp: buffer containing the btree block
    4916             :  */
    4917             : xfs_failaddr_t
    4918   208249006 : xfs_btree_sblock_v5hdr_verify(
    4919             :         struct xfs_buf          *bp)
    4920             : {
    4921   208249006 :         struct xfs_mount        *mp = bp->b_mount;
    4922   208249006 :         struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
    4923   208249006 :         struct xfs_perag        *pag = bp->b_pag;
    4924             : 
    4925   208249006 :         if (!xfs_has_crc(mp))
    4926           0 :                 return __this_address;
    4927   208249006 :         if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid))
    4928           0 :                 return __this_address;
    4929   208248412 :         if (block->bb_u.s.bb_blkno != cpu_to_be64(xfs_buf_daddr(bp)))
    4930           0 :                 return __this_address;
    4931   208248412 :         if (pag && be32_to_cpu(block->bb_u.s.bb_owner) != pag->pag_agno)
    4932           0 :                 return __this_address;
    4933             :         return NULL;
    4934             : }
    4935             : 
    4936             : /**
    4937             :  * xfs_btree_sblock_verify() -- verify a short-format btree block
    4938             :  *
    4939             :  * @bp: buffer containing the btree block
    4940             :  * @max_recs: maximum records allowed in this btree node
    4941             :  */
    4942             : xfs_failaddr_t
    4943    55858023 : xfs_btree_sblock_verify(
    4944             :         struct xfs_buf          *bp,
    4945             :         unsigned int            max_recs)
    4946             : {
    4947    55858023 :         struct xfs_mount        *mp = bp->b_mount;
    4948    55858023 :         struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
    4949    55858023 :         xfs_agblock_t           agbno;
    4950    55858023 :         xfs_failaddr_t          fa;
    4951             : 
    4952    55858023 :         ASSERT(!(bp->b_target->bt_flags & XFS_BUFTARG_XFILE));
    4953             : 
    4954             :         /* numrecs verification */
    4955    55858023 :         if (be16_to_cpu(block->bb_numrecs) > max_recs)
    4956           0 :                 return __this_address;
    4957             : 
    4958             :         /* sibling pointer verification */
    4959    55858023 :         agbno = xfs_daddr_to_agbno(mp, xfs_buf_daddr(bp));
    4960    55858670 :         fa = xfs_btree_check_sblock_siblings(bp->b_pag, NULL, -1, agbno,
    4961             :                         block->bb_u.s.bb_leftsib);
    4962    55857268 :         if (!fa)
    4963    55859405 :                 fa = xfs_btree_check_sblock_siblings(bp->b_pag, NULL, -1, agbno,
    4964             :                                 block->bb_u.s.bb_rightsib);
    4965             :         return fa;
    4966             : }
    4967             : 
    4968             : /*
    4969             :  * For the given limits on leaf and keyptr records per block, calculate the
    4970             :  * height of the tree needed to index the number of leaf records.
    4971             :  */
    4972             : unsigned int
    4973      614538 : xfs_btree_compute_maxlevels(
    4974             :         const unsigned int      *limits,
    4975             :         unsigned long long      records)
    4976             : {
    4977      614538 :         unsigned long long      level_blocks = howmany_64(records, limits[0]);
    4978      614538 :         unsigned int            height = 1;
    4979             : 
    4980     3965636 :         while (level_blocks > 1) {
    4981     3351098 :                 level_blocks = howmany_64(level_blocks, limits[1]);
    4982     3351098 :                 height++;
    4983             :         }
    4984             : 
    4985      614538 :         return height;
    4986             : }
    4987             : 
    4988             : /*
    4989             :  * For the given limits on leaf and keyptr records per block, calculate the
    4990             :  * number of blocks needed to index the given number of leaf records.
    4991             :  */
    4992             : unsigned long long
    4993    16439736 : xfs_btree_calc_size(
    4994             :         const unsigned int      *limits,
    4995             :         unsigned long long      records)
    4996             : {
    4997    16439736 :         unsigned long long      level_blocks = howmany_64(records, limits[0]);
    4998    16439736 :         unsigned long long      blocks = level_blocks;
    4999             : 
    5000    45885311 :         while (level_blocks > 1) {
    5001    29442780 :                 level_blocks = howmany_64(level_blocks, limits[1]);
    5002    29445575 :                 blocks += level_blocks;
    5003             :         }
    5004             : 
    5005    16440970 :         return blocks;
    5006             : }
    5007             : 
    5008             : /*
    5009             :  * Given a number of available blocks for the btree to consume with records and
    5010             :  * pointers, calculate the height of the tree needed to index all the records
    5011             :  * that space can hold based on the number of pointers each interior node
    5012             :  * holds.
    5013             :  *
    5014             :  * We start by assuming a single level tree consumes a single block, then track
    5015             :  * the number of blocks each node level consumes until we no longer have space
    5016             :  * to store the next node level. At this point, we are indexing all the leaf
    5017             :  * blocks in the space, and there's no more free space to split the tree any
    5018             :  * further. That's our maximum btree height.
    5019             :  */
    5020             : unsigned int
    5021  1553298249 : xfs_btree_space_to_height(
    5022             :         const unsigned int      *limits,
    5023             :         unsigned long long      leaf_blocks)
    5024             : {
    5025             :         /*
    5026             :          * The root btree block can have fewer than minrecs pointers in it
    5027             :          * because the tree might not be big enough to require that amount of
    5028             :          * fanout. Hence it has a minimum size of 2 pointers, not limits[1].
    5029             :          */
    5030  1553298249 :         unsigned long long      node_blocks = 2;
    5031  1553298249 :         unsigned long long      blocks_left = leaf_blocks - 1;
    5032  1553298249 :         unsigned int            height = 1;
    5033             : 
    5034  1553298249 :         if (leaf_blocks < 1)
    5035             :                 return 0;
    5036             : 
    5037 17085513749 :         while (node_blocks < blocks_left) {
    5038 15532215500 :                 blocks_left -= node_blocks;
    5039 15532215500 :                 node_blocks *= limits[1];
    5040 15532215500 :                 height++;
    5041             :         }
    5042             : 
    5043             :         return height;
    5044             : }
    5045             : 
    5046             : /*
    5047             :  * Query a regular btree for all records overlapping a given interval.
    5048             :  * Start with a LE lookup of the key of low_rec and return all records
    5049             :  * until we find a record with a key greater than the key of high_rec.
    5050             :  */
    5051             : STATIC int
    5052  5236785266 : xfs_btree_simple_query_range(
    5053             :         struct xfs_btree_cur            *cur,
    5054             :         const union xfs_btree_key       *low_key,
    5055             :         const union xfs_btree_key       *high_key,
    5056             :         xfs_btree_query_range_fn        fn,
    5057             :         void                            *priv)
    5058             : {
    5059  5236785266 :         union xfs_btree_rec             *recp;
    5060  5236785266 :         union xfs_btree_key             rec_key;
    5061  5236785266 :         int                             stat;
    5062  5236785266 :         bool                            firstrec = true;
    5063  5236785266 :         int                             error;
    5064             : 
    5065  5236785266 :         ASSERT(cur->bc_ops->init_high_key_from_rec);
    5066  5236785266 :         ASSERT(cur->bc_ops->diff_two_keys);
    5067             : 
    5068             :         /*
    5069             :          * Find the leftmost record.  The btree cursor must be set
    5070             :          * to the low record used to generate low_key.
    5071             :          */
    5072  5236785266 :         stat = 0;
    5073  5236785266 :         error = xfs_btree_lookup(cur, XFS_LOOKUP_LE, &stat);
    5074  5239254651 :         if (error)
    5075          20 :                 goto out;
    5076             : 
    5077             :         /* Nothing?  See if there's anything to the right. */
    5078  5239254631 :         if (!stat) {
    5079  2166201320 :                 error = xfs_btree_increment(cur, 0, &stat);
    5080  2165801970 :                 if (error)
    5081           0 :                         goto out;
    5082             :         }
    5083             : 
    5084 >21665*10^7 :         while (stat) {
    5085             :                 /* Find the record. */
    5086 >21502*10^7 :                 error = xfs_btree_get_rec(cur, &recp, &stat);
    5087 >21472*10^7 :                 if (error || !stat)
    5088             :                         break;
    5089             : 
    5090             :                 /* Skip if low_key > high_key(rec). */
    5091 >21472*10^7 :                 if (firstrec) {
    5092  3957260949 :                         cur->bc_ops->init_high_key_from_rec(&rec_key, recp);
    5093  3956210057 :                         firstrec = false;
    5094  3956210057 :                         if (xfs_btree_keycmp_gt(cur, low_key, &rec_key))
    5095  3071278594 :                                 goto advloop;
    5096             :                 }
    5097             : 
    5098             :                 /* Stop if low_key(rec) > high_key. */
    5099 >21164*10^7 :                 cur->bc_ops->init_key_from_rec(&rec_key, recp);
    5100 >21197*10^7 :                 if (xfs_btree_keycmp_gt(cur, &rec_key, high_key))
    5101             :                         break;
    5102             : 
    5103             :                 /* Callback */
    5104 >20771*10^7 :                 error = fn(cur, recp, priv);
    5105 >20865*10^7 :                 if (error)
    5106             :                         break;
    5107             : 
    5108 >20865*10^7 : advloop:
    5109             :                 /* Move on to the next record. */
    5110 >21172*10^7 :                 error = xfs_btree_increment(cur, 0, &stat);
    5111 >21141*10^7 :                 if (error)
    5112             :                         break;
    5113             :         }
    5114             : 
    5115  5237247863 : out:
    5116  5237247883 :         return error;
    5117             : }
    5118             : 
    5119             : /*
    5120             :  * Query an overlapped interval btree for all records overlapping a given
    5121             :  * interval.  This function roughly follows the algorithm given in
    5122             :  * "Interval Trees" of _Introduction to Algorithms_, which is section
    5123             :  * 14.3 in the 2nd and 3rd editions.
    5124             :  *
    5125             :  * First, generate keys for the low and high records passed in.
    5126             :  *
    5127             :  * For any leaf node, generate the high and low keys for the record.
    5128             :  * If the record keys overlap with the query low/high keys, pass the
    5129             :  * record to the function iterator.
    5130             :  *
    5131             :  * For any internal node, compare the low and high keys of each
    5132             :  * pointer against the query low/high keys.  If there's an overlap,
    5133             :  * follow the pointer.
    5134             :  *
    5135             :  * As an optimization, we stop scanning a block when we find a low key
    5136             :  * that is greater than the query's high key.
    5137             :  */
    5138             : STATIC int
    5139  2011913811 : xfs_btree_overlapped_query_range(
    5140             :         struct xfs_btree_cur            *cur,
    5141             :         const union xfs_btree_key       *low_key,
    5142             :         const union xfs_btree_key       *high_key,
    5143             :         xfs_btree_query_range_fn        fn,
    5144             :         void                            *priv)
    5145             : {
    5146  2011913811 :         union xfs_btree_ptr             ptr;
    5147  2011913811 :         union xfs_btree_ptr             *pp;
    5148  2011913811 :         union xfs_btree_key             rec_key;
    5149  2011913811 :         union xfs_btree_key             rec_hkey;
    5150  2011913811 :         union xfs_btree_key             *lkp;
    5151  2011913811 :         union xfs_btree_key             *hkp;
    5152  2011913811 :         union xfs_btree_rec             *recp;
    5153  2011913811 :         struct xfs_btree_block          *block;
    5154  2011913811 :         int                             level;
    5155  2011913811 :         struct xfs_buf                  *bp;
    5156  2011913811 :         int                             i;
    5157  2011913811 :         int                             error;
    5158             : 
    5159             :         /* Load the root of the btree. */
    5160  2011913811 :         level = cur->bc_nlevels - 1;
    5161  2011913811 :         cur->bc_ops->init_ptr_from_cur(cur, &ptr);
    5162  2012246956 :         error = xfs_btree_lookup_get_block(cur, level, &ptr, &block);
    5163  2013043376 :         if (error)
    5164             :                 return error;
    5165  2013055147 :         xfs_btree_get_block(cur, level, &bp);
    5166  2012908080 :         trace_xfs_btree_overlapped_query_range(cur, level, bp);
    5167             : #ifdef DEBUG
    5168  2012723543 :         error = xfs_btree_check_block(cur, block, level, bp);
    5169  2012282101 :         if (error)
    5170           0 :                 goto out;
    5171             : #endif
    5172  2012282101 :         cur->bc_levels[level].ptr = 1;
    5173             : 
    5174 >23601*10^7 :         while (level < cur->bc_nlevels) {
    5175 >23400*10^7 :                 block = xfs_btree_get_block(cur, level, &bp);
    5176             : 
    5177             :                 /* End of node, pop back towards the root. */
    5178 >23954*10^7 :                 if (cur->bc_levels[level].ptr >
    5179 >23394*10^7 :                                         be16_to_cpu(block->bb_numrecs)) {
    5180   837984764 : pop_up:
    5181  5598005471 :                         if (level < cur->bc_nlevels - 1)
    5182  3590693860 :                                 cur->bc_levels[level + 1].ptr++;
    5183  5598005471 :                         level++;
    5184  5598005471 :                         continue;
    5185             :                 }
    5186             : 
    5187 >23311*10^7 :                 if (level == 0) {
    5188             :                         /* Handle a leaf node. */
    5189 >16182*10^7 :                         recp = xfs_btree_rec_addr(cur, cur->bc_levels[0].ptr,
    5190             :                                         block);
    5191             : 
    5192 >16182*10^7 :                         cur->bc_ops->init_high_key_from_rec(&rec_hkey, recp);
    5193 >16174*10^7 :                         cur->bc_ops->init_key_from_rec(&rec_key, recp);
    5194             : 
    5195             :                         /*
    5196             :                          * If (query's high key < record's low key), then there
    5197             :                          * are no more interesting records in this block.  Pop
    5198             :                          * up to the leaf level to find more record blocks.
    5199             :                          *
    5200             :                          * If (record's high key >= query's low key) and
    5201             :                          *    (query's high key >= record's low key), then
    5202             :                          * this record overlaps the query range; callback.
    5203             :                          */
    5204 >16171*10^7 :                         if (xfs_btree_keycmp_lt(cur, high_key, &rec_key))
    5205  1962013345 :                                 goto pop_up;
    5206 >15978*10^7 :                         if (xfs_btree_keycmp_ge(cur, &rec_hkey, low_key)) {
    5207 17784986031 :                                 error = fn(cur, recp, priv);
    5208 17794538449 :                                 if (error)
    5209             :                                         break;
    5210             :                         }
    5211 >15990*10^7 :                         cur->bc_levels[level].ptr++;
    5212 >15990*10^7 :                         continue;
    5213             :                 }
    5214             : 
    5215             :                 /* Handle an internal node. */
    5216 71282512049 :                 lkp = xfs_btree_key_addr(cur, cur->bc_levels[level].ptr, block);
    5217 71282512049 :                 hkp = xfs_btree_high_key_addr(cur, cur->bc_levels[level].ptr,
    5218             :                                 block);
    5219 71282512049 :                 pp = xfs_btree_ptr_addr(cur, cur->bc_levels[level].ptr, block);
    5220             : 
    5221             :                 /*
    5222             :                  * If (query's high key < pointer's low key), then there are no
    5223             :                  * more interesting keys in this block.  Pop up one leaf level
    5224             :                  * to continue looking for records.
    5225             :                  *
    5226             :                  * If (pointer's high key >= query's low key) and
    5227             :                  *    (query's high key >= pointer's low key), then
    5228             :                  * this record overlaps the query range; follow pointer.
    5229             :                  */
    5230 71255649049 :                 if (xfs_btree_keycmp_lt(cur, high_key, lkp))
    5231  2798007362 :                         goto pop_up;
    5232 68463277907 :                 if (xfs_btree_keycmp_ge(cur, hkp, low_key)) {
    5233  3600125120 :                         level--;
    5234  3600125120 :                         error = xfs_btree_lookup_get_block(cur, level, pp,
    5235             :                                         &block);
    5236  3600677968 :                         if (error)
    5237           0 :                                 goto out;
    5238  3600677968 :                         xfs_btree_get_block(cur, level, &bp);
    5239  3600518302 :                         trace_xfs_btree_overlapped_query_range(cur, level, bp);
    5240             : #ifdef DEBUG
    5241  3600479088 :                         error = xfs_btree_check_block(cur, block, level, bp);
    5242  3600427409 :                         if (error)
    5243           0 :                                 goto out;
    5244             : #endif
    5245  3600427409 :                         cur->bc_levels[level].ptr = 1;
    5246  3600427409 :                         continue;
    5247             :                 }
    5248 64904116819 :                 cur->bc_levels[level].ptr++;
    5249             :         }
    5250             : 
    5251  2013144968 : out:
    5252             :         /*
    5253             :          * If we don't end this function with the cursor pointing at a record
    5254             :          * block, a subsequent non-error cursor deletion will not release
    5255             :          * node-level buffers, causing a buffer leak.  This is quite possible
    5256             :          * with a zero-results range query, so release the buffers if we
    5257             :          * failed to return any results.
    5258             :          */
    5259  2013144968 :         if (cur->bc_levels[0].bp == NULL) {
    5260     1437866 :                 for (i = 0; i < cur->bc_nlevels; i++) {
    5261      764177 :                         if (cur->bc_levels[i].bp) {
    5262       54487 :                                 xfs_trans_brelse(cur->bc_tp,
    5263             :                                                 cur->bc_levels[i].bp);
    5264       54500 :                                 cur->bc_levels[i].bp = NULL;
    5265       54500 :                                 cur->bc_levels[i].ptr = 0;
    5266       54500 :                                 cur->bc_levels[i].ra = 0;
    5267             :                         }
    5268             :                 }
    5269             :         }
    5270             : 
    5271             :         return error;
    5272             : }
    5273             : 
    5274             : static inline void
    5275 25777585033 : xfs_btree_key_from_irec(
    5276             :         struct xfs_btree_cur            *cur,
    5277             :         union xfs_btree_key             *key,
    5278             :         const union xfs_btree_irec      *irec)
    5279             : {
    5280 25777585033 :         union xfs_btree_rec             rec;
    5281             : 
    5282 25777585033 :         cur->bc_rec = *irec;
    5283 25777585033 :         cur->bc_ops->init_rec_from_cur(cur, &rec);
    5284 25774717043 :         cur->bc_ops->init_key_from_rec(key, &rec);
    5285 25775942800 : }
    5286             : 
    5287             : /*
    5288             :  * Query a btree for all records overlapping a given interval of keys.  The
    5289             :  * supplied function will be called with each record found; return one of the
    5290             :  * XFS_BTREE_QUERY_RANGE_{CONTINUE,ABORT} values or the usual negative error
    5291             :  * code.  This function returns -ECANCELED, zero, or a negative error code.
    5292             :  */
    5293             : int
    5294  7214021856 : xfs_btree_query_range(
    5295             :         struct xfs_btree_cur            *cur,
    5296             :         const union xfs_btree_irec      *low_rec,
    5297             :         const union xfs_btree_irec      *high_rec,
    5298             :         xfs_btree_query_range_fn        fn,
    5299             :         void                            *priv)
    5300             : {
    5301  7214021856 :         union xfs_btree_key             low_key;
    5302  7214021856 :         union xfs_btree_key             high_key;
    5303             : 
    5304             :         /* Find the keys of both ends of the interval. */
    5305  7214021856 :         xfs_btree_key_from_irec(cur, &high_key, high_rec);
    5306  7212930431 :         xfs_btree_key_from_irec(cur, &low_key, low_rec);
    5307             : 
    5308             :         /* Enforce low key <= high key. */
    5309  7213517560 :         if (!xfs_btree_keycmp_le(cur, &low_key, &high_key))
    5310             :                 return -EINVAL;
    5311             : 
    5312  7213306736 :         if (!(cur->bc_flags & XFS_BTREE_OVERLAPPING))
    5313  5201547110 :                 return xfs_btree_simple_query_range(cur, &low_key,
    5314             :                                 &high_key, fn, priv);
    5315  2011759626 :         return xfs_btree_overlapped_query_range(cur, &low_key, &high_key,
    5316             :                         fn, priv);
    5317             : }
    5318             : 
    5319             : /* Query a btree for all records. */
    5320             : int
    5321    34784374 : xfs_btree_query_all(
    5322             :         struct xfs_btree_cur            *cur,
    5323             :         xfs_btree_query_range_fn        fn,
    5324             :         void                            *priv)
    5325             : {
    5326    34784374 :         union xfs_btree_key             low_key;
    5327    34784374 :         union xfs_btree_key             high_key;
    5328             : 
    5329    34784374 :         memset(&cur->bc_rec, 0, sizeof(cur->bc_rec));
    5330    34784374 :         memset(&low_key, 0, sizeof(low_key));
    5331    34784374 :         memset(&high_key, 0xFF, sizeof(high_key));
    5332             : 
    5333    34784374 :         return xfs_btree_simple_query_range(cur, &low_key, &high_key, fn, priv);
    5334             : }
    5335             : 
    5336             : static int
    5337   161052818 : xfs_btree_count_blocks_helper(
    5338             :         struct xfs_btree_cur    *cur,
    5339             :         int                     level,
    5340             :         void                    *data)
    5341             : {
    5342   161052818 :         xfs_extlen_t            *blocks = data;
    5343   161052818 :         (*blocks)++;
    5344             : 
    5345   161052818 :         return 0;
    5346             : }
    5347             : 
    5348             : /* Count the blocks in a btree and return the result in *blocks. */
    5349             : int
    5350    11646831 : xfs_btree_count_blocks(
    5351             :         struct xfs_btree_cur    *cur,
    5352             :         xfs_extlen_t            *blocks)
    5353             : {
    5354    11646831 :         *blocks = 0;
    5355    11646831 :         return xfs_btree_visit_blocks(cur, xfs_btree_count_blocks_helper,
    5356             :                         XFS_BTREE_VISIT_ALL, blocks);
    5357             : }
    5358             : 
    5359             : /* Compare two btree pointers. */
    5360             : int64_t
    5361    33888024 : xfs_btree_diff_two_ptrs(
    5362             :         struct xfs_btree_cur            *cur,
    5363             :         const union xfs_btree_ptr       *a,
    5364             :         const union xfs_btree_ptr       *b)
    5365             : {
    5366    33888024 :         if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
    5367    17525802 :                 return (int64_t)be64_to_cpu(a->l) - be64_to_cpu(b->l);
    5368    16362222 :         return (int64_t)be32_to_cpu(a->s) - be32_to_cpu(b->s);
    5369             : }
    5370             : 
    5371             : struct xfs_btree_has_records {
    5372             :         /* Keys for the start and end of the range we want to know about. */
    5373             :         union xfs_btree_key             start_key;
    5374             :         union xfs_btree_key             end_key;
    5375             : 
    5376             :         /* Mask for key comparisons, if desired. */
    5377             :         const union xfs_btree_key       *key_mask;
    5378             : 
    5379             :         /* Highest record key we've seen so far. */
    5380             :         union xfs_btree_key             high_key;
    5381             : 
    5382             :         enum xbtree_recpacking          outcome;
    5383             : };
    5384             : 
    5385             : STATIC int
    5386  6842036614 : xfs_btree_has_records_helper(
    5387             :         struct xfs_btree_cur            *cur,
    5388             :         const union xfs_btree_rec       *rec,
    5389             :         void                            *priv)
    5390             : {
    5391  6842036614 :         union xfs_btree_key             rec_key;
    5392  6842036614 :         union xfs_btree_key             rec_high_key;
    5393  6842036614 :         struct xfs_btree_has_records    *info = priv;
    5394  6842036614 :         enum xbtree_key_contig          key_contig;
    5395             : 
    5396  6842036614 :         cur->bc_ops->init_key_from_rec(&rec_key, rec);
    5397             : 
    5398  6841029406 :         if (info->outcome == XBTREE_RECPACKING_EMPTY) {
    5399    24872887 :                 info->outcome = XBTREE_RECPACKING_SPARSE;
    5400             : 
    5401             :                 /*
    5402             :                  * If the first record we find does not overlap the start key,
    5403             :                  * then there is a hole at the start of the search range.
    5404             :                  * Classify this as sparse and stop immediately.
    5405             :                  */
    5406    24872887 :                 if (xfs_btree_masked_keycmp_lt(cur, &info->start_key, &rec_key,
    5407             :                                         info->key_mask))
    5408             :                         return -ECANCELED;
    5409             :         } else {
    5410             :                 /*
    5411             :                  * If a subsequent record does not overlap with the any record
    5412             :                  * we've seen so far, there is a hole in the middle of the
    5413             :                  * search range.  Classify this as sparse and stop.
    5414             :                  * If the keys overlap and this btree does not allow overlap,
    5415             :                  * signal corruption.
    5416             :                  */
    5417  6816156519 :                 key_contig = cur->bc_ops->keys_contiguous(cur, &info->high_key,
    5418             :                                         &rec_key, info->key_mask);
    5419  6816759317 :                 if (key_contig == XBTREE_KEY_OVERLAP &&
    5420  1043626414 :                                 !(cur->bc_flags & XFS_BTREE_OVERLAPPING))
    5421             :                         return -EFSCORRUPTED;
    5422  6816759317 :                 if (key_contig == XBTREE_KEY_GAP)
    5423             :                         return -ECANCELED;
    5424             :         }
    5425             : 
    5426             :         /*
    5427             :          * If high_key(rec) is larger than any other high key we've seen,
    5428             :          * remember it for later.
    5429             :          */
    5430  6841632452 :         cur->bc_ops->init_high_key_from_rec(&rec_high_key, rec);
    5431  6842071473 :         if (xfs_btree_masked_keycmp_gt(cur, &rec_high_key, &info->high_key,
    5432             :                                 info->key_mask))
    5433  5814606784 :                 info->high_key = rec_high_key; /* struct copy */
    5434             : 
    5435             :         return 0;
    5436             : }
    5437             : 
    5438             : /*
    5439             :  * Scan part of the keyspace of a btree and tell us if that keyspace does not
    5440             :  * map to any records; is fully mapped to records; or is partially mapped to
    5441             :  * records.  This is the btree record equivalent to determining if a file is
    5442             :  * sparse.
    5443             :  *
    5444             :  * For most btree types, the record scan should use all available btree key
    5445             :  * fields to compare the keys encountered.  These callers should pass NULL for
    5446             :  * @mask.  However, some callers (e.g.  scanning physical space in the rmapbt)
    5447             :  * want to ignore some part of the btree record keyspace when performing the
    5448             :  * comparison.  These callers should pass in a union xfs_btree_key object with
    5449             :  * the fields that *should* be a part of the comparison set to any nonzero
    5450             :  * value, and the rest zeroed.
    5451             :  */
    5452             : int
    5453  5686371612 : xfs_btree_has_records(
    5454             :         struct xfs_btree_cur            *cur,
    5455             :         const union xfs_btree_irec      *low,
    5456             :         const union xfs_btree_irec      *high,
    5457             :         const union xfs_btree_key       *mask,
    5458             :         enum xbtree_recpacking          *outcome)
    5459             : {
    5460  5686371612 :         struct xfs_btree_has_records    info = {
    5461             :                 .outcome                = XBTREE_RECPACKING_EMPTY,
    5462             :                 .key_mask               = mask,
    5463             :         };
    5464  5686371612 :         int                             error;
    5465             : 
    5466             :         /* Not all btrees support this operation. */
    5467  5686371612 :         if (!cur->bc_ops->keys_contiguous) {
    5468           0 :                 ASSERT(0);
    5469           0 :                 return -EOPNOTSUPP;
    5470             :         }
    5471             : 
    5472  5686371612 :         xfs_btree_key_from_irec(cur, &info.start_key, low);
    5473  5681575349 :         xfs_btree_key_from_irec(cur, &info.end_key, high);
    5474             : 
    5475  5682685378 :         error = xfs_btree_query_range(cur, low, high,
    5476             :                         xfs_btree_has_records_helper, &info);
    5477  5684582499 :         if (error == -ECANCELED)
    5478           0 :                 goto out;
    5479  5684582499 :         if (error)
    5480             :                 return error;
    5481             : 
    5482  5684582499 :         if (info.outcome == XBTREE_RECPACKING_EMPTY)
    5483  5659709323 :                 goto out;
    5484             : 
    5485             :         /*
    5486             :          * If the largest high_key(rec) we saw during the walk is greater than
    5487             :          * the end of the search range, classify this as full.  Otherwise,
    5488             :          * there is a hole at the end of the search range.
    5489             :          */
    5490    24873176 :         if (xfs_btree_masked_keycmp_ge(cur, &info.high_key, &info.end_key,
    5491             :                                 mask))
    5492    24873141 :                 info.outcome = XBTREE_RECPACKING_FULL;
    5493             : 
    5494           0 : out:
    5495  5684582464 :         *outcome = info.outcome;
    5496  5684582464 :         return 0;
    5497             : }
    5498             : 
    5499             : /* Are there more records in this btree? */
    5500             : bool
    5501   134413464 : xfs_btree_has_more_records(
    5502             :         struct xfs_btree_cur    *cur)
    5503             : {
    5504   134413464 :         struct xfs_btree_block  *block;
    5505   134413464 :         struct xfs_buf          *bp;
    5506             : 
    5507   134413464 :         block = xfs_btree_get_block(cur, 0, &bp);
    5508             : 
    5509             :         /* There are still records in this block. */
    5510   134413357 :         if (cur->bc_levels[0].ptr < xfs_btree_get_numrecs(block))
    5511             :                 return true;
    5512             : 
    5513             :         /* There are more record blocks. */
    5514     1022271 :         if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
    5515      300961 :                 return block->bb_u.l.bb_rightsib != cpu_to_be64(NULLFSBLOCK);
    5516             :         else
    5517      721310 :                 return block->bb_u.s.bb_rightsib != cpu_to_be32(NULLAGBLOCK);
    5518             : }
    5519             : 
    5520             : /* Set up all the btree cursor caches. */
    5521             : int __init
    5522          59 : xfs_btree_init_cur_caches(void)
    5523             : {
    5524          59 :         int             error;
    5525             : 
    5526          59 :         error = xfs_allocbt_init_cur_cache();
    5527          59 :         if (error)
    5528             :                 return error;
    5529          59 :         error = xfs_inobt_init_cur_cache();
    5530          59 :         if (error)
    5531           0 :                 goto err;
    5532          59 :         error = xfs_bmbt_init_cur_cache();
    5533          59 :         if (error)
    5534           0 :                 goto err;
    5535          59 :         error = xfs_rmapbt_init_cur_cache();
    5536          59 :         if (error)
    5537           0 :                 goto err;
    5538          59 :         error = xfs_refcountbt_init_cur_cache();
    5539          59 :         if (error)
    5540           0 :                 goto err;
    5541          59 :         error = xfs_rtrmapbt_init_cur_cache();
    5542          59 :         if (error)
    5543           0 :                 goto err;
    5544          59 :         error = xfs_rtrefcountbt_init_cur_cache();
    5545          59 :         if (error)
    5546           0 :                 goto err;
    5547             : 
    5548             :         return 0;
    5549           0 : err:
    5550           0 :         xfs_btree_destroy_cur_caches();
    5551           0 :         return error;
    5552             : }
    5553             : 
    5554             : /* Destroy all the btree cursor caches, if they've been allocated. */
    5555             : void
    5556          58 : xfs_btree_destroy_cur_caches(void)
    5557             : {
    5558          58 :         xfs_allocbt_destroy_cur_cache();
    5559          58 :         xfs_inobt_destroy_cur_cache();
    5560          58 :         xfs_bmbt_destroy_cur_cache();
    5561          58 :         xfs_rmapbt_destroy_cur_cache();
    5562          58 :         xfs_refcountbt_destroy_cur_cache();
    5563          58 :         xfs_rtrmapbt_destroy_cur_cache();
    5564          58 :         xfs_rtrefcountbt_destroy_cur_cache();
    5565          58 : }
    5566             : 
    5567             : /* Move the btree cursor before the first record. */
    5568             : int
    5569   283631420 : xfs_btree_goto_left_edge(
    5570             :         struct xfs_btree_cur    *cur)
    5571             : {
    5572   283631420 :         int                     stat = 0;
    5573   283631420 :         int                     error;
    5574             : 
    5575   283631420 :         memset(&cur->bc_rec, 0, sizeof(cur->bc_rec));
    5576   283631420 :         error = xfs_btree_lookup(cur, XFS_LOOKUP_LE, &stat);
    5577   283625854 :         if (error)
    5578             :                 return error;
    5579   283625854 :         if (!stat)
    5580             :                 return 0;
    5581             : 
    5582           0 :         error = xfs_btree_decrement(cur, 0, &stat);
    5583           0 :         if (error)
    5584             :                 return error;
    5585           0 :         if (stat != 0) {
    5586           0 :                 ASSERT(0);
    5587           0 :                 xfs_btree_mark_sick(cur);
    5588           0 :                 return -EFSCORRUPTED;
    5589             :         }
    5590             : 
    5591             :         return 0;
    5592             : }
    5593             : 
    5594             : /* Allocate a block for an inode-rooted metadata btree. */
    5595             : int
    5596      726296 : xfs_btree_alloc_imeta_block(
    5597             :         struct xfs_btree_cur            *cur,
    5598             :         const union xfs_btree_ptr       *start,
    5599             :         union xfs_btree_ptr             *new,
    5600             :         int                             *stat)
    5601             : {
    5602      726296 :         struct xfs_alloc_arg            args = {
    5603      726296 :                 .mp                     = cur->bc_mp,
    5604      726296 :                 .tp                     = cur->bc_tp,
    5605             :                 .resv                   = XFS_AG_RESV_IMETA,
    5606             :                 .minlen                 = 1,
    5607             :                 .maxlen                 = 1,
    5608             :                 .prod                   = 1,
    5609             :         };
    5610      726296 :         struct xfs_inode                *ip = cur->bc_ino.ip;
    5611      726296 :         int                             error;
    5612             : 
    5613      726296 :         ASSERT(xfs_is_metadir_inode(ip));
    5614      726296 :         ASSERT(XFS_IS_DQDETACHED(cur->bc_mp, ip));
    5615             : 
    5616      726296 :         xfs_rmap_ino_bmbt_owner(&args.oinfo, ip->i_ino, cur->bc_ino.whichfork);
    5617     3631480 :         error = xfs_alloc_vextent_start_ag(&args,
    5618      726296 :                         XFS_INO_TO_FSB(cur->bc_mp, ip->i_ino));
    5619      726296 :         if (error)
    5620             :                 return error;
    5621      726296 :         if (args.fsbno == NULLFSBLOCK) {
    5622           0 :                 *stat = 0;
    5623           0 :                 return 0;
    5624             :         }
    5625      726296 :         ASSERT(args.len == 1);
    5626             : 
    5627      726296 :         xfs_imeta_resv_alloc_extent(ip, &args);
    5628      726295 :         cur->bc_ino.allocated++;
    5629             : 
    5630      726295 :         new->l = cpu_to_be64(args.fsbno);
    5631      726295 :         *stat = 1;
    5632      726295 :         return 0;
    5633             : }
    5634             : 
    5635             : /* Free a block from an inode-rooted metadata btree. */
    5636             : int
    5637       88579 : xfs_btree_free_imeta_block(
    5638             :         struct xfs_btree_cur    *cur,
    5639             :         struct xfs_buf          *bp)
    5640             : {
    5641       88579 :         struct xfs_owner_info   oinfo;
    5642       88579 :         struct xfs_mount        *mp = cur->bc_mp;
    5643       88579 :         struct xfs_inode        *ip = cur->bc_ino.ip;
    5644       88579 :         struct xfs_trans        *tp = cur->bc_tp;
    5645       88579 :         xfs_fsblock_t           fsbno = XFS_DADDR_TO_FSB(mp, xfs_buf_daddr(bp));
    5646       88579 :         int                     error;
    5647             : 
    5648       88579 :         ASSERT(xfs_is_metadir_inode(ip));
    5649       88579 :         ASSERT(XFS_IS_DQDETACHED(cur->bc_mp, ip));
    5650             : 
    5651       88579 :         xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, cur->bc_ino.whichfork);
    5652       88579 :         error = xfs_free_extent_later(tp, fsbno, 1, &oinfo, XFS_AG_RESV_IMETA,
    5653             :                         0);
    5654       88579 :         if (error)
    5655             :                 return error;
    5656             : 
    5657       88579 :         xfs_imeta_resv_free_extent(ip, tp, 1);
    5658       88579 :         return 0;
    5659             : }

Generated by: LCOV version 1.14