LCOV - code coverage report
Current view: top level - fs/xfs/libxfs - xfs_btree.c (source / functions) Hit Total Coverage
Test: fstests of 6.5.0-rc4-xfsa @ Mon Jul 31 20:08:27 PDT 2023 Lines: 2130 2382 89.4 %
Date: 2023-07-31 20:08:27 Functions: 127 129 98.4 %

          Line data    Source code
       1             : // SPDX-License-Identifier: GPL-2.0
       2             : /*
       3             :  * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
       4             :  * All Rights Reserved.
       5             :  */
       6             : #include "xfs.h"
       7             : #include "xfs_fs.h"
       8             : #include "xfs_shared.h"
       9             : #include "xfs_format.h"
      10             : #include "xfs_log_format.h"
      11             : #include "xfs_trans_resv.h"
      12             : #include "xfs_bit.h"
      13             : #include "xfs_mount.h"
      14             : #include "xfs_inode.h"
      15             : #include "xfs_trans.h"
      16             : #include "xfs_buf_item.h"
      17             : #include "xfs_btree.h"
      18             : #include "xfs_errortag.h"
      19             : #include "xfs_error.h"
      20             : #include "xfs_trace.h"
      21             : #include "xfs_alloc.h"
      22             : #include "xfs_log.h"
      23             : #include "xfs_btree_staging.h"
      24             : #include "xfs_ag.h"
      25             : #include "xfs_alloc_btree.h"
      26             : #include "xfs_ialloc_btree.h"
      27             : #include "xfs_bmap_btree.h"
      28             : #include "xfs_rmap_btree.h"
      29             : #include "xfs_refcount_btree.h"
      30             : #include "xfs_health.h"
      31             : #include "scrub/xfile.h"
      32             : #include "scrub/xfbtree.h"
      33             : #include "xfs_btree_mem.h"
      34             : #include "xfs_rtgroup.h"
      35             : #include "xfs_rtrmap_btree.h"
      36             : #include "xfs_bmap.h"
      37             : #include "xfs_rmap.h"
      38             : #include "xfs_quota.h"
      39             : #include "xfs_imeta.h"
      40             : #include "xfs_rtrefcount_btree.h"
      41             : 
      42             : /*
      43             :  * Btree magic numbers.
      44             :  */
      45             : uint32_t
      46 >25978*10^7 : xfs_btree_magic(
      47             :         struct xfs_mount                *mp,
      48             :         const struct xfs_btree_ops      *ops)
      49             : {
      50 >25978*10^7 :         int                             idx = xfs_has_crc(mp) ? 1 : 0;
      51 >25978*10^7 :         __be32                          magic = ops->buf_ops->magic[idx];
      52             : 
      53             :         /* Ensure we asked for crc for crc-only magics. */
      54 >25978*10^7 :         ASSERT(magic != 0);
      55 >25978*10^7 :         return be32_to_cpu(magic);
      56             : }
      57             : 
      58             : /*
      59             :  * These sibling pointer checks are optimised for null sibling pointers. This
      60             :  * happens a lot, and we don't need to byte swap at runtime if the sibling
      61             :  * pointer is NULL.
      62             :  *
      63             :  * These are explicitly marked at inline because the cost of calling them as
      64             :  * functions instead of inlining them is about 36 bytes extra code per call site
      65             :  * on x86-64. Yes, gcc-11 fails to inline them, and explicit inlining of these
      66             :  * two sibling check functions reduces the compiled code size by over 300
      67             :  * bytes.
      68             :  */
      69             : static inline xfs_failaddr_t
      70 >16389*10^7 : xfs_btree_check_lblock_siblings(
      71             :         struct xfs_mount        *mp,
      72             :         struct xfs_btree_cur    *cur,
      73             :         int                     level,
      74             :         xfs_fsblock_t           fsb,
      75             :         __be64                  dsibling)
      76             : {
      77 >16389*10^7 :         xfs_fsblock_t           sibling;
      78             : 
      79 >16389*10^7 :         if (dsibling == cpu_to_be64(NULLFSBLOCK))
      80             :                 return NULL;
      81             : 
      82 >15561*10^7 :         sibling = be64_to_cpu(dsibling);
      83 >15561*10^7 :         if (sibling == fsb)
      84           0 :                 return __this_address;
      85 >15561*10^7 :         if (level >= 0) {
      86 >15555*10^7 :                 if (!xfs_btree_check_lptr(cur, sibling, level + 1))
      87           0 :                         return __this_address;
      88    59223344 :         } else if (cur && (cur->bc_flags & XFS_BTREE_IN_XFILE)) {
      89           0 :                 if (!xfbtree_verify_xfileoff(cur, sibling))
      90           0 :                         return __this_address;
      91             :         } else {
      92    59223344 :                 if (!xfs_verify_fsbno(mp, sibling))
      93           0 :                         return __this_address;
      94             :         }
      95             : 
      96             :         return NULL;
      97             : }
      98             : 
      99             : static inline xfs_failaddr_t
     100 >35254*10^7 : xfs_btree_check_sblock_siblings(
     101             :         struct xfs_perag        *pag,
     102             :         struct xfs_btree_cur    *cur,
     103             :         int                     level,
     104             :         xfs_agblock_t           agbno,
     105             :         __be32                  dsibling)
     106             : {
     107 >35254*10^7 :         xfs_agblock_t           sibling;
     108             : 
     109 >35254*10^7 :         if (dsibling == cpu_to_be32(NULLAGBLOCK))
     110             :                 return NULL;
     111             : 
     112 >24946*10^7 :         sibling = be32_to_cpu(dsibling);
     113 >24946*10^7 :         if (sibling == agbno)
     114           0 :                 return __this_address;
     115 >24946*10^7 :         if (level >= 0) {
     116 >24939*10^7 :                 if (!xfs_btree_check_sptr(cur, sibling, level + 1))
     117           0 :                         return __this_address;
     118    62765666 :         } else if (cur && (cur->bc_flags & XFS_BTREE_IN_XFILE)) {
     119           0 :                 if (!xfbtree_verify_xfileoff(cur, sibling))
     120           0 :                         return __this_address;
     121             :         } else {
     122    62765666 :                 if (!xfs_verify_agbno(pag, sibling))
     123           0 :                         return __this_address;
     124             :         }
     125             :         return NULL;
     126             : }
     127             : 
     128             : /*
     129             :  * Check a long btree block header.  Return the address of the failing check,
     130             :  * or NULL if everything is ok.
     131             :  */
     132             : xfs_failaddr_t
     133 84052940461 : __xfs_btree_check_lblock(
     134             :         struct xfs_btree_cur    *cur,
     135             :         struct xfs_btree_block  *block,
     136             :         int                     level,
     137             :         struct xfs_buf          *bp)
     138             : {
     139 84052940461 :         struct xfs_mount        *mp = cur->bc_mp;
     140 84052940461 :         int                     crc = xfs_has_crc(mp);
     141 84052940461 :         xfs_failaddr_t          fa;
     142 84052940461 :         xfs_fsblock_t           fsb = NULLFSBLOCK;
     143             : 
     144 84052940461 :         if (crc) {
     145 83813819480 :                 if (!uuid_equal(&block->bb_u.l.bb_uuid, &mp->m_sb.sb_meta_uuid))
     146           0 :                         return __this_address;
     147 >16828*10^7 :                 if (block->bb_u.l.bb_blkno !=
     148 84144626366 :                     cpu_to_be64(bp ? xfs_buf_daddr(bp) : XFS_BUF_DADDR_NULL))
     149           0 :                         return __this_address;
     150 84144626366 :                 if (block->bb_u.l.bb_pad != cpu_to_be32(0))
     151           0 :                         return __this_address;
     152             :         }
     153             : 
     154 >16876*10^7 :         if (be32_to_cpu(block->bb_magic) != xfs_btree_magic(mp, cur->bc_ops))
     155           0 :                 return __this_address;
     156 85101425262 :         if (be16_to_cpu(block->bb_level) != level)
     157  1129707217 :                 return __this_address;
     158 >16887*10^7 :         if (be16_to_cpu(block->bb_numrecs) >
     159 83971718045 :             cur->bc_ops->get_maxrecs(cur, level))
     160           0 :                 return __this_address;
     161             : 
     162 84905649263 :         if ((cur->bc_flags & XFS_BTREE_IN_XFILE) && bp)
     163  2829173329 :                 fsb = xfbtree_buf_to_xfoff(cur, bp);
     164 82076475934 :         else if (bp)
     165 82074930219 :                 fsb = XFS_DADDR_TO_FSB(mp, xfs_buf_daddr(bp));
     166             : 
     167 84905649457 :         fa = xfs_btree_check_lblock_siblings(mp, cur, level, fsb,
     168             :                         block->bb_u.l.bb_leftsib);
     169 83216878162 :         if (!fa)
     170 83402614938 :                 fa = xfs_btree_check_lblock_siblings(mp, cur, level, fsb,
     171             :                                 block->bb_u.l.bb_rightsib);
     172             :         return fa;
     173             : }
     174             : 
     175             : /* Check a long btree block header. */
     176             : static int
     177 84763596356 : xfs_btree_check_lblock(
     178             :         struct xfs_btree_cur    *cur,
     179             :         struct xfs_btree_block  *block,
     180             :         int                     level,
     181             :         struct xfs_buf          *bp)
     182             : {
     183 84763596356 :         struct xfs_mount        *mp = cur->bc_mp;
     184 84763596356 :         xfs_failaddr_t          fa;
     185             : 
     186 84763596356 :         fa = __xfs_btree_check_lblock(cur, block, level, bp);
     187 >17034*10^7 :         if (XFS_IS_CORRUPT(mp, fa != NULL) ||
     188 84845310054 :             XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BTREE_CHECK_LBLOCK)) {
     189           0 :                 if (bp)
     190           0 :                         trace_xfs_btree_corrupt(bp, _RET_IP_);
     191           0 :                 xfs_btree_mark_sick(cur);
     192           0 :                 return -EFSCORRUPTED;
     193             :         }
     194             :         return 0;
     195             : }
     196             : 
     197             : /*
     198             :  * Check a short btree block header.  Return the address of the failing check,
     199             :  * or NULL if everything is ok.
     200             :  */
     201             : xfs_failaddr_t
     202 >17585*10^7 : __xfs_btree_check_sblock(
     203             :         struct xfs_btree_cur    *cur,
     204             :         struct xfs_btree_block  *block,
     205             :         int                     level,
     206             :         struct xfs_buf          *bp)
     207             : {
     208 >17585*10^7 :         struct xfs_mount        *mp = cur->bc_mp;
     209 >17585*10^7 :         struct xfs_perag        *pag = cur->bc_ag.pag;
     210 >17585*10^7 :         int                     crc = xfs_has_crc(mp);
     211 >17585*10^7 :         xfs_failaddr_t          fa;
     212 >17585*10^7 :         xfs_agblock_t           agbno = NULLAGBLOCK;
     213             : 
     214 >17585*10^7 :         if (crc) {
     215 >17593*10^7 :                 if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid))
     216           0 :                         return __this_address;
     217 >35267*10^7 :                 if (block->bb_u.s.bb_blkno !=
     218 >17633*10^7 :                     cpu_to_be64(bp ? xfs_buf_daddr(bp) : XFS_BUF_DADDR_NULL))
     219           0 :                         return __this_address;
     220             :         }
     221             : 
     222 >35251*10^7 :         if (be32_to_cpu(block->bb_magic) != xfs_btree_magic(mp, cur->bc_ops))
     223           0 :                 return __this_address;
     224 >17686*10^7 :         if (be16_to_cpu(block->bb_level) != level)
     225   455417601 :                 return __this_address;
     226 >35246*10^7 :         if (be16_to_cpu(block->bb_numrecs) >
     227 >17640*10^7 :             cur->bc_ops->get_maxrecs(cur, level))
     228           0 :                 return __this_address;
     229             : 
     230 >17605*10^7 :         if ((cur->bc_flags & XFS_BTREE_IN_XFILE) && bp) {
     231   264462941 :                 pag = NULL;
     232   264462941 :                 agbno = xfbtree_buf_to_xfoff(cur, bp);
     233 >17579*10^7 :         } else if (bp) {
     234 >17579*10^7 :                 agbno = xfs_daddr_to_agbno(mp, xfs_buf_daddr(bp));
     235             :         }
     236             : 
     237 >17605*10^7 :         fa = xfs_btree_check_sblock_siblings(pag, cur, level, agbno,
     238             :                         block->bb_u.s.bb_leftsib);
     239 >17635*10^7 :         if (!fa)
     240 >17645*10^7 :                 fa = xfs_btree_check_sblock_siblings(pag, cur, level, agbno,
     241             :                                 block->bb_u.s.bb_rightsib);
     242             :         return fa;
     243             : }
     244             : 
     245             : /* Check a short btree block header. */
     246             : STATIC int
     247 >17605*10^7 : xfs_btree_check_sblock(
     248             :         struct xfs_btree_cur    *cur,
     249             :         struct xfs_btree_block  *block,
     250             :         int                     level,
     251             :         struct xfs_buf          *bp)
     252             : {
     253 >17605*10^7 :         struct xfs_mount        *mp = cur->bc_mp;
     254 >17605*10^7 :         xfs_failaddr_t          fa;
     255             : 
     256 >17605*10^7 :         fa = __xfs_btree_check_sblock(cur, block, level, bp);
     257 >35317*10^7 :         if (XFS_IS_CORRUPT(mp, fa != NULL) ||
     258 >17645*10^7 :             XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BTREE_CHECK_SBLOCK)) {
     259           0 :                 if (bp)
     260           0 :                         trace_xfs_btree_corrupt(bp, _RET_IP_);
     261           0 :                 xfs_btree_mark_sick(cur);
     262           0 :                 return -EFSCORRUPTED;
     263             :         }
     264             :         return 0;
     265             : }
     266             : 
     267             : /*
     268             :  * Debug routine: check that block header is ok.
     269             :  */
     270             : int
     271 >26125*10^7 : xfs_btree_check_block(
     272             :         struct xfs_btree_cur    *cur,   /* btree cursor */
     273             :         struct xfs_btree_block  *block, /* generic btree block pointer */
     274             :         int                     level,  /* level of the btree block */
     275             :         struct xfs_buf          *bp)    /* buffer containing block, if any */
     276             : {
     277             :         /* Don't check the inode-core root. */
     278 >26125*10^7 :         if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
     279 81821259326 :             level == cur->bc_nlevels - 1)
     280             :                 return 0;
     281             : 
     282 >26108*10^7 :         if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
     283 84751174806 :                 return xfs_btree_check_lblock(cur, block, level, bp);
     284             :         else
     285 >17633*10^7 :                 return xfs_btree_check_sblock(cur, block, level, bp);
     286             : }
     287             : 
     288             : /* Check that this long pointer is valid and points within the fs. */
     289             : bool
     290 >16135*10^7 : xfs_btree_check_lptr(
     291             :         struct xfs_btree_cur    *cur,
     292             :         xfs_fsblock_t           fsbno,
     293             :         int                     level)
     294             : {
     295 >16135*10^7 :         if (level <= 0)
     296             :                 return false;
     297 >16135*10^7 :         if (cur->bc_flags & XFS_BTREE_IN_XFILE)
     298   103347200 :                 return xfbtree_verify_xfileoff(cur, fsbno);
     299 >16124*10^7 :         return xfs_verify_fsbno(cur->bc_mp, fsbno);
     300             : }
     301             : 
     302             : /* Check that this short pointer is valid and points within the AG. */
     303             : bool
     304 >29321*10^7 : xfs_btree_check_sptr(
     305             :         struct xfs_btree_cur    *cur,
     306             :         xfs_agblock_t           agbno,
     307             :         int                     level)
     308             : {
     309 >29321*10^7 :         if (level <= 0)
     310             :                 return false;
     311 >29321*10^7 :         if (cur->bc_flags & XFS_BTREE_IN_XFILE)
     312   310973624 :                 return xfbtree_verify_xfileoff(cur, agbno);
     313 >29290*10^7 :         return xfs_verify_agbno(cur->bc_ag.pag, agbno);
     314             : }
     315             : 
     316             : /*
     317             :  * Check that a given (indexed) btree pointer at a certain level of a
     318             :  * btree is valid and doesn't point past where it should.
     319             :  */
     320             : static int
     321 52257991255 : xfs_btree_check_ptr(
     322             :         struct xfs_btree_cur            *cur,
     323             :         const union xfs_btree_ptr       *ptr,
     324             :         int                             index,
     325             :         int                             level)
     326             : {
     327 52257991255 :         if (cur->bc_flags & XFS_BTREE_IN_XFILE)
     328  2303417308 :                 return xfbtree_check_ptr(cur, ptr, index, level);
     329             : 
     330 49954573947 :         if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
     331  5275962526 :                 if (xfs_btree_check_lptr(cur, be64_to_cpu((&ptr->l)[index]),
     332             :                                 level))
     333             :                         return 0;
     334           0 :                 xfs_err(cur->bc_mp,
     335             : "Inode %llu fork %d: Corrupt btree %d pointer at level %d index %d.",
     336             :                                 cur->bc_ino.ip->i_ino,
     337             :                                 cur->bc_ino.whichfork, cur->bc_btnum,
     338             :                                 level, index);
     339             :         } else {
     340 89357222842 :                 if (xfs_btree_check_sptr(cur, be32_to_cpu((&ptr->s)[index]),
     341             :                                 level))
     342             :                         return 0;
     343           0 :                 xfs_err(cur->bc_mp,
     344             : "AG %u: Corrupt btree %d pointer at level %d index %d.",
     345             :                                 cur->bc_ag.pag->pag_agno, cur->bc_btnum,
     346             :                                 level, index);
     347             :         }
     348             : 
     349           0 :         xfs_btree_mark_sick(cur);
     350           0 :         return -EFSCORRUPTED;
     351             : }
     352             : 
     353             : #ifdef DEBUG
     354             : # define xfs_btree_debug_check_ptr      xfs_btree_check_ptr
     355             : #else
     356             : # define xfs_btree_debug_check_ptr(...) (0)
     357             : #endif
     358             : 
     359             : /*
     360             :  * Calculate CRC on the whole btree block and stuff it into the
     361             :  * long-form btree header.
     362             :  *
     363             :  * Prior to calculting the CRC, pull the LSN out of the buffer log item and put
     364             :  * it into the buffer so recovery knows what the last modification was that made
     365             :  * it to disk.
     366             :  */
     367             : void
     368    22654441 : xfs_btree_lblock_calc_crc(
     369             :         struct xfs_buf          *bp)
     370             : {
     371    22654441 :         struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
     372    22654441 :         struct xfs_buf_log_item *bip = bp->b_log_item;
     373             : 
     374    22654441 :         if (!xfs_has_crc(bp->b_mount))
     375             :                 return;
     376    22654441 :         if (bip)
     377    22240936 :                 block->bb_u.l.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn);
     378    22654441 :         xfs_buf_update_cksum(bp, XFS_BTREE_LBLOCK_CRC_OFF);
     379             : }
     380             : 
     381             : bool
     382     8184072 : xfs_btree_lblock_verify_crc(
     383             :         struct xfs_buf          *bp)
     384             : {
     385     8184072 :         struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
     386     8184072 :         struct xfs_mount        *mp = bp->b_mount;
     387             : 
     388     8184072 :         if (xfs_has_crc(mp)) {
     389     8184072 :                 if (!xfs_log_check_lsn(mp, be64_to_cpu(block->bb_u.l.bb_lsn)))
     390             :                         return false;
     391     8184072 :                 return xfs_buf_verify_cksum(bp, XFS_BTREE_LBLOCK_CRC_OFF);
     392             :         }
     393             : 
     394             :         return true;
     395             : }
     396             : 
     397             : /*
     398             :  * Calculate CRC on the whole btree block and stuff it into the
     399             :  * short-form btree header.
     400             :  *
     401             :  * Prior to calculting the CRC, pull the LSN out of the buffer log item and put
     402             :  * it into the buffer so recovery knows what the last modification was that made
     403             :  * it to disk.
     404             :  */
     405             : void
     406    18821081 : xfs_btree_sblock_calc_crc(
     407             :         struct xfs_buf          *bp)
     408             : {
     409    18821081 :         struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
     410    18821081 :         struct xfs_buf_log_item *bip = bp->b_log_item;
     411             : 
     412    18821081 :         if (!xfs_has_crc(bp->b_mount))
     413             :                 return;
     414    18814197 :         if (bip)
     415    18007757 :                 block->bb_u.s.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn);
     416    18814197 :         xfs_buf_update_cksum(bp, XFS_BTREE_SBLOCK_CRC_OFF);
     417             : }
     418             : 
     419             : bool
     420     2395322 : xfs_btree_sblock_verify_crc(
     421             :         struct xfs_buf          *bp)
     422             : {
     423     2395322 :         struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
     424     2395322 :         struct xfs_mount        *mp = bp->b_mount;
     425             : 
     426     2395322 :         if (xfs_has_crc(mp)) {
     427     2395170 :                 if (!xfs_log_check_lsn(mp, be64_to_cpu(block->bb_u.s.bb_lsn)))
     428             :                         return false;
     429     2395170 :                 return xfs_buf_verify_cksum(bp, XFS_BTREE_SBLOCK_CRC_OFF);
     430             :         }
     431             : 
     432             :         return true;
     433             : }
     434             : 
     435             : static int
     436      449577 : xfs_btree_free_block(
     437             :         struct xfs_btree_cur    *cur,
     438             :         struct xfs_buf          *bp)
     439             : {
     440      449577 :         int                     error;
     441             : 
     442      449577 :         trace_xfs_btree_free_block(cur, bp);
     443             : 
     444      449577 :         error = cur->bc_ops->free_block(cur, bp);
     445      449577 :         if (!error) {
     446      449577 :                 xfs_trans_binval(cur->bc_tp, bp);
     447      449577 :                 XFS_BTREE_STATS_INC(cur, free);
     448             :         }
     449      449577 :         return error;
     450             : }
     451             : 
     452             : /*
     453             :  * Delete the btree cursor.
     454             :  */
     455             : void
     456  8072727470 : xfs_btree_del_cursor(
     457             :         struct xfs_btree_cur    *cur,           /* btree cursor */
     458             :         int                     error)          /* del because of error */
     459             : {
     460  8072727470 :         int                     i;              /* btree level */
     461             : 
     462             :         /*
     463             :          * Clear the buffer pointers and release the buffers. If we're doing
     464             :          * this because of an error, inspect all of the entries in the bc_bufs
     465             :          * array for buffers to be unlocked. This is because some of the btree
     466             :          * code works from level n down to 0, and if we get an error along the
     467             :          * way we won't have initialized all the entries down to 0.
     468             :          */
     469 20182283308 :         for (i = 0; i < cur->bc_nlevels; i++) {
     470 12500537122 :                 if (cur->bc_levels[i].bp)
     471 11010799127 :                         xfs_trans_brelse(cur->bc_tp, cur->bc_levels[i].bp);
     472  1489737995 :                 else if (!error)
     473             :                         break;
     474             :         }
     475             : 
     476             :         /*
     477             :          * If we are doing a BMBT update, the number of unaccounted blocks
     478             :          * allocated during this cursor life time should be zero. If it's not
     479             :          * zero, then we should be shut down or on our way to shutdown due to
     480             :          * cancelling a dirty transaction on error.
     481             :          */
     482  8074135421 :         ASSERT(cur->bc_btnum != XFS_BTNUM_BMAP || cur->bc_ino.allocated == 0 ||
     483             :                xfs_is_shutdown(cur->bc_mp) || error != 0);
     484  8074135419 :         if (unlikely(cur->bc_flags & XFS_BTREE_STAGING))
     485           0 :                 kmem_free(cur->bc_ops);
     486  8074135419 :         if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
     487   441849901 :             !(cur->bc_flags & XFS_BTREE_IN_XFILE) && cur->bc_ino.rtg)
     488   216726624 :                 xfs_rtgroup_put(cur->bc_ino.rtg);
     489  8074138942 :         if (!(cur->bc_flags & XFS_BTREE_LONG_PTRS) &&
     490  6573883693 :             !(cur->bc_flags & XFS_BTREE_IN_XFILE) && cur->bc_ag.pag)
     491  6573883693 :                 xfs_perag_put(cur->bc_ag.pag);
     492  8074411981 :         if (cur->bc_flags & XFS_BTREE_IN_XFILE) {
     493  1058421364 :                 if (cur->bc_mem.pag)
     494    23922159 :                         xfs_perag_put(cur->bc_mem.pag);
     495  1058421398 :                 if (cur->bc_mem.rtg)
     496     9213608 :                         xfs_rtgroup_put(cur->bc_mem.rtg);
     497             :         }
     498  8074412017 :         kmem_cache_free(cur->bc_cache, cur);
     499  8074142671 : }
     500             : 
     501             : /* Return the buffer target for this btree's buffer. */
     502             : static inline struct xfs_buftarg *
     503 12674772259 : xfs_btree_buftarg(
     504             :         struct xfs_btree_cur    *cur)
     505             : {
     506 12674772259 :         if (cur->bc_flags & XFS_BTREE_IN_XFILE)
     507  1099145546 :                 return xfbtree_target(cur->bc_mem.xfbtree);
     508 11575626713 :         return cur->bc_mp->m_ddev_targp;
     509             : }
     510             : 
     511             : /* Return the block size (in units of 512b sectors) for this btree. */
     512             : static inline unsigned int
     513 12674248816 : xfs_btree_bbsize(
     514             :         struct xfs_btree_cur    *cur)
     515             : {
     516 12674248816 :         if (cur->bc_flags & XFS_BTREE_IN_XFILE)
     517  1099145531 :                 return xfbtree_bbsize();
     518 11575103285 :         return cur->bc_mp->m_bsize;
     519             : }
     520             : 
     521             : /*
     522             :  * Duplicate the btree cursor.
     523             :  * Allocate a new one, copy the record, re-get the buffers.
     524             :  */
     525             : int                                     /* error */
     526   212133490 : xfs_btree_dup_cursor(
     527             :         struct xfs_btree_cur *cur,              /* input cursor */
     528             :         struct xfs_btree_cur **ncur)            /* output cursor */
     529             : {
     530   212133490 :         struct xfs_buf  *bp;            /* btree block's buffer pointer */
     531   212133490 :         int             error;          /* error return value */
     532   212133490 :         int             i;              /* level number of btree block */
     533   212133490 :         xfs_mount_t     *mp;            /* mount structure for filesystem */
     534   212133490 :         struct xfs_btree_cur *new;              /* new cursor value */
     535   212133490 :         xfs_trans_t     *tp;            /* transaction pointer, can be NULL */
     536             : 
     537   212133490 :         tp = cur->bc_tp;
     538   212133490 :         mp = cur->bc_mp;
     539             : 
     540             :         /*
     541             :          * Allocate a new cursor like the old one.
     542             :          */
     543   212133490 :         new = cur->bc_ops->dup_cursor(cur);
     544             : 
     545             :         /*
     546             :          * Copy the record currently in the cursor.
     547             :          */
     548   212136206 :         new->bc_rec = cur->bc_rec;
     549             : 
     550             :         /*
     551             :          * For each level current, re-get the buffer and copy the ptr value.
     552             :          */
     553   701882385 :         for (i = 0; i < new->bc_nlevels; i++) {
     554   489746828 :                 new->bc_levels[i].ptr = cur->bc_levels[i].ptr;
     555   489746828 :                 new->bc_levels[i].ra = cur->bc_levels[i].ra;
     556   489746828 :                 bp = cur->bc_levels[i].bp;
     557   489746828 :                 if (bp) {
     558   417533646 :                         error = xfs_trans_read_buf(mp, tp,
     559             :                                         xfs_btree_buftarg(cur),
     560             :                                         xfs_buf_daddr(bp),
     561   417531032 :                                         xfs_btree_bbsize(cur), 0, &bp,
     562   417532916 :                                         cur->bc_ops->buf_ops);
     563   417533003 :                         if (xfs_metadata_is_sick(error))
     564           0 :                                 xfs_btree_mark_sick(new);
     565   417533003 :                         if (error) {
     566           6 :                                 xfs_btree_del_cursor(new, error);
     567           6 :                                 *ncur = NULL;
     568           6 :                                 return error;
     569             :                         }
     570             :                 }
     571   489746179 :                 new->bc_levels[i].bp = bp;
     572             :         }
     573   212135557 :         *ncur = new;
     574   212135557 :         return 0;
     575             : }
     576             : 
     577             : /*
     578             :  * XFS btree block layout and addressing:
     579             :  *
     580             :  * There are two types of blocks in the btree: leaf and non-leaf blocks.
     581             :  *
     582             :  * The leaf record start with a header then followed by records containing
     583             :  * the values.  A non-leaf block also starts with the same header, and
     584             :  * then first contains lookup keys followed by an equal number of pointers
     585             :  * to the btree blocks at the previous level.
     586             :  *
     587             :  *              +--------+-------+-------+-------+-------+-------+-------+
     588             :  * Leaf:        | header | rec 1 | rec 2 | rec 3 | rec 4 | rec 5 | rec N |
     589             :  *              +--------+-------+-------+-------+-------+-------+-------+
     590             :  *
     591             :  *              +--------+-------+-------+-------+-------+-------+-------+
     592             :  * Non-Leaf:    | header | key 1 | key 2 | key N | ptr 1 | ptr 2 | ptr N |
     593             :  *              +--------+-------+-------+-------+-------+-------+-------+
     594             :  *
     595             :  * The header is called struct xfs_btree_block for reasons better left unknown
     596             :  * and comes in different versions for short (32bit) and long (64bit) block
     597             :  * pointers.  The record and key structures are defined by the btree instances
     598             :  * and opaque to the btree core.  The block pointers are simple disk endian
     599             :  * integers, available in a short (32bit) and long (64bit) variant.
     600             :  *
     601             :  * The helpers below calculate the offset of a given record, key or pointer
     602             :  * into a btree block (xfs_btree_*_offset) or return a pointer to the given
     603             :  * record, key or pointer (xfs_btree_*_addr).  Note that all addressing
     604             :  * inside the btree block is done using indices starting at one, not zero!
     605             :  *
     606             :  * If XFS_BTREE_OVERLAPPING is set, then this btree supports keys containing
     607             :  * overlapping intervals.  In such a tree, records are still sorted lowest to
     608             :  * highest and indexed by the smallest key value that refers to the record.
     609             :  * However, nodes are different: each pointer has two associated keys -- one
     610             :  * indexing the lowest key available in the block(s) below (the same behavior
     611             :  * as the key in a regular btree) and another indexing the highest key
     612             :  * available in the block(s) below.  Because records are /not/ sorted by the
     613             :  * highest key, all leaf block updates require us to compute the highest key
     614             :  * that matches any record in the leaf and to recursively update the high keys
     615             :  * in the nodes going further up in the tree, if necessary.  Nodes look like
     616             :  * this:
     617             :  *
     618             :  *              +--------+-----+-----+-----+-----+-----+-------+-------+-----+
     619             :  * Non-Leaf:    | header | lo1 | hi1 | lo2 | hi2 | ... | ptr 1 | ptr 2 | ... |
     620             :  *              +--------+-----+-----+-----+-----+-----+-------+-------+-----+
     621             :  *
     622             :  * To perform an interval query on an overlapped tree, perform the usual
     623             :  * depth-first search and use the low and high keys to decide if we can skip
     624             :  * that particular node.  If a leaf node is reached, return the records that
     625             :  * intersect the interval.  Note that an interval query may return numerous
     626             :  * entries.  For a non-overlapped tree, simply search for the record associated
     627             :  * with the lowest key and iterate forward until a non-matching record is
     628             :  * found.  Section 14.3 ("Interval Trees") of _Introduction to Algorithms_ by
     629             :  * Cormen, Leiserson, Rivest, and Stein (2nd or 3rd ed. only) discuss this in
     630             :  * more detail.
     631             :  *
     632             :  * Why do we care about overlapping intervals?  Let's say you have a bunch of
     633             :  * reverse mapping records on a reflink filesystem:
     634             :  *
     635             :  * 1: +- file A startblock B offset C length D -----------+
     636             :  * 2:      +- file E startblock F offset G length H --------------+
     637             :  * 3:      +- file I startblock F offset J length K --+
     638             :  * 4:                                                        +- file L... --+
     639             :  *
     640             :  * Now say we want to map block (B+D) into file A at offset (C+D).  Ideally,
     641             :  * we'd simply increment the length of record 1.  But how do we find the record
     642             :  * that ends at (B+D-1) (i.e. record 1)?  A LE lookup of (B+D-1) would return
     643             :  * record 3 because the keys are ordered first by startblock.  An interval
     644             :  * query would return records 1 and 2 because they both overlap (B+D-1), and
     645             :  * from that we can pick out record 1 as the appropriate left neighbor.
     646             :  *
     647             :  * In the non-overlapped case you can do a LE lookup and decrement the cursor
     648             :  * because a record's interval must end before the next record.
     649             :  */
     650             : 
     651             : /*
     652             :  * Return size of the btree block header for this btree instance.
     653             :  */
     654 >73886*10^7 : static inline size_t xfs_btree_block_len(struct xfs_btree_cur *cur)
     655             : {
     656 >73886*10^7 :         if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
     657 >14799*10^7 :                 if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS)
     658             :                         return XFS_BTREE_LBLOCK_CRC_LEN;
     659           0 :                 return XFS_BTREE_LBLOCK_LEN;
     660             :         }
     661 >59087*10^7 :         if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS)
     662 >59100*10^7 :                 return XFS_BTREE_SBLOCK_CRC_LEN;
     663             :         return XFS_BTREE_SBLOCK_LEN;
     664             : }
     665             : 
     666             : /*
     667             :  * Return size of btree block pointers for this btree instance.
     668             :  */
     669             : static inline size_t xfs_btree_ptr_len(struct xfs_btree_cur *cur)
     670             : {
     671 64047836744 :         return (cur->bc_flags & XFS_BTREE_LONG_PTRS) ?
     672 64047836744 :                 sizeof(__be64) : sizeof(__be32);
     673             : }
     674             : 
     675             : /*
     676             :  * Calculate offset of the n-th record in a btree block.
     677             :  */
     678             : STATIC size_t
     679 >52374*10^7 : xfs_btree_rec_offset(
     680             :         struct xfs_btree_cur    *cur,
     681             :         int                     n)
     682             : {
     683 >52374*10^7 :         return xfs_btree_block_len(cur) +
     684 >52374*10^7 :                 (n - 1) * cur->bc_ops->rec_len;
     685             : }
     686             : 
     687             : /*
     688             :  * Calculate offset of the n-th key in a btree block.
     689             :  */
     690             : STATIC size_t
     691 87788403343 : xfs_btree_key_offset(
     692             :         struct xfs_btree_cur    *cur,
     693             :         int                     n)
     694             : {
     695 87788403343 :         return xfs_btree_block_len(cur) +
     696 87788403343 :                 (n - 1) * cur->bc_ops->key_len;
     697             : }
     698             : 
     699             : /*
     700             :  * Calculate offset of the n-th high key in a btree block.
     701             :  */
     702             : STATIC size_t
     703 65166753234 : xfs_btree_high_key_offset(
     704             :         struct xfs_btree_cur    *cur,
     705             :         int                     n)
     706             : {
     707 65166753234 :         return xfs_btree_block_len(cur) +
     708 65166753234 :                 (n - 1) * cur->bc_ops->key_len + (cur->bc_ops->key_len / 2);
     709             : }
     710             : 
     711             : /*
     712             :  * Calculate offset of the n-th block pointer in a btree block.
     713             :  */
     714             : STATIC size_t
     715 64028826499 : xfs_btree_ptr_offset(
     716             :         struct xfs_btree_cur    *cur,
     717             :         int                     n,
     718             :         int                     level)
     719             : {
     720 64028826499 :         return xfs_btree_block_len(cur) +
     721 >12806*10^7 :                 cur->bc_ops->get_maxrecs(cur, level) * cur->bc_ops->key_len +
     722 >12032*10^7 :                 (n - 1) * xfs_btree_ptr_len(cur);
     723             : }
     724             : 
     725             : /*
     726             :  * Return a pointer to the n-th record in the btree block.
     727             :  */
     728             : union xfs_btree_rec *
     729  5225713953 : xfs_btree_rec_addr(
     730             :         struct xfs_btree_cur    *cur,
     731             :         int                     n,
     732             :         struct xfs_btree_block  *block)
     733             : {
     734 >52300*10^7 :         return (union xfs_btree_rec *)
     735 >52300*10^7 :                 ((char *)block + xfs_btree_rec_offset(cur, n));
     736             : }
     737             : 
     738             : /*
     739             :  * Return a pointer to the n-th key in the btree block.
     740             :  */
     741             : union xfs_btree_key *
     742  4106943171 : xfs_btree_key_addr(
     743             :         struct xfs_btree_cur    *cur,
     744             :         int                     n,
     745             :         struct xfs_btree_block  *block)
     746             : {
     747 87444289785 :         return (union xfs_btree_key *)
     748 87444289785 :                 ((char *)block + xfs_btree_key_offset(cur, n));
     749             : }
     750             : 
     751             : /*
     752             :  * Return a pointer to the n-th high key in the btree block.
     753             :  */
     754             : union xfs_btree_key *
     755  3103639032 : xfs_btree_high_key_addr(
     756             :         struct xfs_btree_cur    *cur,
     757             :         int                     n,
     758             :         struct xfs_btree_block  *block)
     759             : {
     760 65180853692 :         return (union xfs_btree_key *)
     761 65180853692 :                 ((char *)block + xfs_btree_high_key_offset(cur, n));
     762             : }
     763             : 
     764             : /*
     765             :  * Return a pointer to the n-th block pointer in the btree block.
     766             :  */
     767             : union xfs_btree_ptr *
     768 64027272440 : xfs_btree_ptr_addr(
     769             :         struct xfs_btree_cur    *cur,
     770             :         int                     n,
     771             :         struct xfs_btree_block  *block)
     772             : {
     773 64027272440 :         int                     level = xfs_btree_get_level(block);
     774             : 
     775 64027272440 :         ASSERT(block->bb_level != 0);
     776             : 
     777 >12805*10^7 :         return (union xfs_btree_ptr *)
     778 64027272440 :                 ((char *)block + xfs_btree_ptr_offset(cur, n, level));
     779             : }
     780             : 
     781             : struct xfs_ifork *
     782  3426860890 : xfs_btree_ifork_ptr(
     783             :         struct xfs_btree_cur    *cur)
     784             : {
     785  3426860890 :         ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE);
     786             : 
     787  3426860890 :         if (cur->bc_flags & XFS_BTREE_STAGING)
     788      132175 :                 return cur->bc_ino.ifake->if_fork;
     789  3426728715 :         return xfs_ifork_ptr(cur->bc_ino.ip, cur->bc_ino.whichfork);
     790             : }
     791             : 
     792             : /*
     793             :  * Get the root block which is stored in the inode.
     794             :  *
     795             :  * For now this btree implementation assumes the btree root is always
     796             :  * stored in the if_broot field of an inode fork.
     797             :  */
     798             : STATIC struct xfs_btree_block *
     799  2013036000 : xfs_btree_get_iroot(
     800             :         struct xfs_btree_cur    *cur)
     801             : {
     802  2013036000 :         struct xfs_ifork        *ifp = xfs_btree_ifork_ptr(cur);
     803             : 
     804  2013106684 :         return (struct xfs_btree_block *)ifp->if_broot;
     805             : }
     806             : 
     807             : /*
     808             :  * Retrieve the block pointer from the cursor at the given level.
     809             :  * This may be an inode btree root or from a buffer.
     810             :  */
     811             : struct xfs_btree_block *                /* generic btree block pointer */
     812 >47052*10^7 : xfs_btree_get_block(
     813             :         struct xfs_btree_cur    *cur,   /* btree cursor */
     814             :         int                     level,  /* level in btree */
     815             :         struct xfs_buf          **bpp)  /* buffer containing the block */
     816             : {
     817 >47052*10^7 :         if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
     818 >11000*10^7 :             (level == cur->bc_nlevels - 1)) {
     819   620387778 :                 *bpp = NULL;
     820   620387778 :                 return xfs_btree_get_iroot(cur);
     821             :         }
     822             : 
     823 >46990*10^7 :         *bpp = cur->bc_levels[level].bp;
     824 >46990*10^7 :         return XFS_BUF_TO_BLOCK(*bpp);
     825             : }
     826             : 
     827             : /*
     828             :  * Change the cursor to point to the first record at the given level.
     829             :  * Other levels are unaffected.
     830             :  */
     831             : STATIC int                              /* success=1, failure=0 */
     832    82499122 : xfs_btree_firstrec(
     833             :         struct xfs_btree_cur    *cur,   /* btree cursor */
     834             :         int                     level)  /* level to change */
     835             : {
     836    82499122 :         struct xfs_btree_block  *block; /* generic btree block pointer */
     837    82499122 :         struct xfs_buf          *bp;    /* buffer containing block */
     838             : 
     839             :         /*
     840             :          * Get the block pointer for this level.
     841             :          */
     842    82499122 :         block = xfs_btree_get_block(cur, level, &bp);
     843    82499241 :         if (xfs_btree_check_block(cur, block, level, bp))
     844             :                 return 0;
     845             :         /*
     846             :          * It's empty, there is no such record.
     847             :          */
     848    82499431 :         if (!block->bb_numrecs)
     849             :                 return 0;
     850             :         /*
     851             :          * Set the ptr value to 1, that's the first record/key.
     852             :          */
     853    82499431 :         cur->bc_levels[level].ptr = 1;
     854    82499431 :         return 1;
     855             : }
     856             : 
     857             : /*
     858             :  * Change the cursor to point to the last record in the current block
     859             :  * at the given level.  Other levels are unaffected.
     860             :  */
     861             : STATIC int                              /* success=1, failure=0 */
     862    88711133 : xfs_btree_lastrec(
     863             :         struct xfs_btree_cur    *cur,   /* btree cursor */
     864             :         int                     level)  /* level to change */
     865             : {
     866    88711133 :         struct xfs_btree_block  *block; /* generic btree block pointer */
     867    88711133 :         struct xfs_buf          *bp;    /* buffer containing block */
     868             : 
     869             :         /*
     870             :          * Get the block pointer for this level.
     871             :          */
     872    88711133 :         block = xfs_btree_get_block(cur, level, &bp);
     873    88711266 :         if (xfs_btree_check_block(cur, block, level, bp))
     874             :                 return 0;
     875             :         /*
     876             :          * It's empty, there is no such record.
     877             :          */
     878    88711636 :         if (!block->bb_numrecs)
     879             :                 return 0;
     880             :         /*
     881             :          * Set the ptr value to numrecs, that's the last record/key.
     882             :          */
     883    88711636 :         cur->bc_levels[level].ptr = be16_to_cpu(block->bb_numrecs);
     884    88711636 :         return 1;
     885             : }
     886             : 
     887             : /*
     888             :  * Compute first and last byte offsets for the fields given.
     889             :  * Interprets the offsets table, which contains struct field offsets.
     890             :  */
     891             : void
     892  2055434749 : xfs_btree_offsets(
     893             :         uint32_t        fields,         /* bitmask of fields */
     894             :         const short     *offsets,       /* table of field offsets */
     895             :         int             nbits,          /* number of bits to inspect */
     896             :         int             *first,         /* output: first byte offset */
     897             :         int             *last)          /* output: last byte offset */
     898             : {
     899  2055434749 :         int             i;              /* current bit number */
     900  2055434749 :         uint32_t        imask;          /* mask for current bit number */
     901             : 
     902  2055434749 :         ASSERT(fields != 0);
     903             :         /*
     904             :          * Find the lowest bit, so the first byte offset.
     905             :          */
     906  7734769232 :         for (i = 0, imask = 1u; ; i++, imask <<= 1) {
     907  7734769232 :                 if (imask & fields) {
     908  2055434749 :                         *first = offsets[i];
     909  2055434749 :                         break;
     910             :                 }
     911             :         }
     912             :         /*
     913             :          * Find the highest bit, so the last byte offset.
     914             :          */
     915  2055434749 :         for (i = nbits - 1, imask = 1u << i; ; i--, imask >>= 1) {
     916 14335346669 :                 if (imask & fields) {
     917  2055434749 :                         *last = offsets[i + 1] - 1;
     918  2055434749 :                         break;
     919             :                 }
     920             :         }
     921  2055434749 : }
     922             : 
     923             : /*
     924             :  * Get a buffer for the block, return it read in.
     925             :  * Long-form addressing.
     926             :  */
     927             : int
     928   410320077 : xfs_btree_read_bufl(
     929             :         struct xfs_mount        *mp,            /* file system mount point */
     930             :         struct xfs_trans        *tp,            /* transaction pointer */
     931             :         xfs_fsblock_t           fsbno,          /* file system block number */
     932             :         struct xfs_buf          **bpp,          /* buffer for fsbno */
     933             :         int                     refval,         /* ref count value for buffer */
     934             :         const struct xfs_buf_ops *ops)
     935             : {
     936   410320077 :         struct xfs_buf          *bp;            /* return value */
     937   410320077 :         xfs_daddr_t             d;              /* real disk block address */
     938   410320077 :         int                     error;
     939             : 
     940   410320077 :         if (!xfs_verify_fsbno(mp, fsbno))
     941             :                 return -EFSCORRUPTED;
     942   410320973 :         d = XFS_FSB_TO_DADDR(mp, fsbno);
     943   410320973 :         error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d,
     944             :                                    mp->m_bsize, 0, &bp, ops);
     945   410321814 :         if (error)
     946             :                 return error;
     947   410321763 :         if (bp)
     948   410321763 :                 xfs_buf_set_ref(bp, refval);
     949   410321833 :         *bpp = bp;
     950   410321833 :         return 0;
     951             : }
     952             : 
     953             : /*
     954             :  * Read-ahead the block, don't wait for it, don't return a buffer.
     955             :  * Long-form addressing.
     956             :  */
     957             : /* ARGSUSED */
     958             : void
     959   439395821 : xfs_btree_reada_bufl(
     960             :         struct xfs_mount        *mp,            /* file system mount point */
     961             :         xfs_fsblock_t           fsbno,          /* file system block number */
     962             :         xfs_extlen_t            count,          /* count of filesystem blocks */
     963             :         const struct xfs_buf_ops *ops)
     964             : {
     965   439395821 :         xfs_daddr_t             d;
     966             : 
     967   439395821 :         ASSERT(fsbno != NULLFSBLOCK);
     968   439395821 :         d = XFS_FSB_TO_DADDR(mp, fsbno);
     969   439395821 :         xfs_buf_readahead(mp->m_ddev_targp, d, mp->m_bsize * count, ops);
     970   439412098 : }
     971             : 
     972             : /*
     973             :  * Read-ahead the block, don't wait for it, don't return a buffer.
     974             :  * Short-form addressing.
     975             :  */
     976             : /* ARGSUSED */
     977             : void
     978  4964643656 : xfs_btree_reada_bufs(
     979             :         struct xfs_mount        *mp,            /* file system mount point */
     980             :         xfs_agnumber_t          agno,           /* allocation group number */
     981             :         xfs_agblock_t           agbno,          /* allocation group block number */
     982             :         xfs_extlen_t            count,          /* count of filesystem blocks */
     983             :         const struct xfs_buf_ops *ops)
     984             : {
     985  4964643656 :         xfs_daddr_t             d;
     986             : 
     987  4964643656 :         ASSERT(agno != NULLAGNUMBER);
     988  4964643656 :         ASSERT(agbno != NULLAGBLOCK);
     989  4964643656 :         d = XFS_AGB_TO_DADDR(mp, agno, agbno);
     990  4964643656 :         xfs_buf_readahead(mp->m_ddev_targp, d, mp->m_bsize * count, ops);
     991  4966474002 : }
     992             : 
     993             : STATIC int
     994   442603187 : xfs_btree_readahead_lblock(
     995             :         struct xfs_btree_cur    *cur,
     996             :         int                     lr,
     997             :         struct xfs_btree_block  *block)
     998             : {
     999   442603187 :         int                     rval = 0;
    1000   442603187 :         xfs_fsblock_t           left = be64_to_cpu(block->bb_u.l.bb_leftsib);
    1001   442603187 :         xfs_fsblock_t           right = be64_to_cpu(block->bb_u.l.bb_rightsib);
    1002             : 
    1003   442603187 :         if (cur->bc_flags & XFS_BTREE_IN_XFILE)
    1004             :                 return 0;
    1005             : 
    1006   439398575 :         if ((lr & XFS_BTCUR_LEFTRA) && left != NULLFSBLOCK) {
    1007    38857475 :                 xfs_btree_reada_bufl(cur->bc_mp, left, 1,
    1008    38857475 :                                      cur->bc_ops->buf_ops);
    1009    38857475 :                 rval++;
    1010             :         }
    1011             : 
    1012   439398645 :         if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLFSBLOCK) {
    1013   400539638 :                 xfs_btree_reada_bufl(cur->bc_mp, right, 1,
    1014   400539638 :                                      cur->bc_ops->buf_ops);
    1015   400554619 :                 rval++;
    1016             :         }
    1017             : 
    1018             :         return rval;
    1019             : }
    1020             : 
    1021             : STATIC int
    1022  1491861741 : xfs_btree_readahead_sblock(
    1023             :         struct xfs_btree_cur    *cur,
    1024             :         int                     lr,
    1025             :         struct xfs_btree_block *block)
    1026             : {
    1027  1491861741 :         int                     rval = 0;
    1028  1491861741 :         xfs_agblock_t           left = be32_to_cpu(block->bb_u.s.bb_leftsib);
    1029  1491861741 :         xfs_agblock_t           right = be32_to_cpu(block->bb_u.s.bb_rightsib);
    1030             : 
    1031  1491861741 :         if (cur->bc_flags & XFS_BTREE_IN_XFILE)
    1032             :                 return 0;
    1033             : 
    1034  1486094868 :         if ((lr & XFS_BTCUR_LEFTRA) && left != NULLAGBLOCK) {
    1035    77831277 :                 xfs_btree_reada_bufs(cur->bc_mp, cur->bc_ag.pag->pag_agno,
    1036    77831277 :                                      left, 1, cur->bc_ops->buf_ops);
    1037    77831277 :                 rval++;
    1038             :         }
    1039             : 
    1040  1486095148 :         if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLAGBLOCK) {
    1041  1408290081 :                 xfs_btree_reada_bufs(cur->bc_mp, cur->bc_ag.pag->pag_agno,
    1042  1408290081 :                                      right, 1, cur->bc_ops->buf_ops);
    1043  1408371054 :                 rval++;
    1044             :         }
    1045             : 
    1046             :         return rval;
    1047             : }
    1048             : 
    1049             : /*
    1050             :  * Read-ahead btree blocks, at the given level.
    1051             :  * Bits in lr are set from XFS_BTCUR_{LEFT,RIGHT}RA.
    1052             :  */
    1053             : STATIC int
    1054 >12596*10^7 : xfs_btree_readahead(
    1055             :         struct xfs_btree_cur    *cur,           /* btree cursor */
    1056             :         int                     lev,            /* level in btree */
    1057             :         int                     lr)             /* left/right bits */
    1058             : {
    1059 >12596*10^7 :         struct xfs_btree_block  *block;
    1060             : 
    1061             :         /*
    1062             :          * No readahead needed if we are at the root level and the
    1063             :          * btree root is stored in the inode.
    1064             :          */
    1065 >12596*10^7 :         if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
    1066 40560819755 :             (lev == cur->bc_nlevels - 1))
    1067             :                 return 0;
    1068             : 
    1069 >12594*10^7 :         if ((cur->bc_levels[lev].ra | lr) == cur->bc_levels[lev].ra)
    1070             :                 return 0;
    1071             : 
    1072  1934499339 :         cur->bc_levels[lev].ra |= lr;
    1073  1934499339 :         block = XFS_BUF_TO_BLOCK(cur->bc_levels[lev].bp);
    1074             : 
    1075  1934499339 :         if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
    1076   442607158 :                 return xfs_btree_readahead_lblock(cur, lr, block);
    1077  1491892181 :         return xfs_btree_readahead_sblock(cur, lr, block);
    1078             : }
    1079             : 
    1080             : STATIC int
    1081 43580638568 : xfs_btree_ptr_to_daddr(
    1082             :         struct xfs_btree_cur            *cur,
    1083             :         const union xfs_btree_ptr       *ptr,
    1084             :         xfs_daddr_t                     *daddr)
    1085             : {
    1086 43580638568 :         xfs_fsblock_t           fsbno;
    1087 43580638568 :         xfs_agblock_t           agbno;
    1088 43580638568 :         int                     error;
    1089             : 
    1090 43580638568 :         error = xfs_btree_check_ptr(cur, ptr, 0, 1);
    1091 43599702249 :         if (error)
    1092             :                 return error;
    1093             : 
    1094 43599702249 :         if (cur->bc_flags & XFS_BTREE_IN_XFILE) {
    1095  2254738227 :                 *daddr = xfbtree_ptr_to_daddr(cur, ptr);
    1096  2254738322 :                 return 0;
    1097             :         }
    1098             : 
    1099 41344964022 :         if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
    1100  3481117133 :                 fsbno = be64_to_cpu(ptr->l);
    1101  3481117133 :                 *daddr = XFS_FSB_TO_DADDR(cur->bc_mp, fsbno);
    1102             :         } else {
    1103 37863846889 :                 agbno = be32_to_cpu(ptr->s);
    1104 37863846889 :                 *daddr = XFS_AGB_TO_DADDR(cur->bc_mp, cur->bc_ag.pag->pag_agno,
    1105             :                                 agbno);
    1106             :         }
    1107             : 
    1108             :         return 0;
    1109             : }
    1110             : 
    1111             : /*
    1112             :  * Readahead @count btree blocks at the given @ptr location.
    1113             :  *
    1114             :  * We don't need to care about long or short form btrees here as we have a
    1115             :  * method of converting the ptr directly to a daddr available to us.
    1116             :  */
    1117             : STATIC void
    1118    14031953 : xfs_btree_readahead_ptr(
    1119             :         struct xfs_btree_cur    *cur,
    1120             :         union xfs_btree_ptr     *ptr,
    1121             :         xfs_extlen_t            count)
    1122             : {
    1123    14031953 :         xfs_daddr_t             daddr;
    1124             : 
    1125    14031953 :         if (xfs_btree_ptr_to_daddr(cur, ptr, &daddr))
    1126           0 :                 return;
    1127    14031950 :         xfs_buf_readahead(xfs_btree_buftarg(cur), daddr,
    1128    14031956 :                         xfs_btree_bbsize(cur) * count,
    1129    14031948 :                         cur->bc_ops->buf_ops);
    1130             : }
    1131             : 
    1132             : /*
    1133             :  * Set the buffer for level "lev" in the cursor to bp, releasing
    1134             :  * any previous buffer.
    1135             :  */
    1136             : STATIC void
    1137 12098949121 : xfs_btree_setbuf(
    1138             :         struct xfs_btree_cur    *cur,   /* btree cursor */
    1139             :         int                     lev,    /* level in btree */
    1140             :         struct xfs_buf          *bp)    /* new buffer to set */
    1141             : {
    1142 12098949121 :         struct xfs_btree_block  *b;     /* btree block */
    1143             : 
    1144 12098949121 :         if (cur->bc_levels[lev].bp)
    1145  1502424328 :                 xfs_trans_brelse(cur->bc_tp, cur->bc_levels[lev].bp);
    1146 12096917243 :         cur->bc_levels[lev].bp = bp;
    1147 12096917243 :         cur->bc_levels[lev].ra = 0;
    1148             : 
    1149 12096917243 :         b = XFS_BUF_TO_BLOCK(bp);
    1150 12096917243 :         if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
    1151  2192682519 :                 if (b->bb_u.l.bb_leftsib == cpu_to_be64(NULLFSBLOCK))
    1152  1334792009 :                         cur->bc_levels[lev].ra |= XFS_BTCUR_LEFTRA;
    1153  2192682519 :                 if (b->bb_u.l.bb_rightsib == cpu_to_be64(NULLFSBLOCK))
    1154  1467727165 :                         cur->bc_levels[lev].ra |= XFS_BTCUR_RIGHTRA;
    1155             :         } else {
    1156  9904234724 :                 if (b->bb_u.s.bb_leftsib == cpu_to_be32(NULLAGBLOCK))
    1157  6758740460 :                         cur->bc_levels[lev].ra |= XFS_BTCUR_LEFTRA;
    1158  9904234724 :                 if (b->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK))
    1159  6829180590 :                         cur->bc_levels[lev].ra |= XFS_BTCUR_RIGHTRA;
    1160             :         }
    1161 12096917243 : }
    1162             : 
    1163             : bool
    1164    39895652 : xfs_btree_ptr_is_null(
    1165             :         struct xfs_btree_cur            *cur,
    1166             :         const union xfs_btree_ptr       *ptr)
    1167             : {
    1168   470079994 :         if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
    1169  3093443252 :                 return ptr->l == cpu_to_be64(NULLFSBLOCK);
    1170             :         else
    1171  5359726705 :                 return ptr->s == cpu_to_be32(NULLAGBLOCK);
    1172             : }
    1173             : 
    1174             : void
    1175     2140800 : xfs_btree_set_ptr_null(
    1176             :         struct xfs_btree_cur    *cur,
    1177             :         union xfs_btree_ptr     *ptr)
    1178             : {
    1179     2140800 :         if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
    1180   936632054 :                 ptr->l = cpu_to_be64(NULLFSBLOCK);
    1181             :         else
    1182   735273402 :                 ptr->s = cpu_to_be32(NULLAGBLOCK);
    1183     2140800 : }
    1184             : 
    1185             : /*
    1186             :  * Get/set/init sibling pointers
    1187             :  */
    1188             : void
    1189  6342330755 : xfs_btree_get_sibling(
    1190             :         struct xfs_btree_cur    *cur,
    1191             :         struct xfs_btree_block  *block,
    1192             :         union xfs_btree_ptr     *ptr,
    1193             :         int                     lr)
    1194             : {
    1195  6342330755 :         ASSERT(lr == XFS_BB_LEFTSIB || lr == XFS_BB_RIGHTSIB);
    1196             : 
    1197  6342330755 :         if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
    1198  1744742777 :                 if (lr == XFS_BB_RIGHTSIB)
    1199  1302968746 :                         ptr->l = block->bb_u.l.bb_rightsib;
    1200             :                 else
    1201   441774031 :                         ptr->l = block->bb_u.l.bb_leftsib;
    1202             :         } else {
    1203  4597587978 :                 if (lr == XFS_BB_RIGHTSIB)
    1204  4483925784 :                         ptr->s = block->bb_u.s.bb_rightsib;
    1205             :                 else
    1206   113662194 :                         ptr->s = block->bb_u.s.bb_leftsib;
    1207             :         }
    1208  6342330755 : }
    1209             : 
    1210             : void
    1211     5817258 : xfs_btree_set_sibling(
    1212             :         struct xfs_btree_cur            *cur,
    1213             :         struct xfs_btree_block          *block,
    1214             :         const union xfs_btree_ptr       *ptr,
    1215             :         int                             lr)
    1216             : {
    1217     5817258 :         ASSERT(lr == XFS_BB_LEFTSIB || lr == XFS_BB_RIGHTSIB);
    1218             : 
    1219     5817258 :         if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
    1220     2271202 :                 if (lr == XFS_BB_RIGHTSIB)
    1221     1311556 :                         block->bb_u.l.bb_rightsib = ptr->l;
    1222             :                 else
    1223      959646 :                         block->bb_u.l.bb_leftsib = ptr->l;
    1224             :         } else {
    1225     3546056 :                 if (lr == XFS_BB_RIGHTSIB)
    1226     1675942 :                         block->bb_u.s.bb_rightsib = ptr->s;
    1227             :                 else
    1228     1870114 :                         block->bb_u.s.bb_leftsib = ptr->s;
    1229             :         }
    1230     5817258 : }
    1231             : 
    1232             : static void
    1233    18083807 : __xfs_btree_init_block(
    1234             :         struct xfs_mount        *mp,
    1235             :         struct xfs_btree_block  *buf,
    1236             :         const struct xfs_btree_ops *ops,
    1237             :         xfs_daddr_t             blkno,
    1238             :         __u16                   level,
    1239             :         __u16                   numrecs,
    1240             :         __u64                   owner)
    1241             : {
    1242    18083807 :         int                     crc = xfs_has_crc(mp);
    1243    18083807 :         __u32                   magic = xfs_btree_magic(mp, ops);
    1244             : 
    1245    18083817 :         buf->bb_magic = cpu_to_be32(magic);
    1246    18083817 :         buf->bb_level = cpu_to_be16(level);
    1247    18083817 :         buf->bb_numrecs = cpu_to_be16(numrecs);
    1248             : 
    1249    18083817 :         if (ops->geom_flags & XFS_BTREE_LONG_PTRS) {
    1250    16719397 :                 buf->bb_u.l.bb_leftsib = cpu_to_be64(NULLFSBLOCK);
    1251    16719397 :                 buf->bb_u.l.bb_rightsib = cpu_to_be64(NULLFSBLOCK);
    1252    16719397 :                 if (crc) {
    1253    16719399 :                         buf->bb_u.l.bb_blkno = cpu_to_be64(blkno);
    1254    16719399 :                         buf->bb_u.l.bb_owner = cpu_to_be64(owner);
    1255    16719399 :                         uuid_copy(&buf->bb_u.l.bb_uuid, &mp->m_sb.sb_meta_uuid);
    1256    16719396 :                         buf->bb_u.l.bb_pad = 0;
    1257    16719396 :                         buf->bb_u.l.bb_lsn = 0;
    1258             :                 }
    1259             :         } else {
    1260             :                 /* owner is a 32 bit value on short blocks */
    1261     1364420 :                 __u32 __owner = (__u32)owner;
    1262             : 
    1263     1364420 :                 buf->bb_u.s.bb_leftsib = cpu_to_be32(NULLAGBLOCK);
    1264     1364420 :                 buf->bb_u.s.bb_rightsib = cpu_to_be32(NULLAGBLOCK);
    1265     1364420 :                 if (crc) {
    1266     1357590 :                         buf->bb_u.s.bb_blkno = cpu_to_be64(blkno);
    1267     1357590 :                         buf->bb_u.s.bb_owner = cpu_to_be32(__owner);
    1268     1357590 :                         uuid_copy(&buf->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid);
    1269     1357585 :                         buf->bb_u.s.bb_lsn = 0;
    1270             :                 }
    1271             :         }
    1272    18083809 : }
    1273             : 
    1274             : void
    1275    14251891 : xfs_btree_init_block(
    1276             :         struct xfs_mount        *mp,
    1277             :         struct xfs_btree_block  *block,
    1278             :         const struct xfs_btree_ops *ops,
    1279             :         __u16                   level,
    1280             :         __u16                   numrecs,
    1281             :         __u64                   owner)
    1282             : {
    1283    14251891 :         __xfs_btree_init_block(mp, block, ops, XFS_BUF_DADDR_NULL, level,
    1284             :                         numrecs, owner);
    1285    14251878 : }
    1286             : 
    1287             : void
    1288     3802812 : xfs_btree_init_buf(
    1289             :         struct xfs_mount                *mp,
    1290             :         struct xfs_buf                  *bp,
    1291             :         const struct xfs_btree_ops      *ops,
    1292             :         __u16                           level,
    1293             :         __u16                           numrecs,
    1294             :         __u64                           owner)
    1295             : {
    1296     3802812 :         __xfs_btree_init_block(mp, XFS_BUF_TO_BLOCK(bp), ops,
    1297             :                         xfs_buf_daddr(bp), level, numrecs, owner);
    1298     3802798 :         bp->b_ops = ops->buf_ops;
    1299     3802798 : }
    1300             : 
    1301             : void
    1302     2118602 : xfs_btree_init_block_cur(
    1303             :         struct xfs_btree_cur    *cur,
    1304             :         struct xfs_buf          *bp,
    1305             :         int                     level,
    1306             :         int                     numrecs)
    1307             : {
    1308     2118602 :         __u64                   owner;
    1309             : 
    1310             :         /*
    1311             :          * we can pull the owner from the cursor right now as the different
    1312             :          * owners align directly with the pointer size of the btree. This may
    1313             :          * change in future, but is safe for current users of the generic btree
    1314             :          * code.
    1315             :          */
    1316     2118602 :         if (cur->bc_flags & XFS_BTREE_IN_XFILE)
    1317       11191 :                 owner = xfbtree_owner(cur);
    1318     2107411 :         else if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
    1319      799813 :                 owner = cur->bc_ino.ip->i_ino;
    1320             :         else
    1321     1307598 :                 owner = cur->bc_ag.pag->pag_agno;
    1322             : 
    1323     2118602 :         xfs_btree_init_buf(cur->bc_mp, bp, cur->bc_ops, level, numrecs, owner);
    1324     2118599 : }
    1325             : 
    1326             : /*
    1327             :  * Return true if ptr is the last record in the btree and
    1328             :  * we need to track updates to this record.  The decision
    1329             :  * will be further refined in the update_lastrec method.
    1330             :  */
    1331             : STATIC int
    1332  2016997796 : xfs_btree_is_lastrec(
    1333             :         struct xfs_btree_cur    *cur,
    1334             :         struct xfs_btree_block  *block,
    1335             :         int                     level)
    1336             : {
    1337  2016997796 :         union xfs_btree_ptr     ptr;
    1338             : 
    1339  2016997796 :         if (level > 0)
    1340             :                 return 0;
    1341  2015684162 :         if (!(cur->bc_flags & XFS_BTREE_LASTREC_UPDATE))
    1342             :                 return 0;
    1343             : 
    1344   191973145 :         xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
    1345   383944842 :         if (!xfs_btree_ptr_is_null(cur, &ptr))
    1346    22659851 :                 return 0;
    1347             :         return 1;
    1348             : }
    1349             : 
    1350             : STATIC void
    1351   112690206 : xfs_btree_buf_to_ptr(
    1352             :         struct xfs_btree_cur    *cur,
    1353             :         struct xfs_buf          *bp,
    1354             :         union xfs_btree_ptr     *ptr)
    1355             : {
    1356   112690206 :         if (cur->bc_flags & XFS_BTREE_IN_XFILE) {
    1357       11191 :                 xfbtree_buf_to_ptr(cur, bp, ptr);
    1358       11191 :                 return;
    1359             :         }
    1360             : 
    1361   112679015 :         if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
    1362    14598272 :                 ptr->l = cpu_to_be64(XFS_DADDR_TO_FSB(cur->bc_mp,
    1363             :                                         xfs_buf_daddr(bp)));
    1364             :         else {
    1365    98080743 :                 ptr->s = cpu_to_be32(xfs_daddr_to_agbno(cur->bc_mp,
    1366             :                                         xfs_buf_daddr(bp)));
    1367             :         }
    1368             : }
    1369             : 
    1370             : static inline void
    1371             : xfs_btree_set_refs(
    1372             :         struct xfs_btree_cur    *cur,
    1373             :         struct xfs_buf          *bp)
    1374             : {
    1375 12243221157 :         xfs_buf_set_ref(bp, cur->bc_ops->lru_refs);
    1376             : }
    1377             : 
    1378             : int
    1379     2136480 : xfs_btree_get_buf_block(
    1380             :         struct xfs_btree_cur            *cur,
    1381             :         const union xfs_btree_ptr       *ptr,
    1382             :         struct xfs_btree_block          **block,
    1383             :         struct xfs_buf                  **bpp)
    1384             : {
    1385     2136480 :         xfs_daddr_t                     d;
    1386     2136480 :         int                             error;
    1387             : 
    1388     2136480 :         error = xfs_btree_ptr_to_daddr(cur, ptr, &d);
    1389     2136486 :         if (error)
    1390             :                 return error;
    1391     4272962 :         error = xfs_trans_get_buf(cur->bc_tp, xfs_btree_buftarg(cur), d,
    1392     2136481 :                         xfs_btree_bbsize(cur), 0, bpp);
    1393     2136481 :         if (error)
    1394             :                 return error;
    1395             : 
    1396     2136481 :         (*bpp)->b_ops = cur->bc_ops->buf_ops;
    1397     2136481 :         *block = XFS_BUF_TO_BLOCK(*bpp);
    1398     2136481 :         return 0;
    1399             : }
    1400             : 
    1401             : /*
    1402             :  * Read in the buffer at the given ptr and return the buffer and
    1403             :  * the block pointer within the buffer.
    1404             :  */
    1405             : int
    1406 12240626578 : xfs_btree_read_buf_block(
    1407             :         struct xfs_btree_cur            *cur,
    1408             :         const union xfs_btree_ptr       *ptr,
    1409             :         int                             flags,
    1410             :         struct xfs_btree_block          **block,
    1411             :         struct xfs_buf                  **bpp)
    1412             : {
    1413 12240626578 :         struct xfs_mount        *mp = cur->bc_mp;
    1414 12240626578 :         xfs_daddr_t             d;
    1415 12240626578 :         int                     error;
    1416             : 
    1417             :         /* need to sort out how callers deal with failures first */
    1418 12240626578 :         ASSERT(!(flags & XBF_TRYLOCK));
    1419             : 
    1420 12240626578 :         error = xfs_btree_ptr_to_daddr(cur, ptr, &d);
    1421 12241788386 :         if (error)
    1422             :                 return error;
    1423 12241076402 :         error = xfs_trans_read_buf(mp, cur->bc_tp, xfs_btree_buftarg(cur), d,
    1424 12241514225 :                         xfs_btree_bbsize(cur), flags, bpp,
    1425 12241639584 :                         cur->bc_ops->buf_ops);
    1426 12243354685 :         if (xfs_metadata_is_sick(error))
    1427         270 :                 xfs_btree_mark_sick(cur);
    1428 12243354685 :         if (error)
    1429             :                 return error;
    1430             : 
    1431 12243221157 :         xfs_btree_set_refs(cur, *bpp);
    1432 12243639622 :         *block = XFS_BUF_TO_BLOCK(*bpp);
    1433 12243639622 :         return 0;
    1434             : }
    1435             : 
    1436             : /*
    1437             :  * Copy keys from one btree block to another.
    1438             :  */
    1439             : void
    1440   222561139 : xfs_btree_copy_keys(
    1441             :         struct xfs_btree_cur            *cur,
    1442             :         union xfs_btree_key             *dst_key,
    1443             :         const union xfs_btree_key       *src_key,
    1444             :         int                             numkeys)
    1445             : {
    1446   222561139 :         ASSERT(numkeys >= 0);
    1447   445122278 :         memcpy(dst_key, src_key, numkeys * cur->bc_ops->key_len);
    1448   222561139 : }
    1449             : 
    1450             : /*
    1451             :  * Copy records from one btree block to another.
    1452             :  */
    1453             : STATIC void
    1454  1447867448 : xfs_btree_copy_recs(
    1455             :         struct xfs_btree_cur    *cur,
    1456             :         union xfs_btree_rec     *dst_rec,
    1457             :         union xfs_btree_rec     *src_rec,
    1458             :         int                     numrecs)
    1459             : {
    1460  1447867448 :         ASSERT(numrecs >= 0);
    1461  2895734896 :         memcpy(dst_rec, src_rec, numrecs * cur->bc_ops->rec_len);
    1462  1447867448 : }
    1463             : 
    1464             : /*
    1465             :  * Copy block pointers from one btree block to another.
    1466             :  */
    1467             : void
    1468    17763203 : xfs_btree_copy_ptrs(
    1469             :         struct xfs_btree_cur    *cur,
    1470             :         union xfs_btree_ptr     *dst_ptr,
    1471             :         const union xfs_btree_ptr *src_ptr,
    1472             :         int                     numptrs)
    1473             : {
    1474    17763203 :         ASSERT(numptrs >= 0);
    1475    39675934 :         memcpy(dst_ptr, src_ptr, numptrs * xfs_btree_ptr_len(cur));
    1476    17763203 : }
    1477             : 
    1478             : /*
    1479             :  * Shift keys one index left/right inside a single btree block.
    1480             :  */
    1481             : STATIC void
    1482     1247043 : xfs_btree_shift_keys(
    1483             :         struct xfs_btree_cur    *cur,
    1484             :         union xfs_btree_key     *key,
    1485             :         int                     dir,
    1486             :         int                     numkeys)
    1487             : {
    1488     1247043 :         char                    *dst_key;
    1489             : 
    1490     1247043 :         ASSERT(numkeys >= 0);
    1491     1247043 :         ASSERT(dir == 1 || dir == -1);
    1492             : 
    1493     1247043 :         dst_key = (char *)key + (dir * cur->bc_ops->key_len);
    1494     2494086 :         memmove(dst_key, key, numkeys * cur->bc_ops->key_len);
    1495     1247043 : }
    1496             : 
    1497             : /*
    1498             :  * Shift records one index left/right inside a single btree block.
    1499             :  */
    1500             : STATIC void
    1501  1270423907 : xfs_btree_shift_recs(
    1502             :         struct xfs_btree_cur    *cur,
    1503             :         union xfs_btree_rec     *rec,
    1504             :         int                     dir,
    1505             :         int                     numrecs)
    1506             : {
    1507  1270423907 :         char                    *dst_rec;
    1508             : 
    1509  1270423907 :         ASSERT(numrecs >= 0);
    1510  1270423907 :         ASSERT(dir == 1 || dir == -1);
    1511             : 
    1512  1270423907 :         dst_rec = (char *)rec + (dir * cur->bc_ops->rec_len);
    1513  2540847814 :         memmove(dst_rec, rec, numrecs * cur->bc_ops->rec_len);
    1514  1270423907 : }
    1515             : 
    1516             : /*
    1517             :  * Shift block pointers one index left/right inside a single btree block.
    1518             :  */
    1519             : STATIC void
    1520     1247042 : xfs_btree_shift_ptrs(
    1521             :         struct xfs_btree_cur    *cur,
    1522             :         union xfs_btree_ptr     *ptr,
    1523             :         int                     dir,
    1524             :         int                     numptrs)
    1525             : {
    1526     1247042 :         char                    *dst_ptr;
    1527             : 
    1528     1247042 :         ASSERT(numptrs >= 0);
    1529     1247042 :         ASSERT(dir == 1 || dir == -1);
    1530             : 
    1531     1247042 :         dst_ptr = (char *)ptr + (dir * xfs_btree_ptr_len(cur));
    1532     2494084 :         memmove(dst_ptr, ptr, numptrs * xfs_btree_ptr_len(cur));
    1533     1247042 : }
    1534             : 
    1535             : /*
    1536             :  * Log key values from the btree block.
    1537             :  */
    1538             : STATIC void
    1539   221134601 : xfs_btree_log_keys(
    1540             :         struct xfs_btree_cur    *cur,
    1541             :         struct xfs_buf          *bp,
    1542             :         int                     first,
    1543             :         int                     last)
    1544             : {
    1545             : 
    1546   221134601 :         if (bp) {
    1547   211448607 :                 xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF);
    1548   211449037 :                 xfs_trans_log_buf(cur->bc_tp, bp,
    1549   211449037 :                                   xfs_btree_key_offset(cur, first),
    1550   211449037 :                                   xfs_btree_key_offset(cur, last + 1) - 1);
    1551             :         } else {
    1552     9685994 :                 xfs_trans_log_inode(cur->bc_tp, cur->bc_ino.ip,
    1553     9685994 :                                 xfs_ilog_fbroot(cur->bc_ino.whichfork));
    1554             :         }
    1555   221135073 : }
    1556             : 
    1557             : /*
    1558             :  * Log record values from the btree block.
    1559             :  */
    1560             : void
    1561  1825846041 : xfs_btree_log_recs(
    1562             :         struct xfs_btree_cur    *cur,
    1563             :         struct xfs_buf          *bp,
    1564             :         int                     first,
    1565             :         int                     last)
    1566             : {
    1567  1825846041 :         if (!bp) {
    1568     2959190 :                 xfs_trans_log_inode(cur->bc_tp, cur->bc_ino.ip,
    1569     1479595 :                                 xfs_ilog_fbroot(cur->bc_ino.whichfork));
    1570     1479595 :                 return;
    1571             :         }
    1572             : 
    1573  1824366446 :         xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF);
    1574  1824409177 :         xfs_trans_log_buf(cur->bc_tp, bp,
    1575  1824409177 :                           xfs_btree_rec_offset(cur, first),
    1576  1824409177 :                           xfs_btree_rec_offset(cur, last + 1) - 1);
    1577             : }
    1578             : 
    1579             : /*
    1580             :  * Log block pointer fields from a btree block (nonleaf).
    1581             :  */
    1582             : STATIC void
    1583     1377589 : xfs_btree_log_ptrs(
    1584             :         struct xfs_btree_cur    *cur,   /* btree cursor */
    1585             :         struct xfs_buf          *bp,    /* buffer containing btree block */
    1586             :         int                     first,  /* index of first pointer to log */
    1587             :         int                     last)   /* index of last pointer to log */
    1588             : {
    1589             : 
    1590     1377589 :         if (bp) {
    1591     1345473 :                 struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
    1592     1345473 :                 int                     level = xfs_btree_get_level(block);
    1593             : 
    1594     1345473 :                 xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF);
    1595     1345474 :                 xfs_trans_log_buf(cur->bc_tp, bp,
    1596     1345473 :                                 xfs_btree_ptr_offset(cur, first, level),
    1597     1345474 :                                 xfs_btree_ptr_offset(cur, last + 1, level) - 1);
    1598             :         } else {
    1599       32116 :                 xfs_trans_log_inode(cur->bc_tp, cur->bc_ino.ip,
    1600       32116 :                         xfs_ilog_fbroot(cur->bc_ino.whichfork));
    1601             :         }
    1602             : 
    1603     1377589 : }
    1604             : 
    1605             : /*
    1606             :  * Log fields from a btree block header.
    1607             :  */
    1608             : void
    1609  1804035623 : xfs_btree_log_block(
    1610             :         struct xfs_btree_cur    *cur,   /* btree cursor */
    1611             :         struct xfs_buf          *bp,    /* buffer containing btree block */
    1612             :         uint32_t                fields) /* mask of fields: XFS_BB_... */
    1613             : {
    1614  1804035623 :         int                     first;  /* first byte offset logged */
    1615  1804035623 :         int                     last;   /* last byte offset logged */
    1616  1804035623 :         static const short      soffsets[] = {  /* table of offsets (short) */
    1617             :                 offsetof(struct xfs_btree_block, bb_magic),
    1618             :                 offsetof(struct xfs_btree_block, bb_level),
    1619             :                 offsetof(struct xfs_btree_block, bb_numrecs),
    1620             :                 offsetof(struct xfs_btree_block, bb_u.s.bb_leftsib),
    1621             :                 offsetof(struct xfs_btree_block, bb_u.s.bb_rightsib),
    1622             :                 offsetof(struct xfs_btree_block, bb_u.s.bb_blkno),
    1623             :                 offsetof(struct xfs_btree_block, bb_u.s.bb_lsn),
    1624             :                 offsetof(struct xfs_btree_block, bb_u.s.bb_uuid),
    1625             :                 offsetof(struct xfs_btree_block, bb_u.s.bb_owner),
    1626             :                 offsetof(struct xfs_btree_block, bb_u.s.bb_crc),
    1627             :                 XFS_BTREE_SBLOCK_CRC_LEN
    1628             :         };
    1629  1804035623 :         static const short      loffsets[] = {  /* table of offsets (long) */
    1630             :                 offsetof(struct xfs_btree_block, bb_magic),
    1631             :                 offsetof(struct xfs_btree_block, bb_level),
    1632             :                 offsetof(struct xfs_btree_block, bb_numrecs),
    1633             :                 offsetof(struct xfs_btree_block, bb_u.l.bb_leftsib),
    1634             :                 offsetof(struct xfs_btree_block, bb_u.l.bb_rightsib),
    1635             :                 offsetof(struct xfs_btree_block, bb_u.l.bb_blkno),
    1636             :                 offsetof(struct xfs_btree_block, bb_u.l.bb_lsn),
    1637             :                 offsetof(struct xfs_btree_block, bb_u.l.bb_uuid),
    1638             :                 offsetof(struct xfs_btree_block, bb_u.l.bb_owner),
    1639             :                 offsetof(struct xfs_btree_block, bb_u.l.bb_crc),
    1640             :                 offsetof(struct xfs_btree_block, bb_u.l.bb_pad),
    1641             :                 XFS_BTREE_LBLOCK_CRC_LEN
    1642             :         };
    1643             : 
    1644  1804035623 :         if (bp) {
    1645  1802844392 :                 int nbits;
    1646             : 
    1647  1802844392 :                 if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS) {
    1648             :                         /*
    1649             :                          * We don't log the CRC when updating a btree
    1650             :                          * block but instead recreate it during log
    1651             :                          * recovery.  As the log buffers have checksums
    1652             :                          * of their own this is safe and avoids logging a crc
    1653             :                          * update in a lot of places.
    1654             :                          */
    1655  1802838522 :                         if (fields == XFS_BB_ALL_BITS)
    1656     2515429 :                                 fields = XFS_BB_ALL_BITS_CRC;
    1657             :                         nbits = XFS_BB_NUM_BITS_CRC;
    1658             :                 } else {
    1659             :                         nbits = XFS_BB_NUM_BITS;
    1660             :                 }
    1661  1802844392 :                 xfs_btree_offsets(fields,
    1662  1802844392 :                                   (cur->bc_flags & XFS_BTREE_LONG_PTRS) ?
    1663             :                                         loffsets : soffsets,
    1664             :                                   nbits, &first, &last);
    1665  1802907524 :                 xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF);
    1666  1802920280 :                 xfs_trans_log_buf(cur->bc_tp, bp, first, last);
    1667             :         } else {
    1668     1191231 :                 xfs_trans_log_inode(cur->bc_tp, cur->bc_ino.ip,
    1669     1191231 :                         xfs_ilog_fbroot(cur->bc_ino.whichfork));
    1670             :         }
    1671  1803990026 : }
    1672             : 
    1673             : /*
    1674             :  * Increment cursor by one record at the level.
    1675             :  * For nonzero levels the leaf-ward information is untouched.
    1676             :  */
    1677             : int                                             /* error */
    1678 >12381*10^7 : xfs_btree_increment(
    1679             :         struct xfs_btree_cur    *cur,
    1680             :         int                     level,
    1681             :         int                     *stat)          /* success/failure */
    1682             : {
    1683 >12381*10^7 :         struct xfs_btree_block  *block;
    1684 >12381*10^7 :         union xfs_btree_ptr     ptr;
    1685 >12381*10^7 :         struct xfs_buf          *bp;
    1686 >12381*10^7 :         int                     error;          /* error return value */
    1687 >12381*10^7 :         int                     lev;
    1688             : 
    1689 >12381*10^7 :         ASSERT(level < cur->bc_nlevels);
    1690             : 
    1691             :         /* Read-ahead to the right at this level. */
    1692 >12381*10^7 :         xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA);
    1693             : 
    1694             :         /* Get a pointer to the btree block. */
    1695 >12403*10^7 :         block = xfs_btree_get_block(cur, level, &bp);
    1696             : 
    1697             : #ifdef DEBUG
    1698 >12416*10^7 :         error = xfs_btree_check_block(cur, block, level, bp);
    1699 >12366*10^7 :         if (error)
    1700           0 :                 goto error0;
    1701             : #endif
    1702             : 
    1703             :         /* We're done if we remain in the block after the increment. */
    1704 >24733*10^7 :         if (++cur->bc_levels[level].ptr <= xfs_btree_get_numrecs(block))
    1705 >11925*10^7 :                 goto out1;
    1706             : 
    1707             :         /* Fail if we just went off the right edge of the tree. */
    1708  4408604856 :         xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
    1709  8817590438 :         if (xfs_btree_ptr_is_null(cur, &ptr))
    1710  3608953482 :                 goto out0;
    1711             : 
    1712   799841737 :         XFS_BTREE_STATS_INC(cur, increment);
    1713             : 
    1714             :         /*
    1715             :          * March up the tree incrementing pointers.
    1716             :          * Stop when we don't go off the right edge of a block.
    1717             :          */
    1718   808209485 :         for (lev = level + 1; lev < cur->bc_nlevels; lev++) {
    1719   808203456 :                 block = xfs_btree_get_block(cur, lev, &bp);
    1720             : 
    1721             : #ifdef DEBUG
    1722   808216244 :                 error = xfs_btree_check_block(cur, block, lev, bp);
    1723   808178696 :                 if (error)
    1724           0 :                         goto error0;
    1725             : #endif
    1726             : 
    1727  1616357392 :                 if (++cur->bc_levels[lev].ptr <= xfs_btree_get_numrecs(block))
    1728             :                         break;
    1729             : 
    1730             :                 /* Read-ahead the right block for the next loop. */
    1731     8357095 :                 xfs_btree_readahead(cur, lev, XFS_BTCUR_RIGHTRA);
    1732             :         }
    1733             : 
    1734             :         /*
    1735             :          * If we went off the root then we are either seriously
    1736             :          * confused or have the tree root in an inode.
    1737             :          */
    1738   799827630 :         if (lev == cur->bc_nlevels) {
    1739           0 :                 if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
    1740           0 :                         goto out0;
    1741           0 :                 ASSERT(0);
    1742           0 :                 xfs_btree_mark_sick(cur);
    1743           0 :                 error = -EFSCORRUPTED;
    1744           0 :                 goto error0;
    1745             :         }
    1746   799827630 :         ASSERT(lev < cur->bc_nlevels);
    1747             : 
    1748             :         /*
    1749             :          * Now walk back down the tree, fixing up the cursor's buffer
    1750             :          * pointers and key numbers.
    1751             :          */
    1752  1607984414 :         for (block = xfs_btree_get_block(cur, lev, &bp); lev > level; ) {
    1753   808186460 :                 union xfs_btree_ptr     *ptrp;
    1754             : 
    1755   808186460 :                 ptrp = xfs_btree_ptr_addr(cur, cur->bc_levels[lev].ptr, block);
    1756   808213691 :                 --lev;
    1757   808213691 :                 error = xfs_btree_read_buf_block(cur, ptrp, 0, &block, &bp);
    1758   808231739 :                 if (error)
    1759          14 :                         goto error0;
    1760             : 
    1761   808231725 :                 xfs_btree_setbuf(cur, lev, bp);
    1762   808156784 :                 cur->bc_levels[lev].ptr = 1;
    1763             :         }
    1764   799792126 : out1:
    1765 >12005*10^7 :         *stat = 1;
    1766 >12005*10^7 :         return 0;
    1767             : 
    1768  3608953482 : out0:
    1769  3608953482 :         *stat = 0;
    1770  3608953482 :         return 0;
    1771             : 
    1772             : error0:
    1773             :         return error;
    1774             : }
    1775             : 
    1776             : /*
    1777             :  * Decrement cursor by one record at the level.
    1778             :  * For nonzero levels the leaf-ward information is untouched.
    1779             :  */
    1780             : int                                             /* error */
    1781  1870585236 : xfs_btree_decrement(
    1782             :         struct xfs_btree_cur    *cur,
    1783             :         int                     level,
    1784             :         int                     *stat)          /* success/failure */
    1785             : {
    1786  1870585236 :         struct xfs_btree_block  *block;
    1787  1870585236 :         struct xfs_buf          *bp;
    1788  1870585236 :         int                     error;          /* error return value */
    1789  1870585236 :         int                     lev;
    1790  1870585236 :         union xfs_btree_ptr     ptr;
    1791             : 
    1792  1870585236 :         ASSERT(level < cur->bc_nlevels);
    1793             : 
    1794             :         /* Read-ahead to the left at this level. */
    1795  1870585236 :         xfs_btree_readahead(cur, level, XFS_BTCUR_LEFTRA);
    1796             : 
    1797             :         /* We're done if we remain in the block after the decrement. */
    1798  1870601795 :         if (--cur->bc_levels[level].ptr > 0)
    1799  1472539083 :                 goto out1;
    1800             : 
    1801             :         /* Get a pointer to the btree block. */
    1802   398062712 :         block = xfs_btree_get_block(cur, level, &bp);
    1803             : 
    1804             : #ifdef DEBUG
    1805   398062727 :         error = xfs_btree_check_block(cur, block, level, bp);
    1806   398062726 :         if (error)
    1807           0 :                 goto error0;
    1808             : #endif
    1809             : 
    1810             :         /* Fail if we just went off the left edge of the tree. */
    1811   398062726 :         xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_LEFTSIB);
    1812   796125694 :         if (xfs_btree_ptr_is_null(cur, &ptr))
    1813   337323395 :                 goto out0;
    1814             : 
    1815    60739452 :         XFS_BTREE_STATS_INC(cur, decrement);
    1816             : 
    1817             :         /*
    1818             :          * March up the tree decrementing pointers.
    1819             :          * Stop when we don't go off the left edge of a block.
    1820             :          */
    1821    60909658 :         for (lev = level + 1; lev < cur->bc_nlevels; lev++) {
    1822    60909653 :                 if (--cur->bc_levels[lev].ptr > 0)
    1823             :                         break;
    1824             :                 /* Read-ahead the left block for the next loop. */
    1825      170098 :                 xfs_btree_readahead(cur, lev, XFS_BTCUR_LEFTRA);
    1826             :         }
    1827             : 
    1828             :         /*
    1829             :          * If we went off the root then we are seriously confused.
    1830             :          * or the root of the tree is in an inode.
    1831             :          */
    1832    60739560 :         if (lev == cur->bc_nlevels) {
    1833           0 :                 if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
    1834           0 :                         goto out0;
    1835           0 :                 ASSERT(0);
    1836           0 :                 xfs_btree_mark_sick(cur);
    1837           0 :                 error = -EFSCORRUPTED;
    1838           0 :                 goto error0;
    1839             :         }
    1840    60739560 :         ASSERT(lev < cur->bc_nlevels);
    1841             : 
    1842             :         /*
    1843             :          * Now walk back down the tree, fixing up the cursor's buffer
    1844             :          * pointers and key numbers.
    1845             :          */
    1846   121649004 :         for (block = xfs_btree_get_block(cur, lev, &bp); lev > level; ) {
    1847    60909608 :                 union xfs_btree_ptr     *ptrp;
    1848             : 
    1849    60909608 :                 ptrp = xfs_btree_ptr_addr(cur, cur->bc_levels[lev].ptr, block);
    1850    60909645 :                 --lev;
    1851    60909645 :                 error = xfs_btree_read_buf_block(cur, ptrp, 0, &block, &bp);
    1852    60909464 :                 if (error)
    1853           3 :                         goto error0;
    1854    60909461 :                 xfs_btree_setbuf(cur, lev, bp);
    1855   121818888 :                 cur->bc_levels[lev].ptr = xfs_btree_get_numrecs(block);
    1856             :         }
    1857    60739361 : out1:
    1858  1533278444 :         *stat = 1;
    1859  1533278444 :         return 0;
    1860             : 
    1861   337323395 : out0:
    1862   337323395 :         *stat = 0;
    1863   337323395 :         return 0;
    1864             : 
    1865             : error0:
    1866             :         return error;
    1867             : }
    1868             : 
    1869             : /*
    1870             :  * Check the btree block owner now that we have the context to know who the
    1871             :  * real owner is.
    1872             :  */
    1873             : static inline xfs_failaddr_t
    1874 11227776446 : xfs_btree_check_block_owner(
    1875             :         struct xfs_btree_cur    *cur,
    1876             :         struct xfs_btree_block  *block)
    1877             : {
    1878 11227776446 :         if (!xfs_has_crc(cur->bc_mp))
    1879             :                 return NULL;
    1880             : 
    1881 11227775692 :         if (cur->bc_flags & XFS_BTREE_IN_XFILE)
    1882  1071397113 :                 return xfbtree_check_block_owner(cur, block);
    1883             : 
    1884 10156378579 :         if (!(cur->bc_flags & XFS_BTREE_LONG_PTRS)) {
    1885  9363891136 :                 if (be32_to_cpu(block->bb_u.s.bb_owner) !=
    1886  9363891136 :                                                 cur->bc_ag.pag->pag_agno)
    1887           0 :                         return __this_address;
    1888             :                 return NULL;
    1889             :         }
    1890             : 
    1891   792487443 :         if (cur->bc_ino.flags & XFS_BTCUR_BMBT_INVALID_OWNER)
    1892             :                 return NULL;
    1893             : 
    1894   792487443 :         if (be64_to_cpu(block->bb_u.l.bb_owner) != cur->bc_ino.ip->i_ino)
    1895           0 :                 return __this_address;
    1896             : 
    1897             :         return NULL;
    1898             : }
    1899             : 
    1900             : int
    1901 32731242380 : xfs_btree_lookup_get_block(
    1902             :         struct xfs_btree_cur            *cur,   /* btree cursor */
    1903             :         int                             level,  /* level in the btree */
    1904             :         const union xfs_btree_ptr       *pp,    /* ptr to btree block */
    1905             :         struct xfs_btree_block          **blkp) /* return btree block */
    1906             : {
    1907 32731242380 :         struct xfs_buf          *bp;    /* buffer pointer for btree block */
    1908 32731242380 :         xfs_daddr_t             daddr;
    1909 32731242380 :         int                     error = 0;
    1910             : 
    1911             :         /* special case the root block if in an inode */
    1912 32731242380 :         if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
    1913  3639755496 :             (level == cur->bc_nlevels - 1)) {
    1914  1390956068 :                 *blkp = xfs_btree_get_iroot(cur);
    1915  1390963037 :                 return 0;
    1916             :         }
    1917             : 
    1918             :         /*
    1919             :          * If the old buffer at this level for the disk address we are
    1920             :          * looking for re-use it.
    1921             :          *
    1922             :          * Otherwise throw it away and get a new one.
    1923             :          */
    1924 31340286312 :         bp = cur->bc_levels[level].bp;
    1925 31340286312 :         error = xfs_btree_ptr_to_daddr(cur, pp, &daddr);
    1926 31353337062 :         if (error)
    1927             :                 return error;
    1928 31353337062 :         if (bp && xfs_buf_daddr(bp) == daddr) {
    1929 20126742853 :                 *blkp = XFS_BUF_TO_BLOCK(bp);
    1930 20126742853 :                 return 0;
    1931             :         }
    1932             : 
    1933 11226594209 :         error = xfs_btree_read_buf_block(cur, pp, 0, blkp, &bp);
    1934 11229671819 :         if (error)
    1935             :                 return error;
    1936             : 
    1937             :         /* Check the inode owner since the verifiers don't. */
    1938 11229708252 :         if (xfs_btree_check_block_owner(cur, *blkp) != NULL)
    1939           0 :                 goto out_bad;
    1940             : 
    1941             :         /* Did we get the level we were looking for? */
    1942 11228649198 :         if (be16_to_cpu((*blkp)->bb_level) != level)
    1943           0 :                 goto out_bad;
    1944             : 
    1945             :         /* Check that internal nodes have at least one record. */
    1946 11228649198 :         if (level != 0 && be16_to_cpu((*blkp)->bb_numrecs) == 0)
    1947           0 :                 goto out_bad;
    1948             : 
    1949 11228649198 :         xfs_btree_setbuf(cur, level, bp);
    1950 11228649198 :         return 0;
    1951             : 
    1952           0 : out_bad:
    1953           0 :         *blkp = NULL;
    1954           0 :         xfs_buf_mark_corrupt(bp);
    1955           0 :         xfs_trans_brelse(cur->bc_tp, bp);
    1956           0 :         xfs_btree_mark_sick(cur);
    1957           0 :         return -EFSCORRUPTED;
    1958             : }
    1959             : 
    1960             : /*
    1961             :  * Get current search key.  For level 0 we don't actually have a key
    1962             :  * structure so we make one up from the record.  For all other levels
    1963             :  * we just return the right key.
    1964             :  */
    1965             : STATIC union xfs_btree_key *
    1966 >11946*10^7 : xfs_lookup_get_search_key(
    1967             :         struct xfs_btree_cur    *cur,
    1968             :         int                     level,
    1969             :         int                     keyno,
    1970             :         struct xfs_btree_block  *block,
    1971             :         union xfs_btree_key     *kp)
    1972             : {
    1973 >11946*10^7 :         if (level == 0) {
    1974 91379801522 :                 cur->bc_ops->init_key_from_rec(kp,
    1975             :                                 xfs_btree_rec_addr(cur, keyno, block));
    1976 91379801522 :                 return kp;
    1977             :         }
    1978             : 
    1979 28088349965 :         return xfs_btree_key_addr(cur, keyno, block);
    1980             : }
    1981             : 
    1982             : /*
    1983             :  * Lookup the record.  The cursor is made to point to it, based on dir.
    1984             :  * stat is set to 0 if can't find any such record, 1 for success.
    1985             :  */
    1986             : int                                     /* error */
    1987 19616931805 : xfs_btree_lookup(
    1988             :         struct xfs_btree_cur    *cur,   /* btree cursor */
    1989             :         xfs_lookup_t            dir,    /* <=, ==, or >= */
    1990             :         int                     *stat)  /* success/failure */
    1991             : {
    1992 19616931805 :         struct xfs_btree_block  *block; /* current btree block */
    1993 19616931805 :         int64_t                 diff;   /* difference for the current key */
    1994 19616931805 :         int                     error;  /* error return value */
    1995 19616931805 :         int                     keyno;  /* current key number */
    1996 19616931805 :         int                     level;  /* level in the btree */
    1997 19616931805 :         union xfs_btree_ptr     *pp;    /* ptr to btree block */
    1998 19616931805 :         union xfs_btree_ptr     ptr;    /* ptr to btree block */
    1999             : 
    2000 19616931805 :         XFS_BTREE_STATS_INC(cur, lookup);
    2001             : 
    2002             :         /* No such thing as a zero-level tree. */
    2003 19616931805 :         if (XFS_IS_CORRUPT(cur->bc_mp, cur->bc_nlevels == 0)) {
    2004           0 :                 xfs_btree_mark_sick(cur);
    2005           0 :                 return -EFSCORRUPTED;
    2006             :         }
    2007             : 
    2008 19616931805 :         block = NULL;
    2009 19616931805 :         keyno = 0;
    2010             : 
    2011             :         /* initialise start pointer from cursor */
    2012 19616931805 :         cur->bc_ops->init_ptr_from_cur(cur, &ptr);
    2013 19617291073 :         pp = &ptr;
    2014             : 
    2015             :         /*
    2016             :          * Iterate over each level in the btree, starting at the root.
    2017             :          * For each level above the leaves, find the key we need, based
    2018             :          * on the lookup record, then follow the corresponding block
    2019             :          * pointer down to the next level.
    2020             :          */
    2021 45771492756 :         for (level = cur->bc_nlevels - 1, diff = 1; level >= 0; level--) {
    2022             :                 /* Get the block we need to do the lookup on. */
    2023 28247243483 :                 error = xfs_btree_lookup_get_block(cur, level, pp, &block);
    2024 28247136429 :                 if (error)
    2025        4860 :                         goto error0;
    2026             : 
    2027 28247131569 :                 if (diff == 0) {
    2028             :                         /*
    2029             :                          * If we already had a key match at a higher level, we
    2030             :                          * know we need to use the first entry in this block.
    2031             :                          */
    2032             :                         keyno = 1;
    2033             :                 } else {
    2034             :                         /* Otherwise search this block. Do a binary search. */
    2035             : 
    2036 28226635966 :                         int     high;   /* high entry number */
    2037 28226635966 :                         int     low;    /* low entry number */
    2038             : 
    2039             :                         /* Set low and high entry numbers, 1-based. */
    2040 28226635966 :                         low = 1;
    2041 28226635966 :                         high = xfs_btree_get_numrecs(block);
    2042 28226635966 :                         if (!high) {
    2043             :                                 /* Block is empty, must be an empty leaf. */
    2044  2099358238 :                                 if (level != 0 || cur->bc_nlevels != 1) {
    2045           0 :                                         XFS_CORRUPTION_ERROR(__func__,
    2046             :                                                         XFS_ERRLEVEL_LOW,
    2047             :                                                         cur->bc_mp, block,
    2048             :                                                         sizeof(*block));
    2049           0 :                                         xfs_btree_mark_sick(cur);
    2050           0 :                                         return -EFSCORRUPTED;
    2051             :                                 }
    2052             : 
    2053  2099358238 :                                 cur->bc_levels[0].ptr = dir != XFS_LOOKUP_LE;
    2054  2099358238 :                                 *stat = 0;
    2055  2099358238 :                                 return 0;
    2056             :                         }
    2057             : 
    2058             :                         /* Binary search the block. */
    2059 >14359*10^7 :                         while (low <= high) {
    2060 >11947*10^7 :                                 union xfs_btree_key     key;
    2061 >11947*10^7 :                                 union xfs_btree_key     *kp;
    2062             : 
    2063 >11947*10^7 :                                 XFS_BTREE_STATS_INC(cur, compare);
    2064             : 
    2065             :                                 /* keyno is average of low and high. */
    2066 >11947*10^7 :                                 keyno = (low + high) >> 1;
    2067             : 
    2068             :                                 /* Get current search key */
    2069 >11947*10^7 :                                 kp = xfs_lookup_get_search_key(cur, level,
    2070             :                                                 keyno, block, &key);
    2071             : 
    2072             :                                 /*
    2073             :                                  * Compute difference to get next direction:
    2074             :                                  *  - less than, move right
    2075             :                                  *  - greater than, move left
    2076             :                                  *  - equal, we're done
    2077             :                                  */
    2078 >11940*10^7 :                                 diff = cur->bc_ops->key_diff(cur, kp);
    2079 >11948*10^7 :                                 if (diff < 0)
    2080 65196379307 :                                         low = keyno + 1;
    2081 54287448147 :                                 else if (diff > 0)
    2082 52269216749 :                                         high = keyno - 1;
    2083             :                                 else
    2084             :                                         break;
    2085             :                         }
    2086             :                 }
    2087             : 
    2088             :                 /*
    2089             :                  * If there are more levels, set up for the next level
    2090             :                  * by getting the block number and filling in the cursor.
    2091             :                  */
    2092 26154292273 :                 if (level > 0) {
    2093             :                         /*
    2094             :                          * If we moved left, need the previous key number,
    2095             :                          * unless there isn't one.
    2096             :                          */
    2097  8631296173 :                         if (diff > 0 && --keyno < 1)
    2098             :                                 keyno = 1;
    2099  8631296173 :                         pp = xfs_btree_ptr_addr(cur, keyno, block);
    2100             : 
    2101  8630939099 :                         error = xfs_btree_debug_check_ptr(cur, pp, 0, level);
    2102  8631205583 :                         if (error)
    2103           0 :                                 goto error0;
    2104             : 
    2105  8631205583 :                         cur->bc_levels[level].ptr = keyno;
    2106             :                 }
    2107             :         }
    2108             : 
    2109             :         /* Done with the search. See if we need to adjust the results. */
    2110 17524249273 :         if (dir != XFS_LOOKUP_LE && diff < 0) {
    2111   824451666 :                 keyno++;
    2112             :                 /*
    2113             :                  * If ge search and we went off the end of the block, but it's
    2114             :                  * not the last block, we're in the wrong block.
    2115             :                  */
    2116   824451666 :                 xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
    2117  1405987851 :                 if (dir == XFS_LOOKUP_GE &&
    2118   371819333 :                     keyno > xfs_btree_get_numrecs(block) &&
    2119             :                     !xfs_btree_ptr_is_null(cur, &ptr)) {
    2120     1539438 :                         int     i;
    2121             : 
    2122     1539438 :                         cur->bc_levels[0].ptr = keyno;
    2123     1539438 :                         error = xfs_btree_increment(cur, 0, &i);
    2124     1539438 :                         if (error)
    2125           1 :                                 goto error0;
    2126     1539437 :                         if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
    2127           0 :                                 xfs_btree_mark_sick(cur);
    2128     1539437 :                                 return -EFSCORRUPTED;
    2129             :                         }
    2130     1539437 :                         *stat = 1;
    2131     1539437 :                         return 0;
    2132             :                 }
    2133 16699797607 :         } else if (dir == XFS_LOOKUP_LE && diff > 0)
    2134  9042704637 :                 keyno--;
    2135 17522707623 :         cur->bc_levels[0].ptr = keyno;
    2136             : 
    2137             :         /* Return if we succeeded or not. */
    2138 30738857373 :         if (keyno == 0 || keyno > xfs_btree_get_numrecs(block))
    2139  4772584409 :                 *stat = 0;
    2140 12750123214 :         else if (dir != XFS_LOOKUP_EQ || diff == 0)
    2141 12365167215 :                 *stat = 1;
    2142             :         else
    2143   384955999 :                 *stat = 0;
    2144             :         return 0;
    2145             : 
    2146             : error0:
    2147             :         return error;
    2148             : }
    2149             : 
    2150             : /* Find the high key storage area from a regular key. */
    2151             : union xfs_btree_key *
    2152  1102579761 : xfs_btree_high_key_from_key(
    2153             :         struct xfs_btree_cur    *cur,
    2154             :         union xfs_btree_key     *key)
    2155             : {
    2156  1102579761 :         ASSERT(cur->bc_flags & XFS_BTREE_OVERLAPPING);
    2157  1102579761 :         return (union xfs_btree_key *)((char *)key +
    2158  1102579761 :                         (cur->bc_ops->key_len / 2));
    2159             : }
    2160             : 
    2161             : /* Determine the low (and high if overlapped) keys of a leaf block */
    2162             : STATIC void
    2163  1028171405 : xfs_btree_get_leaf_keys(
    2164             :         struct xfs_btree_cur    *cur,
    2165             :         struct xfs_btree_block  *block,
    2166             :         union xfs_btree_key     *key)
    2167             : {
    2168  1028171405 :         union xfs_btree_key     max_hkey;
    2169  1028171405 :         union xfs_btree_key     hkey;
    2170  1028171405 :         union xfs_btree_rec     *rec;
    2171  1028171405 :         union xfs_btree_key     *high;
    2172  1028171405 :         int                     n;
    2173             : 
    2174  1028171405 :         rec = xfs_btree_rec_addr(cur, 1, block);
    2175  1028171405 :         cur->bc_ops->init_key_from_rec(key, rec);
    2176             : 
    2177  1028185028 :         if (cur->bc_flags & XFS_BTREE_OVERLAPPING) {
    2178             : 
    2179   505929433 :                 cur->bc_ops->init_high_key_from_rec(&max_hkey, rec);
    2180 >28939*10^7 :                 for (n = 2; n <= xfs_btree_get_numrecs(block); n++) {
    2181 >14393*10^7 :                         rec = xfs_btree_rec_addr(cur, n, block);
    2182 >14393*10^7 :                         cur->bc_ops->init_high_key_from_rec(&hkey, rec);
    2183 >14394*10^7 :                         if (xfs_btree_keycmp_gt(cur, &hkey, &max_hkey))
    2184 >14089*10^7 :                                 max_hkey = hkey;
    2185             :                 }
    2186             : 
    2187   505930936 :                 high = xfs_btree_high_key_from_key(cur, key);
    2188  1011850630 :                 memcpy(high, &max_hkey, cur->bc_ops->key_len / 2);
    2189             :         }
    2190  1028180910 : }
    2191             : 
    2192             : /* Determine the low (and high if overlapped) keys of a node block */
    2193             : STATIC void
    2194    90904244 : xfs_btree_get_node_keys(
    2195             :         struct xfs_btree_cur    *cur,
    2196             :         struct xfs_btree_block  *block,
    2197             :         union xfs_btree_key     *key)
    2198             : {
    2199    90904244 :         union xfs_btree_key     *hkey;
    2200    90904244 :         union xfs_btree_key     *max_hkey;
    2201    90904244 :         union xfs_btree_key     *high;
    2202    90904244 :         int                     n;
    2203             : 
    2204    90904244 :         if (cur->bc_flags & XFS_BTREE_OVERLAPPING) {
    2205   181525464 :                 memcpy(key, xfs_btree_key_addr(cur, 1, block),
    2206             :                                 cur->bc_ops->key_len / 2);
    2207             : 
    2208    90762732 :                 max_hkey = xfs_btree_high_key_addr(cur, 1, block);
    2209 14019215754 :                 for (n = 2; n <= xfs_btree_get_numrecs(block); n++) {
    2210  6918845185 :                         hkey = xfs_btree_high_key_addr(cur, n, block);
    2211  6918845185 :                         if (xfs_btree_keycmp_gt(cur, hkey, max_hkey))
    2212  6898782262 :                                 max_hkey = hkey;
    2213             :                 }
    2214             : 
    2215    90762692 :                 high = xfs_btree_high_key_from_key(cur, key);
    2216   181525278 :                 memcpy(high, max_hkey, cur->bc_ops->key_len / 2);
    2217             :         } else {
    2218      283024 :                 memcpy(key, xfs_btree_key_addr(cur, 1, block),
    2219             :                                 cur->bc_ops->key_len);
    2220             :         }
    2221    90904151 : }
    2222             : 
    2223             : /* Derive the keys for any btree block. */
    2224             : void
    2225  1028503718 : xfs_btree_get_keys(
    2226             :         struct xfs_btree_cur    *cur,
    2227             :         struct xfs_btree_block  *block,
    2228             :         union xfs_btree_key     *key)
    2229             : {
    2230  1028503718 :         if (be16_to_cpu(block->bb_level) == 0)
    2231  1027242106 :                 xfs_btree_get_leaf_keys(cur, block, key);
    2232             :         else
    2233     1261612 :                 xfs_btree_get_node_keys(cur, block, key);
    2234  1028506413 : }
    2235             : 
    2236             : /*
    2237             :  * Decide if we need to update the parent keys of a btree block.  For
    2238             :  * a standard btree this is only necessary if we're updating the first
    2239             :  * record/key.  For an overlapping btree, we must always update the
    2240             :  * keys because the highest key can be in any of the records or keys
    2241             :  * in the block.
    2242             :  */
    2243             : static inline bool
    2244             : xfs_btree_needs_key_update(
    2245             :         struct xfs_btree_cur    *cur,
    2246             :         int                     ptr)
    2247             : {
    2248  1179946360 :         return (cur->bc_flags & XFS_BTREE_OVERLAPPING) || ptr == 1;
    2249             : }
    2250             : 
    2251             : /*
    2252             :  * Update the low and high parent keys of the given level, progressing
    2253             :  * towards the root.  If force_all is false, stop if the keys for a given
    2254             :  * level do not need updating.
    2255             :  */
    2256             : STATIC int
    2257   504977701 : __xfs_btree_updkeys(
    2258             :         struct xfs_btree_cur    *cur,
    2259             :         int                     level,
    2260             :         struct xfs_btree_block  *block,
    2261             :         struct xfs_buf          *bp0,
    2262             :         bool                    force_all)
    2263             : {
    2264   504977701 :         union xfs_btree_key     key;    /* keys from current level */
    2265   504977701 :         union xfs_btree_key     *lkey;  /* keys from the next level up */
    2266   504977701 :         union xfs_btree_key     *hkey;
    2267   504977701 :         union xfs_btree_key     *nlkey; /* keys from the next level up */
    2268   504977701 :         union xfs_btree_key     *nhkey;
    2269   504977701 :         struct xfs_buf          *bp;
    2270   504977701 :         int                     ptr;
    2271             : 
    2272   504977701 :         ASSERT(cur->bc_flags & XFS_BTREE_OVERLAPPING);
    2273             : 
    2274             :         /* Exit if there aren't any parent levels to update. */
    2275   504977701 :         if (level + 1 >= cur->bc_nlevels)
    2276             :                 return 0;
    2277             : 
    2278   483887879 :         trace_xfs_btree_updkeys(cur, level, bp0);
    2279             : 
    2280   483892216 :         lkey = &key;
    2281   483892216 :         hkey = xfs_btree_high_key_from_key(cur, lkey);
    2282   483889016 :         xfs_btree_get_keys(cur, block, lkey);
    2283  1057406643 :         for (level++; level < cur->bc_nlevels; level++) {
    2284             : #ifdef DEBUG
    2285   573517627 :                 int             error;
    2286             : #endif
    2287   573517627 :                 block = xfs_btree_get_block(cur, level, &bp);
    2288   573521671 :                 trace_xfs_btree_updkeys(cur, level, bp);
    2289             : #ifdef DEBUG
    2290   573521781 :                 error = xfs_btree_check_block(cur, block, level, bp);
    2291   573519524 :                 if (error)
    2292           0 :                         return error;
    2293             : #endif
    2294   573519524 :                 ptr = cur->bc_levels[level].ptr;
    2295   573519524 :                 nlkey = xfs_btree_key_addr(cur, ptr, block);
    2296   573519524 :                 nhkey = xfs_btree_high_key_addr(cur, ptr, block);
    2297  1146766680 :                 if (!force_all &&
    2298   519509701 :                     xfs_btree_keycmp_eq(cur, nlkey, lkey) &&
    2299             :                     xfs_btree_keycmp_eq(cur, nhkey, hkey))
    2300             :                         break;
    2301   132125019 :                 xfs_btree_copy_keys(cur, nlkey, lkey, 1);
    2302   132124226 :                 xfs_btree_log_keys(cur, bp, ptr, ptr);
    2303   132124280 :                 if (level + 1 >= cur->bc_nlevels)
    2304             :                         break;
    2305    89629380 :                 xfs_btree_get_node_keys(cur, block, lkey);
    2306             :         }
    2307             : 
    2308             :         return 0;
    2309             : }
    2310             : 
    2311             : /* Update all the keys from some level in cursor back to the root. */
    2312             : STATIC int
    2313      155173 : xfs_btree_updkeys_force(
    2314             :         struct xfs_btree_cur    *cur,
    2315             :         int                     level)
    2316             : {
    2317      155173 :         struct xfs_buf          *bp;
    2318      155173 :         struct xfs_btree_block  *block;
    2319             : 
    2320      155173 :         block = xfs_btree_get_block(cur, level, &bp);
    2321      155173 :         return __xfs_btree_updkeys(cur, level, block, bp, true);
    2322             : }
    2323             : 
    2324             : /*
    2325             :  * Update the parent keys of the given level, progressing towards the root.
    2326             :  */
    2327             : STATIC int
    2328  1015689658 : xfs_btree_update_keys(
    2329             :         struct xfs_btree_cur    *cur,
    2330             :         int                     level)
    2331             : {
    2332  1015689658 :         struct xfs_btree_block  *block;
    2333  1015689658 :         struct xfs_buf          *bp;
    2334  1015689658 :         union xfs_btree_key     *kp;
    2335  1015689658 :         union xfs_btree_key     key;
    2336  1015689658 :         int                     ptr;
    2337             : 
    2338  1015689658 :         ASSERT(level >= 0);
    2339             : 
    2340  1015689658 :         block = xfs_btree_get_block(cur, level, &bp);
    2341  1015742458 :         if (cur->bc_flags & XFS_BTREE_OVERLAPPING)
    2342   504827408 :                 return __xfs_btree_updkeys(cur, level, block, bp, false);
    2343             : 
    2344             :         /*
    2345             :          * Go up the tree from this level toward the root.
    2346             :          * At each level, update the key value to the value input.
    2347             :          * Stop when we reach a level where the cursor isn't pointing
    2348             :          * at the first entry in the block.
    2349             :          */
    2350   510915050 :         xfs_btree_get_keys(cur, block, &key);
    2351   598548044 :         for (level++, ptr = 1; ptr == 1 && level < cur->bc_nlevels; level++) {
    2352             : #ifdef DEBUG
    2353    87635638 :                 int             error;
    2354             : #endif
    2355    87635638 :                 block = xfs_btree_get_block(cur, level, &bp);
    2356             : #ifdef DEBUG
    2357    87634535 :                 error = xfs_btree_check_block(cur, block, level, bp);
    2358    87634838 :                 if (error)
    2359           0 :                         return error;
    2360             : #endif
    2361    87634838 :                 ptr = cur->bc_levels[level].ptr;
    2362    87634838 :                 kp = xfs_btree_key_addr(cur, ptr, block);
    2363    87634838 :                 xfs_btree_copy_keys(cur, kp, &key, 1);
    2364    87635426 :                 xfs_btree_log_keys(cur, bp, ptr, ptr);
    2365             :         }
    2366             : 
    2367             :         return 0;
    2368             : }
    2369             : 
    2370             : /*
    2371             :  * Update the record referred to by cur to the value in the
    2372             :  * given record. This either works (return 0) or gets an
    2373             :  * EFSCORRUPTED error.
    2374             :  */
    2375             : int
    2376   480779149 : xfs_btree_update(
    2377             :         struct xfs_btree_cur    *cur,
    2378             :         union xfs_btree_rec     *rec)
    2379             : {
    2380   480779149 :         struct xfs_btree_block  *block;
    2381   480779149 :         struct xfs_buf          *bp;
    2382   480779149 :         int                     error;
    2383   480779149 :         int                     ptr;
    2384   480779149 :         union xfs_btree_rec     *rp;
    2385             : 
    2386             :         /* Pick up the current block. */
    2387   480779149 :         block = xfs_btree_get_block(cur, 0, &bp);
    2388             : 
    2389             : #ifdef DEBUG
    2390   480798713 :         error = xfs_btree_check_block(cur, block, 0, bp);
    2391   480765908 :         if (error)
    2392           0 :                 goto error0;
    2393             : #endif
    2394             :         /* Get the address of the rec to be updated. */
    2395   480765908 :         ptr = cur->bc_levels[0].ptr;
    2396   480765908 :         rp = xfs_btree_rec_addr(cur, ptr, block);
    2397             : 
    2398             :         /* Fill in the new contents and log them. */
    2399   480765908 :         xfs_btree_copy_recs(cur, rp, rec, 1);
    2400   480799410 :         xfs_btree_log_recs(cur, bp, ptr, ptr);
    2401             : 
    2402             :         /*
    2403             :          * If we are tracking the last record in the tree and
    2404             :          * we are at the far right edge of the tree, update it.
    2405             :          */
    2406   480804843 :         if (xfs_btree_is_lastrec(cur, block, 0)) {
    2407           0 :                 cur->bc_ops->update_lastrec(cur, block, rec,
    2408             :                                             ptr, LASTREC_UPDATE);
    2409             :         }
    2410             : 
    2411             :         /* Pass new key value up to our parent. */
    2412   904165033 :         if (xfs_btree_needs_key_update(cur, ptr)) {
    2413   139822339 :                 error = xfs_btree_update_keys(cur, 0);
    2414   139818781 :                 if (error)
    2415           0 :                         goto error0;
    2416             :         }
    2417             : 
    2418             :         return 0;
    2419             : 
    2420             : error0:
    2421             :         return error;
    2422             : }
    2423             : 
    2424             : /*
    2425             :  * Move 1 record left from cur/level if possible.
    2426             :  * Update cur to reflect the new path.
    2427             :  */
    2428             : STATIC int                                      /* error */
    2429    72952747 : xfs_btree_lshift(
    2430             :         struct xfs_btree_cur    *cur,
    2431             :         int                     level,
    2432             :         int                     *stat)          /* success/failure */
    2433             : {
    2434    72952747 :         struct xfs_buf          *lbp;           /* left buffer pointer */
    2435    72952747 :         struct xfs_btree_block  *left;          /* left btree block */
    2436    72952747 :         int                     lrecs;          /* left record count */
    2437    72952747 :         struct xfs_buf          *rbp;           /* right buffer pointer */
    2438    72952747 :         struct xfs_btree_block  *right;         /* right btree block */
    2439    72952747 :         struct xfs_btree_cur    *tcur;          /* temporary btree cursor */
    2440    72952747 :         int                     rrecs;          /* right record count */
    2441    72952747 :         union xfs_btree_ptr     lptr;           /* left btree pointer */
    2442    72952747 :         union xfs_btree_key     *rkp = NULL;    /* right btree key */
    2443    72952747 :         union xfs_btree_ptr     *rpp = NULL;    /* right address pointer */
    2444    72952747 :         union xfs_btree_rec     *rrp = NULL;    /* right record pointer */
    2445    72952747 :         int                     error;          /* error return value */
    2446    72952747 :         int                     i;
    2447             : 
    2448    72952747 :         if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
    2449    33495467 :             level == cur->bc_nlevels - 1)
    2450           0 :                 goto out0;
    2451             : 
    2452             :         /* Set up variables for this block as "right". */
    2453    72952747 :         right = xfs_btree_get_block(cur, level, &rbp);
    2454             : 
    2455             : #ifdef DEBUG
    2456    72944867 :         error = xfs_btree_check_block(cur, right, level, rbp);
    2457    72939609 :         if (error)
    2458           0 :                 goto error0;
    2459             : #endif
    2460             : 
    2461             :         /* If we've got no left sibling then we can't shift an entry left. */
    2462    72939609 :         xfs_btree_get_sibling(cur, right, &lptr, XFS_BB_LEFTSIB);
    2463   145886588 :         if (xfs_btree_ptr_is_null(cur, &lptr))
    2464       39045 :                 goto out0;
    2465             : 
    2466             :         /*
    2467             :          * If the cursor entry is the one that would be moved, don't
    2468             :          * do it... it's too complicated.
    2469             :          */
    2470    72904249 :         if (cur->bc_levels[level].ptr <= 1)
    2471       14237 :                 goto out0;
    2472             : 
    2473             :         /* Set up the left neighbor as "left". */
    2474    72890012 :         error = xfs_btree_read_buf_block(cur, &lptr, 0, &left, &lbp);
    2475    72893514 :         if (error)
    2476          50 :                 goto error0;
    2477             : 
    2478             :         /* If it's full, it can't take another entry. */
    2479    72893464 :         lrecs = xfs_btree_get_numrecs(left);
    2480    72893464 :         if (lrecs == cur->bc_ops->get_maxrecs(cur, level))
    2481      859279 :                 goto out0;
    2482             : 
    2483    72035480 :         rrecs = xfs_btree_get_numrecs(right);
    2484             : 
    2485             :         /*
    2486             :          * We add one entry to the left side and remove one for the right side.
    2487             :          * Account for it here, the changes will be updated on disk and logged
    2488             :          * later.
    2489             :          */
    2490    72035480 :         lrecs++;
    2491    72035480 :         rrecs--;
    2492             : 
    2493    72035480 :         XFS_BTREE_STATS_INC(cur, lshift);
    2494    72035480 :         XFS_BTREE_STATS_ADD(cur, moves, 1);
    2495             : 
    2496             :         /*
    2497             :          * If non-leaf, copy a key and a ptr to the left block.
    2498             :          * Log the changes to the left block.
    2499             :          */
    2500    72035480 :         if (level > 0) {
    2501             :                 /* It's a non-leaf.  Move keys and pointers. */
    2502      103882 :                 union xfs_btree_key     *lkp;   /* left btree key */
    2503      103882 :                 union xfs_btree_ptr     *lpp;   /* left address pointer */
    2504             : 
    2505      103882 :                 lkp = xfs_btree_key_addr(cur, lrecs, left);
    2506      103882 :                 rkp = xfs_btree_key_addr(cur, 1, right);
    2507             : 
    2508      103882 :                 lpp = xfs_btree_ptr_addr(cur, lrecs, left);
    2509      103829 :                 rpp = xfs_btree_ptr_addr(cur, 1, right);
    2510             : 
    2511      103829 :                 error = xfs_btree_debug_check_ptr(cur, rpp, 0, level);
    2512      103829 :                 if (error)
    2513           0 :                         goto error0;
    2514             : 
    2515      103829 :                 xfs_btree_copy_keys(cur, lkp, rkp, 1);
    2516      103829 :                 xfs_btree_copy_ptrs(cur, lpp, rpp, 1);
    2517             : 
    2518      103829 :                 xfs_btree_log_keys(cur, lbp, lrecs, lrecs);
    2519      103829 :                 xfs_btree_log_ptrs(cur, lbp, lrecs, lrecs);
    2520             : 
    2521      103829 :                 ASSERT(cur->bc_ops->keys_inorder(cur,
    2522             :                         xfs_btree_key_addr(cur, lrecs - 1, left), lkp));
    2523             :         } else {
    2524             :                 /* It's a leaf.  Move records.  */
    2525    71931598 :                 union xfs_btree_rec     *lrp;   /* left record pointer */
    2526             : 
    2527    71931598 :                 lrp = xfs_btree_rec_addr(cur, lrecs, left);
    2528    71931598 :                 rrp = xfs_btree_rec_addr(cur, 1, right);
    2529             : 
    2530    71931598 :                 xfs_btree_copy_recs(cur, lrp, rrp, 1);
    2531    71931737 :                 xfs_btree_log_recs(cur, lbp, lrecs, lrecs);
    2532             : 
    2533    71929766 :                 ASSERT(cur->bc_ops->recs_inorder(cur,
    2534             :                         xfs_btree_rec_addr(cur, lrecs - 1, left), lrp));
    2535             :         }
    2536             : 
    2537    72037120 :         xfs_btree_set_numrecs(left, lrecs);
    2538    72037120 :         xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS);
    2539             : 
    2540    72036506 :         xfs_btree_set_numrecs(right, rrecs);
    2541    72036506 :         xfs_btree_log_block(cur, rbp, XFS_BB_NUMRECS);
    2542             : 
    2543             :         /*
    2544             :          * Slide the contents of right down one entry.
    2545             :          */
    2546    72036453 :         XFS_BTREE_STATS_ADD(cur, moves, rrecs - 1);
    2547    72036453 :         if (level > 0) {
    2548             :                 /* It's a nonleaf. operate on keys and ptrs */
    2549    15969201 :                 for (i = 0; i < rrecs; i++) {
    2550    15865372 :                         error = xfs_btree_debug_check_ptr(cur, rpp, i + 1, level);
    2551    15865372 :                         if (error)
    2552           0 :                                 goto error0;
    2553             :                 }
    2554             : 
    2555      103829 :                 xfs_btree_shift_keys(cur,
    2556             :                                 xfs_btree_key_addr(cur, 2, right),
    2557             :                                 -1, rrecs);
    2558      103829 :                 xfs_btree_shift_ptrs(cur,
    2559             :                                 xfs_btree_ptr_addr(cur, 2, right),
    2560             :                                 -1, rrecs);
    2561             : 
    2562      103829 :                 xfs_btree_log_keys(cur, rbp, 1, rrecs);
    2563      103829 :                 xfs_btree_log_ptrs(cur, rbp, 1, rrecs);
    2564             :         } else {
    2565             :                 /* It's a leaf. operate on records */
    2566    71932624 :                 xfs_btree_shift_recs(cur,
    2567             :                         xfs_btree_rec_addr(cur, 2, right),
    2568             :                         -1, rrecs);
    2569    71934619 :                 xfs_btree_log_recs(cur, rbp, 1, rrecs);
    2570             :         }
    2571             : 
    2572             :         /*
    2573             :          * Using a temporary cursor, update the parent key values of the
    2574             :          * block on the left.
    2575             :          */
    2576    72039144 :         if (cur->bc_flags & XFS_BTREE_OVERLAPPING) {
    2577    26401356 :                 error = xfs_btree_dup_cursor(cur, &tcur);
    2578    26401357 :                 if (error)
    2579           0 :                         goto error0;
    2580    26401357 :                 i = xfs_btree_firstrec(tcur, level);
    2581    26401340 :                 if (XFS_IS_CORRUPT(tcur->bc_mp, i != 1)) {
    2582           0 :                         xfs_btree_mark_sick(cur);
    2583           0 :                         error = -EFSCORRUPTED;
    2584           0 :                         goto error0;
    2585             :                 }
    2586             : 
    2587    26401340 :                 error = xfs_btree_decrement(tcur, level, &i);
    2588    26401364 :                 if (error)
    2589           0 :                         goto error1;
    2590             : 
    2591             :                 /* Update the parent high keys of the left block, if needed. */
    2592    26401364 :                 error = xfs_btree_update_keys(tcur, level);
    2593    26401357 :                 if (error)
    2594           0 :                         goto error1;
    2595             : 
    2596    26401357 :                 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
    2597             :         }
    2598             : 
    2599             :         /* Update the parent keys of the right block. */
    2600    72039155 :         error = xfs_btree_update_keys(cur, level);
    2601    72038799 :         if (error)
    2602           0 :                 goto error0;
    2603             : 
    2604             :         /* Slide the cursor value left one. */
    2605    72038799 :         cur->bc_levels[level].ptr--;
    2606             : 
    2607    72038799 :         *stat = 1;
    2608    72038799 :         return 0;
    2609             : 
    2610      912561 : out0:
    2611      912561 :         *stat = 0;
    2612      912561 :         return 0;
    2613             : 
    2614             : error0:
    2615             :         return error;
    2616             : 
    2617           0 : error1:
    2618           0 :         xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
    2619           0 :         return error;
    2620             : }
    2621             : 
    2622             : /*
    2623             :  * Move 1 record right from cur/level if possible.
    2624             :  * Update cur to reflect the new path.
    2625             :  */
    2626             : STATIC int                                      /* error */
    2627   122143983 : xfs_btree_rshift(
    2628             :         struct xfs_btree_cur    *cur,
    2629             :         int                     level,
    2630             :         int                     *stat)          /* success/failure */
    2631             : {
    2632   122143983 :         struct xfs_buf          *lbp;           /* left buffer pointer */
    2633   122143983 :         struct xfs_btree_block  *left;          /* left btree block */
    2634   122143983 :         struct xfs_buf          *rbp;           /* right buffer pointer */
    2635   122143983 :         struct xfs_btree_block  *right;         /* right btree block */
    2636   122143983 :         struct xfs_btree_cur    *tcur;          /* temporary btree cursor */
    2637   122143983 :         union xfs_btree_ptr     rptr;           /* right block pointer */
    2638   122143983 :         union xfs_btree_key     *rkp;           /* right btree key */
    2639   122143983 :         int                     rrecs;          /* right record count */
    2640   122143983 :         int                     lrecs;          /* left record count */
    2641   122143983 :         int                     error;          /* error return value */
    2642   122143983 :         int                     i;              /* loop counter */
    2643             : 
    2644   122143983 :         if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
    2645    62176064 :             (level == cur->bc_nlevels - 1))
    2646           0 :                 goto out0;
    2647             : 
    2648             :         /* Set up variables for this block as "left". */
    2649   122143983 :         left = xfs_btree_get_block(cur, level, &lbp);
    2650             : 
    2651             : #ifdef DEBUG
    2652   122142606 :         error = xfs_btree_check_block(cur, left, level, lbp);
    2653   122138640 :         if (error)
    2654           0 :                 goto error0;
    2655             : #endif
    2656             : 
    2657             :         /* If we've got no right sibling then we can't shift an entry right. */
    2658   122138640 :         xfs_btree_get_sibling(cur, left, &rptr, XFS_BB_RIGHTSIB);
    2659   244285618 :         if (xfs_btree_ptr_is_null(cur, &rptr))
    2660    44501415 :                 goto out0;
    2661             : 
    2662             :         /*
    2663             :          * If the cursor entry is the one that would be moved, don't
    2664             :          * do it... it's too complicated.
    2665             :          */
    2666    77641394 :         lrecs = xfs_btree_get_numrecs(left);
    2667    77641394 :         if (cur->bc_levels[level].ptr >= lrecs)
    2668     7204047 :                 goto out0;
    2669             : 
    2670             :         /* Set up the right neighbor as "right". */
    2671    70437347 :         error = xfs_btree_read_buf_block(cur, &rptr, 0, &right, &rbp);
    2672    70436292 :         if (error)
    2673         139 :                 goto error0;
    2674             : 
    2675             :         /* If it's full, it can't take another entry. */
    2676    70436153 :         rrecs = xfs_btree_get_numrecs(right);
    2677    70436153 :         if (rrecs == cur->bc_ops->get_maxrecs(cur, level))
    2678    10876357 :                 goto out0;
    2679             : 
    2680    59560928 :         XFS_BTREE_STATS_INC(cur, rshift);
    2681    59560928 :         XFS_BTREE_STATS_ADD(cur, moves, rrecs);
    2682             : 
    2683             :         /*
    2684             :          * Make a hole at the start of the right neighbor block, then
    2685             :          * copy the last left block entry to the hole.
    2686             :          */
    2687    59560928 :         if (level > 0) {
    2688             :                 /* It's a nonleaf. make a hole in the keys and ptrs */
    2689       45622 :                 union xfs_btree_key     *lkp;
    2690       45622 :                 union xfs_btree_ptr     *lpp;
    2691       45622 :                 union xfs_btree_ptr     *rpp;
    2692             : 
    2693       45622 :                 lkp = xfs_btree_key_addr(cur, lrecs, left);
    2694       45622 :                 lpp = xfs_btree_ptr_addr(cur, lrecs, left);
    2695       45622 :                 rkp = xfs_btree_key_addr(cur, 1, right);
    2696       45622 :                 rpp = xfs_btree_ptr_addr(cur, 1, right);
    2697             : 
    2698     3093758 :                 for (i = rrecs - 1; i >= 0; i--) {
    2699     3048136 :                         error = xfs_btree_debug_check_ptr(cur, rpp, i, level);
    2700     3048136 :                         if (error)
    2701           0 :                                 goto error0;
    2702             :                 }
    2703             : 
    2704       45622 :                 xfs_btree_shift_keys(cur, rkp, 1, rrecs);
    2705       45622 :                 xfs_btree_shift_ptrs(cur, rpp, 1, rrecs);
    2706             : 
    2707       45622 :                 error = xfs_btree_debug_check_ptr(cur, lpp, 0, level);
    2708       45622 :                 if (error)
    2709           0 :                         goto error0;
    2710             : 
    2711             :                 /* Now put the new data in, and log it. */
    2712       45622 :                 xfs_btree_copy_keys(cur, rkp, lkp, 1);
    2713       45622 :                 xfs_btree_copy_ptrs(cur, rpp, lpp, 1);
    2714             : 
    2715       45622 :                 xfs_btree_log_keys(cur, rbp, 1, rrecs + 1);
    2716       45622 :                 xfs_btree_log_ptrs(cur, rbp, 1, rrecs + 1);
    2717             : 
    2718       45622 :                 ASSERT(cur->bc_ops->keys_inorder(cur, rkp,
    2719             :                         xfs_btree_key_addr(cur, 2, right)));
    2720             :         } else {
    2721             :                 /* It's a leaf. make a hole in the records */
    2722    59515306 :                 union xfs_btree_rec     *lrp;
    2723    59515306 :                 union xfs_btree_rec     *rrp;
    2724             : 
    2725    59515306 :                 lrp = xfs_btree_rec_addr(cur, lrecs, left);
    2726    59515306 :                 rrp = xfs_btree_rec_addr(cur, 1, right);
    2727             : 
    2728    59515306 :                 xfs_btree_shift_recs(cur, rrp, 1, rrecs);
    2729             : 
    2730             :                 /* Now put the new data in, and log it. */
    2731    59515550 :                 xfs_btree_copy_recs(cur, rrp, lrp, 1);
    2732    59515552 :                 xfs_btree_log_recs(cur, rbp, 1, rrecs + 1);
    2733             :         }
    2734             : 
    2735             :         /*
    2736             :          * Decrement and log left's numrecs, bump and log right's numrecs.
    2737             :          */
    2738    59561869 :         xfs_btree_set_numrecs(left, --lrecs);
    2739    59561869 :         xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS);
    2740             : 
    2741    59561507 :         xfs_btree_set_numrecs(right, ++rrecs);
    2742    59561507 :         xfs_btree_log_block(cur, rbp, XFS_BB_NUMRECS);
    2743             : 
    2744             :         /*
    2745             :          * Using a temporary cursor, update the parent key values of the
    2746             :          * block on the right.
    2747             :          */
    2748    59561190 :         error = xfs_btree_dup_cursor(cur, &tcur);
    2749    59561806 :         if (error)
    2750           0 :                 goto error0;
    2751    59561806 :         i = xfs_btree_lastrec(tcur, level);
    2752    59562050 :         if (XFS_IS_CORRUPT(tcur->bc_mp, i != 1)) {
    2753           0 :                 xfs_btree_mark_sick(cur);
    2754           0 :                 error = -EFSCORRUPTED;
    2755           0 :                 goto error0;
    2756             :         }
    2757             : 
    2758    59562050 :         error = xfs_btree_increment(tcur, level, &i);
    2759    59562534 :         if (error)
    2760           1 :                 goto error1;
    2761             : 
    2762             :         /* Update the parent high keys of the left block, if needed. */
    2763    59562533 :         if (cur->bc_flags & XFS_BTREE_OVERLAPPING) {
    2764    25804829 :                 error = xfs_btree_update_keys(cur, level);
    2765    25804808 :                 if (error)
    2766           0 :                         goto error1;
    2767             :         }
    2768             : 
    2769             :         /* Update the parent keys of the right block. */
    2770    59562512 :         error = xfs_btree_update_keys(tcur, level);
    2771    59562291 :         if (error)
    2772           0 :                 goto error1;
    2773             : 
    2774    59562291 :         xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
    2775             : 
    2776    59562389 :         *stat = 1;
    2777    59562389 :         return 0;
    2778             : 
    2779    62581819 : out0:
    2780    62581819 :         *stat = 0;
    2781    62581819 :         return 0;
    2782             : 
    2783             : error0:
    2784             :         return error;
    2785             : 
    2786           1 : error1:
    2787           1 :         xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
    2788           1 :         return error;
    2789             : }
    2790             : 
    2791             : static inline int
    2792      948992 : xfs_btree_alloc_block(
    2793             :         struct xfs_btree_cur            *cur,
    2794             :         const union xfs_btree_ptr       *hint_block,
    2795             :         union xfs_btree_ptr             *new_block,
    2796             :         int                             *stat)
    2797             : {
    2798      948992 :         int                             error;
    2799             : 
    2800      948992 :         error = cur->bc_ops->alloc_block(cur, hint_block, new_block, stat);
    2801      948992 :         trace_xfs_btree_alloc_block(cur, new_block, *stat, error);
    2802      948992 :         return error;
    2803             : }
    2804             : 
    2805             : /*
    2806             :  * Split cur/level block in half.
    2807             :  * Return new block number and the key to its first
    2808             :  * record (to be inserted into parent).
    2809             :  */
    2810             : STATIC int                                      /* error */
    2811      912561 : __xfs_btree_split(
    2812             :         struct xfs_btree_cur    *cur,
    2813             :         int                     level,
    2814             :         union xfs_btree_ptr     *ptrp,
    2815             :         union xfs_btree_key     *key,
    2816             :         struct xfs_btree_cur    **curp,
    2817             :         int                     *stat)          /* success/failure */
    2818             : {
    2819      912561 :         union xfs_btree_ptr     lptr;           /* left sibling block ptr */
    2820      912561 :         struct xfs_buf          *lbp;           /* left buffer pointer */
    2821      912561 :         struct xfs_btree_block  *left;          /* left btree block */
    2822      912561 :         union xfs_btree_ptr     rptr;           /* right sibling block ptr */
    2823      912561 :         struct xfs_buf          *rbp;           /* right buffer pointer */
    2824      912561 :         struct xfs_btree_block  *right;         /* right btree block */
    2825      912561 :         union xfs_btree_ptr     rrptr;          /* right-right sibling ptr */
    2826      912561 :         struct xfs_buf          *rrbp;          /* right-right buffer pointer */
    2827      912561 :         struct xfs_btree_block  *rrblock;       /* right-right btree block */
    2828      912561 :         int                     lrecs;
    2829      912561 :         int                     rrecs;
    2830      912561 :         int                     src_index;
    2831      912561 :         int                     error;          /* error return value */
    2832      912561 :         int                     i;
    2833             : 
    2834      912561 :         XFS_BTREE_STATS_INC(cur, split);
    2835             : 
    2836             :         /* Set up left block (current one). */
    2837      912561 :         left = xfs_btree_get_block(cur, level, &lbp);
    2838             : 
    2839             : #ifdef DEBUG
    2840      912561 :         error = xfs_btree_check_block(cur, left, level, lbp);
    2841      912561 :         if (error)
    2842           0 :                 goto error0;
    2843             : #endif
    2844             : 
    2845      912561 :         xfs_btree_buf_to_ptr(cur, lbp, &lptr);
    2846             : 
    2847             :         /* Allocate the new block. If we can't do it, we're toast. Give up. */
    2848      912561 :         error = xfs_btree_alloc_block(cur, &lptr, &rptr, stat);
    2849      912561 :         if (error)
    2850           6 :                 goto error0;
    2851      912555 :         if (*stat == 0)
    2852           0 :                 goto out0;
    2853      912555 :         XFS_BTREE_STATS_INC(cur, alloc);
    2854             : 
    2855             :         /* Set up the new block as "right". */
    2856      912555 :         error = xfs_btree_get_buf_block(cur, &rptr, &right, &rbp);
    2857      912555 :         if (error)
    2858           0 :                 goto error0;
    2859             : 
    2860             :         /* Fill in the btree header for the new right block. */
    2861     1825110 :         xfs_btree_init_block_cur(cur, rbp, xfs_btree_get_level(left), 0);
    2862             : 
    2863             :         /*
    2864             :          * Split the entries between the old and the new block evenly.
    2865             :          * Make sure that if there's an odd number of entries now, that
    2866             :          * each new block will have the same number of entries.
    2867             :          */
    2868      912554 :         lrecs = xfs_btree_get_numrecs(left);
    2869      912554 :         rrecs = lrecs / 2;
    2870      912554 :         if ((lrecs & 1) && cur->bc_levels[level].ptr <= rrecs + 1)
    2871       85929 :                 rrecs++;
    2872      912554 :         src_index = (lrecs - rrecs + 1);
    2873             : 
    2874      912554 :         XFS_BTREE_STATS_ADD(cur, moves, rrecs);
    2875             : 
    2876             :         /* Adjust numrecs for the later get_*_keys() calls. */
    2877      912554 :         lrecs -= rrecs;
    2878      912554 :         xfs_btree_set_numrecs(left, lrecs);
    2879     1825108 :         xfs_btree_set_numrecs(right, xfs_btree_get_numrecs(right) + rrecs);
    2880             : 
    2881             :         /*
    2882             :          * Copy btree block entries from the left block over to the
    2883             :          * new block, the right. Update the right block and log the
    2884             :          * changes.
    2885             :          */
    2886      912554 :         if (level > 0) {
    2887             :                 /* It's a non-leaf.  Move keys and pointers. */
    2888        4679 :                 union xfs_btree_key     *lkp;   /* left btree key */
    2889        4679 :                 union xfs_btree_ptr     *lpp;   /* left address pointer */
    2890        4679 :                 union xfs_btree_key     *rkp;   /* right btree key */
    2891        4679 :                 union xfs_btree_ptr     *rpp;   /* right address pointer */
    2892             : 
    2893        4679 :                 lkp = xfs_btree_key_addr(cur, src_index, left);
    2894        4679 :                 lpp = xfs_btree_ptr_addr(cur, src_index, left);
    2895        4680 :                 rkp = xfs_btree_key_addr(cur, 1, right);
    2896        4680 :                 rpp = xfs_btree_ptr_addr(cur, 1, right);
    2897             : 
    2898        9360 :                 for (i = src_index; i < rrecs; i++) {
    2899           0 :                         error = xfs_btree_debug_check_ptr(cur, lpp, i, level);
    2900           0 :                         if (error)
    2901           0 :                                 goto error0;
    2902             :                 }
    2903             : 
    2904             :                 /* Copy the keys & pointers to the new block. */
    2905        4680 :                 xfs_btree_copy_keys(cur, rkp, lkp, rrecs);
    2906        4680 :                 xfs_btree_copy_ptrs(cur, rpp, lpp, rrecs);
    2907             : 
    2908        4680 :                 xfs_btree_log_keys(cur, rbp, 1, rrecs);
    2909        4680 :                 xfs_btree_log_ptrs(cur, rbp, 1, rrecs);
    2910             : 
    2911             :                 /* Stash the keys of the new block for later insertion. */
    2912        4680 :                 xfs_btree_get_node_keys(cur, right, key);
    2913             :         } else {
    2914             :                 /* It's a leaf.  Move records.  */
    2915      907875 :                 union xfs_btree_rec     *lrp;   /* left record pointer */
    2916      907875 :                 union xfs_btree_rec     *rrp;   /* right record pointer */
    2917             : 
    2918      907875 :                 lrp = xfs_btree_rec_addr(cur, src_index, left);
    2919      907875 :                 rrp = xfs_btree_rec_addr(cur, 1, right);
    2920             : 
    2921             :                 /* Copy records to the new block. */
    2922      907875 :                 xfs_btree_copy_recs(cur, rrp, lrp, rrecs);
    2923      907875 :                 xfs_btree_log_recs(cur, rbp, 1, rrecs);
    2924             : 
    2925             :                 /* Stash the keys of the new block for later insertion. */
    2926      907875 :                 xfs_btree_get_leaf_keys(cur, right, key);
    2927             :         }
    2928             : 
    2929             :         /*
    2930             :          * Find the left block number by looking in the buffer.
    2931             :          * Adjust sibling pointers.
    2932             :          */
    2933      912555 :         xfs_btree_get_sibling(cur, left, &rrptr, XFS_BB_RIGHTSIB);
    2934      912555 :         xfs_btree_set_sibling(cur, right, &rrptr, XFS_BB_RIGHTSIB);
    2935      912555 :         xfs_btree_set_sibling(cur, right, &lptr, XFS_BB_LEFTSIB);
    2936      912555 :         xfs_btree_set_sibling(cur, left, &rptr, XFS_BB_RIGHTSIB);
    2937             : 
    2938      912555 :         xfs_btree_log_block(cur, rbp, XFS_BB_ALL_BITS);
    2939      912555 :         xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB);
    2940             : 
    2941             :         /*
    2942             :          * If there's a block to the new block's right, make that block
    2943             :          * point back to right instead of to left.
    2944             :          */
    2945     1825110 :         if (!xfs_btree_ptr_is_null(cur, &rrptr)) {
    2946      524844 :                 error = xfs_btree_read_buf_block(cur, &rrptr,
    2947             :                                                         0, &rrblock, &rrbp);
    2948      524844 :                 if (error)
    2949         206 :                         goto error0;
    2950      524638 :                 xfs_btree_set_sibling(cur, rrblock, &rptr, XFS_BB_LEFTSIB);
    2951      524638 :                 xfs_btree_log_block(cur, rrbp, XFS_BB_LEFTSIB);
    2952             :         }
    2953             : 
    2954             :         /* Update the parent high keys of the left block, if needed. */
    2955      912349 :         if (cur->bc_flags & XFS_BTREE_OVERLAPPING) {
    2956      545712 :                 error = xfs_btree_update_keys(cur, level);
    2957      545712 :                 if (error)
    2958           0 :                         goto error0;
    2959             :         }
    2960             : 
    2961             :         /*
    2962             :          * If the cursor is really in the right block, move it there.
    2963             :          * If it's just pointing past the last entry in left, then we'll
    2964             :          * insert there, so don't change anything in that case.
    2965             :          */
    2966      912349 :         if (cur->bc_levels[level].ptr > lrecs + 1) {
    2967      672624 :                 xfs_btree_setbuf(cur, level, rbp);
    2968      672624 :                 cur->bc_levels[level].ptr -= lrecs;
    2969             :         }
    2970             :         /*
    2971             :          * If there are more levels, we'll need another cursor which refers
    2972             :          * the right block, no matter where this cursor was.
    2973             :          */
    2974      912349 :         if (level + 1 < cur->bc_nlevels) {
    2975      893769 :                 error = xfs_btree_dup_cursor(cur, curp);
    2976      893769 :                 if (error)
    2977           3 :                         goto error0;
    2978      893766 :                 (*curp)->bc_levels[level + 1].ptr++;
    2979             :         }
    2980      912346 :         *ptrp = rptr;
    2981      912346 :         *stat = 1;
    2982      912346 :         return 0;
    2983             : out0:
    2984           0 :         *stat = 0;
    2985           0 :         return 0;
    2986             : 
    2987             : error0:
    2988             :         return error;
    2989             : }
    2990             : 
    2991             : #ifdef __KERNEL__
    2992             : struct xfs_btree_split_args {
    2993             :         struct xfs_btree_cur    *cur;
    2994             :         int                     level;
    2995             :         union xfs_btree_ptr     *ptrp;
    2996             :         union xfs_btree_key     *key;
    2997             :         struct xfs_btree_cur    **curp;
    2998             :         int                     *stat;          /* success/failure */
    2999             :         int                     result;
    3000             :         bool                    kswapd; /* allocation in kswapd context */
    3001             :         struct completion       *done;
    3002             :         struct work_struct      work;
    3003             : };
    3004             : 
    3005             : /*
    3006             :  * Stack switching interfaces for allocation
    3007             :  */
    3008             : static void
    3009       20291 : xfs_btree_split_worker(
    3010             :         struct work_struct      *work)
    3011             : {
    3012       20291 :         struct xfs_btree_split_args     *args = container_of(work,
    3013             :                                                 struct xfs_btree_split_args, work);
    3014       20291 :         unsigned long           pflags;
    3015       20291 :         unsigned long           new_pflags = 0;
    3016             : 
    3017             :         /*
    3018             :          * we are in a transaction context here, but may also be doing work
    3019             :          * in kswapd context, and hence we may need to inherit that state
    3020             :          * temporarily to ensure that we don't block waiting for memory reclaim
    3021             :          * in any way.
    3022             :          */
    3023       20291 :         if (args->kswapd)
    3024           0 :                 new_pflags |= PF_MEMALLOC | PF_KSWAPD;
    3025             : 
    3026       20291 :         current_set_flags_nested(&pflags, new_pflags);
    3027       20291 :         xfs_trans_set_context(args->cur->bc_tp);
    3028             : 
    3029       20291 :         args->result = __xfs_btree_split(args->cur, args->level, args->ptrp,
    3030             :                                          args->key, args->curp, args->stat);
    3031             : 
    3032       20291 :         xfs_trans_clear_context(args->cur->bc_tp);
    3033       20291 :         current_restore_flags_nested(&pflags, new_pflags);
    3034             : 
    3035             :         /*
    3036             :          * Do not access args after complete() has run here. We don't own args
    3037             :          * and the owner may run and free args before we return here.
    3038             :          */
    3039       20291 :         complete(args->done);
    3040             : 
    3041       20291 : }
    3042             : 
    3043             : /*
    3044             :  * BMBT split requests often come in with little stack to work on so we push
    3045             :  * them off to a worker thread so there is lots of stack to use. For the other
    3046             :  * btree types, just call directly to avoid the context switch overhead here.
    3047             :  *
    3048             :  * Care must be taken here - the work queue rescuer thread introduces potential
    3049             :  * AGF <> worker queue deadlocks if the BMBT block allocation has to lock new
    3050             :  * AGFs to allocate blocks. A task being run by the rescuer could attempt to
    3051             :  * lock an AGF that is already locked by a task queued to run by the rescuer,
    3052             :  * resulting in an ABBA deadlock as the rescuer cannot run the lock holder to
    3053             :  * release it until the current thread it is running gains the lock.
    3054             :  *
    3055             :  * To avoid this issue, we only ever queue BMBT splits that don't have an AGF
    3056             :  * already locked to allocate from. The only place that doesn't hold an AGF
    3057             :  * locked is unwritten extent conversion at IO completion, but that has already
    3058             :  * been offloaded to a worker thread and hence has no stack consumption issues
    3059             :  * we have to worry about.
    3060             :  */
    3061             : STATIC int                                      /* error */
    3062      912561 : xfs_btree_split(
    3063             :         struct xfs_btree_cur    *cur,
    3064             :         int                     level,
    3065             :         union xfs_btree_ptr     *ptrp,
    3066             :         union xfs_btree_key     *key,
    3067             :         struct xfs_btree_cur    **curp,
    3068             :         int                     *stat)          /* success/failure */
    3069             : {
    3070      912561 :         struct xfs_btree_split_args     args;
    3071      912561 :         DECLARE_COMPLETION_ONSTACK(done);
    3072             : 
    3073      912561 :         if (cur->bc_btnum != XFS_BTNUM_BMAP ||
    3074      238867 :             cur->bc_tp->t_highest_agno == NULLAGNUMBER)
    3075      892270 :                 return __xfs_btree_split(cur, level, ptrp, key, curp, stat);
    3076             : 
    3077       20291 :         args.cur = cur;
    3078       20291 :         args.level = level;
    3079       20291 :         args.ptrp = ptrp;
    3080       20291 :         args.key = key;
    3081       20291 :         args.curp = curp;
    3082       20291 :         args.stat = stat;
    3083       20291 :         args.done = &done;
    3084       20291 :         args.kswapd = current_is_kswapd();
    3085       20291 :         INIT_WORK_ONSTACK(&args.work, xfs_btree_split_worker);
    3086       20291 :         queue_work(xfs_alloc_wq, &args.work);
    3087       20291 :         wait_for_completion(&done);
    3088       20291 :         destroy_work_on_stack(&args.work);
    3089       20291 :         return args.result;
    3090             : }
    3091             : #else
    3092             : #define xfs_btree_split __xfs_btree_split
    3093             : #endif /* __KERNEL__ */
    3094             : 
    3095             : static inline void
    3096     1196433 : xfs_btree_iroot_realloc(
    3097             :         struct xfs_btree_cur            *cur,
    3098             :         int                             rec_diff)
    3099             : {
    3100     1196433 :         ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE);
    3101             : 
    3102     1196433 :         xfs_iroot_realloc(cur->bc_ino.ip, cur->bc_ino.whichfork,
    3103     1196433 :                         cur->bc_ops->iroot_ops, rec_diff);
    3104     1196433 : }
    3105             : 
    3106             : /*
    3107             :  * Move the records from a root leaf block to a separate block.
    3108             :  *
    3109             :  * Trickery here: The amount of memory that we need per record for the incore
    3110             :  * root block changes when we convert a leaf block to an internal block.
    3111             :  * Therefore, we copy leaf records into the new btree block (cblock) before
    3112             :  * freeing the incore root block and changing the tree height.
    3113             :  *
    3114             :  * Once we've changed the tree height, we allocate a new incore root block
    3115             :  * (which will now be an internal root block) and populate it with a pointer to
    3116             :  * cblock and the relevant keys.
    3117             :  */
    3118             : STATIC void
    3119       14842 : xfs_btree_promote_leaf_iroot(
    3120             :         struct xfs_btree_cur    *cur,
    3121             :         struct xfs_btree_block  *block,
    3122             :         struct xfs_buf          *cbp,
    3123             :         union xfs_btree_ptr     *cptr,
    3124             :         struct xfs_btree_block  *cblock)
    3125             : {
    3126       14842 :         union xfs_btree_rec     *rp;
    3127       14842 :         union xfs_btree_rec     *crp;
    3128       14842 :         union xfs_btree_key     *kp;
    3129       14842 :         union xfs_btree_ptr     *pp;
    3130       14842 :         size_t                  size;
    3131       14842 :         int                     numrecs = xfs_btree_get_numrecs(block);
    3132             : 
    3133             :         /* Copy the records from the leaf root into the new child block. */
    3134       14842 :         rp = xfs_btree_rec_addr(cur, 1, block);
    3135       14842 :         crp = xfs_btree_rec_addr(cur, 1, cblock);
    3136       14842 :         xfs_btree_copy_recs(cur, crp, rp, numrecs);
    3137             : 
    3138             :         /* Zap the old root and change the tree height. */
    3139       14842 :         xfs_iroot_free(cur->bc_ino.ip, cur->bc_ino.whichfork);
    3140       14842 :         cur->bc_nlevels++;
    3141       14842 :         cur->bc_levels[1].ptr = 1;
    3142             : 
    3143             :         /*
    3144             :          * Allocate a new internal root block buffer and reinitialize it to
    3145             :          * point to a single new child.
    3146             :          */
    3147       14842 :         size = cur->bc_ops->iroot_ops->size(cur->bc_mp, cur->bc_nlevels - 1, 1);
    3148       14842 :         xfs_iroot_alloc(cur->bc_ino.ip, cur->bc_ino.whichfork, size);
    3149       14842 :         block = xfs_btree_get_iroot(cur);
    3150       14842 :         xfs_btree_init_block(cur->bc_mp, block, cur->bc_ops,
    3151       14842 :                         cur->bc_nlevels - 1, 1, cur->bc_ino.ip->i_ino);
    3152             : 
    3153       14842 :         pp = xfs_btree_ptr_addr(cur, 1, block);
    3154       14842 :         kp = xfs_btree_key_addr(cur, 1, block);
    3155       14842 :         xfs_btree_copy_ptrs(cur, pp, cptr, 1);
    3156       14842 :         xfs_btree_get_keys(cur, cblock, kp);
    3157             : 
    3158             :         /* Attach the new block to the cursor and log it. */
    3159       14842 :         xfs_btree_setbuf(cur, 0, cbp);
    3160       14842 :         xfs_btree_log_block(cur, cbp, XFS_BB_ALL_BITS);
    3161       14842 :         xfs_btree_log_recs(cur, cbp, 1, numrecs);
    3162       14842 : }
    3163             : 
    3164             : /*
    3165             :  * Move the keys and pointers from a root block to a separate block.
    3166             :  *
    3167             :  * Since the keyptr size does not change, all we have to do is increase the
    3168             :  * tree height, copy the keyptrs to the new internal node (cblock), shrink
    3169             :  * the root, and copy the pointers there.
    3170             :  */
    3171             : STATIC int
    3172        3009 : xfs_btree_promote_node_iroot(
    3173             :         struct xfs_btree_cur    *cur,
    3174             :         struct xfs_btree_block  *block,
    3175             :         int                     level,
    3176             :         struct xfs_buf          *cbp,
    3177             :         union xfs_btree_ptr     *cptr,
    3178             :         struct xfs_btree_block  *cblock)
    3179             : {
    3180        3009 :         union xfs_btree_key     *ckp;
    3181        3009 :         union xfs_btree_key     *kp;
    3182        3009 :         union xfs_btree_ptr     *cpp;
    3183        3009 :         union xfs_btree_ptr     *pp;
    3184        3009 :         int                     i;
    3185        3009 :         int                     error;
    3186        3009 :         int                     numrecs = xfs_btree_get_numrecs(block);
    3187             : 
    3188             :         /*
    3189             :          * Increase tree height, adjusting the root block level to match.
    3190             :          * We cannot change the root btree node size until we've copied the
    3191             :          * block contents to the new child block.
    3192             :          */
    3193        3009 :         be16_add_cpu(&block->bb_level, 1);
    3194        3009 :         cur->bc_nlevels++;
    3195        3009 :         cur->bc_levels[level + 1].ptr = 1;
    3196             : 
    3197             :         /*
    3198             :          * Adjust the root btree record count, then copy the keys from the old
    3199             :          * root to the new child block.
    3200             :          */
    3201        3009 :         xfs_btree_set_numrecs(block, 1);
    3202        3009 :         kp = xfs_btree_key_addr(cur, 1, block);
    3203        3009 :         ckp = xfs_btree_key_addr(cur, 1, cblock);
    3204        3009 :         xfs_btree_copy_keys(cur, ckp, kp, numrecs);
    3205             : 
    3206             :         /* Check the pointers and copy them to the new child block. */
    3207        3009 :         pp = xfs_btree_ptr_addr(cur, 1, block);
    3208        3009 :         cpp = xfs_btree_ptr_addr(cur, 1, cblock);
    3209       35935 :         for (i = 0; i < numrecs; i++) {
    3210       29917 :                 error = xfs_btree_debug_check_ptr(cur, pp, i, level);
    3211       29917 :                 if (error)
    3212           0 :                         return error;
    3213             :         }
    3214        3009 :         xfs_btree_copy_ptrs(cur, cpp, pp, numrecs);
    3215             : 
    3216             :         /*
    3217             :          * Set the first keyptr to point to the new child block, then shrink
    3218             :          * the memory buffer for the root block.
    3219             :          */
    3220        3009 :         error = xfs_btree_debug_check_ptr(cur, cptr, 0, level);
    3221        3009 :         if (error)
    3222             :                 return error;
    3223        3009 :         xfs_btree_copy_ptrs(cur, pp, cptr, 1);
    3224        3009 :         xfs_btree_get_keys(cur, cblock, kp);
    3225        3009 :         xfs_btree_iroot_realloc(cur, 1 - numrecs);
    3226             : 
    3227             :         /* Attach the new block to the cursor and log it. */
    3228        3009 :         xfs_btree_setbuf(cur, level, cbp);
    3229        3009 :         xfs_btree_log_block(cur, cbp, XFS_BB_ALL_BITS);
    3230        3009 :         xfs_btree_log_keys(cur, cbp, 1, numrecs);
    3231        3009 :         xfs_btree_log_ptrs(cur, cbp, 1, numrecs);
    3232        3009 :         return 0;
    3233             : }
    3234             : 
    3235             : /*
    3236             :  * Copy the old inode root contents into a real block and make the
    3237             :  * broot point to it.
    3238             :  */
    3239             : int                                             /* error */
    3240       17851 : xfs_btree_new_iroot(
    3241             :         struct xfs_btree_cur    *cur,           /* btree cursor */
    3242             :         int                     *logflags,      /* logging flags for inode */
    3243             :         int                     *stat)          /* return status - 0 fail */
    3244             : {
    3245       17851 :         struct xfs_buf          *cbp;           /* buffer for cblock */
    3246       17851 :         struct xfs_btree_block  *block;         /* btree block */
    3247       17851 :         struct xfs_btree_block  *cblock;        /* child btree block */
    3248       17851 :         union xfs_btree_ptr     aptr;
    3249       17851 :         union xfs_btree_ptr     nptr;           /* new block addr */
    3250       17851 :         int                     level;          /* btree level */
    3251       17851 :         int                     error;          /* error return code */
    3252             : 
    3253       17851 :         XFS_BTREE_STATS_INC(cur, newroot);
    3254             : 
    3255       17851 :         ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE);
    3256             : 
    3257       17851 :         level = cur->bc_nlevels - 1;
    3258             : 
    3259       17851 :         block = xfs_btree_get_iroot(cur);
    3260       17851 :         ASSERT(level > 0 || (cur->bc_flags & XFS_BTREE_IROOT_RECORDS));
    3261       17851 :         if (level > 0)
    3262        3009 :                 aptr = *xfs_btree_ptr_addr(cur, 1, block);
    3263             :         else
    3264       14842 :                 aptr.l = cpu_to_be64(XFS_INO_TO_FSB(cur->bc_mp,
    3265             :                                 cur->bc_ino.ip->i_ino));
    3266             : 
    3267             :         /* Allocate the new block. If we can't do it, we're toast. Give up. */
    3268       17851 :         error = xfs_btree_alloc_block(cur, &aptr, &nptr, stat);
    3269       17851 :         if (error)
    3270           0 :                 goto error0;
    3271       17851 :         if (*stat == 0)
    3272             :                 return 0;
    3273             : 
    3274       17851 :         XFS_BTREE_STATS_INC(cur, alloc);
    3275             : 
    3276             :         /* Copy the root into a real block. */
    3277       17851 :         error = xfs_btree_get_buf_block(cur, &nptr, &cblock, &cbp);
    3278       17851 :         if (error)
    3279           0 :                 goto error0;
    3280             : 
    3281             :         /*
    3282             :          * we can't just memcpy() the root in for CRC enabled btree blocks.
    3283             :          * In that case have to also ensure the blkno remains correct
    3284             :          */
    3285       35702 :         memcpy(cblock, block, xfs_btree_block_len(cur));
    3286       17851 :         if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS) {
    3287       17851 :                 __be64 bno = cpu_to_be64(xfs_buf_daddr(cbp));
    3288       17851 :                 if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
    3289       17851 :                         cblock->bb_u.l.bb_blkno = bno;
    3290             :                 else
    3291           0 :                         cblock->bb_u.s.bb_blkno = bno;
    3292             :         }
    3293             : 
    3294       17851 :         if (level > 0) {
    3295        3009 :                 error = xfs_btree_promote_node_iroot(cur, block, level, cbp,
    3296             :                                 &nptr, cblock);
    3297        3009 :                 if (error)
    3298           0 :                         goto error0;
    3299             :         } else {
    3300       14842 :                 xfs_btree_promote_leaf_iroot(cur, block, cbp, &nptr, cblock);
    3301             :         }
    3302             : 
    3303       35702 :         *logflags |=
    3304       17851 :                 XFS_ILOG_CORE | xfs_ilog_fbroot(cur->bc_ino.whichfork);
    3305       17851 :         *stat = 1;
    3306       17851 :         return 0;
    3307             : error0:
    3308             :         return error;
    3309             : }
    3310             : 
    3311             : /*
    3312             :  * Allocate a new root block, fill it in.
    3313             :  */
    3314             : STATIC int                              /* error */
    3315       18580 : xfs_btree_new_root(
    3316             :         struct xfs_btree_cur    *cur,   /* btree cursor */
    3317             :         int                     *stat)  /* success/failure */
    3318             : {
    3319       18580 :         struct xfs_btree_block  *block; /* one half of the old root block */
    3320       18580 :         struct xfs_buf          *bp;    /* buffer containing block */
    3321       18580 :         int                     error;  /* error return value */
    3322       18580 :         struct xfs_buf          *lbp;   /* left buffer pointer */
    3323       18580 :         struct xfs_btree_block  *left;  /* left btree block */
    3324       18580 :         struct xfs_buf          *nbp;   /* new (root) buffer */
    3325       18580 :         struct xfs_btree_block  *new;   /* new (root) btree block */
    3326       18580 :         int                     nptr;   /* new value for key index, 1 or 2 */
    3327       18580 :         struct xfs_buf          *rbp;   /* right buffer pointer */
    3328       18580 :         struct xfs_btree_block  *right; /* right btree block */
    3329       18580 :         union xfs_btree_ptr     rptr;
    3330       18580 :         union xfs_btree_ptr     lptr;
    3331             : 
    3332       18580 :         XFS_BTREE_STATS_INC(cur, newroot);
    3333             : 
    3334             :         /* initialise our start point from the cursor */
    3335       18580 :         cur->bc_ops->init_ptr_from_cur(cur, &rptr);
    3336             : 
    3337             :         /* Allocate the new block. If we can't do it, we're toast. Give up. */
    3338       18580 :         error = xfs_btree_alloc_block(cur, &rptr, &lptr, stat);
    3339       18580 :         if (error)
    3340           0 :                 goto error0;
    3341       18580 :         if (*stat == 0)
    3342           0 :                 goto out0;
    3343       18580 :         XFS_BTREE_STATS_INC(cur, alloc);
    3344             : 
    3345             :         /* Set up the new block. */
    3346       18580 :         error = xfs_btree_get_buf_block(cur, &lptr, &new, &nbp);
    3347       18580 :         if (error)
    3348           0 :                 goto error0;
    3349             : 
    3350             :         /* Set the root in the holding structure  increasing the level by 1. */
    3351       18580 :         cur->bc_ops->set_root(cur, &lptr, 1);
    3352             : 
    3353             :         /*
    3354             :          * At the previous root level there are now two blocks: the old root,
    3355             :          * and the new block generated when it was split.  We don't know which
    3356             :          * one the cursor is pointing at, so we set up variables "left" and
    3357             :          * "right" for each case.
    3358             :          */
    3359       18580 :         block = xfs_btree_get_block(cur, cur->bc_nlevels - 1, &bp);
    3360             : 
    3361             : #ifdef DEBUG
    3362       18580 :         error = xfs_btree_check_block(cur, block, cur->bc_nlevels - 1, bp);
    3363       18580 :         if (error)
    3364           0 :                 goto error0;
    3365             : #endif
    3366             : 
    3367       18580 :         xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB);
    3368       37160 :         if (!xfs_btree_ptr_is_null(cur, &rptr)) {
    3369             :                 /* Our block is left, pick up the right block. */
    3370        7955 :                 lbp = bp;
    3371        7955 :                 xfs_btree_buf_to_ptr(cur, lbp, &lptr);
    3372        7955 :                 left = block;
    3373        7955 :                 error = xfs_btree_read_buf_block(cur, &rptr, 0, &right, &rbp);
    3374        7955 :                 if (error)
    3375           1 :                         goto error0;
    3376        7954 :                 bp = rbp;
    3377        7954 :                 nptr = 1;
    3378             :         } else {
    3379             :                 /* Our block is right, pick up the left block. */
    3380       10625 :                 rbp = bp;
    3381       10625 :                 xfs_btree_buf_to_ptr(cur, rbp, &rptr);
    3382       10625 :                 right = block;
    3383       10625 :                 xfs_btree_get_sibling(cur, right, &lptr, XFS_BB_LEFTSIB);
    3384       10625 :                 error = xfs_btree_read_buf_block(cur, &lptr, 0, &left, &lbp);
    3385       10625 :                 if (error)
    3386           0 :                         goto error0;
    3387       10625 :                 bp = lbp;
    3388       10625 :                 nptr = 2;
    3389             :         }
    3390             : 
    3391             :         /* Fill in the new block's btree header and log it. */
    3392       18579 :         xfs_btree_init_block_cur(cur, nbp, cur->bc_nlevels, 2);
    3393       18579 :         xfs_btree_log_block(cur, nbp, XFS_BB_ALL_BITS);
    3394       55737 :         ASSERT(!xfs_btree_ptr_is_null(cur, &lptr) &&
    3395             :                         !xfs_btree_ptr_is_null(cur, &rptr));
    3396             : 
    3397             :         /* Fill in the key data in the new root. */
    3398       37158 :         if (xfs_btree_get_level(left) > 0) {
    3399             :                 /*
    3400             :                  * Get the keys for the left block's keys and put them directly
    3401             :                  * in the parent block.  Do the same for the right block.
    3402             :                  */
    3403         376 :                 xfs_btree_get_node_keys(cur, left,
    3404             :                                 xfs_btree_key_addr(cur, 1, new));
    3405         376 :                 xfs_btree_get_node_keys(cur, right,
    3406             :                                 xfs_btree_key_addr(cur, 2, new));
    3407             :         } else {
    3408             :                 /*
    3409             :                  * Get the keys for the left block's records and put them
    3410             :                  * directly in the parent block.  Do the same for the right
    3411             :                  * block.
    3412             :                  */
    3413       18203 :                 xfs_btree_get_leaf_keys(cur, left,
    3414             :                         xfs_btree_key_addr(cur, 1, new));
    3415       18203 :                 xfs_btree_get_leaf_keys(cur, right,
    3416             :                         xfs_btree_key_addr(cur, 2, new));
    3417             :         }
    3418       18579 :         xfs_btree_log_keys(cur, nbp, 1, 2);
    3419             : 
    3420             :         /* Fill in the pointer data in the new root. */
    3421       18579 :         xfs_btree_copy_ptrs(cur,
    3422             :                 xfs_btree_ptr_addr(cur, 1, new), &lptr, 1);
    3423       18579 :         xfs_btree_copy_ptrs(cur,
    3424             :                 xfs_btree_ptr_addr(cur, 2, new), &rptr, 1);
    3425       18579 :         xfs_btree_log_ptrs(cur, nbp, 1, 2);
    3426             : 
    3427             :         /* Fix up the cursor. */
    3428       18579 :         xfs_btree_setbuf(cur, cur->bc_nlevels, nbp);
    3429       18579 :         cur->bc_levels[cur->bc_nlevels].ptr = nptr;
    3430       18579 :         cur->bc_nlevels++;
    3431       18579 :         ASSERT(cur->bc_nlevels <= cur->bc_maxlevels);
    3432       18579 :         *stat = 1;
    3433       18579 :         return 0;
    3434             : error0:
    3435             :         return error;
    3436             : out0:
    3437           0 :         *stat = 0;
    3438           0 :         return 0;
    3439             : }
    3440             : 
    3441             : STATIC int
    3442    97239884 : xfs_btree_make_block_unfull(
    3443             :         struct xfs_btree_cur    *cur,   /* btree cursor */
    3444             :         int                     level,  /* btree level */
    3445             :         int                     numrecs,/* # of recs in block */
    3446             :         int                     *oindex,/* old tree index */
    3447             :         int                     *index, /* new tree index */
    3448             :         union xfs_btree_ptr     *nptr,  /* new btree ptr */
    3449             :         struct xfs_btree_cur    **ncur, /* new btree cursor */
    3450             :         union xfs_btree_key     *key,   /* key of new block */
    3451             :         int                     *stat)
    3452             : {
    3453    97239884 :         int                     error = 0;
    3454             : 
    3455    97239884 :         if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
    3456    41705289 :             level == cur->bc_nlevels - 1) {
    3457      648865 :                 struct xfs_inode *ip = cur->bc_ino.ip;
    3458             : 
    3459      648865 :                 if (numrecs < cur->bc_ops->get_dmaxrecs(cur, level)) {
    3460             :                         /* A root block that can be made bigger. */
    3461      631014 :                         xfs_btree_iroot_realloc(cur, 1);
    3462      631014 :                         *stat = 1;
    3463             :                 } else {
    3464             :                         /* A root block that needs replacing */
    3465       17851 :                         int     logflags = 0;
    3466             : 
    3467       17851 :                         error = xfs_btree_new_iroot(cur, &logflags, stat);
    3468       17851 :                         if (error || *stat == 0)
    3469           0 :                                 return error;
    3470             : 
    3471       17851 :                         xfs_trans_log_inode(cur->bc_tp, ip, logflags);
    3472             :                 }
    3473             : 
    3474      648865 :                 return 0;
    3475             :         }
    3476             : 
    3477             :         /* First, try shifting an entry to the right neighbor. */
    3478    96591019 :         error = xfs_btree_rshift(cur, level, stat);
    3479    96590800 :         if (error || *stat)
    3480             :                 return error;
    3481             : 
    3482             :         /* Next, try shifting an entry to the left neighbor. */
    3483    62581869 :         error = xfs_btree_lshift(cur, level, stat);
    3484    62581891 :         if (error)
    3485             :                 return error;
    3486             : 
    3487    62581841 :         if (*stat) {
    3488    61669280 :                 *oindex = *index = cur->bc_levels[level].ptr;
    3489    61669280 :                 return 0;
    3490             :         }
    3491             : 
    3492             :         /*
    3493             :          * Next, try splitting the current block in half.
    3494             :          *
    3495             :          * If this works we have to re-set our variables because we
    3496             :          * could be in a different block now.
    3497             :          */
    3498      912561 :         error = xfs_btree_split(cur, level, nptr, key, ncur, stat);
    3499      912561 :         if (error || *stat == 0)
    3500             :                 return error;
    3501             : 
    3502             : 
    3503      912346 :         *index = cur->bc_levels[level].ptr;
    3504      912346 :         return 0;
    3505             : }
    3506             : 
    3507             : /*
    3508             :  * Insert one record/level.  Return information to the caller
    3509             :  * allowing the next level up to proceed if necessary.
    3510             :  */
    3511             : STATIC int
    3512   835321062 : xfs_btree_insrec(
    3513             :         struct xfs_btree_cur    *cur,   /* btree cursor */
    3514             :         int                     level,  /* level to insert record at */
    3515             :         union xfs_btree_ptr     *ptrp,  /* i/o: block number inserted */
    3516             :         union xfs_btree_rec     *rec,   /* record to insert */
    3517             :         union xfs_btree_key     *key,   /* i/o: block key for ptrp */
    3518             :         struct xfs_btree_cur    **curp, /* output: new cursor replacing cur */
    3519             :         int                     *stat)  /* success/failure */
    3520             : {
    3521   835321062 :         struct xfs_btree_block  *block; /* btree block */
    3522   835321062 :         struct xfs_buf          *bp;    /* buffer for block */
    3523   835321062 :         union xfs_btree_ptr     nptr;   /* new block ptr */
    3524   835321062 :         struct xfs_btree_cur    *ncur = NULL;   /* new btree cursor */
    3525   835321062 :         union xfs_btree_key     nkey;   /* new block key */
    3526   835321062 :         union xfs_btree_key     *lkey;
    3527   835321062 :         int                     optr;   /* old key/record index */
    3528   835321062 :         int                     ptr;    /* key/record index */
    3529   835321062 :         int                     numrecs;/* number of records */
    3530   835321062 :         int                     error;  /* error return value */
    3531   835321062 :         int                     i;
    3532   835321062 :         xfs_daddr_t             old_bn;
    3533             : 
    3534   835321062 :         ncur = NULL;
    3535   835321062 :         lkey = &nkey;
    3536             : 
    3537             :         /*
    3538             :          * If we have an external root pointer, and we've made it to the
    3539             :          * root level, allocate a new root block and we're done.
    3540             :          */
    3541   835321062 :         if (!(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
    3542   706025648 :             (level >= cur->bc_nlevels)) {
    3543       18580 :                 error = xfs_btree_new_root(cur, stat);
    3544       18580 :                 xfs_btree_set_ptr_null(cur, ptrp);
    3545             : 
    3546       18580 :                 return error;
    3547             :         }
    3548             : 
    3549             :         /* If we're off the left edge, return failure. */
    3550   835302482 :         ptr = cur->bc_levels[level].ptr;
    3551   835302482 :         if (ptr == 0) {
    3552           0 :                 *stat = 0;
    3553           0 :                 return 0;
    3554             :         }
    3555             : 
    3556   835302482 :         optr = ptr;
    3557             : 
    3558   835302482 :         XFS_BTREE_STATS_INC(cur, insrec);
    3559             : 
    3560             :         /* Get pointers to the btree buffer and block. */
    3561   835302482 :         block = xfs_btree_get_block(cur, level, &bp);
    3562   835307516 :         old_bn = bp ? xfs_buf_daddr(bp) : XFS_BUF_DADDR_NULL;
    3563   835307516 :         numrecs = xfs_btree_get_numrecs(block);
    3564             : 
    3565             : #ifdef DEBUG
    3566   835307516 :         error = xfs_btree_check_block(cur, block, level, bp);
    3567   835321564 :         if (error)
    3568           0 :                 goto error0;
    3569             : 
    3570             :         /* Check that the new entry is being inserted in the right place. */
    3571   835321564 :         if (ptr <= numrecs) {
    3572   407772929 :                 if (level == 0) {
    3573   407262232 :                         ASSERT(cur->bc_ops->recs_inorder(cur, rec,
    3574             :                                 xfs_btree_rec_addr(cur, ptr, block)));
    3575             :                 } else {
    3576      510697 :                         ASSERT(cur->bc_ops->keys_inorder(cur, key,
    3577             :                                 xfs_btree_key_addr(cur, ptr, block)));
    3578             :                 }
    3579             :         }
    3580             : #endif
    3581             : 
    3582             :         /*
    3583             :          * If the block is full, we can't insert the new entry until we
    3584             :          * make the block un-full.
    3585             :          */
    3586   835324858 :         xfs_btree_set_ptr_null(cur, &nptr);
    3587   835324858 :         if (numrecs == cur->bc_ops->get_maxrecs(cur, level)) {
    3588    97239734 :                 error = xfs_btree_make_block_unfull(cur, level, numrecs,
    3589             :                                         &optr, &ptr, &nptr, &ncur, lkey, stat);
    3590    97239902 :                 if (error || *stat == 0)
    3591         405 :                         goto error0;
    3592             :         }
    3593             : 
    3594             :         /*
    3595             :          * The current block may have changed if the block was
    3596             :          * previously full and we have just made space in it.
    3597             :          */
    3598   835316632 :         block = xfs_btree_get_block(cur, level, &bp);
    3599   835317093 :         numrecs = xfs_btree_get_numrecs(block);
    3600             : 
    3601             : #ifdef DEBUG
    3602   835317093 :         error = xfs_btree_check_block(cur, block, level, bp);
    3603   835314141 :         if (error)
    3604           0 :                 goto error0;
    3605             : #endif
    3606             : 
    3607             :         /*
    3608             :          * At this point we know there's room for our new entry in the block
    3609             :          * we're pointing at.
    3610             :          */
    3611   835314141 :         XFS_BTREE_STATS_ADD(cur, moves, numrecs - ptr + 1);
    3612             : 
    3613   835314141 :         if (level > 0) {
    3614             :                 /* It's a nonleaf. make a hole in the keys and ptrs */
    3615      893760 :                 union xfs_btree_key     *kp;
    3616      893760 :                 union xfs_btree_ptr     *pp;
    3617             : 
    3618      893760 :                 kp = xfs_btree_key_addr(cur, ptr, block);
    3619      893760 :                 pp = xfs_btree_ptr_addr(cur, ptr, block);
    3620             : 
    3621    16049672 :                 for (i = numrecs - ptr; i >= 0; i--) {
    3622    14262152 :                         error = xfs_btree_debug_check_ptr(cur, pp, i, level);
    3623    14262152 :                         if (error)
    3624           0 :                                 goto error0;
    3625             :                 }
    3626             : 
    3627      893760 :                 xfs_btree_shift_keys(cur, kp, 1, numrecs - ptr + 1);
    3628      893760 :                 xfs_btree_shift_ptrs(cur, pp, 1, numrecs - ptr + 1);
    3629             : 
    3630      893760 :                 error = xfs_btree_debug_check_ptr(cur, ptrp, 0, level);
    3631      893760 :                 if (error)
    3632           0 :                         goto error0;
    3633             : 
    3634             :                 /* Now put the new data in, bump numrecs and log it. */
    3635      893760 :                 xfs_btree_copy_keys(cur, kp, key, 1);
    3636      893760 :                 xfs_btree_copy_ptrs(cur, pp, ptrp, 1);
    3637      893760 :                 numrecs++;
    3638      893760 :                 xfs_btree_set_numrecs(block, numrecs);
    3639      893760 :                 xfs_btree_log_ptrs(cur, bp, ptr, numrecs);
    3640      893760 :                 xfs_btree_log_keys(cur, bp, ptr, numrecs);
    3641             : #ifdef DEBUG
    3642      893760 :                 if (ptr < numrecs) {
    3643      510626 :                         ASSERT(cur->bc_ops->keys_inorder(cur, kp,
    3644             :                                 xfs_btree_key_addr(cur, ptr + 1, block)));
    3645             :                 }
    3646             : #endif
    3647             :         } else {
    3648             :                 /* It's a leaf. make a hole in the records */
    3649   834420381 :                 union xfs_btree_rec             *rp;
    3650             : 
    3651   834420381 :                 rp = xfs_btree_rec_addr(cur, ptr, block);
    3652             : 
    3653   834420381 :                 xfs_btree_shift_recs(cur, rp, 1, numrecs - ptr + 1);
    3654             : 
    3655             :                 /* Now put the new data in, bump numrecs and log it. */
    3656   834417588 :                 xfs_btree_copy_recs(cur, rp, rec, 1);
    3657   834424876 :                 xfs_btree_set_numrecs(block, ++numrecs);
    3658   834424876 :                 xfs_btree_log_recs(cur, bp, ptr, numrecs);
    3659             : #ifdef DEBUG
    3660   834424377 :                 if (ptr < numrecs) {
    3661   407266044 :                         ASSERT(cur->bc_ops->recs_inorder(cur, rp,
    3662             :                                 xfs_btree_rec_addr(cur, ptr + 1, block)));
    3663             :                 }
    3664             : #endif
    3665             :         }
    3666             : 
    3667             :         /* Log the new number of records in the btree header. */
    3668   835321508 :         xfs_btree_log_block(cur, bp, XFS_BB_NUMRECS);
    3669             : 
    3670             :         /*
    3671             :          * Update btree keys to reflect the newly added record or keyptr.
    3672             :          * There are three cases here to be aware of.  Normally, all we have to
    3673             :          * do is walk towards the root, updating keys as necessary.
    3674             :          *
    3675             :          * If the caller had us target a full block for the insertion, we dealt
    3676             :          * with that by calling the _make_block_unfull function.  If the
    3677             :          * "make unfull" function splits the block, it'll hand us back the key
    3678             :          * and pointer of the new block.  We haven't yet added the new block to
    3679             :          * the next level up, so if we decide to add the new record to the new
    3680             :          * block (bp->b_bn != old_bn), we have to update the caller's pointer
    3681             :          * so that the caller adds the new block with the correct key.
    3682             :          *
    3683             :          * However, there is a third possibility-- if the selected block is the
    3684             :          * root block of an inode-rooted btree and cannot be expanded further,
    3685             :          * the "make unfull" function moves the root block contents to a new
    3686             :          * block and updates the root block to point to the new block.  In this
    3687             :          * case, no block pointer is passed back because the block has already
    3688             :          * been added to the btree.  In this case, we need to use the regular
    3689             :          * key update function, just like the first case.  This is critical for
    3690             :          * overlapping btrees, because the high key must be updated to reflect
    3691             :          * the entire tree, not just the subtree accessible through the first
    3692             :          * child of the root (which is now two levels down from the root).
    3693             :          */
    3694  1670640692 :         if (!xfs_btree_ptr_is_null(cur, &nptr) &&
    3695      912346 :             bp && xfs_buf_daddr(bp) != old_bn) {
    3696      672623 :                 xfs_btree_get_keys(cur, block, lkey);
    3697  1452148231 :         } else if (xfs_btree_needs_key_update(cur, optr)) {
    3698   562041723 :                 error = xfs_btree_update_keys(cur, level);
    3699   562040453 :                 if (error)
    3700           0 :                         goto error0;
    3701             :         }
    3702             : 
    3703             :         /*
    3704             :          * If we are tracking the last record in the tree and
    3705             :          * we are at the far right edge of the tree, update it.
    3706             :          */
    3707   835319076 :         if (xfs_btree_is_lastrec(cur, block, level)) {
    3708    87067042 :                 cur->bc_ops->update_lastrec(cur, block, rec,
    3709             :                                             ptr, LASTREC_INSREC);
    3710             :         }
    3711             : 
    3712             :         /*
    3713             :          * Return the new block number, if any.
    3714             :          * If there is one, give back a record value and a cursor too.
    3715             :          */
    3716   835318836 :         *ptrp = nptr;
    3717  1670637672 :         if (!xfs_btree_ptr_is_null(cur, &nptr)) {
    3718      912346 :                 xfs_btree_copy_keys(cur, key, lkey, 1);
    3719      912346 :                 *curp = ncur;
    3720             :         }
    3721             : 
    3722   835318836 :         *stat = 1;
    3723   835318836 :         return 0;
    3724             : 
    3725         405 : error0:
    3726         405 :         if (ncur)
    3727           0 :                 xfs_btree_del_cursor(ncur, error);
    3728             :         return error;
    3729             : }
    3730             : 
    3731             : /*
    3732             :  * Insert the record at the point referenced by cur.
    3733             :  *
    3734             :  * A multi-level split of the tree on insert will invalidate the original
    3735             :  * cursor.  All callers of this function should assume that the cursor is
    3736             :  * no longer valid and revalidate it.
    3737             :  */
    3738             : int
    3739   834421218 : xfs_btree_insert(
    3740             :         struct xfs_btree_cur    *cur,
    3741             :         int                     *stat)
    3742             : {
    3743   834421218 :         int                     error;  /* error return value */
    3744   834421218 :         int                     i;      /* result value, 0 for failure */
    3745   834421218 :         int                     level;  /* current level number in btree */
    3746   834421218 :         union xfs_btree_ptr     nptr;   /* new block number (split result) */
    3747   834421218 :         struct xfs_btree_cur    *ncur;  /* new cursor (split result) */
    3748   834421218 :         struct xfs_btree_cur    *pcur;  /* previous level's cursor */
    3749   834421218 :         union xfs_btree_key     bkey;   /* key of block to insert */
    3750   834421218 :         union xfs_btree_key     *key;
    3751   834421218 :         union xfs_btree_rec     rec;    /* record to insert */
    3752             : 
    3753   834421218 :         level = 0;
    3754   834421218 :         ncur = NULL;
    3755   834421218 :         pcur = cur;
    3756   834421218 :         key = &bkey;
    3757             : 
    3758   834421218 :         xfs_btree_set_ptr_null(cur, &nptr);
    3759             : 
    3760             :         /* Make a key out of the record data to be inserted, and save it. */
    3761   834421218 :         cur->bc_ops->init_rec_from_cur(cur, &rec);
    3762   834416049 :         cur->bc_ops->init_key_from_rec(key, &rec);
    3763             : 
    3764             :         /*
    3765             :          * Loop going up the tree, starting at the leaf level.
    3766             :          * Stop when we don't get a split block, that must mean that
    3767             :          * the insert is finished with this level.
    3768             :          */
    3769   835335641 :         do {
    3770             :                 /*
    3771             :                  * Insert nrec/nptr into this level of the tree.
    3772             :                  * Note if we fail, nptr will be null.
    3773             :                  */
    3774   835335641 :                 error = xfs_btree_insrec(pcur, level, &nptr, &rec, key,
    3775             :                                 &ncur, &i);
    3776   835334396 :                 if (error) {
    3777         406 :                         if (pcur != cur)
    3778           6 :                                 xfs_btree_del_cursor(pcur, XFS_BTREE_ERROR);
    3779         406 :                         goto error0;
    3780             :                 }
    3781             : 
    3782   835333990 :                 if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
    3783           0 :                         xfs_btree_mark_sick(cur);
    3784           0 :                         error = -EFSCORRUPTED;
    3785           0 :                         goto error0;
    3786             :                 }
    3787   835333990 :                 level++;
    3788             : 
    3789             :                 /*
    3790             :                  * See if the cursor we just used is trash.
    3791             :                  * Can't trash the caller's cursor, but otherwise we should
    3792             :                  * if ncur is a new cursor or we're about to be done.
    3793             :                  */
    3794   835333990 :                 if (pcur != cur &&
    3795     1783968 :                     (ncur || xfs_btree_ptr_is_null(cur, &nptr))) {
    3796             :                         /* Save the state from the cursor before we trash it */
    3797      893760 :                         if (cur->bc_ops->update_cursor)
    3798      238860 :                                 cur->bc_ops->update_cursor(pcur, cur);
    3799      893760 :                         cur->bc_nlevels = pcur->bc_nlevels;
    3800      893760 :                         xfs_btree_del_cursor(pcur, XFS_BTREE_NOERROR);
    3801             :                 }
    3802             :                 /* If we got a new cursor, switch to it. */
    3803   835342707 :                 if (ncur) {
    3804      900119 :                         pcur = ncur;
    3805      900119 :                         ncur = NULL;
    3806             :                 }
    3807  1670685414 :         } while (!xfs_btree_ptr_is_null(cur, &nptr));
    3808             : 
    3809   834423695 :         *stat = i;
    3810   834423695 :         return 0;
    3811             : error0:
    3812             :         return error;
    3813             : }
    3814             : 
    3815             : /*
    3816             :  * Move the records from a child leaf block to the root block.
    3817             :  *
    3818             :  * Trickery here: The amount of memory we need per record for the incore root
    3819             :  * block changes when we convert a leaf block to an internal block.  Therefore,
    3820             :  * we free the incore root block, change the tree height, allocate a new incore
    3821             :  * root, and copy the records from the doomed block into the new root.
    3822             :  */
    3823             : STATIC void
    3824       14276 : xfs_btree_demote_leaf_child(
    3825             :         struct xfs_btree_cur    *cur,
    3826             :         struct xfs_btree_block  *cblock,
    3827             :         int                     numrecs)
    3828             : {
    3829       14276 :         union xfs_btree_rec     *rp;
    3830       14276 :         union xfs_btree_rec     *crp;
    3831       14276 :         struct xfs_btree_block  *block;
    3832       14276 :         size_t                  size;
    3833             : 
    3834             :         /* Zap the old root and change the tree height. */
    3835       14276 :         xfs_iroot_free(cur->bc_ino.ip, cur->bc_ino.whichfork);
    3836       14276 :         cur->bc_levels[0].bp = NULL;
    3837       14276 :         cur->bc_nlevels--;
    3838             : 
    3839             :         /*
    3840             :          * Allocate a new internal root block buffer and reinitialize it with
    3841             :          * the leaf records in the child.
    3842             :          */
    3843       14276 :         size = cur->bc_ops->iroot_ops->size(cur->bc_mp, 0, numrecs);
    3844       14276 :         xfs_iroot_alloc(cur->bc_ino.ip, cur->bc_ino.whichfork, size);
    3845       14276 :         block = xfs_btree_get_iroot(cur);
    3846       14276 :         xfs_btree_init_block(cur->bc_mp, block, cur->bc_ops, 0, numrecs,
    3847       14276 :                         cur->bc_ino.ip->i_ino);
    3848             : 
    3849       14276 :         rp = xfs_btree_rec_addr(cur, 1, block);
    3850       14276 :         crp = xfs_btree_rec_addr(cur, 1, cblock);
    3851       14276 :         xfs_btree_copy_recs(cur, rp, crp, numrecs);
    3852       14276 : }
    3853             : 
    3854             : /*
    3855             :  * Move the keyptrs from a child node block to the root block.
    3856             :  *
    3857             :  * Since the keyptr size does not change, all we have to do is increase the
    3858             :  * tree height, copy the keyptrs to the new internal node (cblock), shrink
    3859             :  * the root, and copy the pointers there.
    3860             :  */
    3861             : STATIC int
    3862        2193 : xfs_btree_demote_node_child(
    3863             :         struct xfs_btree_cur    *cur,
    3864             :         struct xfs_btree_block  *cblock,
    3865             :         int                     level,
    3866             :         int                     numrecs)
    3867             : {
    3868        2193 :         struct xfs_btree_block  *block;
    3869        2193 :         union xfs_btree_key     *ckp;
    3870        2193 :         union xfs_btree_key     *kp;
    3871        2193 :         union xfs_btree_ptr     *cpp;
    3872        2193 :         union xfs_btree_ptr     *pp;
    3873        2193 :         int                     i;
    3874        2193 :         int                     error;
    3875        2193 :         int                     diff;
    3876             : 
    3877             :         /*
    3878             :          * Adjust the root btree node size and the record count to match the
    3879             :          * doomed child so that we can copy the keyptrs ahead of changing the
    3880             :          * tree shape.
    3881             :          */
    3882        2193 :         diff = numrecs - cur->bc_ops->get_maxrecs(cur, level);
    3883        2193 :         xfs_btree_iroot_realloc(cur, diff);
    3884        2193 :         block = xfs_btree_get_iroot(cur);
    3885             : 
    3886        2193 :         xfs_btree_set_numrecs(block, numrecs);
    3887        2193 :         ASSERT(block->bb_numrecs == cblock->bb_numrecs);
    3888             : 
    3889             :         /* Copy keys from the doomed block. */
    3890        2193 :         kp = xfs_btree_key_addr(cur, 1, block);
    3891        2193 :         ckp = xfs_btree_key_addr(cur, 1, cblock);
    3892        2193 :         xfs_btree_copy_keys(cur, kp, ckp, numrecs);
    3893             : 
    3894             :         /* Copy pointers from the doomed block. */
    3895        2193 :         pp = xfs_btree_ptr_addr(cur, 1, block);
    3896        2193 :         cpp = xfs_btree_ptr_addr(cur, 1, cblock);
    3897       28453 :         for (i = 0; i < numrecs; i++) {
    3898       24067 :                 error = xfs_btree_debug_check_ptr(cur, cpp, i, level - 1);
    3899       24067 :                 if (error)
    3900           0 :                         return error;
    3901             :         }
    3902        2193 :         xfs_btree_copy_ptrs(cur, pp, cpp, numrecs);
    3903             : 
    3904             :         /* Decrease tree height, adjusting the root block level to match. */
    3905        2193 :         cur->bc_levels[level - 1].bp = NULL;
    3906        2193 :         be16_add_cpu(&block->bb_level, -1);
    3907        2193 :         cur->bc_nlevels--;
    3908        2193 :         return 0;
    3909             : }
    3910             : 
    3911             : /*
    3912             :  * Try to merge a non-leaf block back into the inode root.
    3913             :  *
    3914             :  * Note: the killroot names comes from the fact that we're effectively
    3915             :  * killing the old root block.  But because we can't just delete the
    3916             :  * inode we have to copy the single block it was pointing to into the
    3917             :  * inode.
    3918             :  */
    3919             : STATIC int
    3920    15128998 : xfs_btree_kill_iroot(
    3921             :         struct xfs_btree_cur    *cur)
    3922             : {
    3923    15128998 :         struct xfs_inode        *ip = cur->bc_ino.ip;
    3924    15128998 :         struct xfs_btree_block  *block;
    3925    15128998 :         struct xfs_btree_block  *cblock;
    3926    15128998 :         struct xfs_buf          *cbp;
    3927    15128998 :         int                     level;
    3928    15128998 :         int                     numrecs;
    3929    15128998 :         int                     error;
    3930             : #ifdef DEBUG
    3931    15128998 :         union xfs_btree_ptr     ptr;
    3932             : #endif
    3933             : 
    3934    15128998 :         ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE);
    3935    15128998 :         ASSERT((cur->bc_flags & XFS_BTREE_IROOT_RECORDS) ||
    3936             :                cur->bc_nlevels > 1);
    3937             : 
    3938             :         /*
    3939             :          * Don't deal with the root block needs to be a leaf case.
    3940             :          * We're just going to turn the thing back into extents anyway.
    3941             :          */
    3942    15128998 :         level = cur->bc_nlevels - 1;
    3943    15128998 :         if (level == 1 && !(cur->bc_flags & XFS_BTREE_IROOT_RECORDS))
    3944    12631182 :                 goto out0;
    3945             : 
    3946             :         /* If we're already a leaf, jump out. */
    3947     2497816 :         if (level == 0)
    3948      536770 :                 goto out0;
    3949             : 
    3950             :         /*
    3951             :          * Give up if the root has multiple children.
    3952             :          */
    3953     1961046 :         block = xfs_btree_get_iroot(cur);
    3954     3922092 :         if (xfs_btree_get_numrecs(block) != 1)
    3955         597 :                 goto out0;
    3956             : 
    3957     1960449 :         cblock = xfs_btree_get_block(cur, level - 1, &cbp);
    3958     1960449 :         numrecs = xfs_btree_get_numrecs(cblock);
    3959             : 
    3960             :         /*
    3961             :          * Only do this if the next level will fit.
    3962             :          * Then the data must be copied up to the inode,
    3963             :          * instead of freeing the root you free the next level.
    3964             :          */
    3965     1960449 :         if (numrecs > cur->bc_ops->get_dmaxrecs(cur, level))
    3966     1943980 :                 goto out0;
    3967             : 
    3968       16469 :         XFS_BTREE_STATS_INC(cur, killroot);
    3969             : 
    3970             : #ifdef DEBUG
    3971       16469 :         xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_LEFTSIB);
    3972       32938 :         ASSERT(xfs_btree_ptr_is_null(cur, &ptr));
    3973       16469 :         xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
    3974       32938 :         ASSERT(xfs_btree_ptr_is_null(cur, &ptr));
    3975             : #endif
    3976             : 
    3977       16469 :         if (level > 1) {
    3978        2193 :                 error = xfs_btree_demote_node_child(cur, cblock, level,
    3979             :                                 numrecs);
    3980        2193 :                 if (error)
    3981             :                         return error;
    3982             :         } else
    3983       14276 :                 xfs_btree_demote_leaf_child(cur, cblock, numrecs);
    3984             : 
    3985       16469 :         error = xfs_btree_free_block(cur, cbp);
    3986       16469 :         if (error)
    3987             :                 return error;
    3988             : 
    3989       32938 :         xfs_trans_log_inode(cur->bc_tp, ip,
    3990       16469 :                 XFS_ILOG_CORE | xfs_ilog_fbroot(cur->bc_ino.whichfork));
    3991             : out0:
    3992             :         return 0;
    3993             : }
    3994             : 
    3995             : /*
    3996             :  * Kill the current root node, and replace it with it's only child node.
    3997             :  */
    3998             : STATIC int
    3999       13234 : xfs_btree_kill_root(
    4000             :         struct xfs_btree_cur    *cur,
    4001             :         struct xfs_buf          *bp,
    4002             :         int                     level,
    4003             :         union xfs_btree_ptr     *newroot)
    4004             : {
    4005       13234 :         int                     error;
    4006             : 
    4007       13234 :         XFS_BTREE_STATS_INC(cur, killroot);
    4008             : 
    4009             :         /*
    4010             :          * Update the root pointer, decreasing the level by 1 and then
    4011             :          * free the old root.
    4012             :          */
    4013       13234 :         cur->bc_ops->set_root(cur, newroot, -1);
    4014             : 
    4015       13234 :         error = xfs_btree_free_block(cur, bp);
    4016       13234 :         if (error)
    4017             :                 return error;
    4018             : 
    4019       13234 :         cur->bc_levels[level].bp = NULL;
    4020       13234 :         cur->bc_levels[level].ra = 0;
    4021       13234 :         cur->bc_nlevels--;
    4022             : 
    4023       13234 :         return 0;
    4024             : }
    4025             : 
    4026             : STATIC int
    4027   239030253 : xfs_btree_dec_cursor(
    4028             :         struct xfs_btree_cur    *cur,
    4029             :         int                     level,
    4030             :         int                     *stat)
    4031             : {
    4032   239030253 :         int                     error;
    4033   239030253 :         int                     i;
    4034             : 
    4035   239030253 :         if (level > 0) {
    4036      403354 :                 error = xfs_btree_decrement(cur, level, &i);
    4037      403354 :                 if (error)
    4038             :                         return error;
    4039             :         }
    4040             : 
    4041   239030253 :         *stat = 1;
    4042   239030253 :         return 0;
    4043             : }
    4044             : 
    4045             : /*
    4046             :  * Single level of the btree record deletion routine.
    4047             :  * Delete record pointed to by cur/level.
    4048             :  * Remove the record from its block then rebalance the tree.
    4049             :  * Return 0 for error, 1 for done, 2 to go on to the next level.
    4050             :  */
    4051             : STATIC int                                      /* error */
    4052   701112943 : xfs_btree_delrec(
    4053             :         struct xfs_btree_cur    *cur,           /* btree cursor */
    4054             :         int                     level,          /* level removing record from */
    4055             :         int                     *stat)          /* fail/done/go-on */
    4056             : {
    4057   701112943 :         struct xfs_btree_block  *block;         /* btree block */
    4058   701112943 :         union xfs_btree_ptr     cptr;           /* current block ptr */
    4059   701112943 :         struct xfs_buf          *bp;            /* buffer for block */
    4060   701112943 :         int                     error;          /* error return value */
    4061   701112943 :         int                     i;              /* loop counter */
    4062   701112943 :         union xfs_btree_ptr     lptr;           /* left sibling block ptr */
    4063   701112943 :         struct xfs_buf          *lbp;           /* left buffer pointer */
    4064   701112943 :         struct xfs_btree_block  *left;          /* left btree block */
    4065   701112943 :         int                     lrecs = 0;      /* left record count */
    4066   701112943 :         int                     ptr;            /* key/record index */
    4067   701112943 :         union xfs_btree_ptr     rptr;           /* right sibling block ptr */
    4068   701112943 :         struct xfs_buf          *rbp;           /* right buffer pointer */
    4069   701112943 :         struct xfs_btree_block  *right;         /* right btree block */
    4070   701112943 :         struct xfs_btree_block  *rrblock;       /* right-right btree block */
    4071   701112943 :         struct xfs_buf          *rrbp;          /* right-right buffer pointer */
    4072   701112943 :         int                     rrecs = 0;      /* right record count */
    4073   701112943 :         struct xfs_btree_cur    *tcur;          /* temporary btree cursor */
    4074   701112943 :         int                     numrecs;        /* temporary numrec count */
    4075             : 
    4076   701112943 :         tcur = NULL;
    4077             : 
    4078             :         /* Get the index of the entry being deleted, check for nothing there. */
    4079   701112943 :         ptr = cur->bc_levels[level].ptr;
    4080   701112943 :         if (ptr == 0) {
    4081           0 :                 *stat = 0;
    4082           0 :                 return 0;
    4083             :         }
    4084             : 
    4085             :         /* Get the buffer & block containing the record or key/ptr. */
    4086   701112943 :         block = xfs_btree_get_block(cur, level, &bp);
    4087   701114002 :         numrecs = xfs_btree_get_numrecs(block);
    4088             : 
    4089             : #ifdef DEBUG
    4090   701114002 :         error = xfs_btree_check_block(cur, block, level, bp);
    4091   701115141 :         if (error)
    4092           0 :                 goto error0;
    4093             : #endif
    4094             : 
    4095             :         /* Fail if we're off the end of the block. */
    4096   701115141 :         if (ptr > numrecs) {
    4097           0 :                 *stat = 0;
    4098           0 :                 return 0;
    4099             :         }
    4100             : 
    4101   701115141 :         XFS_BTREE_STATS_INC(cur, delrec);
    4102   701115141 :         XFS_BTREE_STATS_ADD(cur, moves, numrecs - ptr);
    4103             : 
    4104             :         /* Excise the entries being deleted. */
    4105   701115141 :         if (level > 0) {
    4106             :                 /* It's a nonleaf. operate on keys and ptrs */
    4107      419874 :                 union xfs_btree_key     *lkp;
    4108      419874 :                 union xfs_btree_ptr     *lpp;
    4109             : 
    4110      419874 :                 lkp = xfs_btree_key_addr(cur, ptr + 1, block);
    4111      419874 :                 lpp = xfs_btree_ptr_addr(cur, ptr + 1, block);
    4112             : 
    4113     7580458 :                 for (i = 0; i < numrecs - ptr; i++) {
    4114     7160584 :                         error = xfs_btree_debug_check_ptr(cur, lpp, i, level);
    4115     7160584 :                         if (error)
    4116           0 :                                 goto error0;
    4117             :                 }
    4118             : 
    4119      419874 :                 if (ptr < numrecs) {
    4120      203832 :                         xfs_btree_shift_keys(cur, lkp, -1, numrecs - ptr);
    4121      203832 :                         xfs_btree_shift_ptrs(cur, lpp, -1, numrecs - ptr);
    4122      203832 :                         xfs_btree_log_keys(cur, bp, ptr, numrecs - 1);
    4123      203832 :                         xfs_btree_log_ptrs(cur, bp, ptr, numrecs - 1);
    4124             :                 }
    4125             :         } else {
    4126             :                 /* It's a leaf. operate on records */
    4127   700695267 :                 if (ptr < numrecs) {
    4128   304669661 :                         xfs_btree_shift_recs(cur,
    4129             :                                 xfs_btree_rec_addr(cur, ptr + 1, block),
    4130             :                                 -1, numrecs - ptr);
    4131   304671766 :                         xfs_btree_log_recs(cur, bp, ptr, numrecs - 1);
    4132             :                 }
    4133             :         }
    4134             : 
    4135             :         /*
    4136             :          * Decrement and log the number of entries in the block.
    4137             :          */
    4138   701119921 :         xfs_btree_set_numrecs(block, --numrecs);
    4139   701119921 :         xfs_btree_log_block(cur, bp, XFS_BB_NUMRECS);
    4140             : 
    4141             :         /*
    4142             :          * If we are tracking the last record in the tree and
    4143             :          * we are at the far right edge of the tree, update it.
    4144             :          */
    4145   701118640 :         if (xfs_btree_is_lastrec(cur, block, level)) {
    4146    82248452 :                 cur->bc_ops->update_lastrec(cur, block, NULL,
    4147             :                                             ptr, LASTREC_DELREC);
    4148             :         }
    4149             : 
    4150             :         /*
    4151             :          * We're at the root level.  First, shrink the root block in-memory.
    4152             :          * Try to get rid of the next level down.  If we can't then there's
    4153             :          * nothing left to do.
    4154             :          */
    4155   701126572 :         if (level == cur->bc_nlevels - 1) {
    4156   436790599 :                 if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) {
    4157      560217 :                         xfs_btree_iroot_realloc(cur, -1);
    4158             : 
    4159      560217 :                         error = xfs_btree_kill_iroot(cur);
    4160      560217 :                         if (error)
    4161           0 :                                 goto error0;
    4162             : 
    4163      560217 :                         error = xfs_btree_dec_cursor(cur, level, stat);
    4164      560217 :                         if (error)
    4165           0 :                                 goto error0;
    4166      560217 :                         *stat = 1;
    4167      560217 :                         return 0;
    4168             :                 }
    4169             : 
    4170             :                 /*
    4171             :                  * If this is the root level, and there's only one entry left,
    4172             :                  * and it's NOT the leaf level, then we can get rid of this
    4173             :                  * level.
    4174             :                  */
    4175   436230382 :                 if (numrecs == 1 && level > 0) {
    4176       13234 :                         union xfs_btree_ptr     *pp;
    4177             :                         /*
    4178             :                          * pp is still set to the first pointer in the block.
    4179             :                          * Make it the new root of the btree.
    4180             :                          */
    4181       13234 :                         pp = xfs_btree_ptr_addr(cur, 1, block);
    4182       13234 :                         error = xfs_btree_kill_root(cur, bp, level, pp);
    4183       13234 :                         if (error)
    4184           0 :                                 goto error0;
    4185   436217148 :                 } else if (level > 0) {
    4186      107496 :                         error = xfs_btree_dec_cursor(cur, level, stat);
    4187      107496 :                         if (error)
    4188           0 :                                 goto error0;
    4189             :                 }
    4190   436230382 :                 *stat = 1;
    4191   436230382 :                 return 0;
    4192             :         }
    4193             : 
    4194             :         /*
    4195             :          * If we deleted the leftmost entry in the block, update the
    4196             :          * key values above us in the tree.
    4197             :          */
    4198   403426242 :         if (xfs_btree_needs_key_update(cur, ptr)) {
    4199   129531389 :                 error = xfs_btree_update_keys(cur, level);
    4200   129531832 :                 if (error)
    4201           0 :                         goto error0;
    4202             :         }
    4203             : 
    4204             :         /*
    4205             :          * If the number of records remaining in the block is at least
    4206             :          * the minimum, we're done.
    4207             :          */
    4208   264336416 :         if (numrecs >= cur->bc_ops->get_minrecs(cur, level)) {
    4209   213423656 :                 error = xfs_btree_dec_cursor(cur, level, stat);
    4210   213423790 :                 if (error)
    4211           0 :                         goto error0;
    4212             :                 return 0;
    4213             :         }
    4214             : 
    4215             :         /*
    4216             :          * Otherwise, we have to move some records around to keep the
    4217             :          * tree balanced.  Look at the left and right sibling blocks to
    4218             :          * see if we can re-balance by moving only one record.
    4219             :          */
    4220    50912884 :         xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB);
    4221    50912928 :         xfs_btree_get_sibling(cur, block, &lptr, XFS_BB_LEFTSIB);
    4222             : 
    4223    50912965 :         if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) {
    4224             :                 /*
    4225             :                  * One child of root, need to get a chance to copy its contents
    4226             :                  * into the root and delete it. Can't go up to next level,
    4227             :                  * there's nothing to delete there.
    4228             :                  */
    4229   108457358 :                 if (xfs_btree_ptr_is_null(cur, &rptr) &&
    4230    14568782 :                     xfs_btree_ptr_is_null(cur, &lptr) &&
    4231    14568782 :                     level == cur->bc_nlevels - 2) {
    4232    14568783 :                         error = xfs_btree_kill_iroot(cur);
    4233    14568773 :                         if (!error)
    4234    14568773 :                                 error = xfs_btree_dec_cursor(cur, level, stat);
    4235    14568768 :                         if (error)
    4236           0 :                                 goto error0;
    4237             :                         return 0;
    4238             :                 }
    4239             :         }
    4240             : 
    4241    94457792 :         ASSERT(!xfs_btree_ptr_is_null(cur, &rptr) ||
    4242             :                !xfs_btree_ptr_is_null(cur, &lptr));
    4243             : 
    4244             :         /*
    4245             :          * Duplicate the cursor so our btree manipulations here won't
    4246             :          * disrupt the next level up.
    4247             :          */
    4248    36344182 :         error = xfs_btree_dup_cursor(cur, &tcur);
    4249    36344176 :         if (error)
    4250           2 :                 goto error0;
    4251             : 
    4252             :         /*
    4253             :          * If there's a right sibling, see if it's ok to shift an entry
    4254             :          * out of it.
    4255             :          */
    4256    72688348 :         if (!xfs_btree_ptr_is_null(cur, &rptr)) {
    4257             :                 /*
    4258             :                  * Move the temp cursor to the last entry in the next block.
    4259             :                  * Actually any entry but the first would suffice.
    4260             :                  */
    4261    14574746 :                 i = xfs_btree_lastrec(tcur, level);
    4262    14574754 :                 if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
    4263           0 :                         xfs_btree_mark_sick(cur);
    4264           0 :                         error = -EFSCORRUPTED;
    4265           0 :                         goto error0;
    4266             :                 }
    4267             : 
    4268    14574754 :                 error = xfs_btree_increment(tcur, level, &i);
    4269    14574759 :                 if (error)
    4270           3 :                         goto error0;
    4271    14574756 :                 if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
    4272           0 :                         xfs_btree_mark_sick(cur);
    4273           0 :                         error = -EFSCORRUPTED;
    4274           0 :                         goto error0;
    4275             :                 }
    4276             : 
    4277    14574756 :                 i = xfs_btree_lastrec(tcur, level);
    4278    14574745 :                 if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
    4279           0 :                         xfs_btree_mark_sick(cur);
    4280           0 :                         error = -EFSCORRUPTED;
    4281           0 :                         goto error0;
    4282             :                 }
    4283             : 
    4284             :                 /* Grab a pointer to the block. */
    4285    14574745 :                 right = xfs_btree_get_block(tcur, level, &rbp);
    4286             : #ifdef DEBUG
    4287    14574757 :                 error = xfs_btree_check_block(tcur, right, level, rbp);
    4288    14574756 :                 if (error)
    4289           0 :                         goto error0;
    4290             : #endif
    4291             :                 /* Grab the current block number, for future use. */
    4292    14574756 :                 xfs_btree_get_sibling(tcur, right, &cptr, XFS_BB_LEFTSIB);
    4293             : 
    4294             :                 /*
    4295             :                  * If right block is full enough so that removing one entry
    4296             :                  * won't make it too empty, and left-shifting an entry out
    4297             :                  * of right to us works, we're done.
    4298             :                  */
    4299    29149511 :                 if (xfs_btree_get_numrecs(right) - 1 >=
    4300    14574755 :                     cur->bc_ops->get_minrecs(tcur, level)) {
    4301    10370846 :                         error = xfs_btree_lshift(tcur, level, &i);
    4302    10370847 :                         if (error)
    4303           0 :                                 goto error0;
    4304    10370847 :                         if (i) {
    4305    20741696 :                                 ASSERT(xfs_btree_get_numrecs(block) >=
    4306             :                                        cur->bc_ops->get_minrecs(tcur, level));
    4307             : 
    4308    10370848 :                                 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
    4309    10370848 :                                 tcur = NULL;
    4310             : 
    4311    10370848 :                                 error = xfs_btree_dec_cursor(cur, level, stat);
    4312    10370848 :                                 if (error)
    4313           0 :                                         goto error0;
    4314             :                                 return 0;
    4315             :                         }
    4316             :                 }
    4317             : 
    4318             :                 /*
    4319             :                  * Otherwise, grab the number of records in right for
    4320             :                  * future reference, and fix up the temp cursor to point
    4321             :                  * to our block again (last record).
    4322             :                  */
    4323     4203909 :                 rrecs = xfs_btree_get_numrecs(right);
    4324     8407818 :                 if (!xfs_btree_ptr_is_null(cur, &lptr)) {
    4325     4186491 :                         i = xfs_btree_firstrec(tcur, level);
    4326     4186491 :                         if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
    4327           0 :                                 xfs_btree_mark_sick(cur);
    4328           0 :                                 error = -EFSCORRUPTED;
    4329           0 :                                 goto error0;
    4330             :                         }
    4331             : 
    4332     4186491 :                         error = xfs_btree_decrement(tcur, level, &i);
    4333     4186491 :                         if (error)
    4334           0 :                                 goto error0;
    4335     4186491 :                         if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
    4336           0 :                                 xfs_btree_mark_sick(cur);
    4337           0 :                                 error = -EFSCORRUPTED;
    4338           0 :                                 goto error0;
    4339             :                         }
    4340             :                 }
    4341             :         }
    4342             : 
    4343             :         /*
    4344             :          * If there's a left sibling, see if it's ok to shift an entry
    4345             :          * out of it.
    4346             :          */
    4347    51946674 :         if (!xfs_btree_ptr_is_null(cur, &lptr)) {
    4348             :                 /*
    4349             :                  * Move the temp cursor to the first entry in the
    4350             :                  * previous block.
    4351             :                  */
    4352    25955876 :                 i = xfs_btree_firstrec(tcur, level);
    4353    25955859 :                 if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
    4354           0 :                         xfs_btree_mark_sick(cur);
    4355           0 :                         error = -EFSCORRUPTED;
    4356           0 :                         goto error0;
    4357             :                 }
    4358             : 
    4359    25955859 :                 error = xfs_btree_decrement(tcur, level, &i);
    4360    25955914 :                 if (error)
    4361           0 :                         goto error0;
    4362    25955914 :                 i = xfs_btree_firstrec(tcur, level);
    4363    25955916 :                 if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
    4364           0 :                         xfs_btree_mark_sick(cur);
    4365           0 :                         error = -EFSCORRUPTED;
    4366           0 :                         goto error0;
    4367             :                 }
    4368             : 
    4369             :                 /* Grab a pointer to the block. */
    4370    25955916 :                 left = xfs_btree_get_block(tcur, level, &lbp);
    4371             : #ifdef DEBUG
    4372    25955916 :                 error = xfs_btree_check_block(cur, left, level, lbp);
    4373    25955914 :                 if (error)
    4374           0 :                         goto error0;
    4375             : #endif
    4376             :                 /* Grab the current block number, for future use. */
    4377    25955914 :                 xfs_btree_get_sibling(tcur, left, &cptr, XFS_BB_RIGHTSIB);
    4378             : 
    4379             :                 /*
    4380             :                  * If left block is full enough so that removing one entry
    4381             :                  * won't make it too empty, and right-shifting an entry out
    4382             :                  * of left to us works, we're done.
    4383             :                  */
    4384    51911833 :                 if (xfs_btree_get_numrecs(left) - 1 >=
    4385    25955917 :                     cur->bc_ops->get_minrecs(tcur, level)) {
    4386    25553460 :                         error = xfs_btree_rshift(tcur, level, &i);
    4387    25553463 :                         if (error)
    4388           0 :                                 goto error0;
    4389    25553463 :                         if (i) {
    4390    51106926 :                                 ASSERT(xfs_btree_get_numrecs(block) >=
    4391             :                                        cur->bc_ops->get_minrecs(tcur, level));
    4392    25553463 :                                 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
    4393    25553463 :                                 tcur = NULL;
    4394    25553463 :                                 if (level == 0)
    4395    25550627 :                                         cur->bc_levels[0].ptr++;
    4396             : 
    4397    25553463 :                                 *stat = 1;
    4398    25553463 :                                 return 0;
    4399             :                         }
    4400             :                 }
    4401             : 
    4402             :                 /*
    4403             :                  * Otherwise, grab the number of records in right for
    4404             :                  * future reference.
    4405             :                  */
    4406      402456 :                 lrecs = xfs_btree_get_numrecs(left);
    4407             :         }
    4408             : 
    4409             :         /* Delete the temp cursor, we're done with it. */
    4410      419917 :         xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
    4411      419874 :         tcur = NULL;
    4412             : 
    4413             :         /* If here, we need to do a join to keep the tree balanced. */
    4414      839748 :         ASSERT(!xfs_btree_ptr_is_null(cur, &cptr));
    4415             : 
    4416     1242204 :         if (!xfs_btree_ptr_is_null(cur, &lptr) &&
    4417      402456 :             lrecs + xfs_btree_get_numrecs(block) <=
    4418      402456 :                         cur->bc_ops->get_maxrecs(cur, level)) {
    4419             :                 /*
    4420             :                  * Set "right" to be the starting block,
    4421             :                  * "left" to be the left neighbor.
    4422             :                  */
    4423      402456 :                 rptr = cptr;
    4424      402456 :                 right = block;
    4425      402456 :                 rbp = bp;
    4426      402456 :                 error = xfs_btree_read_buf_block(cur, &lptr, 0, &left, &lbp);
    4427      402456 :                 if (error)
    4428           0 :                         goto error0;
    4429             : 
    4430             :         /*
    4431             :          * If that won't work, see if we can join with the right neighbor block.
    4432             :          */
    4433       52254 :         } else if (!xfs_btree_ptr_is_null(cur, &rptr) &&
    4434       17418 :                    rrecs + xfs_btree_get_numrecs(block) <=
    4435       17418 :                         cur->bc_ops->get_maxrecs(cur, level)) {
    4436             :                 /*
    4437             :                  * Set "left" to be the starting block,
    4438             :                  * "right" to be the right neighbor.
    4439             :                  */
    4440       17418 :                 lptr = cptr;
    4441       17418 :                 left = block;
    4442       17418 :                 lbp = bp;
    4443       17418 :                 error = xfs_btree_read_buf_block(cur, &rptr, 0, &right, &rbp);
    4444       17418 :                 if (error)
    4445           0 :                         goto error0;
    4446             : 
    4447             :         /*
    4448             :          * Otherwise, we can't fix the imbalance.
    4449             :          * Just return.  This is probably a logic error, but it's not fatal.
    4450             :          */
    4451             :         } else {
    4452           0 :                 error = xfs_btree_dec_cursor(cur, level, stat);
    4453           0 :                 if (error)
    4454           0 :                         goto error0;
    4455             :                 return 0;
    4456             :         }
    4457             : 
    4458      419874 :         rrecs = xfs_btree_get_numrecs(right);
    4459      419874 :         lrecs = xfs_btree_get_numrecs(left);
    4460             : 
    4461             :         /*
    4462             :          * We're now going to join "left" and "right" by moving all the stuff
    4463             :          * in "right" to "left" and deleting "right".
    4464             :          */
    4465      419874 :         XFS_BTREE_STATS_ADD(cur, moves, rrecs);
    4466      419874 :         if (level > 0) {
    4467             :                 /* It's a non-leaf.  Move keys and pointers. */
    4468         450 :                 union xfs_btree_key     *lkp;   /* left btree key */
    4469         450 :                 union xfs_btree_ptr     *lpp;   /* left address pointer */
    4470         450 :                 union xfs_btree_key     *rkp;   /* right btree key */
    4471         450 :                 union xfs_btree_ptr     *rpp;   /* right address pointer */
    4472             : 
    4473         450 :                 lkp = xfs_btree_key_addr(cur, lrecs + 1, left);
    4474         450 :                 lpp = xfs_btree_ptr_addr(cur, lrecs + 1, left);
    4475         450 :                 rkp = xfs_btree_key_addr(cur, 1, right);
    4476         450 :                 rpp = xfs_btree_ptr_addr(cur, 1, right);
    4477             : 
    4478       22137 :                 for (i = 1; i < rrecs; i++) {
    4479       21687 :                         error = xfs_btree_debug_check_ptr(cur, rpp, i, level);
    4480       21687 :                         if (error)
    4481           0 :                                 goto error0;
    4482             :                 }
    4483             : 
    4484         450 :                 xfs_btree_copy_keys(cur, lkp, rkp, rrecs);
    4485         450 :                 xfs_btree_copy_ptrs(cur, lpp, rpp, rrecs);
    4486             : 
    4487         450 :                 xfs_btree_log_keys(cur, lbp, lrecs + 1, lrecs + rrecs);
    4488         450 :                 xfs_btree_log_ptrs(cur, lbp, lrecs + 1, lrecs + rrecs);
    4489             :         } else {
    4490             :                 /* It's a leaf.  Move records.  */
    4491      419424 :                 union xfs_btree_rec     *lrp;   /* left record pointer */
    4492      419424 :                 union xfs_btree_rec     *rrp;   /* right record pointer */
    4493             : 
    4494      419424 :                 lrp = xfs_btree_rec_addr(cur, lrecs + 1, left);
    4495      419424 :                 rrp = xfs_btree_rec_addr(cur, 1, right);
    4496             : 
    4497      419424 :                 xfs_btree_copy_recs(cur, lrp, rrp, rrecs);
    4498      419424 :                 xfs_btree_log_recs(cur, lbp, lrecs + 1, lrecs + rrecs);
    4499             :         }
    4500             : 
    4501      419874 :         XFS_BTREE_STATS_INC(cur, join);
    4502             : 
    4503             :         /*
    4504             :          * Fix up the number of records and right block pointer in the
    4505             :          * surviving block, and log it.
    4506             :          */
    4507      419874 :         xfs_btree_set_numrecs(left, lrecs + rrecs);
    4508      419874 :         xfs_btree_get_sibling(cur, right, &cptr, XFS_BB_RIGHTSIB);
    4509      419874 :         xfs_btree_set_sibling(cur, left, &cptr, XFS_BB_RIGHTSIB);
    4510      419874 :         xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB);
    4511             : 
    4512             :         /* If there is a right sibling, point it to the remaining block. */
    4513      419874 :         xfs_btree_get_sibling(cur, left, &cptr, XFS_BB_RIGHTSIB);
    4514      839748 :         if (!xfs_btree_ptr_is_null(cur, &cptr)) {
    4515      205083 :                 error = xfs_btree_read_buf_block(cur, &cptr, 0, &rrblock, &rrbp);
    4516      205083 :                 if (error)
    4517           0 :                         goto error0;
    4518      205083 :                 xfs_btree_set_sibling(cur, rrblock, &lptr, XFS_BB_LEFTSIB);
    4519      205083 :                 xfs_btree_log_block(cur, rrbp, XFS_BB_LEFTSIB);
    4520             :         }
    4521             : 
    4522             :         /* Free the deleted block. */
    4523      419874 :         error = xfs_btree_free_block(cur, rbp);
    4524      419874 :         if (error)
    4525           0 :                 goto error0;
    4526             : 
    4527             :         /*
    4528             :          * If we joined with the left neighbor, set the buffer in the
    4529             :          * cursor to the left block, and fix up the index.
    4530             :          */
    4531      419874 :         if (bp != lbp) {
    4532      402456 :                 cur->bc_levels[level].bp = lbp;
    4533      402456 :                 cur->bc_levels[level].ptr += lrecs;
    4534      402456 :                 cur->bc_levels[level].ra = 0;
    4535             :         }
    4536             :         /*
    4537             :          * If we joined with the right neighbor and there's a level above
    4538             :          * us, increment the cursor at that level.
    4539             :          */
    4540       17418 :         else if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) ||
    4541       16767 :                    (level + 1 < cur->bc_nlevels)) {
    4542       17418 :                 error = xfs_btree_increment(cur, level + 1, &i);
    4543       17418 :                 if (error)
    4544           0 :                         goto error0;
    4545             :         }
    4546             : 
    4547             :         /*
    4548             :          * Readjust the ptr at this level if it's not a leaf, since it's
    4549             :          * still pointing at the deletion point, which makes the cursor
    4550             :          * inconsistent.  If this makes the ptr 0, the caller fixes it up.
    4551             :          * We can't use decrement because it would change the next level up.
    4552             :          */
    4553      419874 :         if (level > 0)
    4554         450 :                 cur->bc_levels[level].ptr--;
    4555             : 
    4556             :         /*
    4557             :          * We combined blocks, so we have to update the parent keys if the
    4558             :          * btree supports overlapped intervals.  However,
    4559             :          * bc_levels[level + 1].ptr points to the old block so that the caller
    4560             :          * knows which record to delete.  Therefore, the caller must be savvy
    4561             :          * enough to call updkeys for us if we return stat == 2.  The other
    4562             :          * exit points from this function don't require deletions further up
    4563             :          * the tree, so they can call updkeys directly.
    4564             :          */
    4565             : 
    4566             :         /* Return value means the next level up has something to do. */
    4567      419874 :         *stat = 2;
    4568      419874 :         return 0;
    4569             : 
    4570           5 : error0:
    4571           5 :         if (tcur)
    4572           3 :                 xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
    4573             :         return error;
    4574             : }
    4575             : 
    4576             : /*
    4577             :  * Delete the record pointed to by cur.
    4578             :  * The cursor refers to the place where the record was (could be inserted)
    4579             :  * when the operation returns.
    4580             :  */
    4581             : int                                     /* error */
    4582   700687971 : xfs_btree_delete(
    4583             :         struct xfs_btree_cur    *cur,
    4584             :         int                     *stat)  /* success/failure */
    4585             : {
    4586   700687971 :         int                     error;  /* error return value */
    4587   700687971 :         int                     level;
    4588   700687971 :         int                     i;
    4589   700687971 :         bool                    joined = false;
    4590             : 
    4591             :         /*
    4592             :          * Go up the tree, starting at leaf level.
    4593             :          *
    4594             :          * If 2 is returned then a join was done; go to the next level.
    4595             :          * Otherwise we are done.
    4596             :          */
    4597  1401800780 :         for (level = 0, i = 2; i == 2; level++) {
    4598   701107635 :                 error = xfs_btree_delrec(cur, level, &i);
    4599   701112814 :                 if (error)
    4600           5 :                         goto error0;
    4601   701112809 :                 if (i == 2)
    4602      419874 :                         joined = true;
    4603             :         }
    4604             : 
    4605             :         /*
    4606             :          * If we combined blocks as part of deleting the record, delrec won't
    4607             :          * have updated the parent high keys so we have to do that here.
    4608             :          */
    4609   700693145 :         if (joined && (cur->bc_flags & XFS_BTREE_OVERLAPPING)) {
    4610      155173 :                 error = xfs_btree_updkeys_force(cur, 0);
    4611      155173 :                 if (error)
    4612           0 :                         goto error0;
    4613             :         }
    4614             : 
    4615   700693145 :         if (i == 0) {
    4616           0 :                 for (level = 1; level < cur->bc_nlevels; level++) {
    4617           0 :                         if (cur->bc_levels[level].ptr == 0) {
    4618           0 :                                 error = xfs_btree_decrement(cur, level, &i);
    4619           0 :                                 if (error)
    4620           0 :                                         goto error0;
    4621             :                                 break;
    4622             :                         }
    4623             :                 }
    4624             :         }
    4625             : 
    4626   700693145 :         *stat = i;
    4627   700693145 :         return 0;
    4628             : error0:
    4629             :         return error;
    4630             : }
    4631             : 
    4632             : /*
    4633             :  * Get the data from the pointed-to record.
    4634             :  */
    4635             : int                                     /* error */
    4636 >13315*10^7 : xfs_btree_get_rec(
    4637             :         struct xfs_btree_cur    *cur,   /* btree cursor */
    4638             :         union xfs_btree_rec     **recp, /* output: btree record */
    4639             :         int                     *stat)  /* output: success/failure */
    4640             : {
    4641 >13315*10^7 :         struct xfs_btree_block  *block; /* btree block */
    4642 >13315*10^7 :         struct xfs_buf          *bp;    /* buffer pointer */
    4643 >13315*10^7 :         int                     ptr;    /* record number */
    4644             : #ifdef DEBUG
    4645 >13315*10^7 :         int                     error;  /* error return value */
    4646             : #endif
    4647             : 
    4648 >13315*10^7 :         ptr = cur->bc_levels[0].ptr;
    4649 >13315*10^7 :         block = xfs_btree_get_block(cur, 0, &bp);
    4650             : 
    4651             : #ifdef DEBUG
    4652 >13339*10^7 :         error = xfs_btree_check_block(cur, block, 0, bp);
    4653 >13219*10^7 :         if (error)
    4654             :                 return error;
    4655             : #endif
    4656             : 
    4657             :         /*
    4658             :          * Off the right end or left end, return failure.
    4659             :          */
    4660 >26438*10^7 :         if (ptr > xfs_btree_get_numrecs(block) || ptr <= 0) {
    4661       79696 :                 *stat = 0;
    4662       79696 :                 return 0;
    4663             :         }
    4664             : 
    4665             :         /*
    4666             :          * Point to the record and extract its data.
    4667             :          */
    4668 >13272*10^7 :         *recp = xfs_btree_rec_addr(cur, ptr, block);
    4669 >13272*10^7 :         *stat = 1;
    4670 >13272*10^7 :         return 0;
    4671             : }
    4672             : 
    4673             : /* Visit a block in a btree. */
    4674             : STATIC int
    4675   141680945 : xfs_btree_visit_block(
    4676             :         struct xfs_btree_cur            *cur,
    4677             :         int                             level,
    4678             :         xfs_btree_visit_blocks_fn       fn,
    4679             :         void                            *data)
    4680             : {
    4681   141680945 :         struct xfs_btree_block          *block;
    4682   141680945 :         struct xfs_buf                  *bp;
    4683   141680945 :         union xfs_btree_ptr             rptr, bufptr;
    4684   141680945 :         int                             error;
    4685             : 
    4686             :         /* do right sibling readahead */
    4687   141680945 :         xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA);
    4688   141681000 :         block = xfs_btree_get_block(cur, level, &bp);
    4689             : 
    4690             :         /* process the block */
    4691   141680910 :         error = fn(cur, level, data);
    4692   141680932 :         if (error)
    4693             :                 return error;
    4694             : 
    4695             :         /* now read rh sibling block for next iteration */
    4696   141680934 :         xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB);
    4697   283362192 :         if (xfs_btree_ptr_is_null(cur, &rptr))
    4698             :                 return -ENOENT;
    4699             : 
    4700             :         /*
    4701             :          * We only visit blocks once in this walk, so we have to avoid the
    4702             :          * internal xfs_btree_lookup_get_block() optimisation where it will
    4703             :          * return the same block without checking if the right sibling points
    4704             :          * back to us and creates a cyclic reference in the btree.
    4705             :          */
    4706   111759128 :         xfs_btree_buf_to_ptr(cur, bp, &bufptr);
    4707   111759082 :         if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
    4708    14211959 :                 if (rptr.l == bufptr.l) {
    4709           0 :                         xfs_btree_mark_sick(cur);
    4710           0 :                         return -EFSCORRUPTED;
    4711             :                 }
    4712             :         } else {
    4713    97547123 :                 if (rptr.s == bufptr.s) {
    4714           0 :                         xfs_btree_mark_sick(cur);
    4715           0 :                         return -EFSCORRUPTED;
    4716             :                 }
    4717             :         }
    4718   111759082 :         return xfs_btree_lookup_get_block(cur, level, &rptr, &block);
    4719             : }
    4720             : 
    4721             : 
    4722             : /* Visit every block in a btree. */
    4723             : int
    4724    17433778 : xfs_btree_visit_blocks(
    4725             :         struct xfs_btree_cur            *cur,
    4726             :         xfs_btree_visit_blocks_fn       fn,
    4727             :         unsigned int                    flags,
    4728             :         void                            *data)
    4729             : {
    4730    17433778 :         union xfs_btree_ptr             lptr;
    4731    17433778 :         int                             level;
    4732    17433778 :         struct xfs_btree_block          *block = NULL;
    4733    17433778 :         int                             error = 0;
    4734             : 
    4735    17433778 :         cur->bc_ops->init_ptr_from_cur(cur, &lptr);
    4736             : 
    4737             :         /* for each level */
    4738    48899547 :         for (level = cur->bc_nlevels - 1; level >= 0; level--) {
    4739             :                 /* grab the left hand block */
    4740    31465812 :                 error = xfs_btree_lookup_get_block(cur, level, &lptr, &block);
    4741    31465882 :                 if (error)
    4742         185 :                         return error;
    4743             : 
    4744             :                 /* readahead the left most block for the next level down */
    4745    31465697 :                 if (level > 0) {
    4746    14031961 :                         union xfs_btree_ptr     *ptr;
    4747             : 
    4748    14031961 :                         ptr = xfs_btree_ptr_addr(cur, 1, block);
    4749    14031947 :                         xfs_btree_readahead_ptr(cur, ptr, 1);
    4750             : 
    4751             :                         /* save for the next iteration of the loop */
    4752    14031962 :                         xfs_btree_copy_ptrs(cur, &lptr, ptr, 1);
    4753             : 
    4754    14031974 :                         if (!(flags & XFS_BTREE_VISIT_LEAVES))
    4755     1543728 :                                 continue;
    4756    17433736 :                 } else if (!(flags & XFS_BTREE_VISIT_RECORDS)) {
    4757           0 :                         continue;
    4758             :                 }
    4759             : 
    4760             :                 /* for each buffer in the level */
    4761   141680939 :                 do {
    4762   141680939 :                         error = xfs_btree_visit_block(cur, level, fn, data);
    4763   141680922 :                 } while (!error);
    4764             : 
    4765    29921965 :                 if (error != -ENOENT)
    4766           1 :                         return error;
    4767             :         }
    4768             : 
    4769             :         return 0;
    4770             : }
    4771             : 
    4772             : /*
    4773             :  * Change the owner of a btree.
    4774             :  *
    4775             :  * The mechanism we use here is ordered buffer logging. Because we don't know
    4776             :  * how many buffers were are going to need to modify, we don't really want to
    4777             :  * have to make transaction reservations for the worst case of every buffer in a
    4778             :  * full size btree as that may be more space that we can fit in the log....
    4779             :  *
    4780             :  * We do the btree walk in the most optimal manner possible - we have sibling
    4781             :  * pointers so we can just walk all the blocks on each level from left to right
    4782             :  * in a single pass, and then move to the next level and do the same. We can
    4783             :  * also do readahead on the sibling pointers to get IO moving more quickly,
    4784             :  * though for slow disks this is unlikely to make much difference to performance
    4785             :  * as the amount of CPU work we have to do before moving to the next block is
    4786             :  * relatively small.
    4787             :  *
    4788             :  * For each btree block that we load, modify the owner appropriately, set the
    4789             :  * buffer as an ordered buffer and log it appropriately. We need to ensure that
    4790             :  * we mark the region we change dirty so that if the buffer is relogged in
    4791             :  * a subsequent transaction the changes we make here as an ordered buffer are
    4792             :  * correctly relogged in that transaction.  If we are in recovery context, then
    4793             :  * just queue the modified buffer as delayed write buffer so the transaction
    4794             :  * recovery completion writes the changes to disk.
    4795             :  */
    4796             : struct xfs_btree_block_change_owner_info {
    4797             :         uint64_t                new_owner;
    4798             :         struct list_head        *buffer_list;
    4799             : };
    4800             : 
    4801             : static int
    4802           0 : xfs_btree_block_change_owner(
    4803             :         struct xfs_btree_cur    *cur,
    4804             :         int                     level,
    4805             :         void                    *data)
    4806             : {
    4807           0 :         struct xfs_btree_block_change_owner_info        *bbcoi = data;
    4808           0 :         struct xfs_btree_block  *block;
    4809           0 :         struct xfs_buf          *bp;
    4810             : 
    4811             :         /* modify the owner */
    4812           0 :         block = xfs_btree_get_block(cur, level, &bp);
    4813           0 :         if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
    4814           0 :                 if (block->bb_u.l.bb_owner == cpu_to_be64(bbcoi->new_owner))
    4815             :                         return 0;
    4816           0 :                 block->bb_u.l.bb_owner = cpu_to_be64(bbcoi->new_owner);
    4817             :         } else {
    4818           0 :                 if (block->bb_u.s.bb_owner == cpu_to_be32(bbcoi->new_owner))
    4819             :                         return 0;
    4820           0 :                 block->bb_u.s.bb_owner = cpu_to_be32(bbcoi->new_owner);
    4821             :         }
    4822             : 
    4823             :         /*
    4824             :          * If the block is a root block hosted in an inode, we might not have a
    4825             :          * buffer pointer here and we shouldn't attempt to log the change as the
    4826             :          * information is already held in the inode and discarded when the root
    4827             :          * block is formatted into the on-disk inode fork. We still change it,
    4828             :          * though, so everything is consistent in memory.
    4829             :          */
    4830           0 :         if (!bp) {
    4831           0 :                 ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE);
    4832           0 :                 ASSERT(level == cur->bc_nlevels - 1);
    4833           0 :                 return 0;
    4834             :         }
    4835             : 
    4836           0 :         if (cur->bc_tp) {
    4837           0 :                 if (!xfs_trans_ordered_buf(cur->bc_tp, bp)) {
    4838           0 :                         xfs_btree_log_block(cur, bp, XFS_BB_OWNER);
    4839           0 :                         return -EAGAIN;
    4840             :                 }
    4841             :         } else {
    4842           0 :                 xfs_buf_delwri_queue(bp, bbcoi->buffer_list);
    4843             :         }
    4844             : 
    4845             :         return 0;
    4846             : }
    4847             : 
    4848             : int
    4849           0 : xfs_btree_change_owner(
    4850             :         struct xfs_btree_cur    *cur,
    4851             :         uint64_t                new_owner,
    4852             :         struct list_head        *buffer_list)
    4853             : {
    4854           0 :         struct xfs_btree_block_change_owner_info        bbcoi;
    4855             : 
    4856           0 :         bbcoi.new_owner = new_owner;
    4857           0 :         bbcoi.buffer_list = buffer_list;
    4858             : 
    4859           0 :         return xfs_btree_visit_blocks(cur, xfs_btree_block_change_owner,
    4860             :                         XFS_BTREE_VISIT_ALL, &bbcoi);
    4861             : }
    4862             : 
    4863             : /* Verify the v5 fields of a long-format btree block. */
    4864             : xfs_failaddr_t
    4865   755708587 : xfs_btree_lblock_v5hdr_verify(
    4866             :         struct xfs_buf          *bp,
    4867             :         uint64_t                owner)
    4868             : {
    4869   755708587 :         struct xfs_mount        *mp = bp->b_mount;
    4870   755708587 :         struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
    4871             : 
    4872   755708587 :         if (!xfs_has_crc(mp))
    4873           0 :                 return __this_address;
    4874   755708587 :         if (!uuid_equal(&block->bb_u.l.bb_uuid, &mp->m_sb.sb_meta_uuid))
    4875           0 :                 return __this_address;
    4876   755709454 :         if (block->bb_u.l.bb_blkno != cpu_to_be64(xfs_buf_daddr(bp)))
    4877           0 :                 return __this_address;
    4878   755709454 :         if (owner != XFS_RMAP_OWN_UNKNOWN &&
    4879           0 :             be64_to_cpu(block->bb_u.l.bb_owner) != owner)
    4880           0 :                 return __this_address;
    4881             :         return NULL;
    4882             : }
    4883             : 
    4884             : /* Verify a long-format btree block. */
    4885             : xfs_failaddr_t
    4886    52475790 : xfs_btree_lblock_verify(
    4887             :         struct xfs_buf          *bp,
    4888             :         unsigned int            max_recs)
    4889             : {
    4890    52475790 :         struct xfs_mount        *mp = bp->b_mount;
    4891    52475790 :         struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
    4892    52475790 :         xfs_fsblock_t           fsb;
    4893    52475790 :         xfs_failaddr_t          fa;
    4894             : 
    4895    52475790 :         ASSERT(!(bp->b_target->bt_flags & XFS_BUFTARG_XFILE));
    4896             : 
    4897             :         /* numrecs verification */
    4898    52475790 :         if (be16_to_cpu(block->bb_numrecs) > max_recs)
    4899           0 :                 return __this_address;
    4900             : 
    4901             :         /* sibling pointer verification */
    4902    52475894 :         fsb = XFS_DADDR_TO_FSB(mp, xfs_buf_daddr(bp));
    4903    52475894 :         fa = xfs_btree_check_lblock_siblings(mp, NULL, -1, fsb,
    4904             :                         block->bb_u.l.bb_leftsib);
    4905    52475932 :         if (!fa)
    4906    52475952 :                 fa = xfs_btree_check_lblock_siblings(mp, NULL, -1, fsb,
    4907             :                                 block->bb_u.l.bb_rightsib);
    4908             :         return fa;
    4909             : }
    4910             : 
    4911             : /**
    4912             :  * xfs_btree_sblock_v5hdr_verify() -- verify the v5 fields of a short-format
    4913             :  *                                    btree block
    4914             :  *
    4915             :  * @bp: buffer containing the btree block
    4916             :  */
    4917             : xfs_failaddr_t
    4918    73179078 : xfs_btree_sblock_v5hdr_verify(
    4919             :         struct xfs_buf          *bp)
    4920             : {
    4921    73179078 :         struct xfs_mount        *mp = bp->b_mount;
    4922    73179078 :         struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
    4923    73179078 :         struct xfs_perag        *pag = bp->b_pag;
    4924             : 
    4925    73179078 :         if (!xfs_has_crc(mp))
    4926           0 :                 return __this_address;
    4927    73179078 :         if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid))
    4928           0 :                 return __this_address;
    4929    73179220 :         if (block->bb_u.s.bb_blkno != cpu_to_be64(xfs_buf_daddr(bp)))
    4930           0 :                 return __this_address;
    4931   118091137 :         if (pag && be32_to_cpu(block->bb_u.s.bb_owner) != pag->pag_agno)
    4932           0 :                 return __this_address;
    4933             :         return NULL;
    4934             : }
    4935             : 
    4936             : /**
    4937             :  * xfs_btree_sblock_verify() -- verify a short-format btree block
    4938             :  *
    4939             :  * @bp: buffer containing the btree block
    4940             :  * @max_recs: maximum records allowed in this btree node
    4941             :  */
    4942             : xfs_failaddr_t
    4943    44951432 : xfs_btree_sblock_verify(
    4944             :         struct xfs_buf          *bp,
    4945             :         unsigned int            max_recs)
    4946             : {
    4947    44951432 :         struct xfs_mount        *mp = bp->b_mount;
    4948    44951432 :         struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
    4949    44951432 :         xfs_agblock_t           agbno;
    4950    44951432 :         xfs_failaddr_t          fa;
    4951             : 
    4952    44951432 :         ASSERT(!(bp->b_target->bt_flags & XFS_BUFTARG_XFILE));
    4953             : 
    4954             :         /* numrecs verification */
    4955    44951432 :         if (be16_to_cpu(block->bb_numrecs) > max_recs)
    4956          14 :                 return __this_address;
    4957             : 
    4958             :         /* sibling pointer verification */
    4959    44951422 :         agbno = xfs_daddr_to_agbno(mp, xfs_buf_daddr(bp));
    4960    44951422 :         fa = xfs_btree_check_sblock_siblings(bp->b_pag, NULL, -1, agbno,
    4961             :                         block->bb_u.s.bb_leftsib);
    4962    44951447 :         if (!fa)
    4963    44951450 :                 fa = xfs_btree_check_sblock_siblings(bp->b_pag, NULL, -1, agbno,
    4964             :                                 block->bb_u.s.bb_rightsib);
    4965             :         return fa;
    4966             : }
    4967             : 
    4968             : /*
    4969             :  * For the given limits on leaf and keyptr records per block, calculate the
    4970             :  * height of the tree needed to index the number of leaf records.
    4971             :  */
    4972             : unsigned int
    4973      219186 : xfs_btree_compute_maxlevels(
    4974             :         const unsigned int      *limits,
    4975             :         unsigned long long      records)
    4976             : {
    4977      219186 :         unsigned long long      level_blocks = howmany_64(records, limits[0]);
    4978      219186 :         unsigned int            height = 1;
    4979             : 
    4980     1433227 :         while (level_blocks > 1) {
    4981     1214041 :                 level_blocks = howmany_64(level_blocks, limits[1]);
    4982     1214041 :                 height++;
    4983             :         }
    4984             : 
    4985      219186 :         return height;
    4986             : }
    4987             : 
    4988             : /*
    4989             :  * For the given limits on leaf and keyptr records per block, calculate the
    4990             :  * number of blocks needed to index the given number of leaf records.
    4991             :  */
    4992             : unsigned long long
    4993     7502990 : xfs_btree_calc_size(
    4994             :         const unsigned int      *limits,
    4995             :         unsigned long long      records)
    4996             : {
    4997     7502990 :         unsigned long long      level_blocks = howmany_64(records, limits[0]);
    4998     7502990 :         unsigned long long      blocks = level_blocks;
    4999             : 
    5000    21348087 :         while (level_blocks > 1) {
    5001    13845097 :                 level_blocks = howmany_64(level_blocks, limits[1]);
    5002    13845097 :                 blocks += level_blocks;
    5003             :         }
    5004             : 
    5005     7502990 :         return blocks;
    5006             : }
    5007             : 
    5008             : /*
    5009             :  * Given a number of available blocks for the btree to consume with records and
    5010             :  * pointers, calculate the height of the tree needed to index all the records
    5011             :  * that space can hold based on the number of pointers each interior node
    5012             :  * holds.
    5013             :  *
    5014             :  * We start by assuming a single level tree consumes a single block, then track
    5015             :  * the number of blocks each node level consumes until we no longer have space
    5016             :  * to store the next node level. At this point, we are indexing all the leaf
    5017             :  * blocks in the space, and there's no more free space to split the tree any
    5018             :  * further. That's our maximum btree height.
    5019             :  */
    5020             : unsigned int
    5021  1717004792 : xfs_btree_space_to_height(
    5022             :         const unsigned int      *limits,
    5023             :         unsigned long long      leaf_blocks)
    5024             : {
    5025             :         /*
    5026             :          * The root btree block can have fewer than minrecs pointers in it
    5027             :          * because the tree might not be big enough to require that amount of
    5028             :          * fanout. Hence it has a minimum size of 2 pointers, not limits[1].
    5029             :          */
    5030  1717004792 :         unsigned long long      node_blocks = 2;
    5031  1717004792 :         unsigned long long      blocks_left = leaf_blocks - 1;
    5032  1717004792 :         unsigned int            height = 1;
    5033             : 
    5034  1717004792 :         if (leaf_blocks < 1)
    5035             :                 return 0;
    5036             : 
    5037 13736006152 :         while (node_blocks < blocks_left) {
    5038 12019001360 :                 blocks_left -= node_blocks;
    5039 12019001360 :                 node_blocks *= limits[1];
    5040 12019001360 :                 height++;
    5041             :         }
    5042             : 
    5043             :         return height;
    5044             : }
    5045             : 
    5046             : /*
    5047             :  * Query a regular btree for all records overlapping a given interval.
    5048             :  * Start with a LE lookup of the key of low_rec and return all records
    5049             :  * until we find a record with a key greater than the key of high_rec.
    5050             :  */
    5051             : STATIC int
    5052  4400785467 : xfs_btree_simple_query_range(
    5053             :         struct xfs_btree_cur            *cur,
    5054             :         const union xfs_btree_key       *low_key,
    5055             :         const union xfs_btree_key       *high_key,
    5056             :         xfs_btree_query_range_fn        fn,
    5057             :         void                            *priv)
    5058             : {
    5059  4400785467 :         union xfs_btree_rec             *recp;
    5060  4400785467 :         union xfs_btree_key             rec_key;
    5061  4400785467 :         int                             stat;
    5062  4400785467 :         bool                            firstrec = true;
    5063  4400785467 :         int                             error;
    5064             : 
    5065  4400785467 :         ASSERT(cur->bc_ops->init_high_key_from_rec);
    5066  4400785467 :         ASSERT(cur->bc_ops->diff_two_keys);
    5067             : 
    5068             :         /*
    5069             :          * Find the leftmost record.  The btree cursor must be set
    5070             :          * to the low record used to generate low_key.
    5071             :          */
    5072  4400785467 :         stat = 0;
    5073  4400785467 :         error = xfs_btree_lookup(cur, XFS_LOOKUP_LE, &stat);
    5074  4400794459 :         if (error)
    5075           4 :                 goto out;
    5076             : 
    5077             :         /* Nothing?  See if there's anything to the right. */
    5078  4400794455 :         if (!stat) {
    5079  1769755506 :                 error = xfs_btree_increment(cur, 0, &stat);
    5080  1769768468 :                 if (error)
    5081           0 :                         goto out;
    5082             :         }
    5083             : 
    5084 >10338*10^7 :         while (stat) {
    5085             :                 /* Find the record. */
    5086 >10222*10^7 :                 error = xfs_btree_get_rec(cur, &recp, &stat);
    5087 >10187*10^7 :                 if (error || !stat)
    5088             :                         break;
    5089             : 
    5090             :                 /* Skip if low_key > high_key(rec). */
    5091 >10187*10^7 :                 if (firstrec) {
    5092  3411027589 :                         cur->bc_ops->init_high_key_from_rec(&rec_key, recp);
    5093  3411030846 :                         firstrec = false;
    5094  3411030846 :                         if (xfs_btree_keycmp_gt(cur, low_key, &rec_key))
    5095  2631140896 :                                 goto advloop;
    5096             :                 }
    5097             : 
    5098             :                 /* Stop if low_key(rec) > high_key. */
    5099 99240563007 :                 cur->bc_ops->init_key_from_rec(&rec_key, recp);
    5100 97864164311 :                 if (xfs_btree_keycmp_gt(cur, &rec_key, high_key))
    5101             :                         break;
    5102             : 
    5103             :                 /* Callback */
    5104 95785073280 :                 error = fn(cur, recp, priv);
    5105 95901717653 :                 if (error)
    5106             :                         break;
    5107             : 
    5108 95901715420 : advloop:
    5109             :                 /* Move on to the next record. */
    5110 98532856316 :                 error = xfs_btree_increment(cur, 0, &stat);
    5111 98982402807 :                 if (error)
    5112             :                         break;
    5113             :         }
    5114             : 
    5115  4400859398 : out:
    5116  4400859402 :         return error;
    5117             : }
    5118             : 
    5119             : /*
    5120             :  * Query an overlapped interval btree for all records overlapping a given
    5121             :  * interval.  This function roughly follows the algorithm given in
    5122             :  * "Interval Trees" of _Introduction to Algorithms_, which is section
    5123             :  * 14.3 in the 2nd and 3rd editions.
    5124             :  *
    5125             :  * First, generate keys for the low and high records passed in.
    5126             :  *
    5127             :  * For any leaf node, generate the high and low keys for the record.
    5128             :  * If the record keys overlap with the query low/high keys, pass the
    5129             :  * record to the function iterator.
    5130             :  *
    5131             :  * For any internal node, compare the low and high keys of each
    5132             :  * pointer against the query low/high keys.  If there's an overlap,
    5133             :  * follow the pointer.
    5134             :  *
    5135             :  * As an optimization, we stop scanning a block when we find a low key
    5136             :  * that is greater than the query's high key.
    5137             :  */
    5138             : STATIC int
    5139  1576493168 : xfs_btree_overlapped_query_range(
    5140             :         struct xfs_btree_cur            *cur,
    5141             :         const union xfs_btree_key       *low_key,
    5142             :         const union xfs_btree_key       *high_key,
    5143             :         xfs_btree_query_range_fn        fn,
    5144             :         void                            *priv)
    5145             : {
    5146  1576493168 :         union xfs_btree_ptr             ptr;
    5147  1576493168 :         union xfs_btree_ptr             *pp;
    5148  1576493168 :         union xfs_btree_key             rec_key;
    5149  1576493168 :         union xfs_btree_key             rec_hkey;
    5150  1576493168 :         union xfs_btree_key             *lkp;
    5151  1576493168 :         union xfs_btree_key             *hkp;
    5152  1576493168 :         union xfs_btree_rec             *recp;
    5153  1576493168 :         struct xfs_btree_block          *block;
    5154  1576493168 :         int                             level;
    5155  1576493168 :         struct xfs_buf                  *bp;
    5156  1576493168 :         int                             i;
    5157  1576493168 :         int                             error;
    5158             : 
    5159             :         /* Load the root of the btree. */
    5160  1576493168 :         level = cur->bc_nlevels - 1;
    5161  1576493168 :         cur->bc_ops->init_ptr_from_cur(cur, &ptr);
    5162  1576499933 :         error = xfs_btree_lookup_get_block(cur, level, &ptr, &block);
    5163  1576497934 :         if (error)
    5164             :                 return error;
    5165  1576498924 :         xfs_btree_get_block(cur, level, &bp);
    5166  1576495840 :         trace_xfs_btree_overlapped_query_range(cur, level, bp);
    5167             : #ifdef DEBUG
    5168  1576495081 :         error = xfs_btree_check_block(cur, block, level, bp);
    5169  1576490830 :         if (error)
    5170           0 :                 goto out;
    5171             : #endif
    5172  1576490830 :         cur->bc_levels[level].ptr = 1;
    5173             : 
    5174 >20264*10^7 :         while (level < cur->bc_nlevels) {
    5175 >20106*10^7 :                 block = xfs_btree_get_block(cur, level, &bp);
    5176             : 
    5177             :                 /* End of node, pop back towards the root. */
    5178 >20106*10^7 :                 if (cur->bc_levels[level].ptr >
    5179 >20106*10^7 :                                         be16_to_cpu(block->bb_numrecs)) {
    5180   566207000 : pop_up:
    5181  4329855348 :                         if (level < cur->bc_nlevels - 1)
    5182  2754909575 :                                 cur->bc_levels[level + 1].ptr++;
    5183  4329855348 :                         level++;
    5184  4329855348 :                         continue;
    5185             :                 }
    5186             : 
    5187 >20049*10^7 :                 if (level == 0) {
    5188             :                         /* Handle a leaf node. */
    5189 >14600*10^7 :                         recp = xfs_btree_rec_addr(cur, cur->bc_levels[0].ptr,
    5190             :                                         block);
    5191             : 
    5192 >14600*10^7 :                         cur->bc_ops->init_high_key_from_rec(&rec_hkey, recp);
    5193 >14598*10^7 :                         cur->bc_ops->init_key_from_rec(&rec_key, recp);
    5194             : 
    5195             :                         /*
    5196             :                          * If (query's high key < record's low key), then there
    5197             :                          * are no more interesting records in this block.  Pop
    5198             :                          * up to the leaf level to find more record blocks.
    5199             :                          *
    5200             :                          * If (record's high key >= query's low key) and
    5201             :                          *    (query's high key >= record's low key), then
    5202             :                          * this record overlaps the query range; callback.
    5203             :                          */
    5204 >14599*10^7 :                         if (xfs_btree_keycmp_lt(cur, high_key, &rec_key))
    5205  1555439833 :                                 goto pop_up;
    5206 >14445*10^7 :                         if (xfs_btree_keycmp_ge(cur, &rec_hkey, low_key)) {
    5207 12325337818 :                                 error = fn(cur, recp, priv);
    5208 12325475310 :                                 if (error)
    5209             :                                         break;
    5210             :                         }
    5211 >14444*10^7 :                         cur->bc_levels[level].ptr++;
    5212 >14444*10^7 :                         continue;
    5213             :                 }
    5214             : 
    5215             :                 /* Handle an internal node. */
    5216 54494087219 :                 lkp = xfs_btree_key_addr(cur, cur->bc_levels[level].ptr, block);
    5217 54494087219 :                 hkp = xfs_btree_high_key_addr(cur, cur->bc_levels[level].ptr,
    5218             :                                 block);
    5219 54494087219 :                 pp = xfs_btree_ptr_addr(cur, cur->bc_levels[level].ptr, block);
    5220             : 
    5221             :                 /*
    5222             :                  * If (query's high key < pointer's low key), then there are no
    5223             :                  * more interesting keys in this block.  Pop up one leaf level
    5224             :                  * to continue looking for records.
    5225             :                  *
    5226             :                  * If (pointer's high key >= query's low key) and
    5227             :                  *    (query's high key >= pointer's low key), then
    5228             :                  * this record overlaps the query range; follow pointer.
    5229             :                  */
    5230 54485628166 :                 if (xfs_btree_keycmp_lt(cur, high_key, lkp))
    5231  2208208515 :                         goto pop_up;
    5232 52283026434 :                 if (xfs_btree_keycmp_ge(cur, hkp, low_key)) {
    5233  2757934374 :                         level--;
    5234  2757934374 :                         error = xfs_btree_lookup_get_block(cur, level, pp,
    5235             :                                         &block);
    5236  2757946145 :                         if (error)
    5237           0 :                                 goto out;
    5238  2757946145 :                         xfs_btree_get_block(cur, level, &bp);
    5239  2757934366 :                         trace_xfs_btree_overlapped_query_range(cur, level, bp);
    5240             : #ifdef DEBUG
    5241  2757932465 :                         error = xfs_btree_check_block(cur, block, level, bp);
    5242  2757938301 :                         if (error)
    5243           0 :                                 goto out;
    5244             : #endif
    5245  2757938301 :                         cur->bc_levels[level].ptr = 1;
    5246  2757938301 :                         continue;
    5247             :                 }
    5248 49536804887 :                 cur->bc_levels[level].ptr++;
    5249             :         }
    5250             : 
    5251  1576485350 : out:
    5252             :         /*
    5253             :          * If we don't end this function with the cursor pointing at a record
    5254             :          * block, a subsequent non-error cursor deletion will not release
    5255             :          * node-level buffers, causing a buffer leak.  This is quite possible
    5256             :          * with a zero-results range query, so release the buffers if we
    5257             :          * failed to return any results.
    5258             :          */
    5259  1576485350 :         if (cur->bc_levels[0].bp == NULL) {
    5260      608205 :                 for (i = 0; i < cur->bc_nlevels; i++) {
    5261      322861 :                         if (cur->bc_levels[i].bp) {
    5262       26084 :                                 xfs_trans_brelse(cur->bc_tp,
    5263             :                                                 cur->bc_levels[i].bp);
    5264       26083 :                                 cur->bc_levels[i].bp = NULL;
    5265       26083 :                                 cur->bc_levels[i].ptr = 0;
    5266       26083 :                                 cur->bc_levels[i].ra = 0;
    5267             :                         }
    5268             :                 }
    5269             :         }
    5270             : 
    5271             :         return error;
    5272             : }
    5273             : 
    5274             : static inline void
    5275 21524464193 : xfs_btree_key_from_irec(
    5276             :         struct xfs_btree_cur            *cur,
    5277             :         union xfs_btree_key             *key,
    5278             :         const union xfs_btree_irec      *irec)
    5279             : {
    5280 21524464193 :         union xfs_btree_rec             rec;
    5281             : 
    5282 21524464193 :         cur->bc_rec = *irec;
    5283 21524464193 :         cur->bc_ops->init_rec_from_cur(cur, &rec);
    5284 21524315414 :         cur->bc_ops->init_key_from_rec(key, &rec);
    5285 21524087905 : }
    5286             : 
    5287             : /*
    5288             :  * Query a btree for all records overlapping a given interval of keys.  The
    5289             :  * supplied function will be called with each record found; return one of the
    5290             :  * XFS_BTREE_QUERY_RANGE_{CONTINUE,ABORT} values or the usual negative error
    5291             :  * code.  This function returns -ECANCELED, zero, or a negative error code.
    5292             :  */
    5293             : int
    5294  5972090878 : xfs_btree_query_range(
    5295             :         struct xfs_btree_cur            *cur,
    5296             :         const union xfs_btree_irec      *low_rec,
    5297             :         const union xfs_btree_irec      *high_rec,
    5298             :         xfs_btree_query_range_fn        fn,
    5299             :         void                            *priv)
    5300             : {
    5301  5972090878 :         union xfs_btree_key             low_key;
    5302  5972090878 :         union xfs_btree_key             high_key;
    5303             : 
    5304             :         /* Find the keys of both ends of the interval. */
    5305  5972090878 :         xfs_btree_key_from_irec(cur, &high_key, high_rec);
    5306  5972187427 :         xfs_btree_key_from_irec(cur, &low_key, low_rec);
    5307             : 
    5308             :         /* Enforce low key <= high key. */
    5309  5972182340 :         if (!xfs_btree_keycmp_le(cur, &low_key, &high_key))
    5310             :                 return -EINVAL;
    5311             : 
    5312  5972391010 :         if (!(cur->bc_flags & XFS_BTREE_OVERLAPPING))
    5313  4395894095 :                 return xfs_btree_simple_query_range(cur, &low_key,
    5314             :                                 &high_key, fn, priv);
    5315  1576496915 :         return xfs_btree_overlapped_query_range(cur, &low_key, &high_key,
    5316             :                         fn, priv);
    5317             : }
    5318             : 
    5319             : /* Query a btree for all records. */
    5320             : int
    5321     4904048 : xfs_btree_query_all(
    5322             :         struct xfs_btree_cur            *cur,
    5323             :         xfs_btree_query_range_fn        fn,
    5324             :         void                            *priv)
    5325             : {
    5326     4904048 :         union xfs_btree_key             low_key;
    5327     4904048 :         union xfs_btree_key             high_key;
    5328             : 
    5329     4904048 :         memset(&cur->bc_rec, 0, sizeof(cur->bc_rec));
    5330     4904048 :         memset(&low_key, 0, sizeof(low_key));
    5331     4904048 :         memset(&high_key, 0xFF, sizeof(high_key));
    5332             : 
    5333     4904048 :         return xfs_btree_simple_query_range(cur, &low_key, &high_key, fn, priv);
    5334             : }
    5335             : 
    5336             : static int
    5337   106143492 : xfs_btree_count_blocks_helper(
    5338             :         struct xfs_btree_cur    *cur,
    5339             :         int                     level,
    5340             :         void                    *data)
    5341             : {
    5342   106143492 :         xfs_extlen_t            *blocks = data;
    5343   106143492 :         (*blocks)++;
    5344             : 
    5345   106143492 :         return 0;
    5346             : }
    5347             : 
    5348             : /* Count the blocks in a btree and return the result in *blocks. */
    5349             : int
    5350     5926329 : xfs_btree_count_blocks(
    5351             :         struct xfs_btree_cur    *cur,
    5352             :         xfs_extlen_t            *blocks)
    5353             : {
    5354     5926329 :         *blocks = 0;
    5355     5926329 :         return xfs_btree_visit_blocks(cur, xfs_btree_count_blocks_helper,
    5356             :                         XFS_BTREE_VISIT_ALL, blocks);
    5357             : }
    5358             : 
    5359             : /* Compare two btree pointers. */
    5360             : int64_t
    5361    28543099 : xfs_btree_diff_two_ptrs(
    5362             :         struct xfs_btree_cur            *cur,
    5363             :         const union xfs_btree_ptr       *a,
    5364             :         const union xfs_btree_ptr       *b)
    5365             : {
    5366    28543099 :         if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
    5367    13724397 :                 return (int64_t)be64_to_cpu(a->l) - be64_to_cpu(b->l);
    5368    14818702 :         return (int64_t)be32_to_cpu(a->s) - be32_to_cpu(b->s);
    5369             : }
    5370             : 
    5371             : struct xfs_btree_has_records {
    5372             :         /* Keys for the start and end of the range we want to know about. */
    5373             :         union xfs_btree_key             start_key;
    5374             :         union xfs_btree_key             end_key;
    5375             : 
    5376             :         /* Mask for key comparisons, if desired. */
    5377             :         const union xfs_btree_key       *key_mask;
    5378             : 
    5379             :         /* Highest record key we've seen so far. */
    5380             :         union xfs_btree_key             high_key;
    5381             : 
    5382             :         enum xbtree_recpacking          outcome;
    5383             : };
    5384             : 
    5385             : STATIC int
    5386  5305180735 : xfs_btree_has_records_helper(
    5387             :         struct xfs_btree_cur            *cur,
    5388             :         const union xfs_btree_rec       *rec,
    5389             :         void                            *priv)
    5390             : {
    5391  5305180735 :         union xfs_btree_key             rec_key;
    5392  5305180735 :         union xfs_btree_key             rec_high_key;
    5393  5305180735 :         struct xfs_btree_has_records    *info = priv;
    5394  5305180735 :         enum xbtree_key_contig          key_contig;
    5395             : 
    5396  5305180735 :         cur->bc_ops->init_key_from_rec(&rec_key, rec);
    5397             : 
    5398  5305268768 :         if (info->outcome == XBTREE_RECPACKING_EMPTY) {
    5399     3916080 :                 info->outcome = XBTREE_RECPACKING_SPARSE;
    5400             : 
    5401             :                 /*
    5402             :                  * If the first record we find does not overlap the start key,
    5403             :                  * then there is a hole at the start of the search range.
    5404             :                  * Classify this as sparse and stop immediately.
    5405             :                  */
    5406     3916080 :                 if (xfs_btree_masked_keycmp_lt(cur, &info->start_key, &rec_key,
    5407             :                                         info->key_mask))
    5408             :                         return -ECANCELED;
    5409             :         } else {
    5410             :                 /*
    5411             :                  * If a subsequent record does not overlap with the any record
    5412             :                  * we've seen so far, there is a hole in the middle of the
    5413             :                  * search range.  Classify this as sparse and stop.
    5414             :                  * If the keys overlap and this btree does not allow overlap,
    5415             :                  * signal corruption.
    5416             :                  */
    5417  5301352688 :                 key_contig = cur->bc_ops->keys_contiguous(cur, &info->high_key,
    5418             :                                         &rec_key, info->key_mask);
    5419  5301642109 :                 if (key_contig == XBTREE_KEY_OVERLAP &&
    5420   758678093 :                                 !(cur->bc_flags & XFS_BTREE_OVERLAPPING))
    5421             :                         return -EFSCORRUPTED;
    5422  5301642109 :                 if (key_contig == XBTREE_KEY_GAP)
    5423             :                         return -ECANCELED;
    5424             :         }
    5425             : 
    5426             :         /*
    5427             :          * If high_key(rec) is larger than any other high key we've seen,
    5428             :          * remember it for later.
    5429             :          */
    5430  5305558226 :         cur->bc_ops->init_high_key_from_rec(&rec_high_key, rec);
    5431  5305192178 :         if (xfs_btree_masked_keycmp_gt(cur, &rec_high_key, &info->high_key,
    5432             :                                 info->key_mask))
    5433  4559551663 :                 info->high_key = rec_high_key; /* struct copy */
    5434             : 
    5435             :         return 0;
    5436             : }
    5437             : 
    5438             : /*
    5439             :  * Scan part of the keyspace of a btree and tell us if that keyspace does not
    5440             :  * map to any records; is fully mapped to records; or is partially mapped to
    5441             :  * records.  This is the btree record equivalent to determining if a file is
    5442             :  * sparse.
    5443             :  *
    5444             :  * For most btree types, the record scan should use all available btree key
    5445             :  * fields to compare the keys encountered.  These callers should pass NULL for
    5446             :  * @mask.  However, some callers (e.g.  scanning physical space in the rmapbt)
    5447             :  * want to ignore some part of the btree record keyspace when performing the
    5448             :  * comparison.  These callers should pass in a union xfs_btree_key object with
    5449             :  * the fields that *should* be a part of the comparison set to any nonzero
    5450             :  * value, and the rest zeroed.
    5451             :  */
    5452             : int
    5453  4790863338 : xfs_btree_has_records(
    5454             :         struct xfs_btree_cur            *cur,
    5455             :         const union xfs_btree_irec      *low,
    5456             :         const union xfs_btree_irec      *high,
    5457             :         const union xfs_btree_key       *mask,
    5458             :         enum xbtree_recpacking          *outcome)
    5459             : {
    5460  4790863338 :         struct xfs_btree_has_records    info = {
    5461             :                 .outcome                = XBTREE_RECPACKING_EMPTY,
    5462             :                 .key_mask               = mask,
    5463             :         };
    5464  4790863338 :         int                             error;
    5465             : 
    5466             :         /* Not all btrees support this operation. */
    5467  4790863338 :         if (!cur->bc_ops->keys_contiguous) {
    5468           0 :                 ASSERT(0);
    5469           0 :                 return -EOPNOTSUPP;
    5470             :         }
    5471             : 
    5472  4790863338 :         xfs_btree_key_from_irec(cur, &info.start_key, low);
    5473  4790878043 :         xfs_btree_key_from_irec(cur, &info.end_key, high);
    5474             : 
    5475  4790854462 :         error = xfs_btree_query_range(cur, low, high,
    5476             :                         xfs_btree_has_records_helper, &info);
    5477  4790907482 :         if (error == -ECANCELED)
    5478           0 :                 goto out;
    5479  4790907482 :         if (error)
    5480             :                 return error;
    5481             : 
    5482  4790907482 :         if (info.outcome == XBTREE_RECPACKING_EMPTY)
    5483  4786991335 :                 goto out;
    5484             : 
    5485             :         /*
    5486             :          * If the largest high_key(rec) we saw during the walk is greater than
    5487             :          * the end of the search range, classify this as full.  Otherwise,
    5488             :          * there is a hole at the end of the search range.
    5489             :          */
    5490     3916147 :         if (xfs_btree_masked_keycmp_ge(cur, &info.high_key, &info.end_key,
    5491             :                                 mask))
    5492     3916154 :                 info.outcome = XBTREE_RECPACKING_FULL;
    5493             : 
    5494           0 : out:
    5495  4790907489 :         *outcome = info.outcome;
    5496  4790907489 :         return 0;
    5497             : }
    5498             : 
    5499             : /* Are there more records in this btree? */
    5500             : bool
    5501   135655613 : xfs_btree_has_more_records(
    5502             :         struct xfs_btree_cur    *cur)
    5503             : {
    5504   135655613 :         struct xfs_btree_block  *block;
    5505   135655613 :         struct xfs_buf          *bp;
    5506             : 
    5507   135655613 :         block = xfs_btree_get_block(cur, 0, &bp);
    5508             : 
    5509             :         /* There are still records in this block. */
    5510   271311226 :         if (cur->bc_levels[0].ptr < xfs_btree_get_numrecs(block))
    5511             :                 return true;
    5512             : 
    5513             :         /* There are more record blocks. */
    5514      992024 :         if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
    5515      315845 :                 return block->bb_u.l.bb_rightsib != cpu_to_be64(NULLFSBLOCK);
    5516             :         else
    5517      676179 :                 return block->bb_u.s.bb_rightsib != cpu_to_be32(NULLAGBLOCK);
    5518             : }
    5519             : 
    5520             : /* Set up all the btree cursor caches. */
    5521             : int __init
    5522          12 : xfs_btree_init_cur_caches(void)
    5523             : {
    5524          12 :         int             error;
    5525             : 
    5526          12 :         error = xfs_allocbt_init_cur_cache();
    5527          12 :         if (error)
    5528             :                 return error;
    5529          12 :         error = xfs_inobt_init_cur_cache();
    5530          12 :         if (error)
    5531           0 :                 goto err;
    5532          12 :         error = xfs_bmbt_init_cur_cache();
    5533          12 :         if (error)
    5534           0 :                 goto err;
    5535          12 :         error = xfs_rmapbt_init_cur_cache();
    5536          12 :         if (error)
    5537           0 :                 goto err;
    5538          12 :         error = xfs_refcountbt_init_cur_cache();
    5539          12 :         if (error)
    5540           0 :                 goto err;
    5541          12 :         error = xfs_rtrmapbt_init_cur_cache();
    5542          12 :         if (error)
    5543           0 :                 goto err;
    5544          12 :         error = xfs_rtrefcountbt_init_cur_cache();
    5545          12 :         if (error)
    5546           0 :                 goto err;
    5547             : 
    5548             :         return 0;
    5549           0 : err:
    5550           0 :         xfs_btree_destroy_cur_caches();
    5551           0 :         return error;
    5552             : }
    5553             : 
    5554             : /* Destroy all the btree cursor caches, if they've been allocated. */
    5555             : void
    5556          12 : xfs_btree_destroy_cur_caches(void)
    5557             : {
    5558          12 :         xfs_allocbt_destroy_cur_cache();
    5559          12 :         xfs_inobt_destroy_cur_cache();
    5560          12 :         xfs_bmbt_destroy_cur_cache();
    5561          12 :         xfs_rmapbt_destroy_cur_cache();
    5562          12 :         xfs_refcountbt_destroy_cur_cache();
    5563          12 :         xfs_rtrmapbt_destroy_cur_cache();
    5564          12 :         xfs_rtrefcountbt_destroy_cur_cache();
    5565          12 : }
    5566             : 
    5567             : /* Move the btree cursor before the first record. */
    5568             : int
    5569   331924518 : xfs_btree_goto_left_edge(
    5570             :         struct xfs_btree_cur    *cur)
    5571             : {
    5572   331924518 :         int                     stat = 0;
    5573   331924518 :         int                     error;
    5574             : 
    5575   331924518 :         memset(&cur->bc_rec, 0, sizeof(cur->bc_rec));
    5576   331924518 :         error = xfs_btree_lookup(cur, XFS_LOOKUP_LE, &stat);
    5577   331924517 :         if (error)
    5578             :                 return error;
    5579   331924517 :         if (!stat)
    5580             :                 return 0;
    5581             : 
    5582           0 :         error = xfs_btree_decrement(cur, 0, &stat);
    5583           0 :         if (error)
    5584             :                 return error;
    5585           0 :         if (stat != 0) {
    5586           0 :                 ASSERT(0);
    5587           0 :                 xfs_btree_mark_sick(cur);
    5588           0 :                 return -EFSCORRUPTED;
    5589             :         }
    5590             : 
    5591             :         return 0;
    5592             : }
    5593             : 
    5594             : /* Allocate a block for an inode-rooted metadata btree. */
    5595             : int
    5596      162793 : xfs_btree_alloc_imeta_block(
    5597             :         struct xfs_btree_cur            *cur,
    5598             :         const union xfs_btree_ptr       *start,
    5599             :         union xfs_btree_ptr             *new,
    5600             :         int                             *stat)
    5601             : {
    5602      162793 :         struct xfs_alloc_arg            args = {
    5603      162793 :                 .mp                     = cur->bc_mp,
    5604      162793 :                 .tp                     = cur->bc_tp,
    5605             :                 .resv                   = XFS_AG_RESV_IMETA,
    5606             :                 .minlen                 = 1,
    5607             :                 .maxlen                 = 1,
    5608             :                 .prod                   = 1,
    5609             :         };
    5610      162793 :         struct xfs_inode                *ip = cur->bc_ino.ip;
    5611      162793 :         int                             error;
    5612             : 
    5613      162793 :         ASSERT(xfs_is_metadir_inode(ip));
    5614      162793 :         ASSERT(XFS_IS_DQDETACHED(cur->bc_mp, ip));
    5615             : 
    5616      162793 :         xfs_rmap_ino_bmbt_owner(&args.oinfo, ip->i_ino, cur->bc_ino.whichfork);
    5617      162793 :         error = xfs_alloc_vextent_start_ag(&args,
    5618      162793 :                         XFS_INO_TO_FSB(cur->bc_mp, ip->i_ino));
    5619      162793 :         if (error)
    5620             :                 return error;
    5621      162793 :         if (args.fsbno == NULLFSBLOCK) {
    5622           0 :                 *stat = 0;
    5623           0 :                 return 0;
    5624             :         }
    5625      162793 :         ASSERT(args.len == 1);
    5626             : 
    5627      162793 :         xfs_imeta_resv_alloc_extent(ip, &args);
    5628      162793 :         cur->bc_ino.allocated++;
    5629             : 
    5630      162793 :         new->l = cpu_to_be64(args.fsbno);
    5631      162793 :         *stat = 1;
    5632      162793 :         return 0;
    5633             : }
    5634             : 
    5635             : /* Free a block from an inode-rooted metadata btree. */
    5636             : int
    5637       16485 : xfs_btree_free_imeta_block(
    5638             :         struct xfs_btree_cur    *cur,
    5639             :         struct xfs_buf          *bp)
    5640             : {
    5641       16485 :         struct xfs_owner_info   oinfo;
    5642       16485 :         struct xfs_mount        *mp = cur->bc_mp;
    5643       16485 :         struct xfs_inode        *ip = cur->bc_ino.ip;
    5644       16485 :         struct xfs_trans        *tp = cur->bc_tp;
    5645       16485 :         xfs_fsblock_t           fsbno = XFS_DADDR_TO_FSB(mp, xfs_buf_daddr(bp));
    5646       16485 :         int                     error;
    5647             : 
    5648       16485 :         ASSERT(xfs_is_metadir_inode(ip));
    5649       16485 :         ASSERT(XFS_IS_DQDETACHED(cur->bc_mp, ip));
    5650             : 
    5651       16485 :         xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, cur->bc_ino.whichfork);
    5652       16485 :         error = xfs_free_extent_later(tp, fsbno, 1, &oinfo, XFS_AG_RESV_IMETA,
    5653             :                         0);
    5654       16485 :         if (error)
    5655             :                 return error;
    5656             : 
    5657       16485 :         xfs_imeta_resv_free_extent(ip, tp, 1);
    5658       16485 :         return 0;
    5659             : }

Generated by: LCOV version 1.14