LCOV - code coverage report
Current view: top level - fs/xfs/libxfs - xfs_btree.c (source / functions) Hit Total Coverage
Test: fstests of 6.5.0-rc3-acha @ Mon Jul 31 20:08:06 PDT 2023 Lines: 1988 2263 87.8 %
Date: 2023-07-31 20:08:07 Functions: 119 122 97.5 %

          Line data    Source code
       1             : // SPDX-License-Identifier: GPL-2.0
       2             : /*
       3             :  * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
       4             :  * All Rights Reserved.
       5             :  */
       6             : #include "xfs.h"
       7             : #include "xfs_fs.h"
       8             : #include "xfs_shared.h"
       9             : #include "xfs_format.h"
      10             : #include "xfs_log_format.h"
      11             : #include "xfs_trans_resv.h"
      12             : #include "xfs_bit.h"
      13             : #include "xfs_mount.h"
      14             : #include "xfs_inode.h"
      15             : #include "xfs_trans.h"
      16             : #include "xfs_buf_item.h"
      17             : #include "xfs_btree.h"
      18             : #include "xfs_errortag.h"
      19             : #include "xfs_error.h"
      20             : #include "xfs_trace.h"
      21             : #include "xfs_alloc.h"
      22             : #include "xfs_log.h"
      23             : #include "xfs_btree_staging.h"
      24             : #include "xfs_ag.h"
      25             : #include "xfs_alloc_btree.h"
      26             : #include "xfs_ialloc_btree.h"
      27             : #include "xfs_bmap_btree.h"
      28             : #include "xfs_rmap_btree.h"
      29             : #include "xfs_refcount_btree.h"
      30             : #include "xfs_health.h"
      31             : #include "scrub/xfile.h"
      32             : #include "scrub/xfbtree.h"
      33             : #include "xfs_btree_mem.h"
      34             : 
      35             : /*
      36             :  * Btree magic numbers.
      37             :  */
      38             : uint32_t
      39 >15547*10^7 : xfs_btree_magic(
      40             :         struct xfs_mount                *mp,
      41             :         const struct xfs_btree_ops      *ops)
      42             : {
      43 >15547*10^7 :         int                             idx = xfs_has_crc(mp) ? 1 : 0;
      44 >15547*10^7 :         __be32                          magic = ops->buf_ops->magic[idx];
      45             : 
      46             :         /* Ensure we asked for crc for crc-only magics. */
      47 >15547*10^7 :         ASSERT(magic != 0);
      48 >15547*10^7 :         return be32_to_cpu(magic);
      49             : }
      50             : 
      51             : /*
      52             :  * These sibling pointer checks are optimised for null sibling pointers. This
      53             :  * happens a lot, and we don't need to byte swap at runtime if the sibling
      54             :  * pointer is NULL.
      55             :  *
      56             :  * These are explicitly marked at inline because the cost of calling them as
      57             :  * functions instead of inlining them is about 36 bytes extra code per call site
      58             :  * on x86-64. Yes, gcc-11 fails to inline them, and explicit inlining of these
      59             :  * two sibling check functions reduces the compiled code size by over 300
      60             :  * bytes.
      61             :  */
      62             : static inline xfs_failaddr_t
      63  3654529052 : xfs_btree_check_lblock_siblings(
      64             :         struct xfs_mount        *mp,
      65             :         struct xfs_btree_cur    *cur,
      66             :         int                     level,
      67             :         xfs_fsblock_t           fsb,
      68             :         __be64                  dsibling)
      69             : {
      70  3654529052 :         xfs_fsblock_t           sibling;
      71             : 
      72  3654529052 :         if (dsibling == cpu_to_be64(NULLFSBLOCK))
      73             :                 return NULL;
      74             : 
      75   521879926 :         sibling = be64_to_cpu(dsibling);
      76   521879926 :         if (sibling == fsb)
      77           0 :                 return __this_address;
      78   521879926 :         if (level >= 0) {
      79   508857333 :                 if (!xfs_btree_check_lptr(cur, sibling, level + 1))
      80           0 :                         return __this_address;
      81    13022593 :         } else if (cur && (cur->bc_flags & XFS_BTREE_IN_XFILE)) {
      82           0 :                 if (!xfbtree_verify_xfileoff(cur, sibling))
      83           0 :                         return __this_address;
      84             :         } else {
      85    13022593 :                 if (!xfs_verify_fsbno(mp, sibling))
      86           0 :                         return __this_address;
      87             :         }
      88             : 
      89             :         return NULL;
      90             : }
      91             : 
      92             : static inline xfs_failaddr_t
      93 >30764*10^7 : xfs_btree_check_sblock_siblings(
      94             :         struct xfs_perag        *pag,
      95             :         struct xfs_btree_cur    *cur,
      96             :         int                     level,
      97             :         xfs_agblock_t           agbno,
      98             :         __be32                  dsibling)
      99             : {
     100 >30764*10^7 :         xfs_agblock_t           sibling;
     101             : 
     102 >30764*10^7 :         if (dsibling == cpu_to_be32(NULLAGBLOCK))
     103             :                 return NULL;
     104             : 
     105 >20917*10^7 :         sibling = be32_to_cpu(dsibling);
     106 >20917*10^7 :         if (sibling == agbno)
     107           0 :                 return __this_address;
     108 >20917*10^7 :         if (level >= 0) {
     109 >20911*10^7 :                 if (!xfs_btree_check_sptr(cur, sibling, level + 1))
     110           0 :                         return __this_address;
     111    57498716 :         } else if (cur && (cur->bc_flags & XFS_BTREE_IN_XFILE)) {
     112           0 :                 if (!xfbtree_verify_xfileoff(cur, sibling))
     113           0 :                         return __this_address;
     114             :         } else {
     115    57498716 :                 if (!xfs_verify_agbno(pag, sibling))
     116           0 :                         return __this_address;
     117             :         }
     118             :         return NULL;
     119             : }
     120             : 
     121             : /*
     122             :  * Check a long btree block header.  Return the address of the failing check,
     123             :  * or NULL if everything is ok.
     124             :  */
     125             : xfs_failaddr_t
     126  1812444744 : __xfs_btree_check_lblock(
     127             :         struct xfs_btree_cur    *cur,
     128             :         struct xfs_btree_block  *block,
     129             :         int                     level,
     130             :         struct xfs_buf          *bp)
     131             : {
     132  1812444744 :         struct xfs_mount        *mp = cur->bc_mp;
     133  1812444744 :         int                     crc = xfs_has_crc(mp);
     134  1812444744 :         xfs_failaddr_t          fa;
     135  1812444744 :         xfs_fsblock_t           fsb = NULLFSBLOCK;
     136             : 
     137  1812444744 :         if (crc) {
     138  1812440612 :                 if (!uuid_equal(&block->bb_u.l.bb_uuid, &mp->m_sb.sb_meta_uuid))
     139           0 :                         return __this_address;
     140  3624873414 :                 if (block->bb_u.l.bb_blkno !=
     141  1812436707 :                     cpu_to_be64(bp ? xfs_buf_daddr(bp) : XFS_BUF_DADDR_NULL))
     142           0 :                         return __this_address;
     143  1812436707 :                 if (block->bb_u.l.bb_pad != cpu_to_be32(0))
     144           0 :                         return __this_address;
     145             :         }
     146             : 
     147  3624881678 :         if (be32_to_cpu(block->bb_magic) != xfs_btree_magic(mp, cur->bc_ops))
     148           0 :                 return __this_address;
     149  1812441142 :         if (be16_to_cpu(block->bb_level) != level)
     150          38 :                 return __this_address;
     151  3624887164 :         if (be16_to_cpu(block->bb_numrecs) >
     152  1812441343 :             cur->bc_ops->get_maxrecs(cur, level))
     153           0 :                 return __this_address;
     154             : 
     155  1812445821 :         if ((cur->bc_flags & XFS_BTREE_IN_XFILE) && bp)
     156  1328473686 :                 fsb = xfbtree_buf_to_xfoff(cur, bp);
     157   483972135 :         else if (bp)
     158   469375185 :                 fsb = XFS_DADDR_TO_FSB(mp, xfs_buf_daddr(bp));
     159             : 
     160  1812445811 :         fa = xfs_btree_check_lblock_siblings(mp, cur, level, fsb,
     161             :                         block->bb_u.l.bb_leftsib);
     162  1812441170 :         if (!fa)
     163  1812441362 :                 fa = xfs_btree_check_lblock_siblings(mp, cur, level, fsb,
     164             :                                 block->bb_u.l.bb_rightsib);
     165             :         return fa;
     166             : }
     167             : 
     168             : /* Check a long btree block header. */
     169             : static int
     170  1808800568 : xfs_btree_check_lblock(
     171             :         struct xfs_btree_cur    *cur,
     172             :         struct xfs_btree_block  *block,
     173             :         int                     level,
     174             :         struct xfs_buf          *bp)
     175             : {
     176  1808800568 :         struct xfs_mount        *mp = cur->bc_mp;
     177  1808800568 :         xfs_failaddr_t          fa;
     178             : 
     179  1808800568 :         fa = __xfs_btree_check_lblock(cur, block, level, bp);
     180  3617601860 :         if (XFS_IS_CORRUPT(mp, fa != NULL) ||
     181  1808799393 :             XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BTREE_CHECK_LBLOCK)) {
     182           0 :                 if (bp)
     183           0 :                         trace_xfs_btree_corrupt(bp, _RET_IP_);
     184           0 :                 xfs_btree_mark_sick(cur);
     185           0 :                 return -EFSCORRUPTED;
     186             :         }
     187             :         return 0;
     188             : }
     189             : 
     190             : /*
     191             :  * Check a short btree block header.  Return the address of the failing check,
     192             :  * or NULL if everything is ok.
     193             :  */
     194             : xfs_failaddr_t
     195 >15353*10^7 : __xfs_btree_check_sblock(
     196             :         struct xfs_btree_cur    *cur,
     197             :         struct xfs_btree_block  *block,
     198             :         int                     level,
     199             :         struct xfs_buf          *bp)
     200             : {
     201 >15353*10^7 :         struct xfs_mount        *mp = cur->bc_mp;
     202 >15353*10^7 :         struct xfs_perag        *pag = cur->bc_ag.pag;
     203 >15353*10^7 :         int                     crc = xfs_has_crc(mp);
     204 >15353*10^7 :         xfs_failaddr_t          fa;
     205 >15353*10^7 :         xfs_agblock_t           agbno = NULLAGBLOCK;
     206             : 
     207 >15353*10^7 :         if (crc) {
     208 >15352*10^7 :                 if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid))
     209           0 :                         return __this_address;
     210 >30735*10^7 :                 if (block->bb_u.s.bb_blkno !=
     211 >15367*10^7 :                     cpu_to_be64(bp ? xfs_buf_daddr(bp) : XFS_BUF_DADDR_NULL))
     212           0 :                         return __this_address;
     213             :         }
     214             : 
     215 >30739*10^7 :         if (be32_to_cpu(block->bb_magic) != xfs_btree_magic(mp, cur->bc_ops))
     216           0 :                 return __this_address;
     217 >15377*10^7 :         if (be16_to_cpu(block->bb_level) != level)
     218           0 :                 return __this_address;
     219 >30762*10^7 :         if (be16_to_cpu(block->bb_numrecs) >
     220 >15388*10^7 :             cur->bc_ops->get_maxrecs(cur, level))
     221           0 :                 return __this_address;
     222             : 
     223 >15373*10^7 :         if ((cur->bc_flags & XFS_BTREE_IN_XFILE) && bp) {
     224   157822390 :                 pag = NULL;
     225   157822390 :                 agbno = xfbtree_buf_to_xfoff(cur, bp);
     226 >15357*10^7 :         } else if (bp) {
     227 >15357*10^7 :                 agbno = xfs_daddr_to_agbno(mp, xfs_buf_daddr(bp));
     228             :         }
     229             : 
     230 >15373*10^7 :         fa = xfs_btree_check_sblock_siblings(pag, cur, level, agbno,
     231             :                         block->bb_u.s.bb_leftsib);
     232 >15385*10^7 :         if (!fa)
     233 >15388*10^7 :                 fa = xfs_btree_check_sblock_siblings(pag, cur, level, agbno,
     234             :                                 block->bb_u.s.bb_rightsib);
     235             :         return fa;
     236             : }
     237             : 
     238             : /* Check a short btree block header. */
     239             : STATIC int
     240 >15354*10^7 : xfs_btree_check_sblock(
     241             :         struct xfs_btree_cur    *cur,
     242             :         struct xfs_btree_block  *block,
     243             :         int                     level,
     244             :         struct xfs_buf          *bp)
     245             : {
     246 >15354*10^7 :         struct xfs_mount        *mp = cur->bc_mp;
     247 >15354*10^7 :         xfs_failaddr_t          fa;
     248             : 
     249 >15354*10^7 :         fa = __xfs_btree_check_sblock(cur, block, level, bp);
     250 >30879*10^7 :         if (XFS_IS_CORRUPT(mp, fa != NULL) ||
     251 >15377*10^7 :             XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BTREE_CHECK_SBLOCK)) {
     252           0 :                 if (bp)
     253           0 :                         trace_xfs_btree_corrupt(bp, _RET_IP_);
     254           0 :                 xfs_btree_mark_sick(cur);
     255           0 :                 return -EFSCORRUPTED;
     256             :         }
     257             :         return 0;
     258             : }
     259             : 
     260             : /*
     261             :  * Debug routine: check that block header is ok.
     262             :  */
     263             : int
     264 >15540*10^7 : xfs_btree_check_block(
     265             :         struct xfs_btree_cur    *cur,   /* btree cursor */
     266             :         struct xfs_btree_block  *block, /* generic btree block pointer */
     267             :         int                     level,  /* level of the btree block */
     268             :         struct xfs_buf          *bp)    /* buffer containing block, if any */
     269             : {
     270 >15540*10^7 :         if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
     271  1808798850 :                 return xfs_btree_check_lblock(cur, block, level, bp);
     272             :         else
     273 >15359*10^7 :                 return xfs_btree_check_sblock(cur, block, level, bp);
     274             : }
     275             : 
     276             : /* Check that this long pointer is valid and points within the fs. */
     277             : bool
     278  1284892818 : xfs_btree_check_lptr(
     279             :         struct xfs_btree_cur    *cur,
     280             :         xfs_fsblock_t           fsbno,
     281             :         int                     level)
     282             : {
     283  1284892818 :         if (level <= 0)
     284             :                 return false;
     285  1284892818 :         if (cur->bc_flags & XFS_BTREE_IN_XFILE)
     286           0 :                 return xfbtree_verify_xfileoff(cur, fsbno);
     287  1284892818 :         return xfs_verify_fsbno(cur->bc_mp, fsbno);
     288             : }
     289             : 
     290             : /* Check that this short pointer is valid and points within the AG. */
     291             : bool
     292 >24242*10^7 : xfs_btree_check_sptr(
     293             :         struct xfs_btree_cur    *cur,
     294             :         xfs_agblock_t           agbno,
     295             :         int                     level)
     296             : {
     297 >24242*10^7 :         if (level <= 0)
     298             :                 return false;
     299 >24242*10^7 :         if (cur->bc_flags & XFS_BTREE_IN_XFILE)
     300   208897228 :                 return xfbtree_verify_xfileoff(cur, agbno);
     301 >24221*10^7 :         return xfs_verify_agbno(cur->bc_ag.pag, agbno);
     302             : }
     303             : 
     304             : /*
     305             :  * Check that a given (indexed) btree pointer at a certain level of a
     306             :  * btree is valid and doesn't point past where it should.
     307             :  */
     308             : static int
     309 35333832563 : xfs_btree_check_ptr(
     310             :         struct xfs_btree_cur            *cur,
     311             :         const union xfs_btree_ptr       *ptr,
     312             :         int                             index,
     313             :         int                             level)
     314             : {
     315 35333832563 :         if (cur->bc_flags & XFS_BTREE_IN_XFILE)
     316  1107152226 :                 return xfbtree_check_ptr(cur, ptr, index, level);
     317             : 
     318 34226680337 :         if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
     319   772323881 :                 if (xfs_btree_check_lptr(cur, be64_to_cpu((&ptr->l)[index]),
     320             :                                 level))
     321             :                         return 0;
     322           0 :                 xfs_err(cur->bc_mp,
     323             : "Inode %llu fork %d: Corrupt btree %d pointer at level %d index %d.",
     324             :                                 cur->bc_ino.ip->i_ino,
     325             :                                 cur->bc_ino.whichfork, cur->bc_btnum,
     326             :                                 level, index);
     327             :         } else {
     328 66908712912 :                 if (xfs_btree_check_sptr(cur, be32_to_cpu((&ptr->s)[index]),
     329             :                                 level))
     330             :                         return 0;
     331           0 :                 xfs_err(cur->bc_mp,
     332             : "AG %u: Corrupt btree %d pointer at level %d index %d.",
     333             :                                 cur->bc_ag.pag->pag_agno, cur->bc_btnum,
     334             :                                 level, index);
     335             :         }
     336             : 
     337           0 :         xfs_btree_mark_sick(cur);
     338           0 :         return -EFSCORRUPTED;
     339             : }
     340             : 
     341             : #ifdef DEBUG
     342             : # define xfs_btree_debug_check_ptr      xfs_btree_check_ptr
     343             : #else
     344             : # define xfs_btree_debug_check_ptr(...) (0)
     345             : #endif
     346             : 
     347             : /*
     348             :  * Calculate CRC on the whole btree block and stuff it into the
     349             :  * long-form btree header.
     350             :  *
     351             :  * Prior to calculting the CRC, pull the LSN out of the buffer log item and put
     352             :  * it into the buffer so recovery knows what the last modification was that made
     353             :  * it to disk.
     354             :  */
     355             : void
     356     5033163 : xfs_btree_lblock_calc_crc(
     357             :         struct xfs_buf          *bp)
     358             : {
     359     5033163 :         struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
     360     5033163 :         struct xfs_buf_log_item *bip = bp->b_log_item;
     361             : 
     362     5033163 :         if (!xfs_has_crc(bp->b_mount))
     363             :                 return;
     364     5033163 :         if (bip)
     365     5027823 :                 block->bb_u.l.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn);
     366     5033163 :         xfs_buf_update_cksum(bp, XFS_BTREE_LBLOCK_CRC_OFF);
     367             : }
     368             : 
     369             : bool
     370     6631688 : xfs_btree_lblock_verify_crc(
     371             :         struct xfs_buf          *bp)
     372             : {
     373     6631688 :         struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
     374     6631688 :         struct xfs_mount        *mp = bp->b_mount;
     375             : 
     376     6631688 :         if (xfs_has_crc(mp)) {
     377     6631688 :                 if (!xfs_log_check_lsn(mp, be64_to_cpu(block->bb_u.l.bb_lsn)))
     378             :                         return false;
     379     6631688 :                 return xfs_buf_verify_cksum(bp, XFS_BTREE_LBLOCK_CRC_OFF);
     380             :         }
     381             : 
     382             :         return true;
     383             : }
     384             : 
     385             : /*
     386             :  * Calculate CRC on the whole btree block and stuff it into the
     387             :  * short-form btree header.
     388             :  *
     389             :  * Prior to calculting the CRC, pull the LSN out of the buffer log item and put
     390             :  * it into the buffer so recovery knows what the last modification was that made
     391             :  * it to disk.
     392             :  */
     393             : void
     394    17123713 : xfs_btree_sblock_calc_crc(
     395             :         struct xfs_buf          *bp)
     396             : {
     397    17123713 :         struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
     398    17123713 :         struct xfs_buf_log_item *bip = bp->b_log_item;
     399             : 
     400    17123713 :         if (!xfs_has_crc(bp->b_mount))
     401             :                 return;
     402    17116829 :         if (bip)
     403    16654205 :                 block->bb_u.s.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn);
     404    17116829 :         xfs_buf_update_cksum(bp, XFS_BTREE_SBLOCK_CRC_OFF);
     405             : }
     406             : 
     407             : bool
     408     2393030 : xfs_btree_sblock_verify_crc(
     409             :         struct xfs_buf          *bp)
     410             : {
     411     2393030 :         struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
     412     2393030 :         struct xfs_mount        *mp = bp->b_mount;
     413             : 
     414     2393030 :         if (xfs_has_crc(mp)) {
     415     2392878 :                 if (!xfs_log_check_lsn(mp, be64_to_cpu(block->bb_u.s.bb_lsn)))
     416             :                         return false;
     417     2392878 :                 return xfs_buf_verify_cksum(bp, XFS_BTREE_SBLOCK_CRC_OFF);
     418             :         }
     419             : 
     420             :         return true;
     421             : }
     422             : 
     423             : static int
     424      406346 : xfs_btree_free_block(
     425             :         struct xfs_btree_cur    *cur,
     426             :         struct xfs_buf          *bp)
     427             : {
     428      406346 :         int                     error;
     429             : 
     430      406346 :         trace_xfs_btree_free_block(cur, bp);
     431             : 
     432      406346 :         error = cur->bc_ops->free_block(cur, bp);
     433      406346 :         if (!error) {
     434      406346 :                 xfs_trans_binval(cur->bc_tp, bp);
     435      406346 :                 XFS_BTREE_STATS_INC(cur, free);
     436             :         }
     437      406346 :         return error;
     438             : }
     439             : 
     440             : /*
     441             :  * Delete the btree cursor.
     442             :  */
     443             : void
     444  4537031743 : xfs_btree_del_cursor(
     445             :         struct xfs_btree_cur    *cur,           /* btree cursor */
     446             :         int                     error)          /* del because of error */
     447             : {
     448  4537031743 :         int                     i;              /* btree level */
     449             : 
     450             :         /*
     451             :          * Clear the buffer pointers and release the buffers. If we're doing
     452             :          * this because of an error, inspect all of the entries in the bc_bufs
     453             :          * array for buffers to be unlocked. This is because some of the btree
     454             :          * code works from level n down to 0, and if we get an error along the
     455             :          * way we won't have initialized all the entries down to 0.
     456             :          */
     457 11548233907 :         for (i = 0; i < cur->bc_nlevels; i++) {
     458  7192292527 :                 if (cur->bc_levels[i].bp)
     459  6403425036 :                         xfs_trans_brelse(cur->bc_tp, cur->bc_levels[i].bp);
     460   788867491 :                 else if (!error)
     461             :                         break;
     462             :         }
     463             : 
     464             :         /*
     465             :          * If we are doing a BMBT update, the number of unaccounted blocks
     466             :          * allocated during this cursor life time should be zero. If it's not
     467             :          * zero, then we should be shut down or on our way to shutdown due to
     468             :          * cancelling a dirty transaction on error.
     469             :          */
     470  4537729305 :         ASSERT(cur->bc_btnum != XFS_BTNUM_BMAP || cur->bc_ino.allocated == 0 ||
     471             :                xfs_is_shutdown(cur->bc_mp) || error != 0);
     472  4537729305 :         if (unlikely(cur->bc_flags & XFS_BTREE_STAGING))
     473           0 :                 kmem_free(cur->bc_ops);
     474  4537729305 :         if (!(cur->bc_flags & XFS_BTREE_LONG_PTRS) &&
     475  3861475857 :             !(cur->bc_flags & XFS_BTREE_IN_XFILE) && cur->bc_ag.pag)
     476  3861475857 :                 xfs_perag_put(cur->bc_ag.pag);
     477  4537901984 :         if (cur->bc_flags & XFS_BTREE_IN_XFILE) {
     478   512441385 :                 if (cur->bc_mem.pag)
     479    14450845 :                         xfs_perag_put(cur->bc_mem.pag);
     480             :         }
     481  4537901993 :         kmem_cache_free(cur->bc_cache, cur);
     482  4537925537 : }
     483             : 
     484             : /* Return the buffer target for this btree's buffer. */
     485             : static inline struct xfs_buftarg *
     486  7450555782 : xfs_btree_buftarg(
     487             :         struct xfs_btree_cur    *cur)
     488             : {
     489  7450555782 :         if (cur->bc_flags & XFS_BTREE_IN_XFILE)
     490   531870425 :                 return xfbtree_target(cur->bc_mem.xfbtree);
     491  6918685357 :         return cur->bc_mp->m_ddev_targp;
     492             : }
     493             : 
     494             : /* Return the block size (in units of 512b sectors) for this btree. */
     495             : static inline unsigned int
     496  7450990955 : xfs_btree_bbsize(
     497             :         struct xfs_btree_cur    *cur)
     498             : {
     499  7450990955 :         if (cur->bc_flags & XFS_BTREE_IN_XFILE)
     500   531870425 :                 return xfbtree_bbsize();
     501  6919120530 :         return cur->bc_mp->m_bsize;
     502             : }
     503             : 
     504             : /*
     505             :  * Duplicate the btree cursor.
     506             :  * Allocate a new one, copy the record, re-get the buffers.
     507             :  */
     508             : int                                     /* error */
     509   175922617 : xfs_btree_dup_cursor(
     510             :         struct xfs_btree_cur *cur,              /* input cursor */
     511             :         struct xfs_btree_cur **ncur)            /* output cursor */
     512             : {
     513   175922617 :         struct xfs_buf  *bp;            /* btree block's buffer pointer */
     514   175922617 :         int             error;          /* error return value */
     515   175922617 :         int             i;              /* level number of btree block */
     516   175922617 :         xfs_mount_t     *mp;            /* mount structure for filesystem */
     517   175922617 :         struct xfs_btree_cur *new;              /* new cursor value */
     518   175922617 :         xfs_trans_t     *tp;            /* transaction pointer, can be NULL */
     519             : 
     520   175922617 :         tp = cur->bc_tp;
     521   175922617 :         mp = cur->bc_mp;
     522             : 
     523             :         /*
     524             :          * Allocate a new cursor like the old one.
     525             :          */
     526   175922617 :         new = cur->bc_ops->dup_cursor(cur);
     527             : 
     528             :         /*
     529             :          * Copy the record currently in the cursor.
     530             :          */
     531   175924137 :         new->bc_rec = cur->bc_rec;
     532             : 
     533             :         /*
     534             :          * For each level current, re-get the buffer and copy the ptr value.
     535             :          */
     536   541278083 :         for (i = 0; i < new->bc_nlevels; i++) {
     537   365351906 :                 new->bc_levels[i].ptr = cur->bc_levels[i].ptr;
     538   365351906 :                 new->bc_levels[i].ra = cur->bc_levels[i].ra;
     539   365351906 :                 bp = cur->bc_levels[i].bp;
     540   365351906 :                 if (bp) {
     541   321615872 :                         error = xfs_trans_read_buf(mp, tp,
     542             :                                         xfs_btree_buftarg(cur),
     543             :                                         xfs_buf_daddr(bp),
     544   321615930 :                                         xfs_btree_bbsize(cur), 0, &bp,
     545   321617892 :                                         cur->bc_ops->buf_ops);
     546   321617919 :                         if (xfs_metadata_is_sick(error))
     547           0 :                                 xfs_btree_mark_sick(new);
     548   321617919 :                         if (error) {
     549           7 :                                 xfs_btree_del_cursor(new, error);
     550           7 :                                 *ncur = NULL;
     551           7 :                                 return error;
     552             :                         }
     553             :                 }
     554   365353946 :                 new->bc_levels[i].bp = bp;
     555             :         }
     556   175926177 :         *ncur = new;
     557   175926177 :         return 0;
     558             : }
     559             : 
     560             : /*
     561             :  * XFS btree block layout and addressing:
     562             :  *
     563             :  * There are two types of blocks in the btree: leaf and non-leaf blocks.
     564             :  *
     565             :  * The leaf record start with a header then followed by records containing
     566             :  * the values.  A non-leaf block also starts with the same header, and
     567             :  * then first contains lookup keys followed by an equal number of pointers
     568             :  * to the btree blocks at the previous level.
     569             :  *
     570             :  *              +--------+-------+-------+-------+-------+-------+-------+
     571             :  * Leaf:        | header | rec 1 | rec 2 | rec 3 | rec 4 | rec 5 | rec N |
     572             :  *              +--------+-------+-------+-------+-------+-------+-------+
     573             :  *
     574             :  *              +--------+-------+-------+-------+-------+-------+-------+
     575             :  * Non-Leaf:    | header | key 1 | key 2 | key N | ptr 1 | ptr 2 | ptr N |
     576             :  *              +--------+-------+-------+-------+-------+-------+-------+
     577             :  *
     578             :  * The header is called struct xfs_btree_block for reasons better left unknown
     579             :  * and comes in different versions for short (32bit) and long (64bit) block
     580             :  * pointers.  The record and key structures are defined by the btree instances
     581             :  * and opaque to the btree core.  The block pointers are simple disk endian
     582             :  * integers, available in a short (32bit) and long (64bit) variant.
     583             :  *
     584             :  * The helpers below calculate the offset of a given record, key or pointer
     585             :  * into a btree block (xfs_btree_*_offset) or return a pointer to the given
     586             :  * record, key or pointer (xfs_btree_*_addr).  Note that all addressing
     587             :  * inside the btree block is done using indices starting at one, not zero!
     588             :  *
     589             :  * If XFS_BTREE_OVERLAPPING is set, then this btree supports keys containing
     590             :  * overlapping intervals.  In such a tree, records are still sorted lowest to
     591             :  * highest and indexed by the smallest key value that refers to the record.
     592             :  * However, nodes are different: each pointer has two associated keys -- one
     593             :  * indexing the lowest key available in the block(s) below (the same behavior
     594             :  * as the key in a regular btree) and another indexing the highest key
     595             :  * available in the block(s) below.  Because records are /not/ sorted by the
     596             :  * highest key, all leaf block updates require us to compute the highest key
     597             :  * that matches any record in the leaf and to recursively update the high keys
     598             :  * in the nodes going further up in the tree, if necessary.  Nodes look like
     599             :  * this:
     600             :  *
     601             :  *              +--------+-----+-----+-----+-----+-----+-------+-------+-----+
     602             :  * Non-Leaf:    | header | lo1 | hi1 | lo2 | hi2 | ... | ptr 1 | ptr 2 | ... |
     603             :  *              +--------+-----+-----+-----+-----+-----+-------+-------+-----+
     604             :  *
     605             :  * To perform an interval query on an overlapped tree, perform the usual
     606             :  * depth-first search and use the low and high keys to decide if we can skip
     607             :  * that particular node.  If a leaf node is reached, return the records that
     608             :  * intersect the interval.  Note that an interval query may return numerous
     609             :  * entries.  For a non-overlapped tree, simply search for the record associated
     610             :  * with the lowest key and iterate forward until a non-matching record is
     611             :  * found.  Section 14.3 ("Interval Trees") of _Introduction to Algorithms_ by
     612             :  * Cormen, Leiserson, Rivest, and Stein (2nd or 3rd ed. only) discuss this in
     613             :  * more detail.
     614             :  *
     615             :  * Why do we care about overlapping intervals?  Let's say you have a bunch of
     616             :  * reverse mapping records on a reflink filesystem:
     617             :  *
     618             :  * 1: +- file A startblock B offset C length D -----------+
     619             :  * 2:      +- file E startblock F offset G length H --------------+
     620             :  * 3:      +- file I startblock F offset J length K --+
     621             :  * 4:                                                        +- file L... --+
     622             :  *
     623             :  * Now say we want to map block (B+D) into file A at offset (C+D).  Ideally,
     624             :  * we'd simply increment the length of record 1.  But how do we find the record
     625             :  * that ends at (B+D-1) (i.e. record 1)?  A LE lookup of (B+D-1) would return
     626             :  * record 3 because the keys are ordered first by startblock.  An interval
     627             :  * query would return records 1 and 2 because they both overlap (B+D-1), and
     628             :  * from that we can pick out record 1 as the appropriate left neighbor.
     629             :  *
     630             :  * In the non-overlapped case you can do a LE lookup and decrement the cursor
     631             :  * because a record's interval must end before the next record.
     632             :  */
     633             : 
     634             : /*
     635             :  * Return size of the btree block header for this btree instance.
     636             :  */
     637 >44839*10^7 : static inline size_t xfs_btree_block_len(struct xfs_btree_cur *cur)
     638             : {
     639 >44839*10^7 :         if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
     640  4715206330 :                 if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS)
     641             :                         return XFS_BTREE_LBLOCK_CRC_LEN;
     642           0 :                 return XFS_BTREE_LBLOCK_LEN;
     643             :         }
     644 >44367*10^7 :         if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS)
     645 >44372*10^7 :                 return XFS_BTREE_SBLOCK_CRC_LEN;
     646             :         return XFS_BTREE_SBLOCK_LEN;
     647             : }
     648             : 
     649             : /*
     650             :  * Return size of btree block pointers for this btree instance.
     651             :  */
     652             : static inline size_t xfs_btree_ptr_len(struct xfs_btree_cur *cur)
     653             : {
     654 39391868669 :         return (cur->bc_flags & XFS_BTREE_LONG_PTRS) ?
     655 39391868669 :                 sizeof(__be64) : sizeof(__be32);
     656             : }
     657             : 
     658             : /*
     659             :  * Calculate offset of the n-th record in a btree block.
     660             :  */
     661             : STATIC size_t
     662 >31449*10^7 : xfs_btree_rec_offset(
     663             :         struct xfs_btree_cur    *cur,
     664             :         int                     n)
     665             : {
     666 >31449*10^7 :         return xfs_btree_block_len(cur) +
     667 >31449*10^7 :                 (n - 1) * cur->bc_ops->rec_len;
     668             : }
     669             : 
     670             : /*
     671             :  * Calculate offset of the n-th key in a btree block.
     672             :  */
     673             : STATIC size_t
     674 55610048775 : xfs_btree_key_offset(
     675             :         struct xfs_btree_cur    *cur,
     676             :         int                     n)
     677             : {
     678 55610048775 :         return xfs_btree_block_len(cur) +
     679 55610048775 :                 (n - 1) * cur->bc_ops->key_len;
     680             : }
     681             : 
     682             : /*
     683             :  * Calculate offset of the n-th high key in a btree block.
     684             :  */
     685             : STATIC size_t
     686 40428255662 : xfs_btree_high_key_offset(
     687             :         struct xfs_btree_cur    *cur,
     688             :         int                     n)
     689             : {
     690 40428255662 :         return xfs_btree_block_len(cur) +
     691 40428255662 :                 (n - 1) * cur->bc_ops->key_len + (cur->bc_ops->key_len / 2);
     692             : }
     693             : 
     694             : /*
     695             :  * Calculate offset of the n-th block pointer in a btree block.
     696             :  */
     697             : STATIC size_t
     698 39384152755 : xfs_btree_ptr_offset(
     699             :         struct xfs_btree_cur    *cur,
     700             :         int                     n,
     701             :         int                     level)
     702             : {
     703 39384152755 :         return xfs_btree_block_len(cur) +
     704 78768611045 :                 cur->bc_ops->get_maxrecs(cur, level) * cur->bc_ops->key_len +
     705 78491362062 :                 (n - 1) * xfs_btree_ptr_len(cur);
     706             : }
     707             : 
     708             : /*
     709             :  * Return a pointer to the n-th record in the btree block.
     710             :  */
     711             : union xfs_btree_rec *
     712  2708405854 : xfs_btree_rec_addr(
     713             :         struct xfs_btree_cur    *cur,
     714             :         int                     n,
     715             :         struct xfs_btree_block  *block)
     716             : {
     717 >31346*10^7 :         return (union xfs_btree_rec *)
     718 >31346*10^7 :                 ((char *)block + xfs_btree_rec_offset(cur, n));
     719             : }
     720             : 
     721             : /*
     722             :  * Return a pointer to the n-th key in the btree block.
     723             :  */
     724             : union xfs_btree_key *
     725  2548892420 : xfs_btree_key_addr(
     726             :         struct xfs_btree_cur    *cur,
     727             :         int                     n,
     728             :         struct xfs_btree_block  *block)
     729             : {
     730 55352287743 :         return (union xfs_btree_key *)
     731 55352287743 :                 ((char *)block + xfs_btree_key_offset(cur, n));
     732             : }
     733             : 
     734             : /*
     735             :  * Return a pointer to the n-th high key in the btree block.
     736             :  */
     737             : union xfs_btree_key *
     738  1933729782 : xfs_btree_high_key_addr(
     739             :         struct xfs_btree_cur    *cur,
     740             :         int                     n,
     741             :         struct xfs_btree_block  *block)
     742             : {
     743 40437391843 :         return (union xfs_btree_key *)
     744 40437391843 :                 ((char *)block + xfs_btree_high_key_offset(cur, n));
     745             : }
     746             : 
     747             : /*
     748             :  * Return a pointer to the n-th block pointer in the btree block.
     749             :  */
     750             : union xfs_btree_ptr *
     751 39381441046 : xfs_btree_ptr_addr(
     752             :         struct xfs_btree_cur    *cur,
     753             :         int                     n,
     754             :         struct xfs_btree_block  *block)
     755             : {
     756 39381441046 :         int                     level = xfs_btree_get_level(block);
     757             : 
     758 39381441046 :         ASSERT(block->bb_level != 0);
     759             : 
     760 78764289291 :         return (union xfs_btree_ptr *)
     761 39381441046 :                 ((char *)block + xfs_btree_ptr_offset(cur, n, level));
     762             : }
     763             : 
     764             : struct xfs_ifork *
     765   388288568 : xfs_btree_ifork_ptr(
     766             :         struct xfs_btree_cur    *cur)
     767             : {
     768   388288568 :         ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE);
     769             : 
     770   388288568 :         if (cur->bc_flags & XFS_BTREE_STAGING)
     771       10371 :                 return cur->bc_ino.ifake->if_fork;
     772   388278197 :         return xfs_ifork_ptr(cur->bc_ino.ip, cur->bc_ino.whichfork);
     773             : }
     774             : 
     775             : /*
     776             :  * Get the root block which is stored in the inode.
     777             :  *
     778             :  * For now this btree implementation assumes the btree root is always
     779             :  * stored in the if_broot field of an inode fork.
     780             :  */
     781             : STATIC struct xfs_btree_block *
     782   225220689 : xfs_btree_get_iroot(
     783             :         struct xfs_btree_cur    *cur)
     784             : {
     785   225220689 :         struct xfs_ifork        *ifp = xfs_btree_ifork_ptr(cur);
     786             : 
     787   225233756 :         return (struct xfs_btree_block *)ifp->if_broot;
     788             : }
     789             : 
     790             : /*
     791             :  * Retrieve the block pointer from the cursor at the given level.
     792             :  * This may be an inode btree root or from a buffer.
     793             :  */
     794             : struct xfs_btree_block *                /* generic btree block pointer */
     795 >26847*10^7 : xfs_btree_get_block(
     796             :         struct xfs_btree_cur    *cur,   /* btree cursor */
     797             :         int                     level,  /* level in btree */
     798             :         struct xfs_buf          **bpp)  /* buffer containing the block */
     799             : {
     800 >26847*10^7 :         if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
     801  1105622688 :             (level == cur->bc_nlevels - 1)) {
     802    85377008 :                 *bpp = NULL;
     803    85377008 :                 return xfs_btree_get_iroot(cur);
     804             :         }
     805             : 
     806 >26839*10^7 :         *bpp = cur->bc_levels[level].bp;
     807 >26839*10^7 :         return XFS_BUF_TO_BLOCK(*bpp);
     808             : }
     809             : 
     810             : /*
     811             :  * Change the cursor to point to the first record at the given level.
     812             :  * Other levels are unaffected.
     813             :  */
     814             : STATIC int                              /* success=1, failure=0 */
     815    69123188 : xfs_btree_firstrec(
     816             :         struct xfs_btree_cur    *cur,   /* btree cursor */
     817             :         int                     level)  /* level to change */
     818             : {
     819    69123188 :         struct xfs_btree_block  *block; /* generic btree block pointer */
     820    69123188 :         struct xfs_buf          *bp;    /* buffer containing block */
     821             : 
     822             :         /*
     823             :          * Get the block pointer for this level.
     824             :          */
     825    69123188 :         block = xfs_btree_get_block(cur, level, &bp);
     826    69123183 :         if (xfs_btree_check_block(cur, block, level, bp))
     827             :                 return 0;
     828             :         /*
     829             :          * It's empty, there is no such record.
     830             :          */
     831    69123212 :         if (!block->bb_numrecs)
     832             :                 return 0;
     833             :         /*
     834             :          * Set the ptr value to 1, that's the first record/key.
     835             :          */
     836    69123212 :         cur->bc_levels[level].ptr = 1;
     837    69123212 :         return 1;
     838             : }
     839             : 
     840             : /*
     841             :  * Change the cursor to point to the last record in the current block
     842             :  * at the given level.  Other levels are unaffected.
     843             :  */
     844             : STATIC int                              /* success=1, failure=0 */
     845    71398852 : xfs_btree_lastrec(
     846             :         struct xfs_btree_cur    *cur,   /* btree cursor */
     847             :         int                     level)  /* level to change */
     848             : {
     849    71398852 :         struct xfs_btree_block  *block; /* generic btree block pointer */
     850    71398852 :         struct xfs_buf          *bp;    /* buffer containing block */
     851             : 
     852             :         /*
     853             :          * Get the block pointer for this level.
     854             :          */
     855    71398852 :         block = xfs_btree_get_block(cur, level, &bp);
     856    71399041 :         if (xfs_btree_check_block(cur, block, level, bp))
     857             :                 return 0;
     858             :         /*
     859             :          * It's empty, there is no such record.
     860             :          */
     861    71399836 :         if (!block->bb_numrecs)
     862             :                 return 0;
     863             :         /*
     864             :          * Set the ptr value to numrecs, that's the last record/key.
     865             :          */
     866    71399836 :         cur->bc_levels[level].ptr = be16_to_cpu(block->bb_numrecs);
     867    71399836 :         return 1;
     868             : }
     869             : 
     870             : /*
     871             :  * Compute first and last byte offsets for the fields given.
     872             :  * Interprets the offsets table, which contains struct field offsets.
     873             :  */
     874             : void
     875  1495629157 : xfs_btree_offsets(
     876             :         uint32_t        fields,         /* bitmask of fields */
     877             :         const short     *offsets,       /* table of field offsets */
     878             :         int             nbits,          /* number of bits to inspect */
     879             :         int             *first,         /* output: first byte offset */
     880             :         int             *last)          /* output: last byte offset */
     881             : {
     882  1495629157 :         int             i;              /* current bit number */
     883  1495629157 :         uint32_t        imask;          /* mask for current bit number */
     884             : 
     885  1495629157 :         ASSERT(fields != 0);
     886             :         /*
     887             :          * Find the lowest bit, so the first byte offset.
     888             :          */
     889  6054540460 :         for (i = 0, imask = 1u; ; i++, imask <<= 1) {
     890  6054540460 :                 if (imask & fields) {
     891  1495629157 :                         *first = offsets[i];
     892  1495629157 :                         break;
     893             :                 }
     894             :         }
     895             :         /*
     896             :          * Find the highest bit, so the last byte offset.
     897             :          */
     898  1495629157 :         for (i = nbits - 1, imask = 1u << i; ; i--, imask >>= 1) {
     899 10355034251 :                 if (imask & fields) {
     900  1495629157 :                         *last = offsets[i + 1] - 1;
     901  1495629157 :                         break;
     902             :                 }
     903             :         }
     904  1495629157 : }
     905             : 
     906             : /*
     907             :  * Get a buffer for the block, return it read in.
     908             :  * Long-form addressing.
     909             :  */
     910             : int
     911   391020451 : xfs_btree_read_bufl(
     912             :         struct xfs_mount        *mp,            /* file system mount point */
     913             :         struct xfs_trans        *tp,            /* transaction pointer */
     914             :         xfs_fsblock_t           fsbno,          /* file system block number */
     915             :         struct xfs_buf          **bpp,          /* buffer for fsbno */
     916             :         int                     refval,         /* ref count value for buffer */
     917             :         const struct xfs_buf_ops *ops)
     918             : {
     919   391020451 :         struct xfs_buf          *bp;            /* return value */
     920   391020451 :         xfs_daddr_t             d;              /* real disk block address */
     921   391020451 :         int                     error;
     922             : 
     923   391020451 :         if (!xfs_verify_fsbno(mp, fsbno))
     924             :                 return -EFSCORRUPTED;
     925   391023006 :         d = XFS_FSB_TO_DADDR(mp, fsbno);
     926   391023006 :         error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d,
     927             :                                    mp->m_bsize, 0, &bp, ops);
     928   391023578 :         if (error)
     929             :                 return error;
     930   391023531 :         if (bp)
     931   391023531 :                 xfs_buf_set_ref(bp, refval);
     932   391023576 :         *bpp = bp;
     933   391023576 :         return 0;
     934             : }
     935             : 
     936             : /*
     937             :  * Read-ahead the block, don't wait for it, don't return a buffer.
     938             :  * Long-form addressing.
     939             :  */
     940             : /* ARGSUSED */
     941             : void
     942    45259659 : xfs_btree_reada_bufl(
     943             :         struct xfs_mount        *mp,            /* file system mount point */
     944             :         xfs_fsblock_t           fsbno,          /* file system block number */
     945             :         xfs_extlen_t            count,          /* count of filesystem blocks */
     946             :         const struct xfs_buf_ops *ops)
     947             : {
     948    45259659 :         xfs_daddr_t             d;
     949             : 
     950    45259659 :         ASSERT(fsbno != NULLFSBLOCK);
     951    45259659 :         d = XFS_FSB_TO_DADDR(mp, fsbno);
     952    45259659 :         xfs_buf_readahead(mp->m_ddev_targp, d, mp->m_bsize * count, ops);
     953    45259660 : }
     954             : 
     955             : /*
     956             :  * Read-ahead the block, don't wait for it, don't return a buffer.
     957             :  * Short-form addressing.
     958             :  */
     959             : /* ARGSUSED */
     960             : void
     961  4202240281 : xfs_btree_reada_bufs(
     962             :         struct xfs_mount        *mp,            /* file system mount point */
     963             :         xfs_agnumber_t          agno,           /* allocation group number */
     964             :         xfs_agblock_t           agbno,          /* allocation group block number */
     965             :         xfs_extlen_t            count,          /* count of filesystem blocks */
     966             :         const struct xfs_buf_ops *ops)
     967             : {
     968  4202240281 :         xfs_daddr_t             d;
     969             : 
     970  4202240281 :         ASSERT(agno != NULLAGNUMBER);
     971  4202240281 :         ASSERT(agbno != NULLAGBLOCK);
     972  4202240281 :         d = XFS_AGB_TO_DADDR(mp, agno, agbno);
     973  4202240281 :         xfs_buf_readahead(mp->m_ddev_targp, d, mp->m_bsize * count, ops);
     974  4203900566 : }
     975             : 
     976             : STATIC int
     977    45259719 : xfs_btree_readahead_lblock(
     978             :         struct xfs_btree_cur    *cur,
     979             :         int                     lr,
     980             :         struct xfs_btree_block  *block)
     981             : {
     982    45259719 :         int                     rval = 0;
     983    45259719 :         xfs_fsblock_t           left = be64_to_cpu(block->bb_u.l.bb_leftsib);
     984    45259719 :         xfs_fsblock_t           right = be64_to_cpu(block->bb_u.l.bb_rightsib);
     985             : 
     986    45259719 :         if (cur->bc_flags & XFS_BTREE_IN_XFILE)
     987             :                 return 0;
     988             : 
     989    45259719 :         if ((lr & XFS_BTCUR_LEFTRA) && left != NULLFSBLOCK) {
     990    19612268 :                 xfs_btree_reada_bufl(cur->bc_mp, left, 1,
     991    19612268 :                                      cur->bc_ops->buf_ops);
     992    19612268 :                 rval++;
     993             :         }
     994             : 
     995    45259719 :         if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLFSBLOCK) {
     996    25647391 :                 xfs_btree_reada_bufl(cur->bc_mp, right, 1,
     997    25647391 :                                      cur->bc_ops->buf_ops);
     998    25647391 :                 rval++;
     999             :         }
    1000             : 
    1001             :         return rval;
    1002             : }
    1003             : 
    1004             : STATIC int
    1005  1169326707 : xfs_btree_readahead_sblock(
    1006             :         struct xfs_btree_cur    *cur,
    1007             :         int                     lr,
    1008             :         struct xfs_btree_block *block)
    1009             : {
    1010  1169326707 :         int                     rval = 0;
    1011  1169326707 :         xfs_agblock_t           left = be32_to_cpu(block->bb_u.s.bb_leftsib);
    1012  1169326707 :         xfs_agblock_t           right = be32_to_cpu(block->bb_u.s.bb_rightsib);
    1013             : 
    1014  1169326707 :         if (cur->bc_flags & XFS_BTREE_IN_XFILE)
    1015             :                 return 0;
    1016             : 
    1017  1164851855 :         if ((lr & XFS_BTCUR_LEFTRA) && left != NULLAGBLOCK) {
    1018    73868786 :                 xfs_btree_reada_bufs(cur->bc_mp, cur->bc_ag.pag->pag_agno,
    1019    73868786 :                                      left, 1, cur->bc_ops->buf_ops);
    1020    73868786 :                 rval++;
    1021             :         }
    1022             : 
    1023  1164852208 :         if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLAGBLOCK) {
    1024  1090989346 :                 xfs_btree_reada_bufs(cur->bc_mp, cur->bc_ag.pag->pag_agno,
    1025  1090989346 :                                      right, 1, cur->bc_ops->buf_ops);
    1026  1091066971 :                 rval++;
    1027             :         }
    1028             : 
    1029             :         return rval;
    1030             : }
    1031             : 
    1032             : /*
    1033             :  * Read-ahead btree blocks, at the given level.
    1034             :  * Bits in lr are set from XFS_BTCUR_{LEFT,RIGHT}RA.
    1035             :  */
    1036             : STATIC int
    1037 74183174249 : xfs_btree_readahead(
    1038             :         struct xfs_btree_cur    *cur,           /* btree cursor */
    1039             :         int                     lev,            /* level in btree */
    1040             :         int                     lr)             /* left/right bits */
    1041             : {
    1042 74183174249 :         struct xfs_btree_block  *block;
    1043             : 
    1044             :         /*
    1045             :          * No readahead needed if we are at the root level and the
    1046             :          * btree root is stored in the inode.
    1047             :          */
    1048 74183174249 :         if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
    1049    62605400 :             (lev == cur->bc_nlevels - 1))
    1050             :                 return 0;
    1051             : 
    1052 74177608326 :         if ((cur->bc_levels[lev].ra | lr) == cur->bc_levels[lev].ra)
    1053             :                 return 0;
    1054             : 
    1055  1214611462 :         cur->bc_levels[lev].ra |= lr;
    1056  1214611462 :         block = XFS_BUF_TO_BLOCK(cur->bc_levels[lev].bp);
    1057             : 
    1058  1214611462 :         if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
    1059    45259718 :                 return xfs_btree_readahead_lblock(cur, lr, block);
    1060  1169351744 :         return xfs_btree_readahead_sblock(cur, lr, block);
    1061             : }
    1062             : 
    1063             : STATIC int
    1064 29495716203 : xfs_btree_ptr_to_daddr(
    1065             :         struct xfs_btree_cur            *cur,
    1066             :         const union xfs_btree_ptr       *ptr,
    1067             :         xfs_daddr_t                     *daddr)
    1068             : {
    1069 29495716203 :         xfs_fsblock_t           fsbno;
    1070 29495716203 :         xfs_agblock_t           agbno;
    1071 29495716203 :         int                     error;
    1072             : 
    1073 29495716203 :         error = xfs_btree_check_ptr(cur, ptr, 0, 1);
    1074 29505369212 :         if (error)
    1075             :                 return error;
    1076             : 
    1077 29505369212 :         if (cur->bc_flags & XFS_BTREE_IN_XFILE) {
    1078  1085569913 :                 *daddr = xfbtree_ptr_to_daddr(cur, ptr);
    1079  1085569791 :                 return 0;
    1080             :         }
    1081             : 
    1082 28419799299 :         if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
    1083   535702725 :                 fsbno = be64_to_cpu(ptr->l);
    1084   535702725 :                 *daddr = XFS_FSB_TO_DADDR(cur->bc_mp, fsbno);
    1085             :         } else {
    1086 27884096574 :                 agbno = be32_to_cpu(ptr->s);
    1087 27884096574 :                 *daddr = XFS_AGB_TO_DADDR(cur->bc_mp, cur->bc_ag.pag->pag_agno,
    1088             :                                 agbno);
    1089             :         }
    1090             : 
    1091             :         return 0;
    1092             : }
    1093             : 
    1094             : /*
    1095             :  * Readahead @count btree blocks at the given @ptr location.
    1096             :  *
    1097             :  * We don't need to care about long or short form btrees here as we have a
    1098             :  * method of converting the ptr directly to a daddr available to us.
    1099             :  */
    1100             : STATIC void
    1101     5025771 : xfs_btree_readahead_ptr(
    1102             :         struct xfs_btree_cur    *cur,
    1103             :         union xfs_btree_ptr     *ptr,
    1104             :         xfs_extlen_t            count)
    1105             : {
    1106     5025771 :         xfs_daddr_t             daddr;
    1107             : 
    1108     5025771 :         if (xfs_btree_ptr_to_daddr(cur, ptr, &daddr))
    1109           0 :                 return;
    1110     5025764 :         xfs_buf_readahead(xfs_btree_buftarg(cur), daddr,
    1111     5025765 :                         xfs_btree_bbsize(cur) * count,
    1112     5025760 :                         cur->bc_ops->buf_ops);
    1113             : }
    1114             : 
    1115             : /*
    1116             :  * Set the buffer for level "lev" in the cursor to bp, releasing
    1117             :  * any previous buffer.
    1118             :  */
    1119             : STATIC void
    1120  7010957157 : xfs_btree_setbuf(
    1121             :         struct xfs_btree_cur    *cur,   /* btree cursor */
    1122             :         int                     lev,    /* level in btree */
    1123             :         struct xfs_buf          *bp)    /* new buffer to set */
    1124             : {
    1125  7010957157 :         struct xfs_btree_block  *b;     /* btree block */
    1126             : 
    1127  7010957157 :         if (cur->bc_levels[lev].bp)
    1128   927330787 :                 xfs_trans_brelse(cur->bc_tp, cur->bc_levels[lev].bp);
    1129  7009167989 :         cur->bc_levels[lev].bp = bp;
    1130  7009167989 :         cur->bc_levels[lev].ra = 0;
    1131             : 
    1132  7009167989 :         b = XFS_BUF_TO_BLOCK(bp);
    1133  7009167989 :         if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
    1134   741406483 :                 if (b->bb_u.l.bb_leftsib == cpu_to_be64(NULLFSBLOCK))
    1135   588094253 :                         cur->bc_levels[lev].ra |= XFS_BTCUR_LEFTRA;
    1136   741406483 :                 if (b->bb_u.l.bb_rightsib == cpu_to_be64(NULLFSBLOCK))
    1137   693050522 :                         cur->bc_levels[lev].ra |= XFS_BTCUR_RIGHTRA;
    1138             :         } else {
    1139  6267761506 :                 if (b->bb_u.s.bb_leftsib == cpu_to_be32(NULLAGBLOCK))
    1140  4006630286 :                         cur->bc_levels[lev].ra |= XFS_BTCUR_LEFTRA;
    1141  6267761506 :                 if (b->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK))
    1142  4091201361 :                         cur->bc_levels[lev].ra |= XFS_BTCUR_RIGHTRA;
    1143             :         }
    1144  7009167989 : }
    1145             : 
    1146             : bool
    1147    22271900 : xfs_btree_ptr_is_null(
    1148             :         struct xfs_btree_cur            *cur,
    1149             :         const union xfs_btree_ptr       *ptr)
    1150             : {
    1151   268553409 :         if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
    1152  1213626490 :                 return ptr->l == cpu_to_be64(NULLFSBLOCK);
    1153             :         else
    1154  3399516725 :                 return ptr->s == cpu_to_be32(NULLAGBLOCK);
    1155             : }
    1156             : 
    1157             : void
    1158      732234 : xfs_btree_set_ptr_null(
    1159             :         struct xfs_btree_cur    *cur,
    1160             :         union xfs_btree_ptr     *ptr)
    1161             : {
    1162      732234 :         if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
    1163   459296181 :                 ptr->l = cpu_to_be64(NULLFSBLOCK);
    1164             :         else
    1165   648189869 :                 ptr->s = cpu_to_be32(NULLAGBLOCK);
    1166      732234 : }
    1167             : 
    1168             : /*
    1169             :  * Get/set/init sibling pointers
    1170             :  */
    1171             : void
    1172  3844844077 : xfs_btree_get_sibling(
    1173             :         struct xfs_btree_cur    *cur,
    1174             :         struct xfs_btree_block  *block,
    1175             :         union xfs_btree_ptr     *ptr,
    1176             :         int                     lr)
    1177             : {
    1178  3844844077 :         ASSERT(lr == XFS_BB_LEFTSIB || lr == XFS_BB_RIGHTSIB);
    1179             : 
    1180  3844844077 :         if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
    1181   751640778 :                 if (lr == XFS_BB_RIGHTSIB)
    1182   511589186 :                         ptr->l = block->bb_u.l.bb_rightsib;
    1183             :                 else
    1184   240051592 :                         ptr->l = block->bb_u.l.bb_leftsib;
    1185             :         } else {
    1186  3093203299 :                 if (lr == XFS_BB_RIGHTSIB)
    1187  2991452008 :                         ptr->s = block->bb_u.s.bb_rightsib;
    1188             :                 else
    1189   101751291 :                         ptr->s = block->bb_u.s.bb_leftsib;
    1190             :         }
    1191  3844844077 : }
    1192             : 
    1193             : void
    1194     3651981 : xfs_btree_set_sibling(
    1195             :         struct xfs_btree_cur            *cur,
    1196             :         struct xfs_btree_block          *block,
    1197             :         const union xfs_btree_ptr       *ptr,
    1198             :         int                             lr)
    1199             : {
    1200     3651981 :         ASSERT(lr == XFS_BB_LEFTSIB || lr == XFS_BB_RIGHTSIB);
    1201             : 
    1202     3651981 :         if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
    1203      820966 :                 if (lr == XFS_BB_RIGHTSIB)
    1204      585511 :                         block->bb_u.l.bb_rightsib = ptr->l;
    1205             :                 else
    1206      235455 :                         block->bb_u.l.bb_leftsib = ptr->l;
    1207             :         } else {
    1208     2831015 :                 if (lr == XFS_BB_RIGHTSIB)
    1209     1428241 :                         block->bb_u.s.bb_rightsib = ptr->s;
    1210             :                 else
    1211     1402774 :                         block->bb_u.s.bb_leftsib = ptr->s;
    1212             :         }
    1213     3651981 : }
    1214             : 
    1215             : static void
    1216    13924210 : __xfs_btree_init_block(
    1217             :         struct xfs_mount        *mp,
    1218             :         struct xfs_btree_block  *buf,
    1219             :         const struct xfs_btree_ops *ops,
    1220             :         xfs_daddr_t             blkno,
    1221             :         __u16                   level,
    1222             :         __u16                   numrecs,
    1223             :         __u64                   owner)
    1224             : {
    1225    13924210 :         int                     crc = xfs_has_crc(mp);
    1226    13924210 :         __u32                   magic = xfs_btree_magic(mp, ops);
    1227             : 
    1228    13924204 :         buf->bb_magic = cpu_to_be32(magic);
    1229    13924204 :         buf->bb_level = cpu_to_be16(level);
    1230    13924204 :         buf->bb_numrecs = cpu_to_be16(numrecs);
    1231             : 
    1232    13924204 :         if (ops->geom_flags & XFS_BTREE_LONG_PTRS) {
    1233    12965683 :                 buf->bb_u.l.bb_leftsib = cpu_to_be64(NULLFSBLOCK);
    1234    12965683 :                 buf->bb_u.l.bb_rightsib = cpu_to_be64(NULLFSBLOCK);
    1235    12965683 :                 if (crc) {
    1236    12965683 :                         buf->bb_u.l.bb_blkno = cpu_to_be64(blkno);
    1237    12965683 :                         buf->bb_u.l.bb_owner = cpu_to_be64(owner);
    1238    12965683 :                         uuid_copy(&buf->bb_u.l.bb_uuid, &mp->m_sb.sb_meta_uuid);
    1239    12965680 :                         buf->bb_u.l.bb_pad = 0;
    1240    12965680 :                         buf->bb_u.l.bb_lsn = 0;
    1241             :                 }
    1242             :         } else {
    1243             :                 /* owner is a 32 bit value on short blocks */
    1244      958521 :                 __u32 __owner = (__u32)owner;
    1245             : 
    1246      958521 :                 buf->bb_u.s.bb_leftsib = cpu_to_be32(NULLAGBLOCK);
    1247      958521 :                 buf->bb_u.s.bb_rightsib = cpu_to_be32(NULLAGBLOCK);
    1248      958521 :                 if (crc) {
    1249      951645 :                         buf->bb_u.s.bb_blkno = cpu_to_be64(blkno);
    1250      951645 :                         buf->bb_u.s.bb_owner = cpu_to_be32(__owner);
    1251      951645 :                         uuid_copy(&buf->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid);
    1252      951634 :                         buf->bb_u.s.bb_lsn = 0;
    1253             :                 }
    1254             :         }
    1255    13924190 : }
    1256             : 
    1257             : void
    1258    11869454 : xfs_btree_init_block(
    1259             :         struct xfs_mount        *mp,
    1260             :         struct xfs_btree_block  *block,
    1261             :         const struct xfs_btree_ops *ops,
    1262             :         __u16                   level,
    1263             :         __u16                   numrecs,
    1264             :         __u64                   owner)
    1265             : {
    1266    11869454 :         __xfs_btree_init_block(mp, block, ops, XFS_BUF_DADDR_NULL, level,
    1267             :                         numrecs, owner);
    1268    11869447 : }
    1269             : 
    1270             : void
    1271     2054724 : xfs_btree_init_buf(
    1272             :         struct xfs_mount                *mp,
    1273             :         struct xfs_buf                  *bp,
    1274             :         const struct xfs_btree_ops      *ops,
    1275             :         __u16                           level,
    1276             :         __u16                           numrecs,
    1277             :         __u64                           owner)
    1278             : {
    1279     2054724 :         __xfs_btree_init_block(mp, XFS_BUF_TO_BLOCK(bp), ops,
    1280             :                         xfs_buf_daddr(bp), level, numrecs, owner);
    1281     2054687 :         bp->b_ops = ops->buf_ops;
    1282     2054687 : }
    1283             : 
    1284             : void
    1285     1136014 : xfs_btree_init_block_cur(
    1286             :         struct xfs_btree_cur    *cur,
    1287             :         struct xfs_buf          *bp,
    1288             :         int                     level,
    1289             :         int                     numrecs)
    1290             : {
    1291     1136014 :         __u64                   owner;
    1292             : 
    1293             :         /*
    1294             :          * we can pull the owner from the cursor right now as the different
    1295             :          * owners align directly with the pointer size of the btree. This may
    1296             :          * change in future, but is safe for current users of the generic btree
    1297             :          * code.
    1298             :          */
    1299     1136014 :         if (cur->bc_flags & XFS_BTREE_IN_XFILE)
    1300        4743 :                 owner = xfbtree_owner(cur);
    1301     1131271 :         else if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
    1302      221297 :                 owner = cur->bc_ino.ip->i_ino;
    1303             :         else
    1304      909974 :                 owner = cur->bc_ag.pag->pag_agno;
    1305             : 
    1306     1136014 :         xfs_btree_init_buf(cur->bc_mp, bp, cur->bc_ops, level, numrecs, owner);
    1307     1135988 : }
    1308             : 
    1309             : /*
    1310             :  * Return true if ptr is the last record in the btree and
    1311             :  * we need to track updates to this record.  The decision
    1312             :  * will be further refined in the update_lastrec method.
    1313             :  */
    1314             : STATIC int
    1315  1437416873 : xfs_btree_is_lastrec(
    1316             :         struct xfs_btree_cur    *cur,
    1317             :         struct xfs_btree_block  *block,
    1318             :         int                     level)
    1319             : {
    1320  1437416873 :         union xfs_btree_ptr     ptr;
    1321             : 
    1322  1437416873 :         if (level > 0)
    1323             :                 return 0;
    1324  1436379958 :         if (!(cur->bc_flags & XFS_BTREE_LASTREC_UPDATE))
    1325             :                 return 0;
    1326             : 
    1327   187171524 :         xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
    1328   374341930 :         if (!xfs_btree_ptr_is_null(cur, &ptr))
    1329    24945663 :                 return 0;
    1330             :         return 1;
    1331             : }
    1332             : 
    1333             : STATIC void
    1334    83945028 : xfs_btree_buf_to_ptr(
    1335             :         struct xfs_btree_cur    *cur,
    1336             :         struct xfs_buf          *bp,
    1337             :         union xfs_btree_ptr     *ptr)
    1338             : {
    1339    83945028 :         if (cur->bc_flags & XFS_BTREE_IN_XFILE) {
    1340        4743 :                 xfbtree_buf_to_ptr(cur, bp, ptr);
    1341        4743 :                 return;
    1342             :         }
    1343             : 
    1344    83940285 :         if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
    1345     5342995 :                 ptr->l = cpu_to_be64(XFS_DADDR_TO_FSB(cur->bc_mp,
    1346             :                                         xfs_buf_daddr(bp)));
    1347             :         else {
    1348    78597290 :                 ptr->s = cpu_to_be32(xfs_daddr_to_agbno(cur->bc_mp,
    1349             :                                         xfs_buf_daddr(bp)));
    1350             :         }
    1351             : }
    1352             : 
    1353             : static inline void
    1354             : xfs_btree_set_refs(
    1355             :         struct xfs_btree_cur    *cur,
    1356             :         struct xfs_buf          *bp)
    1357             : {
    1358  7124782691 :         xfs_buf_set_ref(bp, cur->bc_ops->lru_refs);
    1359             : }
    1360             : 
    1361             : int
    1362     1138527 : xfs_btree_get_buf_block(
    1363             :         struct xfs_btree_cur            *cur,
    1364             :         const union xfs_btree_ptr       *ptr,
    1365             :         struct xfs_btree_block          **block,
    1366             :         struct xfs_buf                  **bpp)
    1367             : {
    1368     1138527 :         xfs_daddr_t                     d;
    1369     1138527 :         int                             error;
    1370             : 
    1371     1138527 :         error = xfs_btree_ptr_to_daddr(cur, ptr, &d);
    1372     1138532 :         if (error)
    1373             :                 return error;
    1374     2277036 :         error = xfs_trans_get_buf(cur->bc_tp, xfs_btree_buftarg(cur), d,
    1375     1138511 :                         xfs_btree_bbsize(cur), 0, bpp);
    1376     1138541 :         if (error)
    1377             :                 return error;
    1378             : 
    1379     1138541 :         (*bpp)->b_ops = cur->bc_ops->buf_ops;
    1380     1138541 :         *block = XFS_BUF_TO_BLOCK(*bpp);
    1381     1138541 :         return 0;
    1382             : }
    1383             : 
    1384             : /*
    1385             :  * Read in the buffer at the given ptr and return the buffer and
    1386             :  * the block pointer within the buffer.
    1387             :  */
    1388             : int
    1389  7122804882 : xfs_btree_read_buf_block(
    1390             :         struct xfs_btree_cur            *cur,
    1391             :         const union xfs_btree_ptr       *ptr,
    1392             :         int                             flags,
    1393             :         struct xfs_btree_block          **block,
    1394             :         struct xfs_buf                  **bpp)
    1395             : {
    1396  7122804882 :         struct xfs_mount        *mp = cur->bc_mp;
    1397  7122804882 :         xfs_daddr_t             d;
    1398  7122804882 :         int                     error;
    1399             : 
    1400             :         /* need to sort out how callers deal with failures first */
    1401  7122804882 :         ASSERT(!(flags & XBF_TRYLOCK));
    1402             : 
    1403  7122804882 :         error = xfs_btree_ptr_to_daddr(cur, ptr, &d);
    1404  7123866353 :         if (error)
    1405             :                 return error;
    1406  7123745139 :         error = xfs_trans_read_buf(mp, cur->bc_tp, xfs_btree_buftarg(cur), d,
    1407  7123669396 :                         xfs_btree_bbsize(cur), flags, bpp,
    1408  7123314834 :                         cur->bc_ops->buf_ops);
    1409  7124765471 :         if (xfs_metadata_is_sick(error))
    1410         270 :                 xfs_btree_mark_sick(cur);
    1411  7124765471 :         if (error)
    1412             :                 return error;
    1413             : 
    1414  7124782691 :         xfs_btree_set_refs(cur, *bpp);
    1415  7125127016 :         *block = XFS_BUF_TO_BLOCK(*bpp);
    1416  7125127016 :         return 0;
    1417             : }
    1418             : 
    1419             : /*
    1420             :  * Copy keys from one btree block to another.
    1421             :  */
    1422             : void
    1423   169238681 : xfs_btree_copy_keys(
    1424             :         struct xfs_btree_cur            *cur,
    1425             :         union xfs_btree_key             *dst_key,
    1426             :         const union xfs_btree_key       *src_key,
    1427             :         int                             numkeys)
    1428             : {
    1429   169238681 :         ASSERT(numkeys >= 0);
    1430   338477362 :         memcpy(dst_key, src_key, numkeys * cur->bc_ops->key_len);
    1431   169238681 : }
    1432             : 
    1433             : /*
    1434             :  * Copy records from one btree block to another.
    1435             :  */
    1436             : STATIC void
    1437  1074490103 : xfs_btree_copy_recs(
    1438             :         struct xfs_btree_cur    *cur,
    1439             :         union xfs_btree_rec     *dst_rec,
    1440             :         union xfs_btree_rec     *src_rec,
    1441             :         int                     numrecs)
    1442             : {
    1443  1074490103 :         ASSERT(numrecs >= 0);
    1444  2148980206 :         memcpy(dst_rec, src_rec, numrecs * cur->bc_ops->rec_len);
    1445  1074490103 : }
    1446             : 
    1447             : /*
    1448             :  * Copy block pointers from one btree block to another.
    1449             :  */
    1450             : void
    1451     6783133 : xfs_btree_copy_ptrs(
    1452             :         struct xfs_btree_cur    *cur,
    1453             :         union xfs_btree_ptr     *dst_ptr,
    1454             :         const union xfs_btree_ptr *src_ptr,
    1455             :         int                     numptrs)
    1456             : {
    1457     6783133 :         ASSERT(numptrs >= 0);
    1458    16425810 :         memcpy(dst_ptr, src_ptr, numptrs * xfs_btree_ptr_len(cur));
    1459     6783133 : }
    1460             : 
    1461             : /*
    1462             :  * Shift keys one index left/right inside a single btree block.
    1463             :  */
    1464             : STATIC void
    1465      932783 : xfs_btree_shift_keys(
    1466             :         struct xfs_btree_cur    *cur,
    1467             :         union xfs_btree_key     *key,
    1468             :         int                     dir,
    1469             :         int                     numkeys)
    1470             : {
    1471      932783 :         char                    *dst_key;
    1472             : 
    1473      932783 :         ASSERT(numkeys >= 0);
    1474      932783 :         ASSERT(dir == 1 || dir == -1);
    1475             : 
    1476      932783 :         dst_key = (char *)key + (dir * cur->bc_ops->key_len);
    1477     1865566 :         memmove(dst_key, key, numkeys * cur->bc_ops->key_len);
    1478      932783 : }
    1479             : 
    1480             : /*
    1481             :  * Shift records one index left/right inside a single btree block.
    1482             :  */
    1483             : STATIC void
    1484   908245953 : xfs_btree_shift_recs(
    1485             :         struct xfs_btree_cur    *cur,
    1486             :         union xfs_btree_rec     *rec,
    1487             :         int                     dir,
    1488             :         int                     numrecs)
    1489             : {
    1490   908245953 :         char                    *dst_rec;
    1491             : 
    1492   908245953 :         ASSERT(numrecs >= 0);
    1493   908245953 :         ASSERT(dir == 1 || dir == -1);
    1494             : 
    1495   908245953 :         dst_rec = (char *)rec + (dir * cur->bc_ops->rec_len);
    1496  1816491906 :         memmove(dst_rec, rec, numrecs * cur->bc_ops->rec_len);
    1497   908245953 : }
    1498             : 
    1499             : /*
    1500             :  * Shift block pointers one index left/right inside a single btree block.
    1501             :  */
    1502             : STATIC void
    1503      932781 : xfs_btree_shift_ptrs(
    1504             :         struct xfs_btree_cur    *cur,
    1505             :         union xfs_btree_ptr     *ptr,
    1506             :         int                     dir,
    1507             :         int                     numptrs)
    1508             : {
    1509      932781 :         char                    *dst_ptr;
    1510             : 
    1511      932781 :         ASSERT(numptrs >= 0);
    1512      932781 :         ASSERT(dir == 1 || dir == -1);
    1513             : 
    1514      932781 :         dst_ptr = (char *)ptr + (dir * xfs_btree_ptr_len(cur));
    1515     1865562 :         memmove(dst_ptr, ptr, numptrs * xfs_btree_ptr_len(cur));
    1516      932781 : }
    1517             : 
    1518             : /*
    1519             :  * Log key values from the btree block.
    1520             :  */
    1521             : STATIC void
    1522   168534039 : xfs_btree_log_keys(
    1523             :         struct xfs_btree_cur    *cur,
    1524             :         struct xfs_buf          *bp,
    1525             :         int                     first,
    1526             :         int                     last)
    1527             : {
    1528             : 
    1529   168534039 :         if (bp) {
    1530   161766204 :                 xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF);
    1531   161766254 :                 xfs_trans_log_buf(cur->bc_tp, bp,
    1532   161766254 :                                   xfs_btree_key_offset(cur, first),
    1533   161766254 :                                   xfs_btree_key_offset(cur, last + 1) - 1);
    1534             :         } else {
    1535     6767835 :                 xfs_trans_log_inode(cur->bc_tp, cur->bc_ino.ip,
    1536     6767835 :                                 xfs_ilog_fbroot(cur->bc_ino.whichfork));
    1537             :         }
    1538   168534219 : }
    1539             : 
    1540             : /*
    1541             :  * Log record values from the btree block.
    1542             :  */
    1543             : void
    1544  1385330980 : xfs_btree_log_recs(
    1545             :         struct xfs_btree_cur    *cur,
    1546             :         struct xfs_buf          *bp,
    1547             :         int                     first,
    1548             :         int                     last)
    1549             : {
    1550             : 
    1551  1385330980 :         xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF);
    1552  1385367201 :         xfs_trans_log_buf(cur->bc_tp, bp,
    1553  1385367201 :                           xfs_btree_rec_offset(cur, first),
    1554  1385367201 :                           xfs_btree_rec_offset(cur, last + 1) - 1);
    1555             : 
    1556  1385451267 : }
    1557             : 
    1558             : /*
    1559             :  * Log block pointer fields from a btree block (nonleaf).
    1560             :  */
    1561             : STATIC void
    1562     1041395 : xfs_btree_log_ptrs(
    1563             :         struct xfs_btree_cur    *cur,   /* btree cursor */
    1564             :         struct xfs_buf          *bp,    /* buffer containing btree block */
    1565             :         int                     first,  /* index of first pointer to log */
    1566             :         int                     last)   /* index of last pointer to log */
    1567             : {
    1568             : 
    1569     1041395 :         if (bp) {
    1570     1012022 :                 struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
    1571     1012022 :                 int                     level = xfs_btree_get_level(block);
    1572             : 
    1573     1012022 :                 xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF);
    1574     1012023 :                 xfs_trans_log_buf(cur->bc_tp, bp,
    1575     1012022 :                                 xfs_btree_ptr_offset(cur, first, level),
    1576     1012023 :                                 xfs_btree_ptr_offset(cur, last + 1, level) - 1);
    1577             :         } else {
    1578       29373 :                 xfs_trans_log_inode(cur->bc_tp, cur->bc_ino.ip,
    1579       29373 :                         xfs_ilog_fbroot(cur->bc_ino.whichfork));
    1580             :         }
    1581             : 
    1582     1041395 : }
    1583             : 
    1584             : /*
    1585             :  * Log fields from a btree block header.
    1586             :  */
    1587             : void
    1588  1240185880 : xfs_btree_log_block(
    1589             :         struct xfs_btree_cur    *cur,   /* btree cursor */
    1590             :         struct xfs_buf          *bp,    /* buffer containing btree block */
    1591             :         uint32_t                fields) /* mask of fields: XFS_BB_... */
    1592             : {
    1593  1240185880 :         int                     first;  /* first byte offset logged */
    1594  1240185880 :         int                     last;   /* last byte offset logged */
    1595  1240185880 :         static const short      soffsets[] = {  /* table of offsets (short) */
    1596             :                 offsetof(struct xfs_btree_block, bb_magic),
    1597             :                 offsetof(struct xfs_btree_block, bb_level),
    1598             :                 offsetof(struct xfs_btree_block, bb_numrecs),
    1599             :                 offsetof(struct xfs_btree_block, bb_u.s.bb_leftsib),
    1600             :                 offsetof(struct xfs_btree_block, bb_u.s.bb_rightsib),
    1601             :                 offsetof(struct xfs_btree_block, bb_u.s.bb_blkno),
    1602             :                 offsetof(struct xfs_btree_block, bb_u.s.bb_lsn),
    1603             :                 offsetof(struct xfs_btree_block, bb_u.s.bb_uuid),
    1604             :                 offsetof(struct xfs_btree_block, bb_u.s.bb_owner),
    1605             :                 offsetof(struct xfs_btree_block, bb_u.s.bb_crc),
    1606             :                 XFS_BTREE_SBLOCK_CRC_LEN
    1607             :         };
    1608  1240185880 :         static const short      loffsets[] = {  /* table of offsets (long) */
    1609             :                 offsetof(struct xfs_btree_block, bb_magic),
    1610             :                 offsetof(struct xfs_btree_block, bb_level),
    1611             :                 offsetof(struct xfs_btree_block, bb_numrecs),
    1612             :                 offsetof(struct xfs_btree_block, bb_u.l.bb_leftsib),
    1613             :                 offsetof(struct xfs_btree_block, bb_u.l.bb_rightsib),
    1614             :                 offsetof(struct xfs_btree_block, bb_u.l.bb_blkno),
    1615             :                 offsetof(struct xfs_btree_block, bb_u.l.bb_lsn),
    1616             :                 offsetof(struct xfs_btree_block, bb_u.l.bb_uuid),
    1617             :                 offsetof(struct xfs_btree_block, bb_u.l.bb_owner),
    1618             :                 offsetof(struct xfs_btree_block, bb_u.l.bb_crc),
    1619             :                 offsetof(struct xfs_btree_block, bb_u.l.bb_pad),
    1620             :                 XFS_BTREE_LBLOCK_CRC_LEN
    1621             :         };
    1622             : 
    1623  1240185880 :         if (bp) {
    1624  1240132870 :                 int nbits;
    1625             : 
    1626  1240132870 :                 if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS) {
    1627             :                         /*
    1628             :                          * We don't log the CRC when updating a btree
    1629             :                          * block but instead recreate it during log
    1630             :                          * recovery.  As the log buffers have checksums
    1631             :                          * of their own this is safe and avoids logging a crc
    1632             :                          * update in a lot of places.
    1633             :                          */
    1634  1240130978 :                         if (fields == XFS_BB_ALL_BITS)
    1635     1561268 :                                 fields = XFS_BB_ALL_BITS_CRC;
    1636             :                         nbits = XFS_BB_NUM_BITS_CRC;
    1637             :                 } else {
    1638             :                         nbits = XFS_BB_NUM_BITS;
    1639             :                 }
    1640  1240132870 :                 xfs_btree_offsets(fields,
    1641  1240132870 :                                   (cur->bc_flags & XFS_BTREE_LONG_PTRS) ?
    1642             :                                         loffsets : soffsets,
    1643             :                                   nbits, &first, &last);
    1644  1240158018 :                 xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF);
    1645  1240175174 :                 xfs_trans_log_buf(cur->bc_tp, bp, first, last);
    1646             :         } else {
    1647       53010 :                 xfs_trans_log_inode(cur->bc_tp, cur->bc_ino.ip,
    1648       53010 :                         xfs_ilog_fbroot(cur->bc_ino.whichfork));
    1649             :         }
    1650  1240141697 : }
    1651             : 
    1652             : /*
    1653             :  * Increment cursor by one record at the level.
    1654             :  * For nonzero levels the leaf-ward information is untouched.
    1655             :  */
    1656             : int                                             /* error */
    1657 72560051954 : xfs_btree_increment(
    1658             :         struct xfs_btree_cur    *cur,
    1659             :         int                     level,
    1660             :         int                     *stat)          /* success/failure */
    1661             : {
    1662 72560051954 :         struct xfs_btree_block  *block;
    1663 72560051954 :         union xfs_btree_ptr     ptr;
    1664 72560051954 :         struct xfs_buf          *bp;
    1665 72560051954 :         int                     error;          /* error return value */
    1666 72560051954 :         int                     lev;
    1667             : 
    1668 72560051954 :         ASSERT(level < cur->bc_nlevels);
    1669             : 
    1670             :         /* Read-ahead to the right at this level. */
    1671 72560051954 :         xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA);
    1672             : 
    1673             :         /* Get a pointer to the btree block. */
    1674 72703199415 :         block = xfs_btree_get_block(cur, level, &bp);
    1675             : 
    1676             : #ifdef DEBUG
    1677 72773950888 :         error = xfs_btree_check_block(cur, block, level, bp);
    1678 72275484087 :         if (error)
    1679           0 :                 goto error0;
    1680             : #endif
    1681             : 
    1682             :         /* We're done if we remain in the block after the increment. */
    1683 >14455*10^7 :         if (++cur->bc_levels[level].ptr <= xfs_btree_get_numrecs(block))
    1684 69816870596 :                 goto out1;
    1685             : 
    1686             :         /* Fail if we just went off the right edge of the tree. */
    1687  2458613491 :         xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
    1688  4917470776 :         if (xfs_btree_ptr_is_null(cur, &ptr))
    1689  2043744903 :                 goto out0;
    1690             : 
    1691   414990485 :         XFS_BTREE_STATS_INC(cur, increment);
    1692             : 
    1693             :         /*
    1694             :          * March up the tree incrementing pointers.
    1695             :          * Stop when we don't go off the right edge of a block.
    1696             :          */
    1697   418916115 :         for (lev = level + 1; lev < cur->bc_nlevels; lev++) {
    1698   418914120 :                 block = xfs_btree_get_block(cur, lev, &bp);
    1699             : 
    1700             : #ifdef DEBUG
    1701   418938919 :                 error = xfs_btree_check_block(cur, block, lev, bp);
    1702   418921404 :                 if (error)
    1703           0 :                         goto error0;
    1704             : #endif
    1705             : 
    1706   837842808 :                 if (++cur->bc_levels[lev].ptr <= xfs_btree_get_numrecs(block))
    1707             :                         break;
    1708             : 
    1709             :                 /* Read-ahead the right block for the next loop. */
    1710     3917725 :                 xfs_btree_readahead(cur, lev, XFS_BTCUR_RIGHTRA);
    1711             :         }
    1712             : 
    1713             :         /*
    1714             :          * If we went off the root then we are either seriously
    1715             :          * confused or have the tree root in an inode.
    1716             :          */
    1717   415005674 :         if (lev == cur->bc_nlevels) {
    1718           0 :                 if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
    1719           0 :                         goto out0;
    1720           0 :                 ASSERT(0);
    1721           0 :                 xfs_btree_mark_sick(cur);
    1722           0 :                 error = -EFSCORRUPTED;
    1723           0 :                 goto error0;
    1724             :         }
    1725   415005674 :         ASSERT(lev < cur->bc_nlevels);
    1726             : 
    1727             :         /*
    1728             :          * Now walk back down the tree, fixing up the cursor's buffer
    1729             :          * pointers and key numbers.
    1730             :          */
    1731   833906368 :         for (block = xfs_btree_get_block(cur, lev, &bp); lev > level; ) {
    1732   418926789 :                 union xfs_btree_ptr     *ptrp;
    1733             : 
    1734   418926789 :                 ptrp = xfs_btree_ptr_addr(cur, cur->bc_levels[lev].ptr, block);
    1735   418937897 :                 --lev;
    1736   418937897 :                 error = xfs_btree_read_buf_block(cur, ptrp, 0, &block, &bp);
    1737   418944485 :                 if (error)
    1738           9 :                         goto error0;
    1739             : 
    1740   418944476 :                 xfs_btree_setbuf(cur, lev, bp);
    1741   418900694 :                 cur->bc_levels[lev].ptr = 1;
    1742             :         }
    1743   414976597 : out1:
    1744 70231847193 :         *stat = 1;
    1745 70231847193 :         return 0;
    1746             : 
    1747  2043744903 : out0:
    1748  2043744903 :         *stat = 0;
    1749  2043744903 :         return 0;
    1750             : 
    1751             : error0:
    1752             :         return error;
    1753             : }
    1754             : 
    1755             : /*
    1756             :  * Decrement cursor by one record at the level.
    1757             :  * For nonzero levels the leaf-ward information is untouched.
    1758             :  */
    1759             : int                                             /* error */
    1760  1343974554 : xfs_btree_decrement(
    1761             :         struct xfs_btree_cur    *cur,
    1762             :         int                     level,
    1763             :         int                     *stat)          /* success/failure */
    1764             : {
    1765  1343974554 :         struct xfs_btree_block  *block;
    1766  1343974554 :         struct xfs_buf          *bp;
    1767  1343974554 :         int                     error;          /* error return value */
    1768  1343974554 :         int                     lev;
    1769  1343974554 :         union xfs_btree_ptr     ptr;
    1770             : 
    1771  1343974554 :         ASSERT(level < cur->bc_nlevels);
    1772             : 
    1773             :         /* Read-ahead to the left at this level. */
    1774  1343974554 :         xfs_btree_readahead(cur, level, XFS_BTCUR_LEFTRA);
    1775             : 
    1776             :         /* We're done if we remain in the block after the decrement. */
    1777  1343933279 :         if (--cur->bc_levels[level].ptr > 0)
    1778  1125082839 :                 goto out1;
    1779             : 
    1780             :         /* Get a pointer to the btree block. */
    1781   218850440 :         block = xfs_btree_get_block(cur, level, &bp);
    1782             : 
    1783             : #ifdef DEBUG
    1784   218850507 :         error = xfs_btree_check_block(cur, block, level, bp);
    1785   218850446 :         if (error)
    1786           0 :                 goto error0;
    1787             : #endif
    1788             : 
    1789             :         /* Fail if we just went off the left edge of the tree. */
    1790   218850446 :         xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_LEFTSIB);
    1791   437701042 :         if (xfs_btree_ptr_is_null(cur, &ptr))
    1792   170662904 :                 goto out0;
    1793             : 
    1794    48187617 :         XFS_BTREE_STATS_INC(cur, decrement);
    1795             : 
    1796             :         /*
    1797             :          * March up the tree decrementing pointers.
    1798             :          * Stop when we don't go off the left edge of a block.
    1799             :          */
    1800    48296201 :         for (lev = level + 1; lev < cur->bc_nlevels; lev++) {
    1801    48296190 :                 if (--cur->bc_levels[lev].ptr > 0)
    1802             :                         break;
    1803             :                 /* Read-ahead the left block for the next loop. */
    1804      108493 :                 xfs_btree_readahead(cur, lev, XFS_BTCUR_LEFTRA);
    1805             :         }
    1806             : 
    1807             :         /*
    1808             :          * If we went off the root then we are seriously confused.
    1809             :          * or the root of the tree is in an inode.
    1810             :          */
    1811    48187708 :         if (lev == cur->bc_nlevels) {
    1812           0 :                 if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
    1813           0 :                         goto out0;
    1814           0 :                 ASSERT(0);
    1815           0 :                 xfs_btree_mark_sick(cur);
    1816           0 :                 error = -EFSCORRUPTED;
    1817           0 :                 goto error0;
    1818             :         }
    1819    48187708 :         ASSERT(lev < cur->bc_nlevels);
    1820             : 
    1821             :         /*
    1822             :          * Now walk back down the tree, fixing up the cursor's buffer
    1823             :          * pointers and key numbers.
    1824             :          */
    1825    96483816 :         for (block = xfs_btree_get_block(cur, lev, &bp); lev > level; ) {
    1826    48296175 :                 union xfs_btree_ptr     *ptrp;
    1827             : 
    1828    48296175 :                 ptrp = xfs_btree_ptr_addr(cur, cur->bc_levels[lev].ptr, block);
    1829    48296188 :                 --lev;
    1830    48296188 :                 error = xfs_btree_read_buf_block(cur, ptrp, 0, &block, &bp);
    1831    48296147 :                 if (error)
    1832           7 :                         goto error0;
    1833    48296140 :                 xfs_btree_setbuf(cur, lev, bp);
    1834    96592216 :                 cur->bc_levels[lev].ptr = xfs_btree_get_numrecs(block);
    1835             :         }
    1836    48187610 : out1:
    1837  1173270449 :         *stat = 1;
    1838  1173270449 :         return 0;
    1839             : 
    1840   170662904 : out0:
    1841   170662904 :         *stat = 0;
    1842   170662904 :         return 0;
    1843             : 
    1844             : error0:
    1845             :         return error;
    1846             : }
    1847             : 
    1848             : /*
    1849             :  * Check the btree block owner now that we have the context to know who the
    1850             :  * real owner is.
    1851             :  */
    1852             : static inline xfs_failaddr_t
    1853  6541789975 : xfs_btree_check_block_owner(
    1854             :         struct xfs_btree_cur    *cur,
    1855             :         struct xfs_btree_block  *block)
    1856             : {
    1857  6541789975 :         if (!xfs_has_crc(cur->bc_mp))
    1858             :                 return NULL;
    1859             : 
    1860  6541789316 :         if (cur->bc_flags & XFS_BTREE_IN_XFILE)
    1861   517932659 :                 return xfbtree_check_block_owner(cur, block);
    1862             : 
    1863  6023856657 :         if (!(cur->bc_flags & XFS_BTREE_LONG_PTRS)) {
    1864  5820154599 :                 if (be32_to_cpu(block->bb_u.s.bb_owner) !=
    1865  5820154599 :                                                 cur->bc_ag.pag->pag_agno)
    1866           0 :                         return __this_address;
    1867             :                 return NULL;
    1868             :         }
    1869             : 
    1870   203702058 :         if (cur->bc_ino.flags & XFS_BTCUR_BMBT_INVALID_OWNER)
    1871             :                 return NULL;
    1872             : 
    1873   203702058 :         if (be64_to_cpu(block->bb_u.l.bb_owner) != cur->bc_ino.ip->i_ino)
    1874           0 :                 return __this_address;
    1875             : 
    1876             :         return NULL;
    1877             : }
    1878             : 
    1879             : int
    1880 22517278017 : xfs_btree_lookup_get_block(
    1881             :         struct xfs_btree_cur            *cur,   /* btree cursor */
    1882             :         int                             level,  /* level in the btree */
    1883             :         const union xfs_btree_ptr       *pp,    /* ptr to btree block */
    1884             :         struct xfs_btree_block          **blkp) /* return btree block */
    1885             : {
    1886 22517278017 :         struct xfs_buf          *bp;    /* buffer pointer for btree block */
    1887 22517278017 :         xfs_daddr_t             daddr;
    1888 22517278017 :         int                     error = 0;
    1889             : 
    1890             :         /* special case the root block if in an inode */
    1891 22517278017 :         if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
    1892   380462668 :             (level == cur->bc_nlevels - 1)) {
    1893   139707981 :                 *blkp = xfs_btree_get_iroot(cur);
    1894   139707958 :                 return 0;
    1895             :         }
    1896             : 
    1897             :         /*
    1898             :          * If the old buffer at this level for the disk address we are
    1899             :          * looking for re-use it.
    1900             :          *
    1901             :          * Otherwise throw it away and get a new one.
    1902             :          */
    1903 22377570036 :         bp = cur->bc_levels[level].bp;
    1904 22377570036 :         error = xfs_btree_ptr_to_daddr(cur, pp, &daddr);
    1905 22387359174 :         if (error)
    1906             :                 return error;
    1907 22387359174 :         if (bp && xfs_buf_daddr(bp) == daddr) {
    1908 15847217110 :                 *blkp = XFS_BUF_TO_BLOCK(bp);
    1909 15847217110 :                 return 0;
    1910             :         }
    1911             : 
    1912  6540142064 :         error = xfs_btree_read_buf_block(cur, pp, 0, blkp, &bp);
    1913  6543475522 :         if (error)
    1914             :                 return error;
    1915             : 
    1916             :         /* Check the inode owner since the verifiers don't. */
    1917  6543478836 :         if (xfs_btree_check_block_owner(cur, *blkp) != NULL)
    1918           0 :                 goto out_bad;
    1919             : 
    1920             :         /* Did we get the level we were looking for? */
    1921  6542987060 :         if (be16_to_cpu((*blkp)->bb_level) != level)
    1922           0 :                 goto out_bad;
    1923             : 
    1924             :         /* Check that internal nodes have at least one record. */
    1925  6542987060 :         if (level != 0 && be16_to_cpu((*blkp)->bb_numrecs) == 0)
    1926           0 :                 goto out_bad;
    1927             : 
    1928  6542987060 :         xfs_btree_setbuf(cur, level, bp);
    1929  6542987060 :         return 0;
    1930             : 
    1931           0 : out_bad:
    1932           0 :         *blkp = NULL;
    1933           0 :         xfs_buf_mark_corrupt(bp);
    1934           0 :         xfs_trans_brelse(cur->bc_tp, bp);
    1935           0 :         xfs_btree_mark_sick(cur);
    1936           0 :         return -EFSCORRUPTED;
    1937             : }
    1938             : 
    1939             : /*
    1940             :  * Get current search key.  For level 0 we don't actually have a key
    1941             :  * structure so we make one up from the record.  For all other levels
    1942             :  * we just return the right key.
    1943             :  */
    1944             : STATIC union xfs_btree_key *
    1945 88676765276 : xfs_lookup_get_search_key(
    1946             :         struct xfs_btree_cur    *cur,
    1947             :         int                     level,
    1948             :         int                     keyno,
    1949             :         struct xfs_btree_block  *block,
    1950             :         union xfs_btree_key     *kp)
    1951             : {
    1952 88676765276 :         if (level == 0) {
    1953 69493324707 :                 cur->bc_ops->init_key_from_rec(kp,
    1954             :                                 xfs_btree_rec_addr(cur, keyno, block));
    1955 69493324707 :                 return kp;
    1956             :         }
    1957             : 
    1958 19183440569 :         return xfs_btree_key_addr(cur, keyno, block);
    1959             : }
    1960             : 
    1961             : /*
    1962             :  * Lookup the record.  The cursor is made to point to it, based on dir.
    1963             :  * stat is set to 0 if can't find any such record, 1 for success.
    1964             :  */
    1965             : int                                     /* error */
    1966 14224203938 : xfs_btree_lookup(
    1967             :         struct xfs_btree_cur    *cur,   /* btree cursor */
    1968             :         xfs_lookup_t            dir,    /* <=, ==, or >= */
    1969             :         int                     *stat)  /* success/failure */
    1970             : {
    1971 14224203938 :         struct xfs_btree_block  *block; /* current btree block */
    1972 14224203938 :         int64_t                 diff;   /* difference for the current key */
    1973 14224203938 :         int                     error;  /* error return value */
    1974 14224203938 :         int                     keyno;  /* current key number */
    1975 14224203938 :         int                     level;  /* level in the btree */
    1976 14224203938 :         union xfs_btree_ptr     *pp;    /* ptr to btree block */
    1977 14224203938 :         union xfs_btree_ptr     ptr;    /* ptr to btree block */
    1978             : 
    1979 14224203938 :         XFS_BTREE_STATS_INC(cur, lookup);
    1980             : 
    1981             :         /* No such thing as a zero-level tree. */
    1982 14224203938 :         if (XFS_IS_CORRUPT(cur->bc_mp, cur->bc_nlevels == 0)) {
    1983           0 :                 xfs_btree_mark_sick(cur);
    1984           0 :                 return -EFSCORRUPTED;
    1985             :         }
    1986             : 
    1987 14224203938 :         block = NULL;
    1988 14224203938 :         keyno = 0;
    1989             : 
    1990             :         /* initialise start pointer from cursor */
    1991 14224203938 :         cur->bc_ops->init_ptr_from_cur(cur, &ptr);
    1992 14225374003 :         pp = &ptr;
    1993             : 
    1994             :         /*
    1995             :          * Iterate over each level in the btree, starting at the root.
    1996             :          * For each level above the leaves, find the key we need, based
    1997             :          * on the lookup record, then follow the corresponding block
    1998             :          * pointer down to the next level.
    1999             :          */
    2000 33652276764 :         for (level = cur->bc_nlevels - 1, diff = 1; level >= 0; level--) {
    2001             :                 /* Get the block we need to do the lookup on. */
    2002 20030947462 :                 error = xfs_btree_lookup_get_block(cur, level, pp, &block);
    2003 20032337920 :                 if (error)
    2004        4546 :                         goto error0;
    2005             : 
    2006 20032333374 :                 if (diff == 0) {
    2007             :                         /*
    2008             :                          * If we already had a key match at a higher level, we
    2009             :                          * know we need to use the first entry in this block.
    2010             :                          */
    2011             :                         keyno = 1;
    2012             :                 } else {
    2013             :                         /* Otherwise search this block. Do a binary search. */
    2014             : 
    2015 20017637441 :                         int     high;   /* high entry number */
    2016 20017637441 :                         int     low;    /* low entry number */
    2017             : 
    2018             :                         /* Set low and high entry numbers, 1-based. */
    2019 20017637441 :                         low = 1;
    2020 20017637441 :                         high = xfs_btree_get_numrecs(block);
    2021 20017637441 :                         if (!high) {
    2022             :                                 /* Block is empty, must be an empty leaf. */
    2023   608026904 :                                 if (level != 0 || cur->bc_nlevels != 1) {
    2024           0 :                                         XFS_CORRUPTION_ERROR(__func__,
    2025             :                                                         XFS_ERRLEVEL_LOW,
    2026             :                                                         cur->bc_mp, block,
    2027             :                                                         sizeof(*block));
    2028           0 :                                         xfs_btree_mark_sick(cur);
    2029           0 :                                         return -EFSCORRUPTED;
    2030             :                                 }
    2031             : 
    2032   608026904 :                                 cur->bc_levels[0].ptr = dir != XFS_LOOKUP_LE;
    2033   608026904 :                                 *stat = 0;
    2034   608026904 :                                 return 0;
    2035             :                         }
    2036             : 
    2037             :                         /* Binary search the block. */
    2038 >10676*10^7 :                         while (low <= high) {
    2039 88694912445 :                                 union xfs_btree_key     key;
    2040 88694912445 :                                 union xfs_btree_key     *kp;
    2041             : 
    2042 88694912445 :                                 XFS_BTREE_STATS_INC(cur, compare);
    2043             : 
    2044             :                                 /* keyno is average of low and high. */
    2045 88694912445 :                                 keyno = (low + high) >> 1;
    2046             : 
    2047             :                                 /* Get current search key */
    2048 88694912445 :                                 kp = xfs_lookup_get_search_key(cur, level,
    2049             :                                                 keyno, block, &key);
    2050             : 
    2051             :                                 /*
    2052             :                                  * Compute difference to get next direction:
    2053             :                                  *  - less than, move right
    2054             :                                  *  - greater than, move left
    2055             :                                  *  - equal, we're done
    2056             :                                  */
    2057 88633231141 :                                 diff = cur->bc_ops->key_diff(cur, kp);
    2058 88697566633 :                                 if (diff < 0)
    2059 49045215372 :                                         low = keyno + 1;
    2060 39652351261 :                                 else if (diff > 0)
    2061 38312353382 :                                         high = keyno - 1;
    2062             :                                 else
    2063             :                                         break;
    2064             :                         }
    2065             :                 }
    2066             : 
    2067             :                 /*
    2068             :                  * If there are more levels, set up for the next level
    2069             :                  * by getting the block number and filling in the cursor.
    2070             :                  */
    2071 19426960658 :                 if (level > 0) {
    2072             :                         /*
    2073             :                          * If we moved left, need the previous key number,
    2074             :                          * unless there isn't one.
    2075             :                          */
    2076  5806596213 :                         if (diff > 0 && --keyno < 1)
    2077             :                                 keyno = 1;
    2078  5806596213 :                         pp = xfs_btree_ptr_addr(cur, keyno, block);
    2079             : 
    2080  5806375598 :                         error = xfs_btree_debug_check_ptr(cur, pp, 0, level);
    2081  5806538316 :                         if (error)
    2082           0 :                                 goto error0;
    2083             : 
    2084  5806538316 :                         cur->bc_levels[level].ptr = keyno;
    2085             :                 }
    2086             :         }
    2087             : 
    2088             :         /* Done with the search. See if we need to adjust the results. */
    2089 13621329302 :         if (dir != XFS_LOOKUP_LE && diff < 0) {
    2090   592810276 :                 keyno++;
    2091             :                 /*
    2092             :                  * If ge search and we went off the end of the block, but it's
    2093             :                  * not the last block, we're in the wrong block.
    2094             :                  */
    2095   592810276 :                 xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
    2096  1005927297 :                 if (dir == XFS_LOOKUP_GE &&
    2097   202806326 :                     keyno > xfs_btree_get_numrecs(block) &&
    2098             :                     !xfs_btree_ptr_is_null(cur, &ptr)) {
    2099     1044505 :                         int     i;
    2100             : 
    2101     1044505 :                         cur->bc_levels[0].ptr = keyno;
    2102     1044505 :                         error = xfs_btree_increment(cur, 0, &i);
    2103     1044505 :                         if (error)
    2104           0 :                                 goto error0;
    2105     1044505 :                         if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
    2106           0 :                                 xfs_btree_mark_sick(cur);
    2107     1044505 :                                 return -EFSCORRUPTED;
    2108             :                         }
    2109     1044505 :                         *stat = 1;
    2110     1044505 :                         return 0;
    2111             :                 }
    2112 13028519026 :         } else if (dir == XFS_LOOKUP_LE && diff > 0)
    2113  6985527368 :                 keyno--;
    2114 13620273824 :         cur->bc_levels[0].ptr = keyno;
    2115             : 
    2116             :         /* Return if we succeeded or not. */
    2117 23857436181 :         if (keyno == 0 || keyno > xfs_btree_get_numrecs(block))
    2118  3664680689 :                 *stat = 0;
    2119  9955593135 :         else if (dir != XFS_LOOKUP_EQ || diff == 0)
    2120  9690966722 :                 *stat = 1;
    2121             :         else
    2122   264626413 :                 *stat = 0;
    2123             :         return 0;
    2124             : 
    2125             : error0:
    2126             :         return error;
    2127             : }
    2128             : 
    2129             : /* Find the high key storage area from a regular key. */
    2130             : union xfs_btree_key *
    2131   711728139 : xfs_btree_high_key_from_key(
    2132             :         struct xfs_btree_cur    *cur,
    2133             :         union xfs_btree_key     *key)
    2134             : {
    2135   711728139 :         ASSERT(cur->bc_flags & XFS_BTREE_OVERLAPPING);
    2136   711728139 :         return (union xfs_btree_key *)((char *)key +
    2137   711728139 :                         (cur->bc_ops->key_len / 2));
    2138             : }
    2139             : 
    2140             : /* Determine the low (and high if overlapped) keys of a leaf block */
    2141             : STATIC void
    2142   670279221 : xfs_btree_get_leaf_keys(
    2143             :         struct xfs_btree_cur    *cur,
    2144             :         struct xfs_btree_block  *block,
    2145             :         union xfs_btree_key     *key)
    2146             : {
    2147   670279221 :         union xfs_btree_key     max_hkey;
    2148   670279221 :         union xfs_btree_key     hkey;
    2149   670279221 :         union xfs_btree_rec     *rec;
    2150   670279221 :         union xfs_btree_key     *high;
    2151   670279221 :         int                     n;
    2152             : 
    2153   670279221 :         rec = xfs_btree_rec_addr(cur, 1, block);
    2154   670279221 :         cur->bc_ops->init_key_from_rec(key, rec);
    2155             : 
    2156   670272013 :         if (cur->bc_flags & XFS_BTREE_OVERLAPPING) {
    2157             : 
    2158   325123187 :                 cur->bc_ops->init_high_key_from_rec(&max_hkey, rec);
    2159 >16517*10^7 :                 for (n = 2; n <= xfs_btree_get_numrecs(block); n++) {
    2160 82101500785 :                         rec = xfs_btree_rec_addr(cur, n, block);
    2161 82101500785 :                         cur->bc_ops->init_high_key_from_rec(&hkey, rec);
    2162 82107338000 :                         if (xfs_btree_keycmp_gt(cur, &hkey, &max_hkey))
    2163 81806087668 :                                 max_hkey = hkey;
    2164             :                 }
    2165             : 
    2166   325125162 :                 high = xfs_btree_high_key_from_key(cur, key);
    2167   650247928 :                 memcpy(high, &max_hkey, cur->bc_ops->key_len / 2);
    2168             :         }
    2169   670272790 : }
    2170             : 
    2171             : /* Determine the low (and high if overlapped) keys of a node block */
    2172             : STATIC void
    2173    61560644 : xfs_btree_get_node_keys(
    2174             :         struct xfs_btree_cur    *cur,
    2175             :         struct xfs_btree_block  *block,
    2176             :         union xfs_btree_key     *key)
    2177             : {
    2178    61560644 :         union xfs_btree_key     *hkey;
    2179    61560644 :         union xfs_btree_key     *max_hkey;
    2180    61560644 :         union xfs_btree_key     *high;
    2181    61560644 :         int                     n;
    2182             : 
    2183    61560644 :         if (cur->bc_flags & XFS_BTREE_OVERLAPPING) {
    2184   123025690 :                 memcpy(key, xfs_btree_key_addr(cur, 1, block),
    2185             :                                 cur->bc_ops->key_len / 2);
    2186             : 
    2187    61512845 :                 max_hkey = xfs_btree_high_key_addr(cur, 1, block);
    2188 10055273724 :                 for (n = 2; n <= xfs_btree_get_numrecs(block); n++) {
    2189  4966124070 :                         hkey = xfs_btree_high_key_addr(cur, n, block);
    2190  4966124070 :                         if (xfs_btree_keycmp_gt(cur, hkey, max_hkey))
    2191  4954088921 :                                 max_hkey = hkey;
    2192             :                 }
    2193             : 
    2194    61512792 :                 high = xfs_btree_high_key_from_key(cur, key);
    2195   123025612 :                 memcpy(high, max_hkey, cur->bc_ops->key_len / 2);
    2196             :         } else {
    2197       95598 :                 memcpy(key, xfs_btree_key_addr(cur, 1, block),
    2198             :                                 cur->bc_ops->key_len);
    2199             :         }
    2200    61560605 : }
    2201             : 
    2202             : /* Derive the keys for any btree block. */
    2203             : void
    2204   670261368 : xfs_btree_get_keys(
    2205             :         struct xfs_btree_cur    *cur,
    2206             :         struct xfs_btree_block  *block,
    2207             :         union xfs_btree_key     *key)
    2208             : {
    2209   670261368 :         if (be16_to_cpu(block->bb_level) == 0)
    2210   669569630 :                 xfs_btree_get_leaf_keys(cur, block, key);
    2211             :         else
    2212      692456 :                 xfs_btree_get_node_keys(cur, block, key);
    2213   670257053 : }
    2214             : 
    2215             : /*
    2216             :  * Decide if we need to update the parent keys of a btree block.  For
    2217             :  * a standard btree this is only necessary if we're updating the first
    2218             :  * record/key.  For an overlapping btree, we must always update the
    2219             :  * keys because the highest key can be in any of the records or keys
    2220             :  * in the block.
    2221             :  */
    2222             : static inline bool
    2223             : xfs_btree_needs_key_update(
    2224             :         struct xfs_btree_cur    *cur,
    2225             :         int                     ptr)
    2226             : {
    2227   919889974 :         return (cur->bc_flags & XFS_BTREE_OVERLAPPING) || ptr == 1;
    2228             : }
    2229             : 
    2230             : /*
    2231             :  * Update the low and high parent keys of the given level, progressing
    2232             :  * towards the root.  If force_all is false, stop if the keys for a given
    2233             :  * level do not need updating.
    2234             :  */
    2235             : STATIC int
    2236   328669538 : __xfs_btree_updkeys(
    2237             :         struct xfs_btree_cur    *cur,
    2238             :         int                     level,
    2239             :         struct xfs_btree_block  *block,
    2240             :         struct xfs_buf          *bp0,
    2241             :         bool                    force_all)
    2242             : {
    2243   328669538 :         union xfs_btree_key     key;    /* keys from current level */
    2244   328669538 :         union xfs_btree_key     *lkey;  /* keys from the next level up */
    2245   328669538 :         union xfs_btree_key     *hkey;
    2246   328669538 :         union xfs_btree_key     *nlkey; /* keys from the next level up */
    2247   328669538 :         union xfs_btree_key     *nhkey;
    2248   328669538 :         struct xfs_buf          *bp;
    2249   328669538 :         int                     ptr;
    2250             : 
    2251   328669538 :         ASSERT(cur->bc_flags & XFS_BTREE_OVERLAPPING);
    2252             : 
    2253             :         /* Exit if there aren't any parent levels to update. */
    2254   328669538 :         if (level + 1 >= cur->bc_nlevels)
    2255             :                 return 0;
    2256             : 
    2257   315248673 :         trace_xfs_btree_updkeys(cur, level, bp0);
    2258             : 
    2259   315249612 :         lkey = &key;
    2260   315249612 :         hkey = xfs_btree_high_key_from_key(cur, lkey);
    2261   315250247 :         xfs_btree_get_keys(cur, block, lkey);
    2262   691360721 :         for (level++; level < cur->bc_nlevels; level++) {
    2263             : #ifdef DEBUG
    2264   376110474 :                 int             error;
    2265             : #endif
    2266   376110474 :                 block = xfs_btree_get_block(cur, level, &bp);
    2267   376110516 :                 trace_xfs_btree_updkeys(cur, level, bp);
    2268             : #ifdef DEBUG
    2269   376110696 :                 error = xfs_btree_check_block(cur, block, level, bp);
    2270   376108701 :                 if (error)
    2271           0 :                         return error;
    2272             : #endif
    2273   376108701 :                 ptr = cur->bc_levels[level].ptr;
    2274   376108701 :                 nlkey = xfs_btree_key_addr(cur, ptr, block);
    2275   376108701 :                 nhkey = xfs_btree_high_key_addr(cur, ptr, block);
    2276   751975197 :                 if (!force_all &&
    2277   340748146 :                     xfs_btree_keycmp_eq(cur, nlkey, lkey) &&
    2278             :                     xfs_btree_keycmp_eq(cur, nhkey, hkey))
    2279             :                         break;
    2280    87159008 :                 xfs_btree_copy_keys(cur, nlkey, lkey, 1);
    2281    87159522 :                 xfs_btree_log_keys(cur, bp, ptr, ptr);
    2282    87159537 :                 if (level + 1 >= cur->bc_nlevels)
    2283             :                         break;
    2284    60860046 :                 xfs_btree_get_node_keys(cur, block, lkey);
    2285             :         }
    2286             : 
    2287             :         return 0;
    2288             : }
    2289             : 
    2290             : /* Update all the keys from some level in cursor back to the root. */
    2291             : STATIC int
    2292      135991 : xfs_btree_updkeys_force(
    2293             :         struct xfs_btree_cur    *cur,
    2294             :         int                     level)
    2295             : {
    2296      135991 :         struct xfs_buf          *bp;
    2297      135991 :         struct xfs_btree_block  *block;
    2298             : 
    2299      135991 :         block = xfs_btree_get_block(cur, level, &bp);
    2300      135991 :         return __xfs_btree_updkeys(cur, level, block, bp, true);
    2301             : }
    2302             : 
    2303             : /*
    2304             :  * Update the parent keys of the given level, progressing towards the root.
    2305             :  */
    2306             : STATIC int
    2307   666062429 : xfs_btree_update_keys(
    2308             :         struct xfs_btree_cur    *cur,
    2309             :         int                     level)
    2310             : {
    2311   666062429 :         struct xfs_btree_block  *block;
    2312   666062429 :         struct xfs_buf          *bp;
    2313   666062429 :         union xfs_btree_key     *kp;
    2314   666062429 :         union xfs_btree_key     key;
    2315   666062429 :         int                     ptr;
    2316             : 
    2317   666062429 :         ASSERT(level >= 0);
    2318             : 
    2319   666062429 :         block = xfs_btree_get_block(cur, level, &bp);
    2320   666080756 :         if (cur->bc_flags & XFS_BTREE_OVERLAPPING)
    2321   328535588 :                 return __xfs_btree_updkeys(cur, level, block, bp, false);
    2322             : 
    2323             :         /*
    2324             :          * Go up the tree from this level toward the root.
    2325             :          * At each level, update the key value to the value input.
    2326             :          * Stop when we reach a level where the cursor isn't pointing
    2327             :          * at the first entry in the block.
    2328             :          */
    2329   337545168 :         xfs_btree_get_keys(cur, block, &key);
    2330   417868090 :         for (level++, ptr = 1; ptr == 1 && level < cur->bc_nlevels; level++) {
    2331             : #ifdef DEBUG
    2332    80324412 :                 int             error;
    2333             : #endif
    2334    80324412 :                 block = xfs_btree_get_block(cur, level, &bp);
    2335             : #ifdef DEBUG
    2336    80332989 :                 error = xfs_btree_check_block(cur, block, level, bp);
    2337    80333316 :                 if (error)
    2338           0 :                         return error;
    2339             : #endif
    2340    80333316 :                 ptr = cur->bc_levels[level].ptr;
    2341    80333316 :                 kp = xfs_btree_key_addr(cur, ptr, block);
    2342    80333316 :                 xfs_btree_copy_keys(cur, kp, &key, 1);
    2343    80333404 :                 xfs_btree_log_keys(cur, bp, ptr, ptr);
    2344             :         }
    2345             : 
    2346             :         return 0;
    2347             : }
    2348             : 
    2349             : /*
    2350             :  * Update the record referred to by cur to the value in the
    2351             :  * given record. This either works (return 0) or gets an
    2352             :  * EFSCORRUPTED error.
    2353             :  */
    2354             : int
    2355   413987069 : xfs_btree_update(
    2356             :         struct xfs_btree_cur    *cur,
    2357             :         union xfs_btree_rec     *rec)
    2358             : {
    2359   413987069 :         struct xfs_btree_block  *block;
    2360   413987069 :         struct xfs_buf          *bp;
    2361   413987069 :         int                     error;
    2362   413987069 :         int                     ptr;
    2363   413987069 :         union xfs_btree_rec     *rp;
    2364             : 
    2365             :         /* Pick up the current block. */
    2366   413987069 :         block = xfs_btree_get_block(cur, 0, &bp);
    2367             : 
    2368             : #ifdef DEBUG
    2369   414004373 :         error = xfs_btree_check_block(cur, block, 0, bp);
    2370   413973449 :         if (error)
    2371           0 :                 goto error0;
    2372             : #endif
    2373             :         /* Get the address of the rec to be updated. */
    2374   413973449 :         ptr = cur->bc_levels[0].ptr;
    2375   413973449 :         rp = xfs_btree_rec_addr(cur, ptr, block);
    2376             : 
    2377             :         /* Fill in the new contents and log them. */
    2378   413973449 :         xfs_btree_copy_recs(cur, rp, rec, 1);
    2379   414002765 :         xfs_btree_log_recs(cur, bp, ptr, ptr);
    2380             : 
    2381             :         /*
    2382             :          * If we are tracking the last record in the tree and
    2383             :          * we are at the far right edge of the tree, update it.
    2384             :          */
    2385   414003467 :         if (xfs_btree_is_lastrec(cur, block, 0)) {
    2386           0 :                 cur->bc_ops->update_lastrec(cur, block, rec,
    2387             :                                             ptr, LASTREC_UPDATE);
    2388             :         }
    2389             : 
    2390             :         /* Pass new key value up to our parent. */
    2391   795882358 :         if (xfs_btree_needs_key_update(cur, ptr)) {
    2392   114214142 :                 error = xfs_btree_update_keys(cur, 0);
    2393   114207079 :                 if (error)
    2394           0 :                         goto error0;
    2395             :         }
    2396             : 
    2397             :         return 0;
    2398             : 
    2399             : error0:
    2400             :         return error;
    2401             : }
    2402             : 
    2403             : /*
    2404             :  * Move 1 record left from cur/level if possible.
    2405             :  * Update cur to reflect the new path.
    2406             :  */
    2407             : STATIC int                                      /* error */
    2408    62220545 : xfs_btree_lshift(
    2409             :         struct xfs_btree_cur    *cur,
    2410             :         int                     level,
    2411             :         int                     *stat)          /* success/failure */
    2412             : {
    2413    62220545 :         struct xfs_buf          *lbp;           /* left buffer pointer */
    2414    62220545 :         struct xfs_btree_block  *left;          /* left btree block */
    2415    62220545 :         int                     lrecs;          /* left record count */
    2416    62220545 :         struct xfs_buf          *rbp;           /* right buffer pointer */
    2417    62220545 :         struct xfs_btree_block  *right;         /* right btree block */
    2418    62220545 :         struct xfs_btree_cur    *tcur;          /* temporary btree cursor */
    2419    62220545 :         int                     rrecs;          /* right record count */
    2420    62220545 :         union xfs_btree_ptr     lptr;           /* left btree pointer */
    2421    62220545 :         union xfs_btree_key     *rkp = NULL;    /* right btree key */
    2422    62220545 :         union xfs_btree_ptr     *rpp = NULL;    /* right address pointer */
    2423    62220545 :         union xfs_btree_rec     *rrp = NULL;    /* right record pointer */
    2424    62220545 :         int                     error;          /* error return value */
    2425    62220545 :         int                     i;
    2426             : 
    2427    62220545 :         if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
    2428    27459798 :             level == cur->bc_nlevels - 1)
    2429           0 :                 goto out0;
    2430             : 
    2431             :         /* Set up variables for this block as "right". */
    2432    62220545 :         right = xfs_btree_get_block(cur, level, &rbp);
    2433             : 
    2434             : #ifdef DEBUG
    2435    62220532 :         error = xfs_btree_check_block(cur, right, level, rbp);
    2436    62220506 :         if (error)
    2437           0 :                 goto error0;
    2438             : #endif
    2439             : 
    2440             :         /* If we've got no left sibling then we can't shift an entry left. */
    2441    62220506 :         xfs_btree_get_sibling(cur, right, &lptr, XFS_BB_LEFTSIB);
    2442   124441066 :         if (xfs_btree_ptr_is_null(cur, &lptr))
    2443       34699 :                 goto out0;
    2444             : 
    2445             :         /*
    2446             :          * If the cursor entry is the one that would be moved, don't
    2447             :          * do it... it's too complicated.
    2448             :          */
    2449    62185834 :         if (cur->bc_levels[level].ptr <= 1)
    2450        9582 :                 goto out0;
    2451             : 
    2452             :         /* Set up the left neighbor as "left". */
    2453    62176252 :         error = xfs_btree_read_buf_block(cur, &lptr, 0, &left, &lbp);
    2454    62176161 :         if (error)
    2455          45 :                 goto error0;
    2456             : 
    2457             :         /* If it's full, it can't take another entry. */
    2458    62176116 :         lrecs = xfs_btree_get_numrecs(left);
    2459    62176116 :         if (lrecs == cur->bc_ops->get_maxrecs(cur, level))
    2460      632622 :                 goto out0;
    2461             : 
    2462    61543579 :         rrecs = xfs_btree_get_numrecs(right);
    2463             : 
    2464             :         /*
    2465             :          * We add one entry to the left side and remove one for the right side.
    2466             :          * Account for it here, the changes will be updated on disk and logged
    2467             :          * later.
    2468             :          */
    2469    61543579 :         lrecs++;
    2470    61543579 :         rrecs--;
    2471             : 
    2472    61543579 :         XFS_BTREE_STATS_INC(cur, lshift);
    2473    61543579 :         XFS_BTREE_STATS_ADD(cur, moves, 1);
    2474             : 
    2475             :         /*
    2476             :          * If non-leaf, copy a key and a ptr to the left block.
    2477             :          * Log the changes to the left block.
    2478             :          */
    2479    61543579 :         if (level > 0) {
    2480             :                 /* It's a non-leaf.  Move keys and pointers. */
    2481       79602 :                 union xfs_btree_key     *lkp;   /* left btree key */
    2482       79602 :                 union xfs_btree_ptr     *lpp;   /* left address pointer */
    2483             : 
    2484       79602 :                 lkp = xfs_btree_key_addr(cur, lrecs, left);
    2485       79602 :                 rkp = xfs_btree_key_addr(cur, 1, right);
    2486             : 
    2487       79602 :                 lpp = xfs_btree_ptr_addr(cur, lrecs, left);
    2488       79592 :                 rpp = xfs_btree_ptr_addr(cur, 1, right);
    2489             : 
    2490       79592 :                 error = xfs_btree_debug_check_ptr(cur, rpp, 0, level);
    2491       79592 :                 if (error)
    2492           0 :                         goto error0;
    2493             : 
    2494       79592 :                 xfs_btree_copy_keys(cur, lkp, rkp, 1);
    2495       79592 :                 xfs_btree_copy_ptrs(cur, lpp, rpp, 1);
    2496             : 
    2497       79592 :                 xfs_btree_log_keys(cur, lbp, lrecs, lrecs);
    2498       79592 :                 xfs_btree_log_ptrs(cur, lbp, lrecs, lrecs);
    2499             : 
    2500       79592 :                 ASSERT(cur->bc_ops->keys_inorder(cur,
    2501             :                         xfs_btree_key_addr(cur, lrecs - 1, left), lkp));
    2502             :         } else {
    2503             :                 /* It's a leaf.  Move records.  */
    2504    61463977 :                 union xfs_btree_rec     *lrp;   /* left record pointer */
    2505             : 
    2506    61463977 :                 lrp = xfs_btree_rec_addr(cur, lrecs, left);
    2507    61463977 :                 rrp = xfs_btree_rec_addr(cur, 1, right);
    2508             : 
    2509    61463977 :                 xfs_btree_copy_recs(cur, lrp, rrp, 1);
    2510    61463991 :                 xfs_btree_log_recs(cur, lbp, lrecs, lrecs);
    2511             : 
    2512    61463989 :                 ASSERT(cur->bc_ops->recs_inorder(cur,
    2513             :                         xfs_btree_rec_addr(cur, lrecs - 1, left), lrp));
    2514             :         }
    2515             : 
    2516    61543591 :         xfs_btree_set_numrecs(left, lrecs);
    2517    61543591 :         xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS);
    2518             : 
    2519    61543585 :         xfs_btree_set_numrecs(right, rrecs);
    2520    61543585 :         xfs_btree_log_block(cur, rbp, XFS_BB_NUMRECS);
    2521             : 
    2522             :         /*
    2523             :          * Slide the contents of right down one entry.
    2524             :          */
    2525    61543553 :         XFS_BTREE_STATS_ADD(cur, moves, rrecs - 1);
    2526    61543553 :         if (level > 0) {
    2527             :                 /* It's a nonleaf. operate on keys and ptrs */
    2528    14070212 :                 for (i = 0; i < rrecs; i++) {
    2529    13990620 :                         error = xfs_btree_debug_check_ptr(cur, rpp, i + 1, level);
    2530    13990620 :                         if (error)
    2531           0 :                                 goto error0;
    2532             :                 }
    2533             : 
    2534       79592 :                 xfs_btree_shift_keys(cur,
    2535             :                                 xfs_btree_key_addr(cur, 2, right),
    2536             :                                 -1, rrecs);
    2537       79592 :                 xfs_btree_shift_ptrs(cur,
    2538             :                                 xfs_btree_ptr_addr(cur, 2, right),
    2539             :                                 -1, rrecs);
    2540             : 
    2541       79592 :                 xfs_btree_log_keys(cur, rbp, 1, rrecs);
    2542       79592 :                 xfs_btree_log_ptrs(cur, rbp, 1, rrecs);
    2543             :         } else {
    2544             :                 /* It's a leaf. operate on records */
    2545    61463961 :                 xfs_btree_shift_recs(cur,
    2546             :                         xfs_btree_rec_addr(cur, 2, right),
    2547             :                         -1, rrecs);
    2548    61463988 :                 xfs_btree_log_recs(cur, rbp, 1, rrecs);
    2549             :         }
    2550             : 
    2551             :         /*
    2552             :          * Using a temporary cursor, update the parent key values of the
    2553             :          * block on the left.
    2554             :          */
    2555    61543597 :         if (cur->bc_flags & XFS_BTREE_OVERLAPPING) {
    2556    18585551 :                 error = xfs_btree_dup_cursor(cur, &tcur);
    2557    18585560 :                 if (error)
    2558           0 :                         goto error0;
    2559    18585560 :                 i = xfs_btree_firstrec(tcur, level);
    2560    18585556 :                 if (XFS_IS_CORRUPT(tcur->bc_mp, i != 1)) {
    2561           0 :                         xfs_btree_mark_sick(cur);
    2562           0 :                         error = -EFSCORRUPTED;
    2563           0 :                         goto error0;
    2564             :                 }
    2565             : 
    2566    18585556 :                 error = xfs_btree_decrement(tcur, level, &i);
    2567    18585544 :                 if (error)
    2568           0 :                         goto error1;
    2569             : 
    2570             :                 /* Update the parent high keys of the left block, if needed. */
    2571    18585544 :                 error = xfs_btree_update_keys(tcur, level);
    2572    18585557 :                 if (error)
    2573           0 :                         goto error1;
    2574             : 
    2575    18585557 :                 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
    2576             :         }
    2577             : 
    2578             :         /* Update the parent keys of the right block. */
    2579    61543597 :         error = xfs_btree_update_keys(cur, level);
    2580    61543586 :         if (error)
    2581           0 :                 goto error0;
    2582             : 
    2583             :         /* Slide the cursor value left one. */
    2584    61543586 :         cur->bc_levels[level].ptr--;
    2585             : 
    2586    61543586 :         *stat = 1;
    2587    61543586 :         return 0;
    2588             : 
    2589      676903 : out0:
    2590      676903 :         *stat = 0;
    2591      676903 :         return 0;
    2592             : 
    2593             : error0:
    2594             :         return error;
    2595             : 
    2596           0 : error1:
    2597           0 :         xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
    2598           0 :         return error;
    2599             : }
    2600             : 
    2601             : /*
    2602             :  * Move 1 record right from cur/level if possible.
    2603             :  * Update cur to reflect the new path.
    2604             :  */
    2605             : STATIC int                                      /* error */
    2606    98138190 : xfs_btree_rshift(
    2607             :         struct xfs_btree_cur    *cur,
    2608             :         int                     level,
    2609             :         int                     *stat)          /* success/failure */
    2610             : {
    2611    98138190 :         struct xfs_buf          *lbp;           /* left buffer pointer */
    2612    98138190 :         struct xfs_btree_block  *left;          /* left btree block */
    2613    98138190 :         struct xfs_buf          *rbp;           /* right buffer pointer */
    2614    98138190 :         struct xfs_btree_block  *right;         /* right btree block */
    2615    98138190 :         struct xfs_btree_cur    *tcur;          /* temporary btree cursor */
    2616    98138190 :         union xfs_btree_ptr     rptr;           /* right block pointer */
    2617    98138190 :         union xfs_btree_key     *rkp;           /* right btree key */
    2618    98138190 :         int                     rrecs;          /* right record count */
    2619    98138190 :         int                     lrecs;          /* left record count */
    2620    98138190 :         int                     error;          /* error return value */
    2621    98138190 :         int                     i;              /* loop counter */
    2622             : 
    2623    98138190 :         if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
    2624    47195752 :             (level == cur->bc_nlevels - 1))
    2625           0 :                 goto out0;
    2626             : 
    2627             :         /* Set up variables for this block as "left". */
    2628    98138190 :         left = xfs_btree_get_block(cur, level, &lbp);
    2629             : 
    2630             : #ifdef DEBUG
    2631    98137296 :         error = xfs_btree_check_block(cur, left, level, lbp);
    2632    98137159 :         if (error)
    2633           0 :                 goto error0;
    2634             : #endif
    2635             : 
    2636             :         /* If we've got no right sibling then we can't shift an entry right. */
    2637    98137159 :         xfs_btree_get_sibling(cur, left, &rptr, XFS_BB_RIGHTSIB);
    2638   196274782 :         if (xfs_btree_ptr_is_null(cur, &rptr))
    2639    39807571 :                 goto out0;
    2640             : 
    2641             :         /*
    2642             :          * If the cursor entry is the one that would be moved, don't
    2643             :          * do it... it's too complicated.
    2644             :          */
    2645    58329820 :         lrecs = xfs_btree_get_numrecs(left);
    2646    58329820 :         if (cur->bc_levels[level].ptr >= lrecs)
    2647     6936569 :                 goto out0;
    2648             : 
    2649             :         /* Set up the right neighbor as "right". */
    2650    51393251 :         error = xfs_btree_read_buf_block(cur, &rptr, 0, &right, &rbp);
    2651    51386671 :         if (error)
    2652         116 :                 goto error0;
    2653             : 
    2654             :         /* If it's full, it can't take another entry. */
    2655    51386555 :         rrecs = xfs_btree_get_numrecs(right);
    2656    51386555 :         if (rrecs == cur->bc_ops->get_maxrecs(cur, level))
    2657     6162272 :                 goto out0;
    2658             : 
    2659    45227101 :         XFS_BTREE_STATS_INC(cur, rshift);
    2660    45227101 :         XFS_BTREE_STATS_ADD(cur, moves, rrecs);
    2661             : 
    2662             :         /*
    2663             :          * Make a hole at the start of the right neighbor block, then
    2664             :          * copy the last left block entry to the hole.
    2665             :          */
    2666    45227101 :         if (level > 0) {
    2667             :                 /* It's a nonleaf. make a hole in the keys and ptrs */
    2668       15759 :                 union xfs_btree_key     *lkp;
    2669       15759 :                 union xfs_btree_ptr     *lpp;
    2670       15759 :                 union xfs_btree_ptr     *rpp;
    2671             : 
    2672       15759 :                 lkp = xfs_btree_key_addr(cur, lrecs, left);
    2673       15759 :                 lpp = xfs_btree_ptr_addr(cur, lrecs, left);
    2674       15759 :                 rkp = xfs_btree_key_addr(cur, 1, right);
    2675       15759 :                 rpp = xfs_btree_ptr_addr(cur, 1, right);
    2676             : 
    2677     1037803 :                 for (i = rrecs - 1; i >= 0; i--) {
    2678     1022044 :                         error = xfs_btree_debug_check_ptr(cur, rpp, i, level);
    2679     1022044 :                         if (error)
    2680           0 :                                 goto error0;
    2681             :                 }
    2682             : 
    2683       15759 :                 xfs_btree_shift_keys(cur, rkp, 1, rrecs);
    2684       15759 :                 xfs_btree_shift_ptrs(cur, rpp, 1, rrecs);
    2685             : 
    2686       15759 :                 error = xfs_btree_debug_check_ptr(cur, lpp, 0, level);
    2687       15759 :                 if (error)
    2688           0 :                         goto error0;
    2689             : 
    2690             :                 /* Now put the new data in, and log it. */
    2691       15759 :                 xfs_btree_copy_keys(cur, rkp, lkp, 1);
    2692       15759 :                 xfs_btree_copy_ptrs(cur, rpp, lpp, 1);
    2693             : 
    2694       15759 :                 xfs_btree_log_keys(cur, rbp, 1, rrecs + 1);
    2695       15759 :                 xfs_btree_log_ptrs(cur, rbp, 1, rrecs + 1);
    2696             : 
    2697       15759 :                 ASSERT(cur->bc_ops->keys_inorder(cur, rkp,
    2698             :                         xfs_btree_key_addr(cur, 2, right)));
    2699             :         } else {
    2700             :                 /* It's a leaf. make a hole in the records */
    2701    45211342 :                 union xfs_btree_rec     *lrp;
    2702    45211342 :                 union xfs_btree_rec     *rrp;
    2703             : 
    2704    45211342 :                 lrp = xfs_btree_rec_addr(cur, lrecs, left);
    2705    45211342 :                 rrp = xfs_btree_rec_addr(cur, 1, right);
    2706             : 
    2707    45211342 :                 xfs_btree_shift_recs(cur, rrp, 1, rrecs);
    2708             : 
    2709             :                 /* Now put the new data in, and log it. */
    2710    45212233 :                 xfs_btree_copy_recs(cur, rrp, lrp, 1);
    2711    45212219 :                 xfs_btree_log_recs(cur, rbp, 1, rrecs + 1);
    2712             :         }
    2713             : 
    2714             :         /*
    2715             :          * Decrement and log left's numrecs, bump and log right's numrecs.
    2716             :          */
    2717    45229192 :         xfs_btree_set_numrecs(left, --lrecs);
    2718    45229192 :         xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS);
    2719             : 
    2720    45228548 :         xfs_btree_set_numrecs(right, ++rrecs);
    2721    45228548 :         xfs_btree_log_block(cur, rbp, XFS_BB_NUMRECS);
    2722             : 
    2723             :         /*
    2724             :          * Using a temporary cursor, update the parent key values of the
    2725             :          * block on the right.
    2726             :          */
    2727    45228368 :         error = xfs_btree_dup_cursor(cur, &tcur);
    2728    45230435 :         if (error)
    2729           0 :                 goto error0;
    2730    45230435 :         i = xfs_btree_lastrec(tcur, level);
    2731    45230688 :         if (XFS_IS_CORRUPT(tcur->bc_mp, i != 1)) {
    2732           0 :                 xfs_btree_mark_sick(cur);
    2733           0 :                 error = -EFSCORRUPTED;
    2734           0 :                 goto error0;
    2735             :         }
    2736             : 
    2737    45230688 :         error = xfs_btree_increment(tcur, level, &i);
    2738    45231666 :         if (error)
    2739           0 :                 goto error1;
    2740             : 
    2741             :         /* Update the parent high keys of the left block, if needed. */
    2742    45231666 :         if (cur->bc_flags & XFS_BTREE_OVERLAPPING) {
    2743    15539348 :                 error = xfs_btree_update_keys(cur, level);
    2744    15539347 :                 if (error)
    2745           0 :                         goto error1;
    2746             :         }
    2747             : 
    2748             :         /* Update the parent keys of the right block. */
    2749    45231665 :         error = xfs_btree_update_keys(tcur, level);
    2750    45231644 :         if (error)
    2751           0 :                 goto error1;
    2752             : 
    2753    45231644 :         xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
    2754             : 
    2755    45231657 :         *stat = 1;
    2756    45231657 :         return 0;
    2757             : 
    2758    52906412 : out0:
    2759    52906412 :         *stat = 0;
    2760    52906412 :         return 0;
    2761             : 
    2762             : error0:
    2763             :         return error;
    2764             : 
    2765           0 : error1:
    2766           0 :         xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
    2767           0 :         return error;
    2768             : }
    2769             : 
    2770             : static inline int
    2771      703023 : xfs_btree_alloc_block(
    2772             :         struct xfs_btree_cur            *cur,
    2773             :         const union xfs_btree_ptr       *hint_block,
    2774             :         union xfs_btree_ptr             *new_block,
    2775             :         int                             *stat)
    2776             : {
    2777      703023 :         int                             error;
    2778             : 
    2779      703023 :         error = cur->bc_ops->alloc_block(cur, hint_block, new_block, stat);
    2780      703023 :         trace_xfs_btree_alloc_block(cur, new_block, *stat, error);
    2781      703023 :         return error;
    2782             : }
    2783             : 
    2784             : /*
    2785             :  * Split cur/level block in half.
    2786             :  * Return new block number and the key to its first
    2787             :  * record (to be inserted into parent).
    2788             :  */
    2789             : STATIC int                                      /* error */
    2790      676903 : __xfs_btree_split(
    2791             :         struct xfs_btree_cur    *cur,
    2792             :         int                     level,
    2793             :         union xfs_btree_ptr     *ptrp,
    2794             :         union xfs_btree_key     *key,
    2795             :         struct xfs_btree_cur    **curp,
    2796             :         int                     *stat)          /* success/failure */
    2797             : {
    2798      676903 :         union xfs_btree_ptr     lptr;           /* left sibling block ptr */
    2799      676903 :         struct xfs_buf          *lbp;           /* left buffer pointer */
    2800      676903 :         struct xfs_btree_block  *left;          /* left btree block */
    2801      676903 :         union xfs_btree_ptr     rptr;           /* right sibling block ptr */
    2802      676903 :         struct xfs_buf          *rbp;           /* right buffer pointer */
    2803      676903 :         struct xfs_btree_block  *right;         /* right btree block */
    2804      676903 :         union xfs_btree_ptr     rrptr;          /* right-right sibling ptr */
    2805      676903 :         struct xfs_buf          *rrbp;          /* right-right buffer pointer */
    2806      676903 :         struct xfs_btree_block  *rrblock;       /* right-right btree block */
    2807      676903 :         int                     lrecs;
    2808      676903 :         int                     rrecs;
    2809      676903 :         int                     src_index;
    2810      676903 :         int                     error;          /* error return value */
    2811      676903 :         int                     i;
    2812             : 
    2813      676903 :         XFS_BTREE_STATS_INC(cur, split);
    2814             : 
    2815             :         /* Set up left block (current one). */
    2816      676903 :         left = xfs_btree_get_block(cur, level, &lbp);
    2817             : 
    2818             : #ifdef DEBUG
    2819      676903 :         error = xfs_btree_check_block(cur, left, level, lbp);
    2820      676903 :         if (error)
    2821           0 :                 goto error0;
    2822             : #endif
    2823             : 
    2824      676903 :         xfs_btree_buf_to_ptr(cur, lbp, &lptr);
    2825             : 
    2826             :         /* Allocate the new block. If we can't do it, we're toast. Give up. */
    2827      676903 :         error = xfs_btree_alloc_block(cur, &lptr, &rptr, stat);
    2828      676903 :         if (error)
    2829           1 :                 goto error0;
    2830      676902 :         if (*stat == 0)
    2831           0 :                 goto out0;
    2832      676902 :         XFS_BTREE_STATS_INC(cur, alloc);
    2833             : 
    2834             :         /* Set up the new block as "right". */
    2835      676902 :         error = xfs_btree_get_buf_block(cur, &rptr, &right, &rbp);
    2836      676902 :         if (error)
    2837           0 :                 goto error0;
    2838             : 
    2839             :         /* Fill in the btree header for the new right block. */
    2840     1353804 :         xfs_btree_init_block_cur(cur, rbp, xfs_btree_get_level(left), 0);
    2841             : 
    2842             :         /*
    2843             :          * Split the entries between the old and the new block evenly.
    2844             :          * Make sure that if there's an odd number of entries now, that
    2845             :          * each new block will have the same number of entries.
    2846             :          */
    2847      676902 :         lrecs = xfs_btree_get_numrecs(left);
    2848      676902 :         rrecs = lrecs / 2;
    2849      676902 :         if ((lrecs & 1) && cur->bc_levels[level].ptr <= rrecs + 1)
    2850       28767 :                 rrecs++;
    2851      676902 :         src_index = (lrecs - rrecs + 1);
    2852             : 
    2853      676902 :         XFS_BTREE_STATS_ADD(cur, moves, rrecs);
    2854             : 
    2855             :         /* Adjust numrecs for the later get_*_keys() calls. */
    2856      676902 :         lrecs -= rrecs;
    2857      676902 :         xfs_btree_set_numrecs(left, lrecs);
    2858     1353804 :         xfs_btree_set_numrecs(right, xfs_btree_get_numrecs(right) + rrecs);
    2859             : 
    2860             :         /*
    2861             :          * Copy btree block entries from the left block over to the
    2862             :          * new block, the right. Update the right block and log the
    2863             :          * changes.
    2864             :          */
    2865      676902 :         if (level > 0) {
    2866             :                 /* It's a non-leaf.  Move keys and pointers. */
    2867        2481 :                 union xfs_btree_key     *lkp;   /* left btree key */
    2868        2481 :                 union xfs_btree_ptr     *lpp;   /* left address pointer */
    2869        2481 :                 union xfs_btree_key     *rkp;   /* right btree key */
    2870        2481 :                 union xfs_btree_ptr     *rpp;   /* right address pointer */
    2871             : 
    2872        2481 :                 lkp = xfs_btree_key_addr(cur, src_index, left);
    2873        2481 :                 lpp = xfs_btree_ptr_addr(cur, src_index, left);
    2874        2481 :                 rkp = xfs_btree_key_addr(cur, 1, right);
    2875        2481 :                 rpp = xfs_btree_ptr_addr(cur, 1, right);
    2876             : 
    2877        4962 :                 for (i = src_index; i < rrecs; i++) {
    2878           0 :                         error = xfs_btree_debug_check_ptr(cur, lpp, i, level);
    2879           0 :                         if (error)
    2880           0 :                                 goto error0;
    2881             :                 }
    2882             : 
    2883             :                 /* Copy the keys & pointers to the new block. */
    2884        2481 :                 xfs_btree_copy_keys(cur, rkp, lkp, rrecs);
    2885        2481 :                 xfs_btree_copy_ptrs(cur, rpp, lpp, rrecs);
    2886             : 
    2887        2481 :                 xfs_btree_log_keys(cur, rbp, 1, rrecs);
    2888        2481 :                 xfs_btree_log_ptrs(cur, rbp, 1, rrecs);
    2889             : 
    2890             :                 /* Stash the keys of the new block for later insertion. */
    2891        2481 :                 xfs_btree_get_node_keys(cur, right, key);
    2892             :         } else {
    2893             :                 /* It's a leaf.  Move records.  */
    2894      674421 :                 union xfs_btree_rec     *lrp;   /* left record pointer */
    2895      674421 :                 union xfs_btree_rec     *rrp;   /* right record pointer */
    2896             : 
    2897      674421 :                 lrp = xfs_btree_rec_addr(cur, src_index, left);
    2898      674421 :                 rrp = xfs_btree_rec_addr(cur, 1, right);
    2899             : 
    2900             :                 /* Copy records to the new block. */
    2901      674421 :                 xfs_btree_copy_recs(cur, rrp, lrp, rrecs);
    2902      674421 :                 xfs_btree_log_recs(cur, rbp, 1, rrecs);
    2903             : 
    2904             :                 /* Stash the keys of the new block for later insertion. */
    2905      674421 :                 xfs_btree_get_leaf_keys(cur, right, key);
    2906             :         }
    2907             : 
    2908             :         /*
    2909             :          * Find the left block number by looking in the buffer.
    2910             :          * Adjust sibling pointers.
    2911             :          */
    2912      676902 :         xfs_btree_get_sibling(cur, left, &rrptr, XFS_BB_RIGHTSIB);
    2913      676902 :         xfs_btree_set_sibling(cur, right, &rrptr, XFS_BB_RIGHTSIB);
    2914      676902 :         xfs_btree_set_sibling(cur, right, &lptr, XFS_BB_LEFTSIB);
    2915      676902 :         xfs_btree_set_sibling(cur, left, &rptr, XFS_BB_RIGHTSIB);
    2916             : 
    2917      676902 :         xfs_btree_log_block(cur, rbp, XFS_BB_ALL_BITS);
    2918      676902 :         xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB);
    2919             : 
    2920             :         /*
    2921             :          * If there's a block to the new block's right, make that block
    2922             :          * point back to right instead of to left.
    2923             :          */
    2924     1353804 :         if (!xfs_btree_ptr_is_null(cur, &rrptr)) {
    2925      340146 :                 error = xfs_btree_read_buf_block(cur, &rrptr,
    2926             :                                                         0, &rrblock, &rrbp);
    2927      340146 :                 if (error)
    2928         233 :                         goto error0;
    2929      339913 :                 xfs_btree_set_sibling(cur, rrblock, &rptr, XFS_BB_LEFTSIB);
    2930      339913 :                 xfs_btree_log_block(cur, rrbp, XFS_BB_LEFTSIB);
    2931             :         }
    2932             : 
    2933             :         /* Update the parent high keys of the left block, if needed. */
    2934      676669 :         if (cur->bc_flags & XFS_BTREE_OVERLAPPING) {
    2935      335514 :                 error = xfs_btree_update_keys(cur, level);
    2936      335514 :                 if (error)
    2937           0 :                         goto error0;
    2938             :         }
    2939             : 
    2940             :         /*
    2941             :          * If the cursor is really in the right block, move it there.
    2942             :          * If it's just pointing past the last entry in left, then we'll
    2943             :          * insert there, so don't change anything in that case.
    2944             :          */
    2945      676669 :         if (cur->bc_levels[level].ptr > lrecs + 1) {
    2946      518290 :                 xfs_btree_setbuf(cur, level, rbp);
    2947      518290 :                 cur->bc_levels[level].ptr -= lrecs;
    2948             :         }
    2949             :         /*
    2950             :          * If there are more levels, we'll need another cursor which refers
    2951             :          * the right block, no matter where this cursor was.
    2952             :          */
    2953      676669 :         if (level + 1 < cur->bc_nlevels) {
    2954      653003 :                 error = xfs_btree_dup_cursor(cur, curp);
    2955      653003 :                 if (error)
    2956           7 :                         goto error0;
    2957      652996 :                 (*curp)->bc_levels[level + 1].ptr++;
    2958             :         }
    2959      676662 :         *ptrp = rptr;
    2960      676662 :         *stat = 1;
    2961      676662 :         return 0;
    2962             : out0:
    2963           0 :         *stat = 0;
    2964           0 :         return 0;
    2965             : 
    2966             : error0:
    2967             :         return error;
    2968             : }
    2969             : 
    2970             : #ifdef __KERNEL__
    2971             : struct xfs_btree_split_args {
    2972             :         struct xfs_btree_cur    *cur;
    2973             :         int                     level;
    2974             :         union xfs_btree_ptr     *ptrp;
    2975             :         union xfs_btree_key     *key;
    2976             :         struct xfs_btree_cur    **curp;
    2977             :         int                     *stat;          /* success/failure */
    2978             :         int                     result;
    2979             :         bool                    kswapd; /* allocation in kswapd context */
    2980             :         struct completion       *done;
    2981             :         struct work_struct      work;
    2982             : };
    2983             : 
    2984             : /*
    2985             :  * Stack switching interfaces for allocation
    2986             :  */
    2987             : static void
    2988       12428 : xfs_btree_split_worker(
    2989             :         struct work_struct      *work)
    2990             : {
    2991       12428 :         struct xfs_btree_split_args     *args = container_of(work,
    2992             :                                                 struct xfs_btree_split_args, work);
    2993       12428 :         unsigned long           pflags;
    2994       12428 :         unsigned long           new_pflags = 0;
    2995             : 
    2996             :         /*
    2997             :          * we are in a transaction context here, but may also be doing work
    2998             :          * in kswapd context, and hence we may need to inherit that state
    2999             :          * temporarily to ensure that we don't block waiting for memory reclaim
    3000             :          * in any way.
    3001             :          */
    3002       12428 :         if (args->kswapd)
    3003           0 :                 new_pflags |= PF_MEMALLOC | PF_KSWAPD;
    3004             : 
    3005       12428 :         current_set_flags_nested(&pflags, new_pflags);
    3006       12428 :         xfs_trans_set_context(args->cur->bc_tp);
    3007             : 
    3008       12428 :         args->result = __xfs_btree_split(args->cur, args->level, args->ptrp,
    3009             :                                          args->key, args->curp, args->stat);
    3010             : 
    3011       12428 :         xfs_trans_clear_context(args->cur->bc_tp);
    3012       12428 :         current_restore_flags_nested(&pflags, new_pflags);
    3013             : 
    3014             :         /*
    3015             :          * Do not access args after complete() has run here. We don't own args
    3016             :          * and the owner may run and free args before we return here.
    3017             :          */
    3018       12428 :         complete(args->done);
    3019             : 
    3020       12428 : }
    3021             : 
    3022             : /*
    3023             :  * BMBT split requests often come in with little stack to work on so we push
    3024             :  * them off to a worker thread so there is lots of stack to use. For the other
    3025             :  * btree types, just call directly to avoid the context switch overhead here.
    3026             :  *
    3027             :  * Care must be taken here - the work queue rescuer thread introduces potential
    3028             :  * AGF <> worker queue deadlocks if the BMBT block allocation has to lock new
    3029             :  * AGFs to allocate blocks. A task being run by the rescuer could attempt to
    3030             :  * lock an AGF that is already locked by a task queued to run by the rescuer,
    3031             :  * resulting in an ABBA deadlock as the rescuer cannot run the lock holder to
    3032             :  * release it until the current thread it is running gains the lock.
    3033             :  *
    3034             :  * To avoid this issue, we only ever queue BMBT splits that don't have an AGF
    3035             :  * already locked to allocate from. The only place that doesn't hold an AGF
    3036             :  * locked is unwritten extent conversion at IO completion, but that has already
    3037             :  * been offloaded to a worker thread and hence has no stack consumption issues
    3038             :  * we have to worry about.
    3039             :  */
    3040             : STATIC int                                      /* error */
    3041      676903 : xfs_btree_split(
    3042             :         struct xfs_btree_cur    *cur,
    3043             :         int                     level,
    3044             :         union xfs_btree_ptr     *ptrp,
    3045             :         union xfs_btree_key     *key,
    3046             :         struct xfs_btree_cur    **curp,
    3047             :         int                     *stat)          /* success/failure */
    3048             : {
    3049      676903 :         struct xfs_btree_split_args     args;
    3050      676903 :         DECLARE_COMPLETION_ONSTACK(done);
    3051             : 
    3052      676903 :         if (cur->bc_btnum != XFS_BTNUM_BMAP ||
    3053      215957 :             cur->bc_tp->t_highest_agno == NULLAGNUMBER)
    3054      664475 :                 return __xfs_btree_split(cur, level, ptrp, key, curp, stat);
    3055             : 
    3056       12428 :         args.cur = cur;
    3057       12428 :         args.level = level;
    3058       12428 :         args.ptrp = ptrp;
    3059       12428 :         args.key = key;
    3060       12428 :         args.curp = curp;
    3061       12428 :         args.stat = stat;
    3062       12428 :         args.done = &done;
    3063       12428 :         args.kswapd = current_is_kswapd();
    3064       12428 :         INIT_WORK_ONSTACK(&args.work, xfs_btree_split_worker);
    3065       12428 :         queue_work(xfs_alloc_wq, &args.work);
    3066       12428 :         wait_for_completion(&done);
    3067       12428 :         destroy_work_on_stack(&args.work);
    3068       12428 :         return args.result;
    3069             : }
    3070             : #else
    3071             : #define xfs_btree_split __xfs_btree_split
    3072             : #endif /* __KERNEL__ */
    3073             : 
    3074             : 
    3075             : /*
    3076             :  * Copy the old inode root contents into a real block and make the
    3077             :  * broot point to it.
    3078             :  */
    3079             : int                                             /* error */
    3080        2454 : xfs_btree_new_iroot(
    3081             :         struct xfs_btree_cur    *cur,           /* btree cursor */
    3082             :         int                     *logflags,      /* logging flags for inode */
    3083             :         int                     *stat)          /* return status - 0 fail */
    3084             : {
    3085        2454 :         struct xfs_buf          *cbp;           /* buffer for cblock */
    3086        2454 :         struct xfs_btree_block  *block;         /* btree block */
    3087        2454 :         struct xfs_btree_block  *cblock;        /* child btree block */
    3088        2454 :         union xfs_btree_key     *ckp;           /* child key pointer */
    3089        2454 :         union xfs_btree_ptr     *cpp;           /* child ptr pointer */
    3090        2454 :         union xfs_btree_key     *kp;            /* pointer to btree key */
    3091        2454 :         union xfs_btree_ptr     *pp;            /* pointer to block addr */
    3092        2454 :         union xfs_btree_ptr     nptr;           /* new block addr */
    3093        2454 :         int                     level;          /* btree level */
    3094        2454 :         int                     error;          /* error return code */
    3095        2454 :         int                     i;              /* loop counter */
    3096             : 
    3097        2454 :         XFS_BTREE_STATS_INC(cur, newroot);
    3098             : 
    3099        2454 :         ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE);
    3100             : 
    3101        2454 :         level = cur->bc_nlevels - 1;
    3102             : 
    3103        2454 :         block = xfs_btree_get_iroot(cur);
    3104        2454 :         pp = xfs_btree_ptr_addr(cur, 1, block);
    3105             : 
    3106             :         /* Allocate the new block. If we can't do it, we're toast. Give up. */
    3107        2454 :         error = xfs_btree_alloc_block(cur, pp, &nptr, stat);
    3108        2454 :         if (error)
    3109           0 :                 goto error0;
    3110        2454 :         if (*stat == 0)
    3111             :                 return 0;
    3112             : 
    3113        2454 :         XFS_BTREE_STATS_INC(cur, alloc);
    3114             : 
    3115             :         /* Copy the root into a real block. */
    3116        2454 :         error = xfs_btree_get_buf_block(cur, &nptr, &cblock, &cbp);
    3117        2454 :         if (error)
    3118           0 :                 goto error0;
    3119             : 
    3120             :         /*
    3121             :          * we can't just memcpy() the root in for CRC enabled btree blocks.
    3122             :          * In that case have to also ensure the blkno remains correct
    3123             :          */
    3124        4908 :         memcpy(cblock, block, xfs_btree_block_len(cur));
    3125        2454 :         if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS) {
    3126        2454 :                 __be64 bno = cpu_to_be64(xfs_buf_daddr(cbp));
    3127        2454 :                 if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
    3128        2454 :                         cblock->bb_u.l.bb_blkno = bno;
    3129             :                 else
    3130           0 :                         cblock->bb_u.s.bb_blkno = bno;
    3131             :         }
    3132             : 
    3133        2454 :         be16_add_cpu(&block->bb_level, 1);
    3134        2454 :         xfs_btree_set_numrecs(block, 1);
    3135        2454 :         cur->bc_nlevels++;
    3136        2454 :         ASSERT(cur->bc_nlevels <= cur->bc_maxlevels);
    3137        2454 :         cur->bc_levels[level + 1].ptr = 1;
    3138             : 
    3139        2454 :         kp = xfs_btree_key_addr(cur, 1, block);
    3140        2454 :         ckp = xfs_btree_key_addr(cur, 1, cblock);
    3141        4908 :         xfs_btree_copy_keys(cur, ckp, kp, xfs_btree_get_numrecs(cblock));
    3142             : 
    3143        2454 :         cpp = xfs_btree_ptr_addr(cur, 1, cblock);
    3144       32685 :         for (i = 0; i < be16_to_cpu(cblock->bb_numrecs); i++) {
    3145       27777 :                 error = xfs_btree_debug_check_ptr(cur, pp, i, level);
    3146       27777 :                 if (error)
    3147           0 :                         goto error0;
    3148             :         }
    3149             : 
    3150        2454 :         xfs_btree_copy_ptrs(cur, cpp, pp, xfs_btree_get_numrecs(cblock));
    3151             : 
    3152        2454 :         error = xfs_btree_debug_check_ptr(cur, &nptr, 0, level);
    3153        2454 :         if (error)
    3154           0 :                 goto error0;
    3155             : 
    3156        2454 :         xfs_btree_copy_ptrs(cur, pp, &nptr, 1);
    3157             : 
    3158        2454 :         xfs_iroot_realloc(cur->bc_ino.ip,
    3159             :                           1 - xfs_btree_get_numrecs(cblock),
    3160        2454 :                           cur->bc_ino.whichfork);
    3161             : 
    3162        2454 :         xfs_btree_setbuf(cur, level, cbp);
    3163             : 
    3164             :         /*
    3165             :          * Do all this logging at the end so that
    3166             :          * the root is at the right level.
    3167             :          */
    3168        2454 :         xfs_btree_log_block(cur, cbp, XFS_BB_ALL_BITS);
    3169        2454 :         xfs_btree_log_keys(cur, cbp, 1, be16_to_cpu(cblock->bb_numrecs));
    3170        2454 :         xfs_btree_log_ptrs(cur, cbp, 1, be16_to_cpu(cblock->bb_numrecs));
    3171             : 
    3172        4908 :         *logflags |=
    3173        2454 :                 XFS_ILOG_CORE | xfs_ilog_fbroot(cur->bc_ino.whichfork);
    3174        2454 :         *stat = 1;
    3175        2454 :         return 0;
    3176             : error0:
    3177             :         return error;
    3178             : }
    3179             : 
    3180             : /*
    3181             :  * Allocate a new root block, fill it in.
    3182             :  */
    3183             : STATIC int                              /* error */
    3184       23665 : xfs_btree_new_root(
    3185             :         struct xfs_btree_cur    *cur,   /* btree cursor */
    3186             :         int                     *stat)  /* success/failure */
    3187             : {
    3188       23665 :         struct xfs_btree_block  *block; /* one half of the old root block */
    3189       23665 :         struct xfs_buf          *bp;    /* buffer containing block */
    3190       23665 :         int                     error;  /* error return value */
    3191       23665 :         struct xfs_buf          *lbp;   /* left buffer pointer */
    3192       23665 :         struct xfs_btree_block  *left;  /* left btree block */
    3193       23665 :         struct xfs_buf          *nbp;   /* new (root) buffer */
    3194       23665 :         struct xfs_btree_block  *new;   /* new (root) btree block */
    3195       23665 :         int                     nptr;   /* new value for key index, 1 or 2 */
    3196       23665 :         struct xfs_buf          *rbp;   /* right buffer pointer */
    3197       23665 :         struct xfs_btree_block  *right; /* right btree block */
    3198       23665 :         union xfs_btree_ptr     rptr;
    3199       23665 :         union xfs_btree_ptr     lptr;
    3200             : 
    3201       23665 :         XFS_BTREE_STATS_INC(cur, newroot);
    3202             : 
    3203             :         /* initialise our start point from the cursor */
    3204       23665 :         cur->bc_ops->init_ptr_from_cur(cur, &rptr);
    3205             : 
    3206             :         /* Allocate the new block. If we can't do it, we're toast. Give up. */
    3207       23666 :         error = xfs_btree_alloc_block(cur, &rptr, &lptr, stat);
    3208       23666 :         if (error)
    3209           0 :                 goto error0;
    3210       23666 :         if (*stat == 0)
    3211           0 :                 goto out0;
    3212       23666 :         XFS_BTREE_STATS_INC(cur, alloc);
    3213             : 
    3214             :         /* Set up the new block. */
    3215       23666 :         error = xfs_btree_get_buf_block(cur, &lptr, &new, &nbp);
    3216       23666 :         if (error)
    3217           0 :                 goto error0;
    3218             : 
    3219             :         /* Set the root in the holding structure  increasing the level by 1. */
    3220       23666 :         cur->bc_ops->set_root(cur, &lptr, 1);
    3221             : 
    3222             :         /*
    3223             :          * At the previous root level there are now two blocks: the old root,
    3224             :          * and the new block generated when it was split.  We don't know which
    3225             :          * one the cursor is pointing at, so we set up variables "left" and
    3226             :          * "right" for each case.
    3227             :          */
    3228       23666 :         block = xfs_btree_get_block(cur, cur->bc_nlevels - 1, &bp);
    3229             : 
    3230             : #ifdef DEBUG
    3231       23666 :         error = xfs_btree_check_block(cur, block, cur->bc_nlevels - 1, bp);
    3232       23666 :         if (error)
    3233           0 :                 goto error0;
    3234             : #endif
    3235             : 
    3236       23666 :         xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB);
    3237       47332 :         if (!xfs_btree_ptr_is_null(cur, &rptr)) {
    3238             :                 /* Our block is left, pick up the right block. */
    3239       14847 :                 lbp = bp;
    3240       14847 :                 xfs_btree_buf_to_ptr(cur, lbp, &lptr);
    3241       14847 :                 left = block;
    3242       14847 :                 error = xfs_btree_read_buf_block(cur, &rptr, 0, &right, &rbp);
    3243       14847 :                 if (error)
    3244           0 :                         goto error0;
    3245       14847 :                 bp = rbp;
    3246       14847 :                 nptr = 1;
    3247             :         } else {
    3248             :                 /* Our block is right, pick up the left block. */
    3249        8819 :                 rbp = bp;
    3250        8819 :                 xfs_btree_buf_to_ptr(cur, rbp, &rptr);
    3251        8819 :                 right = block;
    3252        8819 :                 xfs_btree_get_sibling(cur, right, &lptr, XFS_BB_LEFTSIB);
    3253        8819 :                 error = xfs_btree_read_buf_block(cur, &lptr, 0, &left, &lbp);
    3254        8819 :                 if (error)
    3255           0 :                         goto error0;
    3256        8819 :                 bp = lbp;
    3257        8819 :                 nptr = 2;
    3258             :         }
    3259             : 
    3260             :         /* Fill in the new block's btree header and log it. */
    3261       23666 :         xfs_btree_init_block_cur(cur, nbp, cur->bc_nlevels, 2);
    3262       23666 :         xfs_btree_log_block(cur, nbp, XFS_BB_ALL_BITS);
    3263       70998 :         ASSERT(!xfs_btree_ptr_is_null(cur, &lptr) &&
    3264             :                         !xfs_btree_ptr_is_null(cur, &rptr));
    3265             : 
    3266             :         /* Fill in the key data in the new root. */
    3267       47332 :         if (xfs_btree_get_level(left) > 0) {
    3268             :                 /*
    3269             :                  * Get the keys for the left block's keys and put them directly
    3270             :                  * in the parent block.  Do the same for the right block.
    3271             :                  */
    3272         305 :                 xfs_btree_get_node_keys(cur, left,
    3273             :                                 xfs_btree_key_addr(cur, 1, new));
    3274         305 :                 xfs_btree_get_node_keys(cur, right,
    3275             :                                 xfs_btree_key_addr(cur, 2, new));
    3276             :         } else {
    3277             :                 /*
    3278             :                  * Get the keys for the left block's records and put them
    3279             :                  * directly in the parent block.  Do the same for the right
    3280             :                  * block.
    3281             :                  */
    3282       23361 :                 xfs_btree_get_leaf_keys(cur, left,
    3283             :                         xfs_btree_key_addr(cur, 1, new));
    3284       23361 :                 xfs_btree_get_leaf_keys(cur, right,
    3285             :                         xfs_btree_key_addr(cur, 2, new));
    3286             :         }
    3287       23666 :         xfs_btree_log_keys(cur, nbp, 1, 2);
    3288             : 
    3289             :         /* Fill in the pointer data in the new root. */
    3290       23666 :         xfs_btree_copy_ptrs(cur,
    3291             :                 xfs_btree_ptr_addr(cur, 1, new), &lptr, 1);
    3292       23666 :         xfs_btree_copy_ptrs(cur,
    3293             :                 xfs_btree_ptr_addr(cur, 2, new), &rptr, 1);
    3294       23666 :         xfs_btree_log_ptrs(cur, nbp, 1, 2);
    3295             : 
    3296             :         /* Fix up the cursor. */
    3297       23666 :         xfs_btree_setbuf(cur, cur->bc_nlevels, nbp);
    3298       23666 :         cur->bc_levels[cur->bc_nlevels].ptr = nptr;
    3299       23666 :         cur->bc_nlevels++;
    3300       23666 :         ASSERT(cur->bc_nlevels <= cur->bc_maxlevels);
    3301       23666 :         *stat = 1;
    3302       23666 :         return 0;
    3303             : error0:
    3304             :         return error;
    3305             : out0:
    3306           0 :         *stat = 0;
    3307           0 :         return 0;
    3308             : }
    3309             : 
    3310             : STATIC int
    3311    75134081 : xfs_btree_make_block_unfull(
    3312             :         struct xfs_btree_cur    *cur,   /* btree cursor */
    3313             :         int                     level,  /* btree level */
    3314             :         int                     numrecs,/* # of recs in block */
    3315             :         int                     *oindex,/* old tree index */
    3316             :         int                     *index, /* new tree index */
    3317             :         union xfs_btree_ptr     *nptr,  /* new btree ptr */
    3318             :         struct xfs_btree_cur    **ncur, /* new btree cursor */
    3319             :         union xfs_btree_key     *key,   /* key of new block */
    3320             :         int                     *stat)
    3321             : {
    3322    75134081 :         int                     error = 0;
    3323             : 
    3324    75134081 :         if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
    3325    28264198 :             level == cur->bc_nlevels - 1) {
    3326       31667 :                 struct xfs_inode *ip = cur->bc_ino.ip;
    3327             : 
    3328       31667 :                 if (numrecs < cur->bc_ops->get_dmaxrecs(cur, level)) {
    3329             :                         /* A root block that can be made bigger. */
    3330       29213 :                         xfs_iroot_realloc(ip, 1, cur->bc_ino.whichfork);
    3331       29213 :                         *stat = 1;
    3332             :                 } else {
    3333             :                         /* A root block that needs replacing */
    3334        2454 :                         int     logflags = 0;
    3335             : 
    3336        2454 :                         error = xfs_btree_new_iroot(cur, &logflags, stat);
    3337        2454 :                         if (error || *stat == 0)
    3338           0 :                                 return error;
    3339             : 
    3340        2454 :                         xfs_trans_log_inode(cur->bc_tp, ip, logflags);
    3341             :                 }
    3342             : 
    3343       31667 :                 return 0;
    3344             :         }
    3345             : 
    3346             :         /* First, try shifting an entry to the right neighbor. */
    3347    75102414 :         error = xfs_btree_rshift(cur, level, stat);
    3348    75102409 :         if (error || *stat)
    3349             :                 return error;
    3350             : 
    3351             :         /* Next, try shifting an entry to the left neighbor. */
    3352    52906459 :         error = xfs_btree_lshift(cur, level, stat);
    3353    52906477 :         if (error)
    3354             :                 return error;
    3355             : 
    3356    52906432 :         if (*stat) {
    3357    52229529 :                 *oindex = *index = cur->bc_levels[level].ptr;
    3358    52229529 :                 return 0;
    3359             :         }
    3360             : 
    3361             :         /*
    3362             :          * Next, try splitting the current block in half.
    3363             :          *
    3364             :          * If this works we have to re-set our variables because we
    3365             :          * could be in a different block now.
    3366             :          */
    3367      676903 :         error = xfs_btree_split(cur, level, nptr, key, ncur, stat);
    3368      676903 :         if (error || *stat == 0)
    3369             :                 return error;
    3370             : 
    3371             : 
    3372      676662 :         *index = cur->bc_levels[level].ptr;
    3373      676662 :         return 0;
    3374             : }
    3375             : 
    3376             : /*
    3377             :  * Insert one record/level.  Return information to the caller
    3378             :  * allowing the next level up to proceed if necessary.
    3379             :  */
    3380             : STATIC int
    3381   553710573 : xfs_btree_insrec(
    3382             :         struct xfs_btree_cur    *cur,   /* btree cursor */
    3383             :         int                     level,  /* level to insert record at */
    3384             :         union xfs_btree_ptr     *ptrp,  /* i/o: block number inserted */
    3385             :         union xfs_btree_rec     *rec,   /* record to insert */
    3386             :         union xfs_btree_key     *key,   /* i/o: block key for ptrp */
    3387             :         struct xfs_btree_cur    **curp, /* output: new cursor replacing cur */
    3388             :         int                     *stat)  /* success/failure */
    3389             : {
    3390   553710573 :         struct xfs_btree_block  *block; /* btree block */
    3391   553710573 :         struct xfs_buf          *bp;    /* buffer for block */
    3392   553710573 :         union xfs_btree_ptr     nptr;   /* new block ptr */
    3393   553710573 :         struct xfs_btree_cur    *ncur = NULL;   /* new btree cursor */
    3394   553710573 :         union xfs_btree_key     nkey;   /* new block key */
    3395   553710573 :         union xfs_btree_key     *lkey;
    3396   553710573 :         int                     optr;   /* old key/record index */
    3397   553710573 :         int                     ptr;    /* key/record index */
    3398   553710573 :         int                     numrecs;/* number of records */
    3399   553710573 :         int                     error;  /* error return value */
    3400   553710573 :         int                     i;
    3401   553710573 :         xfs_daddr_t             old_bn;
    3402             : 
    3403   553710573 :         ncur = NULL;
    3404   553710573 :         lkey = &nkey;
    3405             : 
    3406             :         /*
    3407             :          * If we have an external root pointer, and we've made it to the
    3408             :          * root level, allocate a new root block and we're done.
    3409             :          */
    3410   553710573 :         if (!(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
    3411   488170775 :             (level >= cur->bc_nlevels)) {
    3412       23666 :                 error = xfs_btree_new_root(cur, stat);
    3413       23666 :                 xfs_btree_set_ptr_null(cur, ptrp);
    3414             : 
    3415       23666 :                 return error;
    3416             :         }
    3417             : 
    3418             :         /* If we're off the left edge, return failure. */
    3419   553686907 :         ptr = cur->bc_levels[level].ptr;
    3420   553686907 :         if (ptr == 0) {
    3421           0 :                 *stat = 0;
    3422           0 :                 return 0;
    3423             :         }
    3424             : 
    3425   553686907 :         optr = ptr;
    3426             : 
    3427   553686907 :         XFS_BTREE_STATS_INC(cur, insrec);
    3428             : 
    3429             :         /* Get pointers to the btree buffer and block. */
    3430   553686907 :         block = xfs_btree_get_block(cur, level, &bp);
    3431   553686056 :         old_bn = bp ? xfs_buf_daddr(bp) : XFS_BUF_DADDR_NULL;
    3432   553686056 :         numrecs = xfs_btree_get_numrecs(block);
    3433             : 
    3434             : #ifdef DEBUG
    3435   553686056 :         error = xfs_btree_check_block(cur, block, level, bp);
    3436   553688293 :         if (error)
    3437           0 :                 goto error0;
    3438             : 
    3439             :         /* Check that the new entry is being inserted in the right place. */
    3440   553688293 :         if (ptr <= numrecs) {
    3441   308606535 :                 if (level == 0) {
    3442   308279105 :                         ASSERT(cur->bc_ops->recs_inorder(cur, rec,
    3443             :                                 xfs_btree_rec_addr(cur, ptr, block)));
    3444             :                 } else {
    3445      327430 :                         ASSERT(cur->bc_ops->keys_inorder(cur, key,
    3446             :                                 xfs_btree_key_addr(cur, ptr, block)));
    3447             :                 }
    3448             :         }
    3449             : #endif
    3450             : 
    3451             :         /*
    3452             :          * If the block is full, we can't insert the new entry until we
    3453             :          * make the block un-full.
    3454             :          */
    3455   553692487 :         xfs_btree_set_ptr_null(cur, &nptr);
    3456   553692487 :         if (numrecs == cur->bc_ops->get_maxrecs(cur, level)) {
    3457    75134019 :                 error = xfs_btree_make_block_unfull(cur, level, numrecs,
    3458             :                                         &optr, &ptr, &nptr, &ncur, lkey, stat);
    3459    75134166 :                 if (error || *stat == 0)
    3460         402 :                         goto error0;
    3461             :         }
    3462             : 
    3463             :         /*
    3464             :          * The current block may have changed if the block was
    3465             :          * previously full and we have just made space in it.
    3466             :          */
    3467   553685353 :         block = xfs_btree_get_block(cur, level, &bp);
    3468   553673490 :         numrecs = xfs_btree_get_numrecs(block);
    3469             : 
    3470             : #ifdef DEBUG
    3471   553673490 :         error = xfs_btree_check_block(cur, block, level, bp);
    3472   553677746 :         if (error)
    3473           0 :                 goto error0;
    3474             : #endif
    3475             : 
    3476             :         /*
    3477             :          * At this point we know there's room for our new entry in the block
    3478             :          * we're pointing at.
    3479             :          */
    3480   553677746 :         XFS_BTREE_STATS_ADD(cur, moves, numrecs - ptr + 1);
    3481             : 
    3482   553677746 :         if (level > 0) {
    3483             :                 /* It's a nonleaf. make a hole in the keys and ptrs */
    3484      652986 :                 union xfs_btree_key     *kp;
    3485      652986 :                 union xfs_btree_ptr     *pp;
    3486             : 
    3487      652986 :                 kp = xfs_btree_key_addr(cur, ptr, block);
    3488      652986 :                 pp = xfs_btree_ptr_addr(cur, ptr, block);
    3489             : 
    3490     9721306 :                 for (i = numrecs - ptr; i >= 0; i--) {
    3491     8415334 :                         error = xfs_btree_debug_check_ptr(cur, pp, i, level);
    3492     8415334 :                         if (error)
    3493           0 :                                 goto error0;
    3494             :                 }
    3495             : 
    3496      652986 :                 xfs_btree_shift_keys(cur, kp, 1, numrecs - ptr + 1);
    3497      652985 :                 xfs_btree_shift_ptrs(cur, pp, 1, numrecs - ptr + 1);
    3498             : 
    3499      652985 :                 error = xfs_btree_debug_check_ptr(cur, ptrp, 0, level);
    3500      652985 :                 if (error)
    3501           0 :                         goto error0;
    3502             : 
    3503             :                 /* Now put the new data in, bump numrecs and log it. */
    3504      652985 :                 xfs_btree_copy_keys(cur, kp, key, 1);
    3505      652985 :                 xfs_btree_copy_ptrs(cur, pp, ptrp, 1);
    3506      652986 :                 numrecs++;
    3507      652986 :                 xfs_btree_set_numrecs(block, numrecs);
    3508      652986 :                 xfs_btree_log_ptrs(cur, bp, ptr, numrecs);
    3509      652985 :                 xfs_btree_log_keys(cur, bp, ptr, numrecs);
    3510             : #ifdef DEBUG
    3511      652985 :                 if (ptr < numrecs) {
    3512      327399 :                         ASSERT(cur->bc_ops->keys_inorder(cur, kp,
    3513             :                                 xfs_btree_key_addr(cur, ptr + 1, block)));
    3514             :                 }
    3515             : #endif
    3516             :         } else {
    3517             :                 /* It's a leaf. make a hole in the records */
    3518   553024760 :                 union xfs_btree_rec             *rp;
    3519             : 
    3520   553024760 :                 rp = xfs_btree_rec_addr(cur, ptr, block);
    3521             : 
    3522   553024760 :                 xfs_btree_shift_recs(cur, rp, 1, numrecs - ptr + 1);
    3523             : 
    3524             :                 /* Now put the new data in, bump numrecs and log it. */
    3525   553027826 :                 xfs_btree_copy_recs(cur, rp, rec, 1);
    3526   553031342 :                 xfs_btree_set_numrecs(block, ++numrecs);
    3527   553031342 :                 xfs_btree_log_recs(cur, bp, ptr, numrecs);
    3528             : #ifdef DEBUG
    3529   553030896 :                 if (ptr < numrecs) {
    3530   308283020 :                         ASSERT(cur->bc_ops->recs_inorder(cur, rp,
    3531             :                                 xfs_btree_rec_addr(cur, ptr + 1, block)));
    3532             :                 }
    3533             : #endif
    3534             :         }
    3535             : 
    3536             :         /* Log the new number of records in the btree header. */
    3537   553687520 :         xfs_btree_log_block(cur, bp, XFS_BB_NUMRECS);
    3538             : 
    3539             :         /*
    3540             :          * If we just inserted into a new tree block, we have to
    3541             :          * recalculate nkey here because nkey is out of date.
    3542             :          *
    3543             :          * Otherwise we're just updating an existing block (having shoved
    3544             :          * some records into the new tree block), so use the regular key
    3545             :          * update mechanism.
    3546             :          */
    3547   553692342 :         if (bp && xfs_buf_daddr(bp) != old_bn) {
    3548      520741 :                 xfs_btree_get_keys(cur, block, lkey);
    3549   967719058 :         } else if (xfs_btree_needs_key_update(cur, optr)) {
    3550   317266222 :                 error = xfs_btree_update_keys(cur, level);
    3551   317260113 :                 if (error)
    3552           0 :                         goto error0;
    3553             :         }
    3554             : 
    3555             :         /*
    3556             :          * If we are tracking the last record in the tree and
    3557             :          * we are at the far right edge of the tree, update it.
    3558             :          */
    3559   553686232 :         if (xfs_btree_is_lastrec(cur, block, level)) {
    3560    83512362 :                 cur->bc_ops->update_lastrec(cur, block, rec,
    3561             :                                             ptr, LASTREC_INSREC);
    3562             :         }
    3563             : 
    3564             :         /*
    3565             :          * Return the new block number, if any.
    3566             :          * If there is one, give back a record value and a cursor too.
    3567             :          */
    3568   553688813 :         *ptrp = nptr;
    3569  1107377626 :         if (!xfs_btree_ptr_is_null(cur, &nptr)) {
    3570      676661 :                 xfs_btree_copy_keys(cur, key, lkey, 1);
    3571      676661 :                 *curp = ncur;
    3572             :         }
    3573             : 
    3574   553688813 :         *stat = 1;
    3575   553688813 :         return 0;
    3576             : 
    3577         402 : error0:
    3578         402 :         if (ncur)
    3579           0 :                 xfs_btree_del_cursor(ncur, error);
    3580             :         return error;
    3581             : }
    3582             : 
    3583             : /*
    3584             :  * Insert the record at the point referenced by cur.
    3585             :  *
    3586             :  * A multi-level split of the tree on insert will invalidate the original
    3587             :  * cursor.  All callers of this function should assume that the cursor is
    3588             :  * no longer valid and revalidate it.
    3589             :  */
    3590             : int
    3591   553037663 : xfs_btree_insert(
    3592             :         struct xfs_btree_cur    *cur,
    3593             :         int                     *stat)
    3594             : {
    3595   553037663 :         int                     error;  /* error return value */
    3596   553037663 :         int                     i;      /* result value, 0 for failure */
    3597   553037663 :         int                     level;  /* current level number in btree */
    3598   553037663 :         union xfs_btree_ptr     nptr;   /* new block number (split result) */
    3599   553037663 :         struct xfs_btree_cur    *ncur;  /* new cursor (split result) */
    3600   553037663 :         struct xfs_btree_cur    *pcur;  /* previous level's cursor */
    3601   553037663 :         union xfs_btree_key     bkey;   /* key of block to insert */
    3602   553037663 :         union xfs_btree_key     *key;
    3603   553037663 :         union xfs_btree_rec     rec;    /* record to insert */
    3604             : 
    3605   553037663 :         level = 0;
    3606   553037663 :         ncur = NULL;
    3607   553037663 :         pcur = cur;
    3608   553037663 :         key = &bkey;
    3609             : 
    3610   553037663 :         xfs_btree_set_ptr_null(cur, &nptr);
    3611             : 
    3612             :         /* Make a key out of the record data to be inserted, and save it. */
    3613   553037663 :         cur->bc_ops->init_rec_from_cur(cur, &rec);
    3614   553031936 :         cur->bc_ops->init_key_from_rec(key, &rec);
    3615             : 
    3616             :         /*
    3617             :          * Loop going up the tree, starting at the leaf level.
    3618             :          * Stop when we don't get a split block, that must mean that
    3619             :          * the insert is finished with this level.
    3620             :          */
    3621   553717973 :         do {
    3622             :                 /*
    3623             :                  * Insert nrec/nptr into this level of the tree.
    3624             :                  * Note if we fail, nptr will be null.
    3625             :                  */
    3626   553717973 :                 error = xfs_btree_insrec(pcur, level, &nptr, &rec, key,
    3627             :                                 &ncur, &i);
    3628   553709372 :                 if (error) {
    3629         402 :                         if (pcur != cur)
    3630          10 :                                 xfs_btree_del_cursor(pcur, XFS_BTREE_ERROR);
    3631         402 :                         goto error0;
    3632             :                 }
    3633             : 
    3634   553708970 :                 if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
    3635           0 :                         xfs_btree_mark_sick(cur);
    3636           0 :                         error = -EFSCORRUPTED;
    3637           0 :                         goto error0;
    3638             :                 }
    3639   553708970 :                 level++;
    3640             : 
    3641             :                 /*
    3642             :                  * See if the cursor we just used is trash.
    3643             :                  * Can't trash the caller's cursor, but otherwise we should
    3644             :                  * if ncur is a new cursor or we're about to be done.
    3645             :                  */
    3646   553708970 :                 if (pcur != cur &&
    3647     1304406 :                     (ncur || xfs_btree_ptr_is_null(cur, &nptr))) {
    3648             :                         /* Save the state from the cursor before we trash it */
    3649      652986 :                         if (cur->bc_ops->update_cursor)
    3650      215957 :                                 cur->bc_ops->update_cursor(pcur, cur);
    3651      652986 :                         cur->bc_nlevels = pcur->bc_nlevels;
    3652      652986 :                         xfs_btree_del_cursor(pcur, XFS_BTREE_NOERROR);
    3653             :                 }
    3654             :                 /* If we got a new cursor, switch to it. */
    3655   553715920 :                 if (ncur) {
    3656      660779 :                         pcur = ncur;
    3657      660779 :                         ncur = NULL;
    3658             :                 }
    3659  1107431840 :         } while (!xfs_btree_ptr_is_null(cur, &nptr));
    3660             : 
    3661   553028152 :         *stat = i;
    3662   553028152 :         return 0;
    3663             : error0:
    3664             :         return error;
    3665             : }
    3666             : 
    3667             : /*
    3668             :  * Try to merge a non-leaf block back into the inode root.
    3669             :  *
    3670             :  * Note: the killroot names comes from the fact that we're effectively
    3671             :  * killing the old root block.  But because we can't just delete the
    3672             :  * inode we have to copy the single block it was pointing to into the
    3673             :  * inode.
    3674             :  */
    3675             : STATIC int
    3676     4167975 : xfs_btree_kill_iroot(
    3677             :         struct xfs_btree_cur    *cur)
    3678             : {
    3679     4167975 :         int                     whichfork = cur->bc_ino.whichfork;
    3680     4167975 :         struct xfs_inode        *ip = cur->bc_ino.ip;
    3681     4167975 :         struct xfs_ifork        *ifp = xfs_ifork_ptr(ip, whichfork);
    3682     4167978 :         struct xfs_btree_block  *block;
    3683     4167978 :         struct xfs_btree_block  *cblock;
    3684     4167978 :         union xfs_btree_key     *kp;
    3685     4167978 :         union xfs_btree_key     *ckp;
    3686     4167978 :         union xfs_btree_ptr     *pp;
    3687     4167978 :         union xfs_btree_ptr     *cpp;
    3688     4167978 :         struct xfs_buf          *cbp;
    3689     4167978 :         int                     level;
    3690     4167978 :         int                     index;
    3691     4167978 :         int                     numrecs;
    3692     4167978 :         int                     error;
    3693             : #ifdef DEBUG
    3694     4167978 :         union xfs_btree_ptr     ptr;
    3695             : #endif
    3696     4167978 :         int                     i;
    3697             : 
    3698     4167978 :         ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE);
    3699     4167978 :         ASSERT(cur->bc_nlevels > 1);
    3700             : 
    3701             :         /*
    3702             :          * Don't deal with the root block needs to be a leaf case.
    3703             :          * We're just going to turn the thing back into extents anyway.
    3704             :          */
    3705     4167978 :         level = cur->bc_nlevels - 1;
    3706     4167978 :         if (level == 1)
    3707     4039730 :                 goto out0;
    3708             : 
    3709             :         /*
    3710             :          * Give up if the root has multiple children.
    3711             :          */
    3712      128248 :         block = xfs_btree_get_iroot(cur);
    3713      256496 :         if (xfs_btree_get_numrecs(block) != 1)
    3714           5 :                 goto out0;
    3715             : 
    3716      128243 :         cblock = xfs_btree_get_block(cur, level - 1, &cbp);
    3717      128243 :         numrecs = xfs_btree_get_numrecs(cblock);
    3718             : 
    3719             :         /*
    3720             :          * Only do this if the next level will fit.
    3721             :          * Then the data must be copied up to the inode,
    3722             :          * instead of freeing the root you free the next level.
    3723             :          */
    3724      128243 :         if (numrecs > cur->bc_ops->get_dmaxrecs(cur, level))
    3725      125990 :                 goto out0;
    3726             : 
    3727        2253 :         XFS_BTREE_STATS_INC(cur, killroot);
    3728             : 
    3729             : #ifdef DEBUG
    3730        2253 :         xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_LEFTSIB);
    3731        4506 :         ASSERT(xfs_btree_ptr_is_null(cur, &ptr));
    3732        2253 :         xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
    3733        4506 :         ASSERT(xfs_btree_ptr_is_null(cur, &ptr));
    3734             : #endif
    3735             : 
    3736        2253 :         index = numrecs - cur->bc_ops->get_maxrecs(cur, level);
    3737        2253 :         if (index) {
    3738        2253 :                 xfs_iroot_realloc(cur->bc_ino.ip, index,
    3739        2253 :                                   cur->bc_ino.whichfork);
    3740        2253 :                 block = ifp->if_broot;
    3741             :         }
    3742             : 
    3743        2253 :         be16_add_cpu(&block->bb_numrecs, index);
    3744        2253 :         ASSERT(block->bb_numrecs == cblock->bb_numrecs);
    3745             : 
    3746        2253 :         kp = xfs_btree_key_addr(cur, 1, block);
    3747        2253 :         ckp = xfs_btree_key_addr(cur, 1, cblock);
    3748        2253 :         xfs_btree_copy_keys(cur, kp, ckp, numrecs);
    3749             : 
    3750        2253 :         pp = xfs_btree_ptr_addr(cur, 1, block);
    3751        2253 :         cpp = xfs_btree_ptr_addr(cur, 1, cblock);
    3752             : 
    3753       29289 :         for (i = 0; i < numrecs; i++) {
    3754       24783 :                 error = xfs_btree_debug_check_ptr(cur, cpp, i, level - 1);
    3755       24783 :                 if (error)
    3756           0 :                         return error;
    3757             :         }
    3758             : 
    3759        2253 :         xfs_btree_copy_ptrs(cur, pp, cpp, numrecs);
    3760             : 
    3761        2253 :         error = xfs_btree_free_block(cur, cbp);
    3762        2253 :         if (error)
    3763             :                 return error;
    3764             : 
    3765        2253 :         cur->bc_levels[level - 1].bp = NULL;
    3766        2253 :         be16_add_cpu(&block->bb_level, -1);
    3767        4506 :         xfs_trans_log_inode(cur->bc_tp, ip,
    3768        2253 :                 XFS_ILOG_CORE | xfs_ilog_fbroot(cur->bc_ino.whichfork));
    3769        2253 :         cur->bc_nlevels--;
    3770             : out0:
    3771             :         return 0;
    3772             : }
    3773             : 
    3774             : /*
    3775             :  * Kill the current root node, and replace it with it's only child node.
    3776             :  */
    3777             : STATIC int
    3778       20164 : xfs_btree_kill_root(
    3779             :         struct xfs_btree_cur    *cur,
    3780             :         struct xfs_buf          *bp,
    3781             :         int                     level,
    3782             :         union xfs_btree_ptr     *newroot)
    3783             : {
    3784       20164 :         int                     error;
    3785             : 
    3786       20164 :         XFS_BTREE_STATS_INC(cur, killroot);
    3787             : 
    3788             :         /*
    3789             :          * Update the root pointer, decreasing the level by 1 and then
    3790             :          * free the old root.
    3791             :          */
    3792       20164 :         cur->bc_ops->set_root(cur, newroot, -1);
    3793             : 
    3794       20164 :         error = xfs_btree_free_block(cur, bp);
    3795       20164 :         if (error)
    3796             :                 return error;
    3797             : 
    3798       20164 :         cur->bc_levels[level].bp = NULL;
    3799       20164 :         cur->bc_levels[level].ra = 0;
    3800       20164 :         cur->bc_nlevels--;
    3801             : 
    3802       20164 :         return 0;
    3803             : }
    3804             : 
    3805             : STATIC int
    3806   189339667 : xfs_btree_dec_cursor(
    3807             :         struct xfs_btree_cur    *cur,
    3808             :         int                     level,
    3809             :         int                     *stat)
    3810             : {
    3811   189339667 :         int                     error;
    3812   189339667 :         int                     i;
    3813             : 
    3814   189339667 :         if (level > 0) {
    3815      361348 :                 error = xfs_btree_decrement(cur, level, &i);
    3816      361348 :                 if (error)
    3817             :                         return error;
    3818             :         }
    3819             : 
    3820   189339667 :         *stat = 1;
    3821   189339667 :         return 0;
    3822             : }
    3823             : 
    3824             : /*
    3825             :  * Single level of the btree record deletion routine.
    3826             :  * Delete record pointed to by cur/level.
    3827             :  * Remove the record from its block then rebalance the tree.
    3828             :  * Return 0 for error, 1 for done, 2 to go on to the next level.
    3829             :  */
    3830             : STATIC int                                      /* error */
    3831   469919127 : xfs_btree_delrec(
    3832             :         struct xfs_btree_cur    *cur,           /* btree cursor */
    3833             :         int                     level,          /* level removing record from */
    3834             :         int                     *stat)          /* fail/done/go-on */
    3835             : {
    3836   469919127 :         struct xfs_btree_block  *block;         /* btree block */
    3837   469919127 :         union xfs_btree_ptr     cptr;           /* current block ptr */
    3838   469919127 :         struct xfs_buf          *bp;            /* buffer for block */
    3839   469919127 :         int                     error;          /* error return value */
    3840   469919127 :         int                     i;              /* loop counter */
    3841   469919127 :         union xfs_btree_ptr     lptr;           /* left sibling block ptr */
    3842   469919127 :         struct xfs_buf          *lbp;           /* left buffer pointer */
    3843   469919127 :         struct xfs_btree_block  *left;          /* left btree block */
    3844   469919127 :         int                     lrecs = 0;      /* left record count */
    3845   469919127 :         int                     ptr;            /* key/record index */
    3846   469919127 :         union xfs_btree_ptr     rptr;           /* right sibling block ptr */
    3847   469919127 :         struct xfs_buf          *rbp;           /* right buffer pointer */
    3848   469919127 :         struct xfs_btree_block  *right;         /* right btree block */
    3849   469919127 :         struct xfs_btree_block  *rrblock;       /* right-right btree block */
    3850   469919127 :         struct xfs_buf          *rrbp;          /* right-right buffer pointer */
    3851   469919127 :         int                     rrecs = 0;      /* right record count */
    3852   469919127 :         struct xfs_btree_cur    *tcur;          /* temporary btree cursor */
    3853   469919127 :         int                     numrecs;        /* temporary numrec count */
    3854             : 
    3855   469919127 :         tcur = NULL;
    3856             : 
    3857             :         /* Get the index of the entry being deleted, check for nothing there. */
    3858   469919127 :         ptr = cur->bc_levels[level].ptr;
    3859   469919127 :         if (ptr == 0) {
    3860           0 :                 *stat = 0;
    3861           0 :                 return 0;
    3862             :         }
    3863             : 
    3864             :         /* Get the buffer & block containing the record or key/ptr. */
    3865   469919127 :         block = xfs_btree_get_block(cur, level, &bp);
    3866   469921354 :         numrecs = xfs_btree_get_numrecs(block);
    3867             : 
    3868             : #ifdef DEBUG
    3869   469921354 :         error = xfs_btree_check_block(cur, block, level, bp);
    3870   469919206 :         if (error)
    3871           0 :                 goto error0;
    3872             : #endif
    3873             : 
    3874             :         /* Fail if we're off the end of the block. */
    3875   469919206 :         if (ptr > numrecs) {
    3876           0 :                 *stat = 0;
    3877           0 :                 return 0;
    3878             :         }
    3879             : 
    3880   469919206 :         XFS_BTREE_STATS_INC(cur, delrec);
    3881   469919206 :         XFS_BTREE_STATS_ADD(cur, moves, numrecs - ptr);
    3882             : 
    3883             :         /* Excise the entries being deleted. */
    3884   469919206 :         if (level > 0) {
    3885             :                 /* It's a nonleaf. operate on keys and ptrs */
    3886      383929 :                 union xfs_btree_key     *lkp;
    3887      383929 :                 union xfs_btree_ptr     *lpp;
    3888             : 
    3889      383929 :                 lkp = xfs_btree_key_addr(cur, ptr + 1, block);
    3890      383929 :                 lpp = xfs_btree_ptr_addr(cur, ptr + 1, block);
    3891             : 
    3892     6983506 :                 for (i = 0; i < numrecs - ptr; i++) {
    3893     6599577 :                         error = xfs_btree_debug_check_ptr(cur, lpp, i, level);
    3894     6599577 :                         if (error)
    3895           0 :                                 goto error0;
    3896             :                 }
    3897             : 
    3898      383929 :                 if (ptr < numrecs) {
    3899      184446 :                         xfs_btree_shift_keys(cur, lkp, -1, numrecs - ptr);
    3900      184446 :                         xfs_btree_shift_ptrs(cur, lpp, -1, numrecs - ptr);
    3901      184446 :                         xfs_btree_log_keys(cur, bp, ptr, numrecs - 1);
    3902      184446 :                         xfs_btree_log_ptrs(cur, bp, ptr, numrecs - 1);
    3903             :                 }
    3904             :         } else {
    3905             :                 /* It's a leaf. operate on records */
    3906   469535277 :                 if (ptr < numrecs) {
    3907   248648208 :                         xfs_btree_shift_recs(cur,
    3908             :                                 xfs_btree_rec_addr(cur, ptr + 1, block),
    3909             :                                 -1, numrecs - ptr);
    3910   248648150 :                         xfs_btree_log_recs(cur, bp, ptr, numrecs - 1);
    3911             :                 }
    3912             :         }
    3913             : 
    3914             :         /*
    3915             :          * Decrement and log the number of entries in the block.
    3916             :          */
    3917   469922534 :         xfs_btree_set_numrecs(block, --numrecs);
    3918   469922534 :         xfs_btree_log_block(cur, bp, XFS_BB_NUMRECS);
    3919             : 
    3920             :         /*
    3921             :          * If we are tracking the last record in the tree and
    3922             :          * we are at the far right edge of the tree, update it.
    3923             :          */
    3924   469920400 :         if (xfs_btree_is_lastrec(cur, block, level)) {
    3925    78717298 :                 cur->bc_ops->update_lastrec(cur, block, NULL,
    3926             :                                             ptr, LASTREC_DELREC);
    3927             :         }
    3928             : 
    3929             :         /*
    3930             :          * We're at the root level.  First, shrink the root block in-memory.
    3931             :          * Try to get rid of the next level down.  If we can't then there's
    3932             :          * nothing left to do.
    3933             :          */
    3934   469926797 :         if (level == cur->bc_nlevels - 1) {
    3935   257285592 :                 if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) {
    3936       23797 :                         xfs_iroot_realloc(cur->bc_ino.ip, -1,
    3937       23797 :                                           cur->bc_ino.whichfork);
    3938             : 
    3939       23797 :                         error = xfs_btree_kill_iroot(cur);
    3940       23797 :                         if (error)
    3941           0 :                                 goto error0;
    3942             : 
    3943       23797 :                         error = xfs_btree_dec_cursor(cur, level, stat);
    3944       23797 :                         if (error)
    3945           0 :                                 goto error0;
    3946       23797 :                         *stat = 1;
    3947       23797 :                         return 0;
    3948             :                 }
    3949             : 
    3950             :                 /*
    3951             :                  * If this is the root level, and there's only one entry left,
    3952             :                  * and it's NOT the leaf level, then we can get rid of this
    3953             :                  * level.
    3954             :                  */
    3955   257261795 :                 if (numrecs == 1 && level > 0) {
    3956       20164 :                         union xfs_btree_ptr     *pp;
    3957             :                         /*
    3958             :                          * pp is still set to the first pointer in the block.
    3959             :                          * Make it the new root of the btree.
    3960             :                          */
    3961       20164 :                         pp = xfs_btree_ptr_addr(cur, 1, block);
    3962       20164 :                         error = xfs_btree_kill_root(cur, bp, level, pp);
    3963       20164 :                         if (error)
    3964           0 :                                 goto error0;
    3965   257241631 :                 } else if (level > 0) {
    3966       93306 :                         error = xfs_btree_dec_cursor(cur, level, stat);
    3967       93306 :                         if (error)
    3968           0 :                                 goto error0;
    3969             :                 }
    3970   257261795 :                 *stat = 1;
    3971   257261795 :                 return 0;
    3972             :         }
    3973             : 
    3974             :         /*
    3975             :          * If we deleted the leftmost entry in the block, update the
    3976             :          * key values above us in the tree.
    3977             :          */
    3978   336105047 :         if (xfs_btree_needs_key_update(cur, ptr)) {
    3979    93372364 :                 error = xfs_btree_update_keys(cur, level);
    3980    93372572 :                 if (error)
    3981           0 :                         goto error0;
    3982             :         }
    3983             : 
    3984             :         /*
    3985             :          * If the number of records remaining in the block is at least
    3986             :          * the minimum, we're done.
    3987             :          */
    3988   212641413 :         if (numrecs >= cur->bc_ops->get_minrecs(cur, level)) {
    3989   175764779 :                 error = xfs_btree_dec_cursor(cur, level, stat);
    3990   175764740 :                 if (error)
    3991           0 :                         goto error0;
    3992             :                 return 0;
    3993             :         }
    3994             : 
    3995             :         /*
    3996             :          * Otherwise, we have to move some records around to keep the
    3997             :          * tree balanced.  Look at the left and right sibling blocks to
    3998             :          * see if we can re-balance by moving only one record.
    3999             :          */
    4000    36877871 :         xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB);
    4001    36877896 :         xfs_btree_get_sibling(cur, block, &lptr, XFS_BB_LEFTSIB);
    4002             : 
    4003    36877924 :         if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) {
    4004             :                 /*
    4005             :                  * One child of root, need to get a chance to copy its contents
    4006             :                  * into the root and delete it. Can't go up to next level,
    4007             :                  * there's nothing to delete there.
    4008             :                  */
    4009    70088947 :                 if (xfs_btree_ptr_is_null(cur, &rptr) &&
    4010     4144183 :                     xfs_btree_ptr_is_null(cur, &lptr) &&
    4011     4144183 :                     level == cur->bc_nlevels - 2) {
    4012     4144181 :                         error = xfs_btree_kill_iroot(cur);
    4013     4144171 :                         if (!error)
    4014     4144173 :                                 error = xfs_btree_dec_cursor(cur, level, stat);
    4015     4144171 :                         if (error)
    4016           0 :                                 goto error0;
    4017             :                         return 0;
    4018             :                 }
    4019             :         }
    4020             : 
    4021    85116873 :         ASSERT(!xfs_btree_ptr_is_null(cur, &rptr) ||
    4022             :                !xfs_btree_ptr_is_null(cur, &lptr));
    4023             : 
    4024             :         /*
    4025             :          * Duplicate the cursor so our btree manipulations here won't
    4026             :          * disrupt the next level up.
    4027             :          */
    4028    32733743 :         error = xfs_btree_dup_cursor(cur, &tcur);
    4029    32733751 :         if (error)
    4030           0 :                 goto error0;
    4031             : 
    4032             :         /*
    4033             :          * If there's a right sibling, see if it's ok to shift an entry
    4034             :          * out of it.
    4035             :          */
    4036    65467502 :         if (!xfs_btree_ptr_is_null(cur, &rptr)) {
    4037             :                 /*
    4038             :                  * Move the temp cursor to the last entry in the next block.
    4039             :                  * Actually any entry but the first would suffice.
    4040             :                  */
    4041    13084363 :                 i = xfs_btree_lastrec(tcur, level);
    4042    13084361 :                 if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
    4043           0 :                         xfs_btree_mark_sick(cur);
    4044           0 :                         error = -EFSCORRUPTED;
    4045           0 :                         goto error0;
    4046             :                 }
    4047             : 
    4048    13084361 :                 error = xfs_btree_increment(tcur, level, &i);
    4049    13084356 :                 if (error)
    4050           2 :                         goto error0;
    4051    13084354 :                 if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
    4052           0 :                         xfs_btree_mark_sick(cur);
    4053           0 :                         error = -EFSCORRUPTED;
    4054           0 :                         goto error0;
    4055             :                 }
    4056             : 
    4057    13084354 :                 i = xfs_btree_lastrec(tcur, level);
    4058    13084354 :                 if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
    4059           0 :                         xfs_btree_mark_sick(cur);
    4060           0 :                         error = -EFSCORRUPTED;
    4061           0 :                         goto error0;
    4062             :                 }
    4063             : 
    4064             :                 /* Grab a pointer to the block. */
    4065    13084354 :                 right = xfs_btree_get_block(tcur, level, &rbp);
    4066             : #ifdef DEBUG
    4067    13084364 :                 error = xfs_btree_check_block(tcur, right, level, rbp);
    4068    13084349 :                 if (error)
    4069           0 :                         goto error0;
    4070             : #endif
    4071             :                 /* Grab the current block number, for future use. */
    4072    13084349 :                 xfs_btree_get_sibling(tcur, right, &cptr, XFS_BB_LEFTSIB);
    4073             : 
    4074             :                 /*
    4075             :                  * If right block is full enough so that removing one entry
    4076             :                  * won't make it too empty, and left-shifting an entry out
    4077             :                  * of right to us works, we're done.
    4078             :                  */
    4079    26168724 :                 if (xfs_btree_get_numrecs(right) - 1 >=
    4080    13084360 :                     cur->bc_ops->get_minrecs(tcur, level)) {
    4081     9314081 :                         error = xfs_btree_lshift(tcur, level, &i);
    4082     9314081 :                         if (error)
    4083           0 :                                 goto error0;
    4084     9314081 :                         if (i) {
    4085    18628166 :                                 ASSERT(xfs_btree_get_numrecs(block) >=
    4086             :                                        cur->bc_ops->get_minrecs(tcur, level));
    4087             : 
    4088     9314082 :                                 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
    4089     9314082 :                                 tcur = NULL;
    4090             : 
    4091     9314082 :                                 error = xfs_btree_dec_cursor(cur, level, stat);
    4092     9314081 :                                 if (error)
    4093           0 :                                         goto error0;
    4094             :                                 return 0;
    4095             :                         }
    4096             :                 }
    4097             : 
    4098             :                 /*
    4099             :                  * Otherwise, grab the number of records in right for
    4100             :                  * future reference, and fix up the temp cursor to point
    4101             :                  * to our block again (last record).
    4102             :                  */
    4103     3770281 :                 rrecs = xfs_btree_get_numrecs(right);
    4104     7540562 :                 if (!xfs_btree_ptr_is_null(cur, &lptr)) {
    4105     3746396 :                         i = xfs_btree_firstrec(tcur, level);
    4106     3746395 :                         if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
    4107           0 :                                 xfs_btree_mark_sick(cur);
    4108           0 :                                 error = -EFSCORRUPTED;
    4109           0 :                                 goto error0;
    4110             :                         }
    4111             : 
    4112     3746395 :                         error = xfs_btree_decrement(tcur, level, &i);
    4113     3746397 :                         if (error)
    4114           0 :                                 goto error0;
    4115     3746397 :                         if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
    4116           0 :                                 xfs_btree_mark_sick(cur);
    4117           0 :                                 error = -EFSCORRUPTED;
    4118           0 :                                 goto error0;
    4119             :                         }
    4120             :                 }
    4121             :         }
    4122             : 
    4123             :         /*
    4124             :          * If there's a left sibling, see if it's ok to shift an entry
    4125             :          * out of it.
    4126             :          */
    4127    46839340 :         if (!xfs_btree_ptr_is_null(cur, &lptr)) {
    4128             :                 /*
    4129             :                  * Move the temp cursor to the first entry in the
    4130             :                  * previous block.
    4131             :                  */
    4132    23395730 :                 i = xfs_btree_firstrec(tcur, level);
    4133    23395664 :                 if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
    4134           0 :                         xfs_btree_mark_sick(cur);
    4135           0 :                         error = -EFSCORRUPTED;
    4136           0 :                         goto error0;
    4137             :                 }
    4138             : 
    4139    23395664 :                 error = xfs_btree_decrement(tcur, level, &i);
    4140    23395743 :                 if (error)
    4141           0 :                         goto error0;
    4142    23395743 :                 i = xfs_btree_firstrec(tcur, level);
    4143    23395771 :                 if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
    4144           0 :                         xfs_btree_mark_sick(cur);
    4145           0 :                         error = -EFSCORRUPTED;
    4146           0 :                         goto error0;
    4147             :                 }
    4148             : 
    4149             :                 /* Grab a pointer to the block. */
    4150    23395771 :                 left = xfs_btree_get_block(tcur, level, &lbp);
    4151             : #ifdef DEBUG
    4152    23395776 :                 error = xfs_btree_check_block(cur, left, level, lbp);
    4153    23395776 :                 if (error)
    4154           0 :                         goto error0;
    4155             : #endif
    4156             :                 /* Grab the current block number, for future use. */
    4157    23395776 :                 xfs_btree_get_sibling(tcur, left, &cptr, XFS_BB_RIGHTSIB);
    4158             : 
    4159             :                 /*
    4160             :                  * If left block is full enough so that removing one entry
    4161             :                  * won't make it too empty, and right-shifting an entry out
    4162             :                  * of left to us works, we're done.
    4163             :                  */
    4164    46791558 :                 if (xfs_btree_get_numrecs(left) - 1 >=
    4165    23395778 :                     cur->bc_ops->get_minrecs(tcur, level)) {
    4166    23035737 :                         error = xfs_btree_rshift(tcur, level, &i);
    4167    23035741 :                         if (error)
    4168           0 :                                 goto error0;
    4169    23035741 :                         if (i) {
    4170    46071482 :                                 ASSERT(xfs_btree_get_numrecs(block) >=
    4171             :                                        cur->bc_ops->get_minrecs(tcur, level));
    4172    23035741 :                                 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
    4173    23035741 :                                 tcur = NULL;
    4174    23035741 :                                 if (level == 0)
    4175    23033744 :                                         cur->bc_levels[0].ptr++;
    4176             : 
    4177    23035741 :                                 *stat = 1;
    4178    23035741 :                                 return 0;
    4179             :                         }
    4180             :                 }
    4181             : 
    4182             :                 /*
    4183             :                  * Otherwise, grab the number of records in right for
    4184             :                  * future reference.
    4185             :                  */
    4186      360043 :                 lrecs = xfs_btree_get_numrecs(left);
    4187             :         }
    4188             : 
    4189             :         /* Delete the temp cursor, we're done with it. */
    4190      383983 :         xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
    4191      383929 :         tcur = NULL;
    4192             : 
    4193             :         /* If here, we need to do a join to keep the tree balanced. */
    4194      767858 :         ASSERT(!xfs_btree_ptr_is_null(cur, &cptr));
    4195             : 
    4196     1127901 :         if (!xfs_btree_ptr_is_null(cur, &lptr) &&
    4197      360043 :             lrecs + xfs_btree_get_numrecs(block) <=
    4198      360043 :                         cur->bc_ops->get_maxrecs(cur, level)) {
    4199             :                 /*
    4200             :                  * Set "right" to be the starting block,
    4201             :                  * "left" to be the left neighbor.
    4202             :                  */
    4203      360043 :                 rptr = cptr;
    4204      360043 :                 right = block;
    4205      360043 :                 rbp = bp;
    4206      360043 :                 error = xfs_btree_read_buf_block(cur, &lptr, 0, &left, &lbp);
    4207      360043 :                 if (error)
    4208           0 :                         goto error0;
    4209             : 
    4210             :         /*
    4211             :          * If that won't work, see if we can join with the right neighbor block.
    4212             :          */
    4213       71658 :         } else if (!xfs_btree_ptr_is_null(cur, &rptr) &&
    4214       23886 :                    rrecs + xfs_btree_get_numrecs(block) <=
    4215       23886 :                         cur->bc_ops->get_maxrecs(cur, level)) {
    4216             :                 /*
    4217             :                  * Set "left" to be the starting block,
    4218             :                  * "right" to be the right neighbor.
    4219             :                  */
    4220       23886 :                 lptr = cptr;
    4221       23886 :                 left = block;
    4222       23886 :                 lbp = bp;
    4223       23886 :                 error = xfs_btree_read_buf_block(cur, &rptr, 0, &right, &rbp);
    4224       23886 :                 if (error)
    4225           0 :                         goto error0;
    4226             : 
    4227             :         /*
    4228             :          * Otherwise, we can't fix the imbalance.
    4229             :          * Just return.  This is probably a logic error, but it's not fatal.
    4230             :          */
    4231             :         } else {
    4232           0 :                 error = xfs_btree_dec_cursor(cur, level, stat);
    4233           0 :                 if (error)
    4234           0 :                         goto error0;
    4235             :                 return 0;
    4236             :         }
    4237             : 
    4238      383929 :         rrecs = xfs_btree_get_numrecs(right);
    4239      383929 :         lrecs = xfs_btree_get_numrecs(left);
    4240             : 
    4241             :         /*
    4242             :          * We're now going to join "left" and "right" by moving all the stuff
    4243             :          * in "right" to "left" and deleting "right".
    4244             :          */
    4245      383929 :         XFS_BTREE_STATS_ADD(cur, moves, rrecs);
    4246      383929 :         if (level > 0) {
    4247             :                 /* It's a non-leaf.  Move keys and pointers. */
    4248         420 :                 union xfs_btree_key     *lkp;   /* left btree key */
    4249         420 :                 union xfs_btree_ptr     *lpp;   /* left address pointer */
    4250         420 :                 union xfs_btree_key     *rkp;   /* right btree key */
    4251         420 :                 union xfs_btree_ptr     *rpp;   /* right address pointer */
    4252             : 
    4253         420 :                 lkp = xfs_btree_key_addr(cur, lrecs + 1, left);
    4254         420 :                 lpp = xfs_btree_ptr_addr(cur, lrecs + 1, left);
    4255         420 :                 rkp = xfs_btree_key_addr(cur, 1, right);
    4256         420 :                 rpp = xfs_btree_ptr_addr(cur, 1, right);
    4257             : 
    4258       20888 :                 for (i = 1; i < rrecs; i++) {
    4259       20468 :                         error = xfs_btree_debug_check_ptr(cur, rpp, i, level);
    4260       20468 :                         if (error)
    4261           0 :                                 goto error0;
    4262             :                 }
    4263             : 
    4264         420 :                 xfs_btree_copy_keys(cur, lkp, rkp, rrecs);
    4265         420 :                 xfs_btree_copy_ptrs(cur, lpp, rpp, rrecs);
    4266             : 
    4267         420 :                 xfs_btree_log_keys(cur, lbp, lrecs + 1, lrecs + rrecs);
    4268         420 :                 xfs_btree_log_ptrs(cur, lbp, lrecs + 1, lrecs + rrecs);
    4269             :         } else {
    4270             :                 /* It's a leaf.  Move records.  */
    4271      383509 :                 union xfs_btree_rec     *lrp;   /* left record pointer */
    4272      383509 :                 union xfs_btree_rec     *rrp;   /* right record pointer */
    4273             : 
    4274      383509 :                 lrp = xfs_btree_rec_addr(cur, lrecs + 1, left);
    4275      383509 :                 rrp = xfs_btree_rec_addr(cur, 1, right);
    4276             : 
    4277      383509 :                 xfs_btree_copy_recs(cur, lrp, rrp, rrecs);
    4278      383509 :                 xfs_btree_log_recs(cur, lbp, lrecs + 1, lrecs + rrecs);
    4279             :         }
    4280             : 
    4281      383929 :         XFS_BTREE_STATS_INC(cur, join);
    4282             : 
    4283             :         /*
    4284             :          * Fix up the number of records and right block pointer in the
    4285             :          * surviving block, and log it.
    4286             :          */
    4287      383929 :         xfs_btree_set_numrecs(left, lrecs + rrecs);
    4288      383929 :         xfs_btree_get_sibling(cur, right, &cptr, XFS_BB_RIGHTSIB);
    4289      383929 :         xfs_btree_set_sibling(cur, left, &cptr, XFS_BB_RIGHTSIB);
    4290      383929 :         xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB);
    4291             : 
    4292             :         /* If there is a right sibling, point it to the remaining block. */
    4293      383929 :         xfs_btree_get_sibling(cur, left, &cptr, XFS_BB_RIGHTSIB);
    4294      767858 :         if (!xfs_btree_ptr_is_null(cur, &cptr)) {
    4295      185941 :                 error = xfs_btree_read_buf_block(cur, &cptr, 0, &rrblock, &rrbp);
    4296      185941 :                 if (error)
    4297           0 :                         goto error0;
    4298      185941 :                 xfs_btree_set_sibling(cur, rrblock, &lptr, XFS_BB_LEFTSIB);
    4299      185941 :                 xfs_btree_log_block(cur, rrbp, XFS_BB_LEFTSIB);
    4300             :         }
    4301             : 
    4302             :         /* Free the deleted block. */
    4303      383929 :         error = xfs_btree_free_block(cur, rbp);
    4304      383929 :         if (error)
    4305           0 :                 goto error0;
    4306             : 
    4307             :         /*
    4308             :          * If we joined with the left neighbor, set the buffer in the
    4309             :          * cursor to the left block, and fix up the index.
    4310             :          */
    4311      383929 :         if (bp != lbp) {
    4312      360043 :                 cur->bc_levels[level].bp = lbp;
    4313      360043 :                 cur->bc_levels[level].ptr += lrecs;
    4314      360043 :                 cur->bc_levels[level].ra = 0;
    4315             :         }
    4316             :         /*
    4317             :          * If we joined with the right neighbor and there's a level above
    4318             :          * us, increment the cursor at that level.
    4319             :          */
    4320       23886 :         else if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) ||
    4321       23413 :                    (level + 1 < cur->bc_nlevels)) {
    4322       23886 :                 error = xfs_btree_increment(cur, level + 1, &i);
    4323       23886 :                 if (error)
    4324           0 :                         goto error0;
    4325             :         }
    4326             : 
    4327             :         /*
    4328             :          * Readjust the ptr at this level if it's not a leaf, since it's
    4329             :          * still pointing at the deletion point, which makes the cursor
    4330             :          * inconsistent.  If this makes the ptr 0, the caller fixes it up.
    4331             :          * We can't use decrement because it would change the next level up.
    4332             :          */
    4333      383929 :         if (level > 0)
    4334         420 :                 cur->bc_levels[level].ptr--;
    4335             : 
    4336             :         /*
    4337             :          * We combined blocks, so we have to update the parent keys if the
    4338             :          * btree supports overlapped intervals.  However,
    4339             :          * bc_levels[level + 1].ptr points to the old block so that the caller
    4340             :          * knows which record to delete.  Therefore, the caller must be savvy
    4341             :          * enough to call updkeys for us if we return stat == 2.  The other
    4342             :          * exit points from this function don't require deletions further up
    4343             :          * the tree, so they can call updkeys directly.
    4344             :          */
    4345             : 
    4346             :         /* Return value means the next level up has something to do. */
    4347      383929 :         *stat = 2;
    4348      383929 :         return 0;
    4349             : 
    4350           2 : error0:
    4351           2 :         if (tcur)
    4352           2 :                 xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
    4353             :         return error;
    4354             : }
    4355             : 
    4356             : /*
    4357             :  * Delete the record pointed to by cur.
    4358             :  * The cursor refers to the place where the record was (could be inserted)
    4359             :  * when the operation returns.
    4360             :  */
    4361             : int                                     /* error */
    4362   469530072 : xfs_btree_delete(
    4363             :         struct xfs_btree_cur    *cur,
    4364             :         int                     *stat)  /* success/failure */
    4365             : {
    4366   469530072 :         int                     error;  /* error return value */
    4367   469530072 :         int                     level;
    4368   469530072 :         int                     i;
    4369   469530072 :         bool                    joined = false;
    4370             : 
    4371             :         /*
    4372             :          * Go up the tree, starting at leaf level.
    4373             :          *
    4374             :          * If 2 is returned then a join was done; go to the next level.
    4375             :          * Otherwise we are done.
    4376             :          */
    4377   939442844 :         for (level = 0, i = 2; i == 2; level++) {
    4378   469913246 :                 error = xfs_btree_delrec(cur, level, &i);
    4379   469912774 :                 if (error)
    4380           2 :                         goto error0;
    4381   469912772 :                 if (i == 2)
    4382      383929 :                         joined = true;
    4383             :         }
    4384             : 
    4385             :         /*
    4386             :          * If we combined blocks as part of deleting the record, delrec won't
    4387             :          * have updated the parent high keys so we have to do that here.
    4388             :          */
    4389   469529598 :         if (joined && (cur->bc_flags & XFS_BTREE_OVERLAPPING)) {
    4390      135991 :                 error = xfs_btree_updkeys_force(cur, 0);
    4391      135991 :                 if (error)
    4392           0 :                         goto error0;
    4393             :         }
    4394             : 
    4395   469529598 :         if (i == 0) {
    4396           0 :                 for (level = 1; level < cur->bc_nlevels; level++) {
    4397           0 :                         if (cur->bc_levels[level].ptr == 0) {
    4398           0 :                                 error = xfs_btree_decrement(cur, level, &i);
    4399           0 :                                 if (error)
    4400           0 :                                         goto error0;
    4401             :                                 break;
    4402             :                         }
    4403             :                 }
    4404             :         }
    4405             : 
    4406   469529598 :         *stat = i;
    4407   469529598 :         return 0;
    4408             : error0:
    4409             :         return error;
    4410             : }
    4411             : 
    4412             : /*
    4413             :  * Get the data from the pointed-to record.
    4414             :  */
    4415             : int                                     /* error */
    4416 80858716913 : xfs_btree_get_rec(
    4417             :         struct xfs_btree_cur    *cur,   /* btree cursor */
    4418             :         union xfs_btree_rec     **recp, /* output: btree record */
    4419             :         int                     *stat)  /* output: success/failure */
    4420             : {
    4421 80858716913 :         struct xfs_btree_block  *block; /* btree block */
    4422 80858716913 :         struct xfs_buf          *bp;    /* buffer pointer */
    4423 80858716913 :         int                     ptr;    /* record number */
    4424             : #ifdef DEBUG
    4425 80858716913 :         int                     error;  /* error return value */
    4426             : #endif
    4427             : 
    4428 80858716913 :         ptr = cur->bc_levels[0].ptr;
    4429 80858716913 :         block = xfs_btree_get_block(cur, 0, &bp);
    4430             : 
    4431             : #ifdef DEBUG
    4432 80987661947 :         error = xfs_btree_check_block(cur, block, 0, bp);
    4433 80880825033 :         if (error)
    4434             :                 return error;
    4435             : #endif
    4436             : 
    4437             :         /*
    4438             :          * Off the right end or left end, return failure.
    4439             :          */
    4440 >16176*10^7 :         if (ptr > xfs_btree_get_numrecs(block) || ptr <= 0) {
    4441     1268893 :                 *stat = 0;
    4442     1268893 :                 return 0;
    4443             :         }
    4444             : 
    4445             :         /*
    4446             :          * Point to the record and extract its data.
    4447             :          */
    4448 80913110833 :         *recp = xfs_btree_rec_addr(cur, ptr, block);
    4449 80913110833 :         *stat = 1;
    4450 80913110833 :         return 0;
    4451             : }
    4452             : 
    4453             : /* Visit a block in a btree. */
    4454             : STATIC int
    4455    94039391 : xfs_btree_visit_block(
    4456             :         struct xfs_btree_cur            *cur,
    4457             :         int                             level,
    4458             :         xfs_btree_visit_blocks_fn       fn,
    4459             :         void                            *data)
    4460             : {
    4461    94039391 :         struct xfs_btree_block          *block;
    4462    94039391 :         struct xfs_buf                  *bp;
    4463    94039391 :         union xfs_btree_ptr             rptr, bufptr;
    4464    94039391 :         int                             error;
    4465             : 
    4466             :         /* do right sibling readahead */
    4467    94039391 :         xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA);
    4468    94039400 :         block = xfs_btree_get_block(cur, level, &bp);
    4469             : 
    4470             :         /* process the block */
    4471    94039334 :         error = fn(cur, level, data);
    4472    94039364 :         if (error)
    4473             :                 return error;
    4474             : 
    4475             :         /* now read rh sibling block for next iteration */
    4476    94039327 :         xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB);
    4477   188078902 :         if (xfs_btree_ptr_is_null(cur, &rptr))
    4478             :                 return -ENOENT;
    4479             : 
    4480             :         /*
    4481             :          * We only visit blocks once in this walk, so we have to avoid the
    4482             :          * internal xfs_btree_lookup_get_block() optimisation where it will
    4483             :          * return the same block without checking if the right sibling points
    4484             :          * back to us and creates a cyclic reference in the btree.
    4485             :          */
    4486    83244487 :         xfs_btree_buf_to_ptr(cur, bp, &bufptr);
    4487    83244472 :         if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
    4488     5127038 :                 if (rptr.l == bufptr.l) {
    4489           0 :                         xfs_btree_mark_sick(cur);
    4490           0 :                         return -EFSCORRUPTED;
    4491             :                 }
    4492             :         } else {
    4493    78117434 :                 if (rptr.s == bufptr.s) {
    4494           0 :                         xfs_btree_mark_sick(cur);
    4495           0 :                         return -EFSCORRUPTED;
    4496             :                 }
    4497             :         }
    4498    83244472 :         return xfs_btree_lookup_get_block(cur, level, &rptr, &block);
    4499             : }
    4500             : 
    4501             : 
    4502             : /* Visit every block in a btree. */
    4503             : int
    4504     7046377 : xfs_btree_visit_blocks(
    4505             :         struct xfs_btree_cur            *cur,
    4506             :         xfs_btree_visit_blocks_fn       fn,
    4507             :         unsigned int                    flags,
    4508             :         void                            *data)
    4509             : {
    4510     7046377 :         union xfs_btree_ptr             lptr;
    4511     7046377 :         int                             level;
    4512     7046377 :         struct xfs_btree_block          *block = NULL;
    4513     7046377 :         int                             error = 0;
    4514             : 
    4515     7046377 :         cur->bc_ops->init_ptr_from_cur(cur, &lptr);
    4516             : 
    4517             :         /* for each level */
    4518    19118470 :         for (level = cur->bc_nlevels - 1; level >= 0; level--) {
    4519             :                 /* grab the left hand block */
    4520    12072208 :                 error = xfs_btree_lookup_get_block(cur, level, &lptr, &block);
    4521    12072241 :                 if (error)
    4522         196 :                         return error;
    4523             : 
    4524             :                 /* readahead the left most block for the next level down */
    4525    12072045 :                 if (level > 0) {
    4526     5025769 :                         union xfs_btree_ptr     *ptr;
    4527             : 
    4528     5025769 :                         ptr = xfs_btree_ptr_addr(cur, 1, block);
    4529     5025767 :                         xfs_btree_readahead_ptr(cur, ptr, 1);
    4530             : 
    4531             :                         /* save for the next iteration of the loop */
    4532     5025777 :                         xfs_btree_copy_ptrs(cur, &lptr, ptr, 1);
    4533             : 
    4534     5025777 :                         if (!(flags & XFS_BTREE_VISIT_LEAVES))
    4535     1277052 :                                 continue;
    4536     7046276 :                 } else if (!(flags & XFS_BTREE_VISIT_RECORDS)) {
    4537           0 :                         continue;
    4538             :                 }
    4539             : 
    4540             :                 /* for each buffer in the level */
    4541    94039387 :                 do {
    4542    94039387 :                         error = xfs_btree_visit_block(cur, level, fn, data);
    4543    94039373 :                 } while (!error);
    4544             : 
    4545    10794987 :                 if (error != -ENOENT)
    4546           2 :                         return error;
    4547             :         }
    4548             : 
    4549             :         return 0;
    4550             : }
    4551             : 
    4552             : /*
    4553             :  * Change the owner of a btree.
    4554             :  *
    4555             :  * The mechanism we use here is ordered buffer logging. Because we don't know
    4556             :  * how many buffers were are going to need to modify, we don't really want to
    4557             :  * have to make transaction reservations for the worst case of every buffer in a
    4558             :  * full size btree as that may be more space that we can fit in the log....
    4559             :  *
    4560             :  * We do the btree walk in the most optimal manner possible - we have sibling
    4561             :  * pointers so we can just walk all the blocks on each level from left to right
    4562             :  * in a single pass, and then move to the next level and do the same. We can
    4563             :  * also do readahead on the sibling pointers to get IO moving more quickly,
    4564             :  * though for slow disks this is unlikely to make much difference to performance
    4565             :  * as the amount of CPU work we have to do before moving to the next block is
    4566             :  * relatively small.
    4567             :  *
    4568             :  * For each btree block that we load, modify the owner appropriately, set the
    4569             :  * buffer as an ordered buffer and log it appropriately. We need to ensure that
    4570             :  * we mark the region we change dirty so that if the buffer is relogged in
    4571             :  * a subsequent transaction the changes we make here as an ordered buffer are
    4572             :  * correctly relogged in that transaction.  If we are in recovery context, then
    4573             :  * just queue the modified buffer as delayed write buffer so the transaction
    4574             :  * recovery completion writes the changes to disk.
    4575             :  */
    4576             : struct xfs_btree_block_change_owner_info {
    4577             :         uint64_t                new_owner;
    4578             :         struct list_head        *buffer_list;
    4579             : };
    4580             : 
    4581             : static int
    4582           0 : xfs_btree_block_change_owner(
    4583             :         struct xfs_btree_cur    *cur,
    4584             :         int                     level,
    4585             :         void                    *data)
    4586             : {
    4587           0 :         struct xfs_btree_block_change_owner_info        *bbcoi = data;
    4588           0 :         struct xfs_btree_block  *block;
    4589           0 :         struct xfs_buf          *bp;
    4590             : 
    4591             :         /* modify the owner */
    4592           0 :         block = xfs_btree_get_block(cur, level, &bp);
    4593           0 :         if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
    4594           0 :                 if (block->bb_u.l.bb_owner == cpu_to_be64(bbcoi->new_owner))
    4595             :                         return 0;
    4596           0 :                 block->bb_u.l.bb_owner = cpu_to_be64(bbcoi->new_owner);
    4597             :         } else {
    4598           0 :                 if (block->bb_u.s.bb_owner == cpu_to_be32(bbcoi->new_owner))
    4599             :                         return 0;
    4600           0 :                 block->bb_u.s.bb_owner = cpu_to_be32(bbcoi->new_owner);
    4601             :         }
    4602             : 
    4603             :         /*
    4604             :          * If the block is a root block hosted in an inode, we might not have a
    4605             :          * buffer pointer here and we shouldn't attempt to log the change as the
    4606             :          * information is already held in the inode and discarded when the root
    4607             :          * block is formatted into the on-disk inode fork. We still change it,
    4608             :          * though, so everything is consistent in memory.
    4609             :          */
    4610           0 :         if (!bp) {
    4611           0 :                 ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE);
    4612           0 :                 ASSERT(level == cur->bc_nlevels - 1);
    4613           0 :                 return 0;
    4614             :         }
    4615             : 
    4616           0 :         if (cur->bc_tp) {
    4617           0 :                 if (!xfs_trans_ordered_buf(cur->bc_tp, bp)) {
    4618           0 :                         xfs_btree_log_block(cur, bp, XFS_BB_OWNER);
    4619           0 :                         return -EAGAIN;
    4620             :                 }
    4621             :         } else {
    4622           0 :                 xfs_buf_delwri_queue(bp, bbcoi->buffer_list);
    4623             :         }
    4624             : 
    4625             :         return 0;
    4626             : }
    4627             : 
    4628             : int
    4629           0 : xfs_btree_change_owner(
    4630             :         struct xfs_btree_cur    *cur,
    4631             :         uint64_t                new_owner,
    4632             :         struct list_head        *buffer_list)
    4633             : {
    4634           0 :         struct xfs_btree_block_change_owner_info        bbcoi;
    4635             : 
    4636           0 :         bbcoi.new_owner = new_owner;
    4637           0 :         bbcoi.buffer_list = buffer_list;
    4638             : 
    4639           0 :         return xfs_btree_visit_blocks(cur, xfs_btree_block_change_owner,
    4640             :                         XFS_BTREE_VISIT_ALL, &bbcoi);
    4641             : }
    4642             : 
    4643             : /* Verify the v5 fields of a long-format btree block. */
    4644             : xfs_failaddr_t
    4645   347895655 : xfs_btree_lblock_v5hdr_verify(
    4646             :         struct xfs_buf          *bp,
    4647             :         uint64_t                owner)
    4648             : {
    4649   347895655 :         struct xfs_mount        *mp = bp->b_mount;
    4650   347895655 :         struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
    4651             : 
    4652   347895655 :         if (!xfs_has_crc(mp))
    4653           0 :                 return __this_address;
    4654   347895655 :         if (!uuid_equal(&block->bb_u.l.bb_uuid, &mp->m_sb.sb_meta_uuid))
    4655           0 :                 return __this_address;
    4656   347895830 :         if (block->bb_u.l.bb_blkno != cpu_to_be64(xfs_buf_daddr(bp)))
    4657           0 :                 return __this_address;
    4658   347895830 :         if (owner != XFS_RMAP_OWN_UNKNOWN &&
    4659           0 :             be64_to_cpu(block->bb_u.l.bb_owner) != owner)
    4660           0 :                 return __this_address;
    4661             :         return NULL;
    4662             : }
    4663             : 
    4664             : /* Verify a long-format btree block. */
    4665             : xfs_failaddr_t
    4666    14823325 : xfs_btree_lblock_verify(
    4667             :         struct xfs_buf          *bp,
    4668             :         unsigned int            max_recs)
    4669             : {
    4670    14823325 :         struct xfs_mount        *mp = bp->b_mount;
    4671    14823325 :         struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
    4672    14823325 :         xfs_fsblock_t           fsb;
    4673    14823325 :         xfs_failaddr_t          fa;
    4674             : 
    4675    14823325 :         ASSERT(!(bp->b_target->bt_flags & XFS_BUFTARG_XFILE));
    4676             : 
    4677             :         /* numrecs verification */
    4678    14823325 :         if (be16_to_cpu(block->bb_numrecs) > max_recs)
    4679           0 :                 return __this_address;
    4680             : 
    4681             :         /* sibling pointer verification */
    4682    14823328 :         fsb = XFS_DADDR_TO_FSB(mp, xfs_buf_daddr(bp));
    4683    14823328 :         fa = xfs_btree_check_lblock_siblings(mp, NULL, -1, fsb,
    4684             :                         block->bb_u.l.bb_leftsib);
    4685    14823321 :         if (!fa)
    4686    14823316 :                 fa = xfs_btree_check_lblock_siblings(mp, NULL, -1, fsb,
    4687             :                                 block->bb_u.l.bb_rightsib);
    4688             :         return fa;
    4689             : }
    4690             : 
    4691             : /**
    4692             :  * xfs_btree_sblock_v5hdr_verify() -- verify the v5 fields of a short-format
    4693             :  *                                    btree block
    4694             :  *
    4695             :  * @bp: buffer containing the btree block
    4696             :  */
    4697             : xfs_failaddr_t
    4698    56432178 : xfs_btree_sblock_v5hdr_verify(
    4699             :         struct xfs_buf          *bp)
    4700             : {
    4701    56432178 :         struct xfs_mount        *mp = bp->b_mount;
    4702    56432178 :         struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
    4703    56432178 :         struct xfs_perag        *pag = bp->b_pag;
    4704             : 
    4705    56432178 :         if (!xfs_has_crc(mp))
    4706           0 :                 return __this_address;
    4707    56432178 :         if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid))
    4708           0 :                 return __this_address;
    4709    56432344 :         if (block->bb_u.s.bb_blkno != cpu_to_be64(xfs_buf_daddr(bp)))
    4710           0 :                 return __this_address;
    4711    95030671 :         if (pag && be32_to_cpu(block->bb_u.s.bb_owner) != pag->pag_agno)
    4712          13 :                 return __this_address;
    4713             :         return NULL;
    4714             : }
    4715             : 
    4716             : /**
    4717             :  * xfs_btree_sblock_verify() -- verify a short-format btree block
    4718             :  *
    4719             :  * @bp: buffer containing the btree block
    4720             :  * @max_recs: maximum records allowed in this btree node
    4721             :  */
    4722             : xfs_failaddr_t
    4723    38637848 : xfs_btree_sblock_verify(
    4724             :         struct xfs_buf          *bp,
    4725             :         unsigned int            max_recs)
    4726             : {
    4727    38637848 :         struct xfs_mount        *mp = bp->b_mount;
    4728    38637848 :         struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
    4729    38637848 :         xfs_agblock_t           agbno;
    4730    38637848 :         xfs_failaddr_t          fa;
    4731             : 
    4732    38637848 :         ASSERT(!(bp->b_target->bt_flags & XFS_BUFTARG_XFILE));
    4733             : 
    4734             :         /* numrecs verification */
    4735    38637848 :         if (be16_to_cpu(block->bb_numrecs) > max_recs)
    4736          16 :                 return __this_address;
    4737             : 
    4738             :         /* sibling pointer verification */
    4739    38637854 :         agbno = xfs_daddr_to_agbno(mp, xfs_buf_daddr(bp));
    4740    38637854 :         fa = xfs_btree_check_sblock_siblings(bp->b_pag, NULL, -1, agbno,
    4741             :                         block->bb_u.s.bb_leftsib);
    4742    38637853 :         if (!fa)
    4743    38637868 :                 fa = xfs_btree_check_sblock_siblings(bp->b_pag, NULL, -1, agbno,
    4744             :                                 block->bb_u.s.bb_rightsib);
    4745             :         return fa;
    4746             : }
    4747             : 
    4748             : /*
    4749             :  * For the given limits on leaf and keyptr records per block, calculate the
    4750             :  * height of the tree needed to index the number of leaf records.
    4751             :  */
    4752             : unsigned int
    4753      216608 : xfs_btree_compute_maxlevels(
    4754             :         const unsigned int      *limits,
    4755             :         unsigned long long      records)
    4756             : {
    4757      216608 :         unsigned long long      level_blocks = howmany_64(records, limits[0]);
    4758      216608 :         unsigned int            height = 1;
    4759             : 
    4760     1417929 :         while (level_blocks > 1) {
    4761     1201321 :                 level_blocks = howmany_64(level_blocks, limits[1]);
    4762     1201321 :                 height++;
    4763             :         }
    4764             : 
    4765      216608 :         return height;
    4766             : }
    4767             : 
    4768             : /*
    4769             :  * For the given limits on leaf and keyptr records per block, calculate the
    4770             :  * number of blocks needed to index the given number of leaf records.
    4771             :  */
    4772             : unsigned long long
    4773     5106531 : xfs_btree_calc_size(
    4774             :         const unsigned int      *limits,
    4775             :         unsigned long long      records)
    4776             : {
    4777     5106531 :         unsigned long long      level_blocks = howmany_64(records, limits[0]);
    4778     5106531 :         unsigned long long      blocks = level_blocks;
    4779             : 
    4780    12883668 :         while (level_blocks > 1) {
    4781     7777137 :                 level_blocks = howmany_64(level_blocks, limits[1]);
    4782     7777137 :                 blocks += level_blocks;
    4783             :         }
    4784             : 
    4785     5106531 :         return blocks;
    4786             : }
    4787             : 
    4788             : /*
    4789             :  * Given a number of available blocks for the btree to consume with records and
    4790             :  * pointers, calculate the height of the tree needed to index all the records
    4791             :  * that space can hold based on the number of pointers each interior node
    4792             :  * holds.
    4793             :  *
    4794             :  * We start by assuming a single level tree consumes a single block, then track
    4795             :  * the number of blocks each node level consumes until we no longer have space
    4796             :  * to store the next node level. At this point, we are indexing all the leaf
    4797             :  * blocks in the space, and there's no more free space to split the tree any
    4798             :  * further. That's our maximum btree height.
    4799             :  */
    4800             : unsigned int
    4801   831111222 : xfs_btree_space_to_height(
    4802             :         const unsigned int      *limits,
    4803             :         unsigned long long      leaf_blocks)
    4804             : {
    4805             :         /*
    4806             :          * The root btree block can have fewer than minrecs pointers in it
    4807             :          * because the tree might not be big enough to require that amount of
    4808             :          * fanout. Hence it has a minimum size of 2 pointers, not limits[1].
    4809             :          */
    4810   831111222 :         unsigned long long      node_blocks = 2;
    4811   831111222 :         unsigned long long      blocks_left = leaf_blocks - 1;
    4812   831111222 :         unsigned int            height = 1;
    4813             : 
    4814   831111222 :         if (leaf_blocks < 1)
    4815             :                 return 0;
    4816             : 
    4817  6648860105 :         while (node_blocks < blocks_left) {
    4818  5817748883 :                 blocks_left -= node_blocks;
    4819  5817748883 :                 node_blocks *= limits[1];
    4820  5817748883 :                 height++;
    4821             :         }
    4822             : 
    4823             :         return height;
    4824             : }
    4825             : 
    4826             : /*
    4827             :  * Query a regular btree for all records overlapping a given interval.
    4828             :  * Start with a LE lookup of the key of low_rec and return all records
    4829             :  * until we find a record with a key greater than the key of high_rec.
    4830             :  */
    4831             : STATIC int
    4832  2800753611 : xfs_btree_simple_query_range(
    4833             :         struct xfs_btree_cur            *cur,
    4834             :         const union xfs_btree_key       *low_key,
    4835             :         const union xfs_btree_key       *high_key,
    4836             :         xfs_btree_query_range_fn        fn,
    4837             :         void                            *priv)
    4838             : {
    4839  2800753611 :         union xfs_btree_rec             *recp;
    4840  2800753611 :         union xfs_btree_key             rec_key;
    4841  2800753611 :         int                             stat;
    4842  2800753611 :         bool                            firstrec = true;
    4843  2800753611 :         int                             error;
    4844             : 
    4845  2800753611 :         ASSERT(cur->bc_ops->init_high_key_from_rec);
    4846  2800753611 :         ASSERT(cur->bc_ops->diff_two_keys);
    4847             : 
    4848             :         /*
    4849             :          * Find the leftmost record.  The btree cursor must be set
    4850             :          * to the low record used to generate low_key.
    4851             :          */
    4852  2800753611 :         stat = 0;
    4853  2800753611 :         error = xfs_btree_lookup(cur, XFS_LOOKUP_LE, &stat);
    4854  2800750224 :         if (error)
    4855           4 :                 goto out;
    4856             : 
    4857             :         /* Nothing?  See if there's anything to the right. */
    4858  2800750220 :         if (!stat) {
    4859   584708918 :                 error = xfs_btree_increment(cur, 0, &stat);
    4860   584712034 :                 if (error)
    4861           0 :                         goto out;
    4862             :         }
    4863             : 
    4864 52786885974 :         while (stat) {
    4865             :                 /* Find the record. */
    4866 52307926700 :                 error = xfs_btree_get_rec(cur, &recp, &stat);
    4867 52408762033 :                 if (error || !stat)
    4868             :                         break;
    4869             : 
    4870             :                 /* Skip if low_key > high_key(rec). */
    4871 52408762033 :                 if (firstrec) {
    4872  2483801939 :                         cur->bc_ops->init_high_key_from_rec(&rec_key, recp);
    4873  2483806762 :                         firstrec = false;
    4874  2483806762 :                         if (xfs_btree_keycmp_gt(cur, low_key, &rec_key))
    4875  2216138152 :                                 goto advloop;
    4876             :                 }
    4877             : 
    4878             :                 /* Stop if low_key(rec) > high_key. */
    4879 50192634164 :                 cur->bc_ops->init_key_from_rec(&rec_key, recp);
    4880 49871424078 :                 if (xfs_btree_keycmp_gt(cur, &rec_key, high_key))
    4881             :                         break;
    4882             : 
    4883             :                 /* Callback */
    4884 47735842311 :                 error = fn(cur, recp, priv);
    4885 47892593975 :                 if (error)
    4886             :                         break;
    4887             : 
    4888 47892593890 : advloop:
    4889             :                 /* Move on to the next record. */
    4890 50108732042 :                 error = xfs_btree_increment(cur, 0, &stat);
    4891 49986132638 :                 if (error)
    4892             :                         break;
    4893             :         }
    4894             : 
    4895  2800799742 : out:
    4896  2800799746 :         return error;
    4897             : }
    4898             : 
    4899             : /*
    4900             :  * Query an overlapped interval btree for all records overlapping a given
    4901             :  * interval.  This function roughly follows the algorithm given in
    4902             :  * "Interval Trees" of _Introduction to Algorithms_, which is section
    4903             :  * 14.3 in the 2nd and 3rd editions.
    4904             :  *
    4905             :  * First, generate keys for the low and high records passed in.
    4906             :  *
    4907             :  * For any leaf node, generate the high and low keys for the record.
    4908             :  * If the record keys overlap with the query low/high keys, pass the
    4909             :  * record to the function iterator.
    4910             :  *
    4911             :  * For any internal node, compare the low and high keys of each
    4912             :  * pointer against the query low/high keys.  If there's an overlap,
    4913             :  * follow the pointer.
    4914             :  *
    4915             :  * As an optimization, we stop scanning a block when we find a low key
    4916             :  * that is greater than the query's high key.
    4917             :  */
    4918             : STATIC int
    4919   888384041 : xfs_btree_overlapped_query_range(
    4920             :         struct xfs_btree_cur            *cur,
    4921             :         const union xfs_btree_key       *low_key,
    4922             :         const union xfs_btree_key       *high_key,
    4923             :         xfs_btree_query_range_fn        fn,
    4924             :         void                            *priv)
    4925             : {
    4926   888384041 :         union xfs_btree_ptr             ptr;
    4927   888384041 :         union xfs_btree_ptr             *pp;
    4928   888384041 :         union xfs_btree_key             rec_key;
    4929   888384041 :         union xfs_btree_key             rec_hkey;
    4930   888384041 :         union xfs_btree_key             *lkp;
    4931   888384041 :         union xfs_btree_key             *hkp;
    4932   888384041 :         union xfs_btree_rec             *recp;
    4933   888384041 :         struct xfs_btree_block          *block;
    4934   888384041 :         int                             level;
    4935   888384041 :         struct xfs_buf                  *bp;
    4936   888384041 :         int                             i;
    4937   888384041 :         int                             error;
    4938             : 
    4939             :         /* Load the root of the btree. */
    4940   888384041 :         level = cur->bc_nlevels - 1;
    4941   888384041 :         cur->bc_ops->init_ptr_from_cur(cur, &ptr);
    4942   888388347 :         error = xfs_btree_lookup_get_block(cur, level, &ptr, &block);
    4943   888386969 :         if (error)
    4944             :                 return error;
    4945   888387781 :         xfs_btree_get_block(cur, level, &bp);
    4946   888383735 :         trace_xfs_btree_overlapped_query_range(cur, level, bp);
    4947             : #ifdef DEBUG
    4948   888381506 :         error = xfs_btree_check_block(cur, block, level, bp);
    4949   888386490 :         if (error)
    4950           0 :                 goto out;
    4951             : #endif
    4952   888386490 :         cur->bc_levels[level].ptr = 1;
    4953             : 
    4954 >10974*10^7 :         while (level < cur->bc_nlevels) {
    4955 >10885*10^7 :                 block = xfs_btree_get_block(cur, level, &bp);
    4956             : 
    4957             :                 /* End of node, pop back towards the root. */
    4958 >10884*10^7 :                 if (cur->bc_levels[level].ptr >
    4959 >10884*10^7 :                                         be16_to_cpu(block->bb_numrecs)) {
    4960   230659607 : pop_up:
    4961  2390963658 :                         if (level < cur->bc_nlevels - 1)
    4962  1503061875 :                                 cur->bc_levels[level + 1].ptr++;
    4963  2390963658 :                         level++;
    4964  2390963658 :                         continue;
    4965             :                 }
    4966             : 
    4967 >10861*10^7 :                 if (level == 0) {
    4968             :                         /* Handle a leaf node. */
    4969 75518093108 :                         recp = xfs_btree_rec_addr(cur, cur->bc_levels[0].ptr,
    4970             :                                         block);
    4971             : 
    4972 75518093108 :                         cur->bc_ops->init_high_key_from_rec(&rec_hkey, recp);
    4973 75514085416 :                         cur->bc_ops->init_key_from_rec(&rec_key, recp);
    4974             : 
    4975             :                         /*
    4976             :                          * If (query's high key < record's low key), then there
    4977             :                          * are no more interesting records in this block.  Pop
    4978             :                          * up to the leaf level to find more record blocks.
    4979             :                          *
    4980             :                          * If (record's high key >= query's low key) and
    4981             :                          *    (query's high key >= record's low key), then
    4982             :                          * this record overlaps the query range; callback.
    4983             :                          */
    4984 75509163893 :                         if (xfs_btree_keycmp_lt(cur, high_key, &rec_key))
    4985   876728731 :                                 goto pop_up;
    4986 74638409940 :                         if (xfs_btree_keycmp_ge(cur, &rec_hkey, low_key)) {
    4987  2662180108 :                                 error = fn(cur, recp, priv);
    4988  2662193711 :                                 if (error)
    4989             :                                         break;
    4990             :                         }
    4991 74636740094 :                         cur->bc_levels[level].ptr++;
    4992 74636740094 :                         continue;
    4993             :                 }
    4994             : 
    4995             :                 /* Handle an internal node. */
    4996 33099916445 :                 lkp = xfs_btree_key_addr(cur, cur->bc_levels[level].ptr, block);
    4997 33099916445 :                 hkp = xfs_btree_high_key_addr(cur, cur->bc_levels[level].ptr,
    4998             :                                 block);
    4999 33099916445 :                 pp = xfs_btree_ptr_addr(cur, cur->bc_levels[level].ptr, block);
    5000             : 
    5001             :                 /*
    5002             :                  * If (query's high key < pointer's low key), then there are no
    5003             :                  * more interesting keys in this block.  Pop up one leaf level
    5004             :                  * to continue looking for records.
    5005             :                  *
    5006             :                  * If (pointer's high key >= query's low key) and
    5007             :                  *    (query's high key >= pointer's low key), then
    5008             :                  * this record overlaps the query range; follow pointer.
    5009             :                  */
    5010 33098717113 :                 if (xfs_btree_keycmp_lt(cur, high_key, lkp))
    5011  1283575320 :                         goto pop_up;
    5012 31821276427 :                 if (xfs_btree_keycmp_ge(cur, hkp, low_key)) {
    5013  1503500189 :                         level--;
    5014  1503500189 :                         error = xfs_btree_lookup_get_block(cur, level, pp,
    5015             :                                         &block);
    5016  1503506720 :                         if (error)
    5017           0 :                                 goto out;
    5018  1503506720 :                         xfs_btree_get_block(cur, level, &bp);
    5019  1503493848 :                         trace_xfs_btree_overlapped_query_range(cur, level, bp);
    5020             : #ifdef DEBUG
    5021  1503486912 :                         error = xfs_btree_check_block(cur, block, level, bp);
    5022  1503499834 :                         if (error)
    5023           0 :                                 goto out;
    5024             : #endif
    5025  1503499834 :                         cur->bc_levels[level].ptr = 1;
    5026  1503499834 :                         continue;
    5027             :                 }
    5028 30323173361 :                 cur->bc_levels[level].ptr++;
    5029             :         }
    5030             : 
    5031   888376891 : out:
    5032             :         /*
    5033             :          * If we don't end this function with the cursor pointing at a record
    5034             :          * block, a subsequent non-error cursor deletion will not release
    5035             :          * node-level buffers, causing a buffer leak.  This is quite possible
    5036             :          * with a zero-results range query, so release the buffers if we
    5037             :          * failed to return any results.
    5038             :          */
    5039   888376891 :         if (cur->bc_levels[0].bp == NULL) {
    5040        2813 :                 for (i = 0; i < cur->bc_nlevels; i++) {
    5041        2035 :                         if (cur->bc_levels[i].bp) {
    5042        1240 :                                 xfs_trans_brelse(cur->bc_tp,
    5043             :                                                 cur->bc_levels[i].bp);
    5044        1240 :                                 cur->bc_levels[i].bp = NULL;
    5045        1240 :                                 cur->bc_levels[i].ptr = 0;
    5046        1240 :                                 cur->bc_levels[i].ra = 0;
    5047             :                         }
    5048             :                 }
    5049             :         }
    5050             : 
    5051             :         return error;
    5052             : }
    5053             : 
    5054             : static inline void
    5055 13732305404 : xfs_btree_key_from_irec(
    5056             :         struct xfs_btree_cur            *cur,
    5057             :         union xfs_btree_key             *key,
    5058             :         const union xfs_btree_irec      *irec)
    5059             : {
    5060 13732305404 :         union xfs_btree_rec             rec;
    5061             : 
    5062 13732305404 :         cur->bc_rec = *irec;
    5063 13732305404 :         cur->bc_ops->init_rec_from_cur(cur, &rec);
    5064 13732235621 :         cur->bc_ops->init_key_from_rec(key, &rec);
    5065 13732392745 : }
    5066             : 
    5067             : /*
    5068             :  * Query a btree for all records overlapping a given interval of keys.  The
    5069             :  * supplied function will be called with each record found; return one of the
    5070             :  * XFS_BTREE_QUERY_RANGE_{CONTINUE,ABORT} values or the usual negative error
    5071             :  * code.  This function returns -ECANCELED, zero, or a negative error code.
    5072             :  */
    5073             : int
    5074  3686355659 : xfs_btree_query_range(
    5075             :         struct xfs_btree_cur            *cur,
    5076             :         const union xfs_btree_irec      *low_rec,
    5077             :         const union xfs_btree_irec      *high_rec,
    5078             :         xfs_btree_query_range_fn        fn,
    5079             :         void                            *priv)
    5080             : {
    5081  3686355659 :         union xfs_btree_key             low_key;
    5082  3686355659 :         union xfs_btree_key             high_key;
    5083             : 
    5084             :         /* Find the keys of both ends of the interval. */
    5085  3686355659 :         xfs_btree_key_from_irec(cur, &high_key, high_rec);
    5086  3686418146 :         xfs_btree_key_from_irec(cur, &low_key, low_rec);
    5087             : 
    5088             :         /* Enforce low key <= high key. */
    5089  3686355503 :         if (!xfs_btree_keycmp_le(cur, &low_key, &high_key))
    5090             :                 return -EINVAL;
    5091             : 
    5092  3686543803 :         if (!(cur->bc_flags & XFS_BTREE_OVERLAPPING))
    5093  2798158605 :                 return xfs_btree_simple_query_range(cur, &low_key,
    5094             :                                 &high_key, fn, priv);
    5095   888385198 :         return xfs_btree_overlapped_query_range(cur, &low_key, &high_key,
    5096             :                         fn, priv);
    5097             : }
    5098             : 
    5099             : /* Query a btree for all records. */
    5100             : int
    5101     2602678 : xfs_btree_query_all(
    5102             :         struct xfs_btree_cur            *cur,
    5103             :         xfs_btree_query_range_fn        fn,
    5104             :         void                            *priv)
    5105             : {
    5106     2602678 :         union xfs_btree_key             low_key;
    5107     2602678 :         union xfs_btree_key             high_key;
    5108             : 
    5109     2602678 :         memset(&cur->bc_rec, 0, sizeof(cur->bc_rec));
    5110     2602678 :         memset(&low_key, 0, sizeof(low_key));
    5111     2602678 :         memset(&high_key, 0xFF, sizeof(high_key));
    5112             : 
    5113     2602678 :         return xfs_btree_simple_query_range(cur, &low_key, &high_key, fn, priv);
    5114             : }
    5115             : 
    5116             : static int
    5117    79894536 : xfs_btree_count_blocks_helper(
    5118             :         struct xfs_btree_cur    *cur,
    5119             :         int                     level,
    5120             :         void                    *data)
    5121             : {
    5122    79894536 :         xfs_extlen_t            *blocks = data;
    5123    79894536 :         (*blocks)++;
    5124             : 
    5125    79894536 :         return 0;
    5126             : }
    5127             : 
    5128             : /* Count the blocks in a btree and return the result in *blocks. */
    5129             : int
    5130     4476285 : xfs_btree_count_blocks(
    5131             :         struct xfs_btree_cur    *cur,
    5132             :         xfs_extlen_t            *blocks)
    5133             : {
    5134     4476285 :         *blocks = 0;
    5135     4476285 :         return xfs_btree_visit_blocks(cur, xfs_btree_count_blocks_helper,
    5136             :                         XFS_BTREE_VISIT_ALL, blocks);
    5137             : }
    5138             : 
    5139             : /* Compare two btree pointers. */
    5140             : int64_t
    5141    13167791 : xfs_btree_diff_two_ptrs(
    5142             :         struct xfs_btree_cur            *cur,
    5143             :         const union xfs_btree_ptr       *a,
    5144             :         const union xfs_btree_ptr       *b)
    5145             : {
    5146    13167791 :         if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
    5147     1164344 :                 return (int64_t)be64_to_cpu(a->l) - be64_to_cpu(b->l);
    5148    12003447 :         return (int64_t)be32_to_cpu(a->s) - be32_to_cpu(b->s);
    5149             : }
    5150             : 
    5151             : struct xfs_btree_has_records {
    5152             :         /* Keys for the start and end of the range we want to know about. */
    5153             :         union xfs_btree_key             start_key;
    5154             :         union xfs_btree_key             end_key;
    5155             : 
    5156             :         /* Mask for key comparisons, if desired. */
    5157             :         const union xfs_btree_key       *key_mask;
    5158             : 
    5159             :         /* Highest record key we've seen so far. */
    5160             :         union xfs_btree_key             high_key;
    5161             : 
    5162             :         enum xbtree_recpacking          outcome;
    5163             : };
    5164             : 
    5165             : STATIC int
    5166           0 : xfs_btree_has_records_helper(
    5167             :         struct xfs_btree_cur            *cur,
    5168             :         const union xfs_btree_rec       *rec,
    5169             :         void                            *priv)
    5170             : {
    5171           0 :         union xfs_btree_key             rec_key;
    5172           0 :         union xfs_btree_key             rec_high_key;
    5173           0 :         struct xfs_btree_has_records    *info = priv;
    5174           0 :         enum xbtree_key_contig          key_contig;
    5175             : 
    5176           0 :         cur->bc_ops->init_key_from_rec(&rec_key, rec);
    5177             : 
    5178           0 :         if (info->outcome == XBTREE_RECPACKING_EMPTY) {
    5179           0 :                 info->outcome = XBTREE_RECPACKING_SPARSE;
    5180             : 
    5181             :                 /*
    5182             :                  * If the first record we find does not overlap the start key,
    5183             :                  * then there is a hole at the start of the search range.
    5184             :                  * Classify this as sparse and stop immediately.
    5185             :                  */
    5186           0 :                 if (xfs_btree_masked_keycmp_lt(cur, &info->start_key, &rec_key,
    5187             :                                         info->key_mask))
    5188             :                         return -ECANCELED;
    5189             :         } else {
    5190             :                 /*
    5191             :                  * If a subsequent record does not overlap with the any record
    5192             :                  * we've seen so far, there is a hole in the middle of the
    5193             :                  * search range.  Classify this as sparse and stop.
    5194             :                  * If the keys overlap and this btree does not allow overlap,
    5195             :                  * signal corruption.
    5196             :                  */
    5197           0 :                 key_contig = cur->bc_ops->keys_contiguous(cur, &info->high_key,
    5198             :                                         &rec_key, info->key_mask);
    5199           0 :                 if (key_contig == XBTREE_KEY_OVERLAP &&
    5200           0 :                                 !(cur->bc_flags & XFS_BTREE_OVERLAPPING))
    5201             :                         return -EFSCORRUPTED;
    5202           0 :                 if (key_contig == XBTREE_KEY_GAP)
    5203             :                         return -ECANCELED;
    5204             :         }
    5205             : 
    5206             :         /*
    5207             :          * If high_key(rec) is larger than any other high key we've seen,
    5208             :          * remember it for later.
    5209             :          */
    5210           0 :         cur->bc_ops->init_high_key_from_rec(&rec_high_key, rec);
    5211           0 :         if (xfs_btree_masked_keycmp_gt(cur, &rec_high_key, &info->high_key,
    5212             :                                 info->key_mask))
    5213           0 :                 info->high_key = rec_high_key; /* struct copy */
    5214             : 
    5215             :         return 0;
    5216             : }
    5217             : 
    5218             : /*
    5219             :  * Scan part of the keyspace of a btree and tell us if that keyspace does not
    5220             :  * map to any records; is fully mapped to records; or is partially mapped to
    5221             :  * records.  This is the btree record equivalent to determining if a file is
    5222             :  * sparse.
    5223             :  *
    5224             :  * For most btree types, the record scan should use all available btree key
    5225             :  * fields to compare the keys encountered.  These callers should pass NULL for
    5226             :  * @mask.  However, some callers (e.g.  scanning physical space in the rmapbt)
    5227             :  * want to ignore some part of the btree record keyspace when performing the
    5228             :  * comparison.  These callers should pass in a union xfs_btree_key object with
    5229             :  * the fields that *should* be a part of the comparison set to any nonzero
    5230             :  * value, and the rest zeroed.
    5231             :  */
    5232             : int
    5233  3180349671 : xfs_btree_has_records(
    5234             :         struct xfs_btree_cur            *cur,
    5235             :         const union xfs_btree_irec      *low,
    5236             :         const union xfs_btree_irec      *high,
    5237             :         const union xfs_btree_key       *mask,
    5238             :         enum xbtree_recpacking          *outcome)
    5239             : {
    5240  3180349671 :         struct xfs_btree_has_records    info = {
    5241             :                 .outcome                = XBTREE_RECPACKING_EMPTY,
    5242             :                 .key_mask               = mask,
    5243             :         };
    5244  3180349671 :         int                             error;
    5245             : 
    5246             :         /* Not all btrees support this operation. */
    5247  3180349671 :         if (!cur->bc_ops->keys_contiguous) {
    5248           0 :                 ASSERT(0);
    5249           0 :                 return -EOPNOTSUPP;
    5250             :         }
    5251             : 
    5252  3180349671 :         xfs_btree_key_from_irec(cur, &info.start_key, low);
    5253  3180356639 :         xfs_btree_key_from_irec(cur, &info.end_key, high);
    5254             : 
    5255  3180273544 :         error = xfs_btree_query_range(cur, low, high,
    5256             :                         xfs_btree_has_records_helper, &info);
    5257  3180414167 :         if (error == -ECANCELED)
    5258           0 :                 goto out;
    5259  3180414167 :         if (error)
    5260             :                 return error;
    5261             : 
    5262  3180414167 :         if (info.outcome == XBTREE_RECPACKING_EMPTY)
    5263  3180414167 :                 goto out;
    5264             : 
    5265             :         /*
    5266             :          * If the largest high_key(rec) we saw during the walk is greater than
    5267             :          * the end of the search range, classify this as full.  Otherwise,
    5268             :          * there is a hole at the end of the search range.
    5269             :          */
    5270           0 :         if (xfs_btree_masked_keycmp_ge(cur, &info.high_key, &info.end_key,
    5271             :                                 mask))
    5272           0 :                 info.outcome = XBTREE_RECPACKING_FULL;
    5273             : 
    5274           0 : out:
    5275  3180414167 :         *outcome = info.outcome;
    5276  3180414167 :         return 0;
    5277             : }
    5278             : 
    5279             : /* Are there more records in this btree? */
    5280             : bool
    5281    72739782 : xfs_btree_has_more_records(
    5282             :         struct xfs_btree_cur    *cur)
    5283             : {
    5284    72739782 :         struct xfs_btree_block  *block;
    5285    72739782 :         struct xfs_buf          *bp;
    5286             : 
    5287    72739782 :         block = xfs_btree_get_block(cur, 0, &bp);
    5288             : 
    5289             :         /* There are still records in this block. */
    5290   145479562 :         if (cur->bc_levels[0].ptr < xfs_btree_get_numrecs(block))
    5291             :                 return true;
    5292             : 
    5293             :         /* There are more record blocks. */
    5294      551068 :         if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
    5295           0 :                 return block->bb_u.l.bb_rightsib != cpu_to_be64(NULLFSBLOCK);
    5296             :         else
    5297      551068 :                 return block->bb_u.s.bb_rightsib != cpu_to_be32(NULLAGBLOCK);
    5298             : }
    5299             : 
    5300             : /* Set up all the btree cursor caches. */
    5301             : int __init
    5302          12 : xfs_btree_init_cur_caches(void)
    5303             : {
    5304          12 :         int             error;
    5305             : 
    5306          12 :         error = xfs_allocbt_init_cur_cache();
    5307          12 :         if (error)
    5308             :                 return error;
    5309          12 :         error = xfs_inobt_init_cur_cache();
    5310          12 :         if (error)
    5311           0 :                 goto err;
    5312          12 :         error = xfs_bmbt_init_cur_cache();
    5313          12 :         if (error)
    5314           0 :                 goto err;
    5315          12 :         error = xfs_rmapbt_init_cur_cache();
    5316          12 :         if (error)
    5317           0 :                 goto err;
    5318          12 :         error = xfs_refcountbt_init_cur_cache();
    5319          12 :         if (error)
    5320           0 :                 goto err;
    5321             : 
    5322             :         return 0;
    5323           0 : err:
    5324           0 :         xfs_btree_destroy_cur_caches();
    5325           0 :         return error;
    5326             : }
    5327             : 
    5328             : /* Destroy all the btree cursor caches, if they've been allocated. */
    5329             : void
    5330          12 : xfs_btree_destroy_cur_caches(void)
    5331             : {
    5332          12 :         xfs_allocbt_destroy_cur_cache();
    5333          12 :         xfs_inobt_destroy_cur_cache();
    5334          12 :         xfs_bmbt_destroy_cur_cache();
    5335          12 :         xfs_rmapbt_destroy_cur_cache();
    5336          12 :         xfs_refcountbt_destroy_cur_cache();
    5337          12 : }
    5338             : 
    5339             : /* Move the btree cursor before the first record. */
    5340             : int
    5341   164415285 : xfs_btree_goto_left_edge(
    5342             :         struct xfs_btree_cur    *cur)
    5343             : {
    5344   164415285 :         int                     stat = 0;
    5345   164415285 :         int                     error;
    5346             : 
    5347   164415285 :         memset(&cur->bc_rec, 0, sizeof(cur->bc_rec));
    5348   164415285 :         error = xfs_btree_lookup(cur, XFS_LOOKUP_LE, &stat);
    5349   164415286 :         if (error)
    5350             :                 return error;
    5351   164415286 :         if (!stat)
    5352             :                 return 0;
    5353             : 
    5354           0 :         error = xfs_btree_decrement(cur, 0, &stat);
    5355           0 :         if (error)
    5356             :                 return error;
    5357           0 :         if (stat != 0) {
    5358           0 :                 ASSERT(0);
    5359           0 :                 xfs_btree_mark_sick(cur);
    5360           0 :                 return -EFSCORRUPTED;
    5361             :         }
    5362             : 
    5363             :         return 0;
    5364             : }

Generated by: LCOV version 1.14