LCOV - code coverage report
Current view: top level - fs/xfs/libxfs - xfs_rmap_btree.c (source / functions) Hit Total Coverage
Test: fstests of 6.5.0-rc4-xfsa @ Mon Jul 31 20:08:27 PDT 2023 Lines: 290 313 92.7 %
Date: 2023-07-31 20:08:27 Functions: 32 34 94.1 %

          Line data    Source code
       1             : // SPDX-License-Identifier: GPL-2.0
       2             : /*
       3             :  * Copyright (c) 2014 Red Hat, Inc.
       4             :  * All Rights Reserved.
       5             :  */
       6             : #include "xfs.h"
       7             : #include "xfs_fs.h"
       8             : #include "xfs_shared.h"
       9             : #include "xfs_format.h"
      10             : #include "xfs_log_format.h"
      11             : #include "xfs_trans_resv.h"
      12             : #include "xfs_mount.h"
      13             : #include "xfs_trans.h"
      14             : #include "xfs_alloc.h"
      15             : #include "xfs_btree.h"
      16             : #include "xfs_btree_staging.h"
      17             : #include "xfs_rmap.h"
      18             : #include "xfs_rmap_btree.h"
      19             : #include "xfs_trace.h"
      20             : #include "xfs_error.h"
      21             : #include "xfs_extent_busy.h"
      22             : #include "xfs_ag.h"
      23             : #include "xfs_ag_resv.h"
      24             : #include "scrub/xfile.h"
      25             : #include "scrub/xfbtree.h"
      26             : #include "xfs_btree_mem.h"
      27             : 
      28             : static struct kmem_cache        *xfs_rmapbt_cur_cache;
      29             : 
      30             : /*
      31             :  * Reverse map btree.
      32             :  *
      33             :  * This is a per-ag tree used to track the owner(s) of a given extent. With
      34             :  * reflink it is possible for there to be multiple owners, which is a departure
      35             :  * from classic XFS. Owner records for data extents are inserted when the
      36             :  * extent is mapped and removed when an extent is unmapped.  Owner records for
      37             :  * all other block types (i.e. metadata) are inserted when an extent is
      38             :  * allocated and removed when an extent is freed. There can only be one owner
      39             :  * of a metadata extent, usually an inode or some other metadata structure like
      40             :  * an AG btree.
      41             :  *
      42             :  * The rmap btree is part of the free space management, so blocks for the tree
      43             :  * are sourced from the agfl. Hence we need transaction reservation support for
      44             :  * this tree so that the freelist is always large enough. This also impacts on
      45             :  * the minimum space we need to leave free in the AG.
      46             :  *
      47             :  * The tree is ordered by [ag block, owner, offset]. This is a large key size,
      48             :  * but it is the only way to enforce unique keys when a block can be owned by
      49             :  * multiple files at any offset. There's no need to order/search by extent
      50             :  * size for online updating/management of the tree. It is intended that most
      51             :  * reverse lookups will be to find the owner(s) of a particular block, or to
      52             :  * try to recover tree and file data from corrupt primary metadata.
      53             :  */
      54             : 
      55             : static struct xfs_btree_cur *
      56    55242922 : xfs_rmapbt_dup_cursor(
      57             :         struct xfs_btree_cur    *cur)
      58             : {
      59    55242922 :         return xfs_rmapbt_init_cursor(cur->bc_mp, cur->bc_tp,
      60             :                                 cur->bc_ag.agbp, cur->bc_ag.pag);
      61             : }
      62             : 
      63             : STATIC void
      64       13971 : xfs_rmapbt_set_root(
      65             :         struct xfs_btree_cur            *cur,
      66             :         const union xfs_btree_ptr       *ptr,
      67             :         int                             inc)
      68             : {
      69       13971 :         struct xfs_buf          *agbp = cur->bc_ag.agbp;
      70       13971 :         struct xfs_agf          *agf = agbp->b_addr;
      71       13971 :         int                     btnum = cur->bc_btnum;
      72             : 
      73       13971 :         ASSERT(ptr->s != 0);
      74             : 
      75       13971 :         agf->agf_roots[btnum] = ptr->s;
      76       13971 :         be32_add_cpu(&agf->agf_levels[btnum], inc);
      77       13971 :         cur->bc_ag.pag->pagf_levels[btnum] += inc;
      78             : 
      79       13971 :         xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_ROOTS | XFS_AGF_LEVELS);
      80       13970 : }
      81             : 
      82             : STATIC int
      83      399048 : xfs_rmapbt_alloc_block(
      84             :         struct xfs_btree_cur            *cur,
      85             :         const union xfs_btree_ptr       *start,
      86             :         union xfs_btree_ptr             *new,
      87             :         int                             *stat)
      88             : {
      89      399048 :         struct xfs_buf          *agbp = cur->bc_ag.agbp;
      90      399048 :         struct xfs_agf          *agf = agbp->b_addr;
      91      399048 :         struct xfs_perag        *pag = cur->bc_ag.pag;
      92      399048 :         int                     error;
      93      399048 :         xfs_agblock_t           bno;
      94             : 
      95             :         /* Allocate the new block from the freelist. If we can't, give up.  */
      96      399048 :         error = xfs_alloc_get_freelist(pag, cur->bc_tp, cur->bc_ag.agbp,
      97             :                                        &bno, 1);
      98      399048 :         if (error)
      99             :                 return error;
     100      399048 :         if (bno == NULLAGBLOCK) {
     101           0 :                 *stat = 0;
     102           0 :                 return 0;
     103             :         }
     104             : 
     105      399048 :         xfs_extent_busy_reuse(cur->bc_mp, pag, bno, 1, false);
     106             : 
     107      399048 :         new->s = cpu_to_be32(bno);
     108      399048 :         be32_add_cpu(&agf->agf_rmap_blocks, 1);
     109      399048 :         xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_RMAP_BLOCKS);
     110             : 
     111      399048 :         xfs_ag_resv_rmapbt_alloc(cur->bc_mp, pag->pag_agno);
     112             : 
     113      399048 :         *stat = 1;
     114      399048 :         return 0;
     115             : }
     116             : 
     117             : STATIC int
     118      160512 : xfs_rmapbt_free_block(
     119             :         struct xfs_btree_cur    *cur,
     120             :         struct xfs_buf          *bp)
     121             : {
     122      160512 :         struct xfs_buf          *agbp = cur->bc_ag.agbp;
     123      160512 :         struct xfs_agf          *agf = agbp->b_addr;
     124      160512 :         struct xfs_perag        *pag = cur->bc_ag.pag;
     125      160512 :         xfs_agblock_t           bno;
     126      160512 :         int                     error;
     127             : 
     128      160512 :         bno = xfs_daddr_to_agbno(cur->bc_mp, xfs_buf_daddr(bp));
     129      160512 :         be32_add_cpu(&agf->agf_rmap_blocks, -1);
     130      160512 :         xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_RMAP_BLOCKS);
     131      160512 :         error = xfs_alloc_put_freelist(pag, cur->bc_tp, agbp, NULL, bno, 1);
     132      160512 :         if (error)
     133             :                 return error;
     134             : 
     135      160512 :         xfs_extent_busy_insert(cur->bc_tp, pag, bno, 1,
     136             :                               XFS_EXTENT_BUSY_SKIP_DISCARD);
     137             : 
     138      160512 :         xfs_ag_resv_free_extent(pag, XFS_AG_RESV_RMAPBT, NULL, 1);
     139      160512 :         return 0;
     140             : }
     141             : 
     142             : STATIC int
     143   127644227 : xfs_rmapbt_get_minrecs(
     144             :         struct xfs_btree_cur    *cur,
     145             :         int                     level)
     146             : {
     147   127644227 :         return cur->bc_mp->m_rmap_mnr[level != 0];
     148             : }
     149             : 
     150             : STATIC int
     151 >16698*10^7 : xfs_rmapbt_get_maxrecs(
     152             :         struct xfs_btree_cur    *cur,
     153             :         int                     level)
     154             : {
     155 >16698*10^7 :         return cur->bc_mp->m_rmap_mxr[level != 0];
     156             : }
     157             : 
     158             : /*
     159             :  * Convert the ondisk record's offset field into the ondisk key's offset field.
     160             :  * Fork and bmbt are significant parts of the rmap record key, but written
     161             :  * status is merely a record attribute.
     162             :  */
     163             : static inline __be64 ondisk_rec_offset_to_key(const union xfs_btree_rec *rec)
     164             : {
     165 >41805*10^7 :         return rec->rmap.rm_offset & ~cpu_to_be64(XFS_RMAP_OFF_UNWRITTEN);
     166             : }
     167             : 
     168             : STATIC void
     169 >19044*10^7 : xfs_rmapbt_init_key_from_rec(
     170             :         union xfs_btree_key             *key,
     171             :         const union xfs_btree_rec       *rec)
     172             : {
     173 >19044*10^7 :         key->rmap.rm_startblock = rec->rmap.rm_startblock;
     174 >19044*10^7 :         key->rmap.rm_owner = rec->rmap.rm_owner;
     175 >19044*10^7 :         key->rmap.rm_offset = ondisk_rec_offset_to_key(rec);
     176 >19044*10^7 : }
     177             : 
     178             : /*
     179             :  * The high key for a reverse mapping record can be computed by shifting
     180             :  * the startblock and offset to the highest value that would still map
     181             :  * to that record.  In practice this means that we add blockcount-1 to
     182             :  * the startblock for all records, and if the record is for a data/attr
     183             :  * fork mapping, we add blockcount-1 to the offset too.
     184             :  */
     185             : STATIC void
     186 >22753*10^7 : xfs_rmapbt_init_high_key_from_rec(
     187             :         union xfs_btree_key             *key,
     188             :         const union xfs_btree_rec       *rec)
     189             : {
     190 >22753*10^7 :         uint64_t                        off;
     191 >22753*10^7 :         int                             adj;
     192             : 
     193 >22753*10^7 :         adj = be32_to_cpu(rec->rmap.rm_blockcount) - 1;
     194             : 
     195 >22753*10^7 :         key->rmap.rm_startblock = rec->rmap.rm_startblock;
     196 >22753*10^7 :         be32_add_cpu(&key->rmap.rm_startblock, adj);
     197 >22761*10^7 :         key->rmap.rm_owner = rec->rmap.rm_owner;
     198 >22761*10^7 :         key->rmap.rm_offset = ondisk_rec_offset_to_key(rec);
     199 >22761*10^7 :         if (XFS_RMAP_NON_INODE_OWNER(be64_to_cpu(rec->rmap.rm_owner)) ||
     200 >21708*10^7 :             XFS_RMAP_IS_BMBT_BLOCK(be64_to_cpu(rec->rmap.rm_offset)))
     201             :                 return;
     202 >20595*10^7 :         off = be64_to_cpu(key->rmap.rm_offset);
     203 >20595*10^7 :         off = (XFS_RMAP_OFF(off) + adj) | (off & ~XFS_RMAP_OFF_MASK);
     204 >20595*10^7 :         key->rmap.rm_offset = cpu_to_be64(off);
     205             : }
     206             : 
     207             : STATIC void
     208  3841526730 : xfs_rmapbt_init_rec_from_cur(
     209             :         struct xfs_btree_cur    *cur,
     210             :         union xfs_btree_rec     *rec)
     211             : {
     212  3841526730 :         rec->rmap.rm_startblock = cpu_to_be32(cur->bc_rec.r.rm_startblock);
     213  3841526730 :         rec->rmap.rm_blockcount = cpu_to_be32(cur->bc_rec.r.rm_blockcount);
     214  3841526730 :         rec->rmap.rm_owner = cpu_to_be64(cur->bc_rec.r.rm_owner);
     215  3841526730 :         rec->rmap.rm_offset = cpu_to_be64(
     216             :                         xfs_rmap_irec_offset_pack(&cur->bc_rec.r));
     217  3841526730 : }
     218             : 
     219             : STATIC void
     220  2479842425 : xfs_rmapbt_init_ptr_from_cur(
     221             :         struct xfs_btree_cur    *cur,
     222             :         union xfs_btree_ptr     *ptr)
     223             : {
     224  2479842425 :         struct xfs_agf          *agf = cur->bc_ag.agbp->b_addr;
     225             : 
     226  4959684850 :         ASSERT(cur->bc_ag.pag->pag_agno == be32_to_cpu(agf->agf_seqno));
     227             : 
     228  2479842425 :         ptr->s = agf->agf_roots[cur->bc_btnum];
     229  2479842425 : }
     230             : 
     231             : /*
     232             :  * Mask the appropriate parts of the ondisk key field for a key comparison.
     233             :  * Fork and bmbt are significant parts of the rmap record key, but written
     234             :  * status is merely a record attribute.
     235             :  */
     236             : static inline uint64_t offset_keymask(uint64_t offset)
     237             : {
     238 11629918137 :         return offset & ~XFS_RMAP_OFF_UNWRITTEN;
     239             : }
     240             : 
     241             : STATIC int64_t
     242 15581211470 : xfs_rmapbt_key_diff(
     243             :         struct xfs_btree_cur            *cur,
     244             :         const union xfs_btree_key       *key)
     245             : {
     246 15581211470 :         struct xfs_rmap_irec            *rec = &cur->bc_rec.r;
     247 15581211470 :         const struct xfs_rmap_key       *kp = &key->rmap;
     248 15581211470 :         __u64                           x, y;
     249 15581211470 :         int64_t                         d;
     250             : 
     251 15581211470 :         d = (int64_t)be32_to_cpu(kp->rm_startblock) - rec->rm_startblock;
     252 15581211470 :         if (d)
     253             :                 return d;
     254             : 
     255  1386010090 :         x = be64_to_cpu(kp->rm_owner);
     256  1386010090 :         y = rec->rm_owner;
     257  1386010090 :         if (x > y)
     258             :                 return 1;
     259  1345870551 :         else if (y > x)
     260             :                 return -1;
     261             : 
     262  1140586235 :         x = offset_keymask(be64_to_cpu(kp->rm_offset));
     263  1140586235 :         y = offset_keymask(xfs_rmap_irec_offset_pack(rec));
     264  1140586235 :         if (x > y)
     265             :                 return 1;
     266  1068969926 :         else if (y > x)
     267   343409147 :                 return -1;
     268             :         return 0;
     269             : }
     270             : 
     271             : STATIC int64_t
     272 >50723*10^7 : xfs_rmapbt_diff_two_keys(
     273             :         struct xfs_btree_cur            *cur,
     274             :         const union xfs_btree_key       *k1,
     275             :         const union xfs_btree_key       *k2,
     276             :         const union xfs_btree_key       *mask)
     277             : {
     278 >50723*10^7 :         const struct xfs_rmap_key       *kp1 = &k1->rmap;
     279 >50723*10^7 :         const struct xfs_rmap_key       *kp2 = &k2->rmap;
     280 >50723*10^7 :         int64_t                         d;
     281 >50723*10^7 :         __u64                           x, y;
     282             : 
     283             :         /* Doesn't make sense to mask off the physical space part */
     284 >50723*10^7 :         ASSERT(!mask || mask->rmap.rm_startblock);
     285             : 
     286 >50723*10^7 :         d = (int64_t)be32_to_cpu(kp1->rm_startblock) -
     287 >50723*10^7 :                      be32_to_cpu(kp2->rm_startblock);
     288 >50723*10^7 :         if (d)
     289             :                 return d;
     290             : 
     291 14010353345 :         if (!mask || mask->rmap.rm_owner) {
     292 14010318151 :                 x = be64_to_cpu(kp1->rm_owner);
     293 14010318151 :                 y = be64_to_cpu(kp2->rm_owner);
     294 14010318151 :                 if (x > y)
     295             :                         return 1;
     296  6245850861 :                 else if (y > x)
     297             :                         return -1;
     298             :         }
     299             : 
     300  5232625302 :         if (!mask || mask->rmap.rm_offset) {
     301             :                 /* Doesn't make sense to allow offset but not owner */
     302  5232624332 :                 ASSERT(!mask || mask->rmap.rm_owner);
     303             : 
     304  5232624332 :                 x = offset_keymask(be64_to_cpu(kp1->rm_offset));
     305  5232624332 :                 y = offset_keymask(be64_to_cpu(kp2->rm_offset));
     306  5232624332 :                 if (x > y)
     307             :                         return 1;
     308   888724405 :                 else if (y > x)
     309    25043674 :                         return -1;
     310             :         }
     311             : 
     312             :         return 0;
     313             : }
     314             : 
     315             : static xfs_failaddr_t
     316    33515571 : xfs_rmapbt_verify(
     317             :         struct xfs_buf          *bp)
     318             : {
     319    33515571 :         struct xfs_mount        *mp = bp->b_mount;
     320    33515571 :         struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
     321    33515571 :         struct xfs_perag        *pag = bp->b_pag;
     322    33515571 :         xfs_failaddr_t          fa;
     323    33515571 :         unsigned int            level;
     324             : 
     325             :         /*
     326             :          * magic number and level verification
     327             :          *
     328             :          * During growfs operations, we can't verify the exact level or owner as
     329             :          * the perag is not fully initialised and hence not attached to the
     330             :          * buffer.  In this case, check against the maximum tree depth.
     331             :          *
     332             :          * Similarly, during log recovery we will have a perag structure
     333             :          * attached, but the agf information will not yet have been initialised
     334             :          * from the on disk AGF. Again, we can only check against maximum limits
     335             :          * in this case.
     336             :          */
     337    33515571 :         if (!xfs_verify_magic(bp, block->bb_magic))
     338           0 :                 return __this_address;
     339             : 
     340    33515637 :         if (!xfs_has_rmapbt(mp))
     341           0 :                 return __this_address;
     342    33515637 :         fa = xfs_btree_sblock_v5hdr_verify(bp);
     343    33515680 :         if (fa)
     344             :                 return fa;
     345             : 
     346    33515674 :         level = be16_to_cpu(block->bb_level);
     347    67025903 :         if (pag && xfs_perag_initialised_agf(pag)) {
     348    27571230 :                 unsigned int    maxlevel = pag->pagf_levels[XFS_BTNUM_RMAPi];
     349             : 
     350             : #ifdef CONFIG_XFS_ONLINE_REPAIR
     351             :                 /*
     352             :                  * Online repair could be rewriting the free space btrees, so
     353             :                  * we'll validate against the larger of either tree while this
     354             :                  * is going on.
     355             :                  */
     356    27571230 :                 maxlevel = max_t(unsigned int, maxlevel,
     357             :                                 pag->pagf_alt_levels[XFS_BTNUM_RMAPi]);
     358             : #endif
     359    27571230 :                 if (level >= maxlevel)
     360           0 :                         return __this_address;
     361     5944444 :         } else if (level >= mp->m_rmap_maxlevels)
     362           0 :                 return __this_address;
     363             : 
     364    33515674 :         return xfs_btree_sblock_verify(bp, mp->m_rmap_mxr[level != 0]);
     365             : }
     366             : 
     367             : static void
     368     1722579 : xfs_rmapbt_read_verify(
     369             :         struct xfs_buf  *bp)
     370             : {
     371     1722579 :         xfs_failaddr_t  fa;
     372             : 
     373     1722579 :         if (!xfs_btree_sblock_verify_crc(bp))
     374           4 :                 xfs_verifier_error(bp, -EFSBADCRC, __this_address);
     375             :         else {
     376     1722575 :                 fa = xfs_rmapbt_verify(bp);
     377     1722575 :                 if (fa)
     378           0 :                         xfs_verifier_error(bp, -EFSCORRUPTED, fa);
     379             :         }
     380             : 
     381     1722579 :         if (bp->b_error)
     382           4 :                 trace_xfs_btree_corrupt(bp, _RET_IP_);
     383     1722579 : }
     384             : 
     385             : static void
     386    12651597 : xfs_rmapbt_write_verify(
     387             :         struct xfs_buf  *bp)
     388             : {
     389    12651597 :         xfs_failaddr_t  fa;
     390             : 
     391    12651597 :         fa = xfs_rmapbt_verify(bp);
     392    12651595 :         if (fa) {
     393           0 :                 trace_xfs_btree_corrupt(bp, _RET_IP_);
     394           0 :                 xfs_verifier_error(bp, -EFSCORRUPTED, fa);
     395           0 :                 return;
     396             :         }
     397    12651595 :         xfs_btree_sblock_calc_crc(bp);
     398             : 
     399             : }
     400             : 
     401             : const struct xfs_buf_ops xfs_rmapbt_buf_ops = {
     402             :         .name                   = "xfs_rmapbt",
     403             :         .magic                  = { 0, cpu_to_be32(XFS_RMAP_CRC_MAGIC) },
     404             :         .verify_read            = xfs_rmapbt_read_verify,
     405             :         .verify_write           = xfs_rmapbt_write_verify,
     406             :         .verify_struct          = xfs_rmapbt_verify,
     407             : };
     408             : 
     409             : STATIC int
     410     6952867 : xfs_rmapbt_keys_inorder(
     411             :         struct xfs_btree_cur            *cur,
     412             :         const union xfs_btree_key       *k1,
     413             :         const union xfs_btree_key       *k2)
     414             : {
     415     6952867 :         uint32_t                x;
     416     6952867 :         uint32_t                y;
     417     6952867 :         uint64_t                a;
     418     6952867 :         uint64_t                b;
     419             : 
     420     6952867 :         x = be32_to_cpu(k1->rmap.rm_startblock);
     421     6952867 :         y = be32_to_cpu(k2->rmap.rm_startblock);
     422     6952867 :         if (x < y)
     423             :                 return 1;
     424       55832 :         else if (x > y)
     425             :                 return 0;
     426       55832 :         a = be64_to_cpu(k1->rmap.rm_owner);
     427       55832 :         b = be64_to_cpu(k2->rmap.rm_owner);
     428       55832 :         if (a < b)
     429             :                 return 1;
     430       52947 :         else if (a > b)
     431             :                 return 0;
     432       52947 :         a = offset_keymask(be64_to_cpu(k1->rmap.rm_offset));
     433       52947 :         b = offset_keymask(be64_to_cpu(k2->rmap.rm_offset));
     434       52947 :         if (a <= b)
     435       52947 :                 return 1;
     436             :         return 0;
     437             : }
     438             : 
     439             : STATIC int
     440  1235295386 : xfs_rmapbt_recs_inorder(
     441             :         struct xfs_btree_cur            *cur,
     442             :         const union xfs_btree_rec       *r1,
     443             :         const union xfs_btree_rec       *r2)
     444             : {
     445  1235295386 :         uint32_t                x;
     446  1235295386 :         uint32_t                y;
     447  1235295386 :         uint64_t                a;
     448  1235295386 :         uint64_t                b;
     449             : 
     450  1235295386 :         x = be32_to_cpu(r1->rmap.rm_startblock);
     451  1235295386 :         y = be32_to_cpu(r2->rmap.rm_startblock);
     452  1235295386 :         if (x < y)
     453             :                 return 1;
     454    41579004 :         else if (x > y)
     455             :                 return 0;
     456    41579004 :         a = be64_to_cpu(r1->rmap.rm_owner);
     457    41579004 :         b = be64_to_cpu(r2->rmap.rm_owner);
     458    41579004 :         if (a < b)
     459             :                 return 1;
     460    11988672 :         else if (a > b)
     461             :                 return 0;
     462    11988672 :         a = offset_keymask(be64_to_cpu(r1->rmap.rm_offset));
     463    11988672 :         b = offset_keymask(be64_to_cpu(r2->rmap.rm_offset));
     464    11988672 :         if (a <= b)
     465    11988672 :                 return 1;
     466             :         return 0;
     467             : }
     468             : 
     469             : STATIC enum xbtree_key_contig
     470           0 : xfs_rmapbt_keys_contiguous(
     471             :         struct xfs_btree_cur            *cur,
     472             :         const union xfs_btree_key       *key1,
     473             :         const union xfs_btree_key       *key2,
     474             :         const union xfs_btree_key       *mask)
     475             : {
     476           0 :         ASSERT(!mask || mask->rmap.rm_startblock);
     477             : 
     478             :         /*
     479             :          * We only support checking contiguity of the physical space component.
     480             :          * If any callers ever need more specificity than that, they'll have to
     481             :          * implement it here.
     482             :          */
     483           0 :         ASSERT(!mask || (!mask->rmap.rm_owner && !mask->rmap.rm_offset));
     484             : 
     485           0 :         return xbtree_key_contig(be32_to_cpu(key1->rmap.rm_startblock),
     486           0 :                                  be32_to_cpu(key2->rmap.rm_startblock));
     487             : }
     488             : 
     489             : const struct xfs_btree_ops xfs_rmapbt_ops = {
     490             :         .rec_len                = sizeof(struct xfs_rmap_rec),
     491             :         .key_len                = 2 * sizeof(struct xfs_rmap_key),
     492             :         .lru_refs               = XFS_RMAP_BTREE_REF,
     493             :         .geom_flags             = XFS_BTREE_CRC_BLOCKS | XFS_BTREE_OVERLAPPING,
     494             : 
     495             :         .dup_cursor             = xfs_rmapbt_dup_cursor,
     496             :         .set_root               = xfs_rmapbt_set_root,
     497             :         .alloc_block            = xfs_rmapbt_alloc_block,
     498             :         .free_block             = xfs_rmapbt_free_block,
     499             :         .get_minrecs            = xfs_rmapbt_get_minrecs,
     500             :         .get_maxrecs            = xfs_rmapbt_get_maxrecs,
     501             :         .init_key_from_rec      = xfs_rmapbt_init_key_from_rec,
     502             :         .init_high_key_from_rec = xfs_rmapbt_init_high_key_from_rec,
     503             :         .init_rec_from_cur      = xfs_rmapbt_init_rec_from_cur,
     504             :         .init_ptr_from_cur      = xfs_rmapbt_init_ptr_from_cur,
     505             :         .key_diff               = xfs_rmapbt_key_diff,
     506             :         .buf_ops                = &xfs_rmapbt_buf_ops,
     507             :         .diff_two_keys          = xfs_rmapbt_diff_two_keys,
     508             :         .keys_inorder           = xfs_rmapbt_keys_inorder,
     509             :         .recs_inorder           = xfs_rmapbt_recs_inorder,
     510             :         .keys_contiguous        = xfs_rmapbt_keys_contiguous,
     511             : };
     512             : 
     513             : static struct xfs_btree_cur *
     514   905897884 : xfs_rmapbt_init_common(
     515             :         struct xfs_mount        *mp,
     516             :         struct xfs_trans        *tp,
     517             :         struct xfs_perag        *pag)
     518             : {
     519   905897884 :         struct xfs_btree_cur    *cur;
     520             : 
     521             :         /* Overlapping btree; 2 keys per pointer. */
     522   905897884 :         cur = xfs_btree_alloc_cursor(mp, tp, XFS_BTNUM_RMAP, &xfs_rmapbt_ops,
     523   905897884 :                         mp->m_rmap_maxlevels, xfs_rmapbt_cur_cache);
     524   905909234 :         cur->bc_statoff = XFS_STATS_CALC_INDEX(xs_rmap_2);
     525             : 
     526   905909234 :         cur->bc_ag.pag = xfs_perag_hold(pag);
     527   905908586 :         return cur;
     528             : }
     529             : 
     530             : /* Create a new reverse mapping btree cursor. */
     531             : struct xfs_btree_cur *
     532   905890927 : xfs_rmapbt_init_cursor(
     533             :         struct xfs_mount        *mp,
     534             :         struct xfs_trans        *tp,
     535             :         struct xfs_buf          *agbp,
     536             :         struct xfs_perag        *pag)
     537             : {
     538   905890927 :         struct xfs_agf          *agf = agbp->b_addr;
     539   905890927 :         struct xfs_btree_cur    *cur;
     540             : 
     541   905890927 :         cur = xfs_rmapbt_init_common(mp, tp, pag);
     542   905892287 :         cur->bc_nlevels = be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]);
     543   905892287 :         cur->bc_ag.agbp = agbp;
     544   905892287 :         return cur;
     545             : }
     546             : 
     547             : /* Create a new reverse mapping btree cursor with a fake root for staging. */
     548             : struct xfs_btree_cur *
     549        9219 : xfs_rmapbt_stage_cursor(
     550             :         struct xfs_mount        *mp,
     551             :         struct xbtree_afakeroot *afake,
     552             :         struct xfs_perag        *pag)
     553             : {
     554        9219 :         struct xfs_btree_cur    *cur;
     555             : 
     556        9219 :         cur = xfs_rmapbt_init_common(mp, NULL, pag);
     557        9219 :         xfs_btree_stage_afakeroot(cur, afake);
     558        9219 :         return cur;
     559             : }
     560             : 
     561             : #ifdef CONFIG_XFS_BTREE_IN_XFILE
     562             : /*
     563             :  * Validate an in-memory rmap btree block.  Callers are allowed to generate an
     564             :  * in-memory btree even if the ondisk feature is not enabled.
     565             :  */
     566             : static xfs_failaddr_t
     567    28234851 : xfs_rmapbt_mem_verify(
     568             :         struct xfs_buf          *bp)
     569             : {
     570    28234851 :         struct xfs_mount        *mp = bp->b_mount;
     571    28234851 :         struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
     572    28234851 :         xfs_failaddr_t          fa;
     573    28234851 :         unsigned int            level;
     574             : 
     575    28234851 :         if (!xfs_verify_magic(bp, block->bb_magic))
     576           0 :                 return __this_address;
     577             : 
     578    28234858 :         fa = xfs_btree_sblock_v5hdr_verify(bp);
     579    28234861 :         if (fa)
     580             :                 return fa;
     581             : 
     582    28234850 :         level = be16_to_cpu(block->bb_level);
     583    28234850 :         if (xfs_has_rmapbt(mp)) {
     584    28234850 :                 if (level >= mp->m_rmap_maxlevels)
     585           0 :                         return __this_address;
     586             :         } else {
     587           0 :                 if (level >= xfs_rmapbt_maxlevels_ondisk())
     588           0 :                         return __this_address;
     589             :         }
     590             : 
     591    28234850 :         return xfbtree_sblock_verify(bp,
     592             :                         xfs_rmapbt_maxrecs(mp, xfo_to_b(1), level == 0));
     593             : }
     594             : 
     595             : static void
     596        9258 : xfs_rmapbt_mem_rw_verify(
     597             :         struct xfs_buf  *bp)
     598             : {
     599        9258 :         xfs_failaddr_t  fa = xfs_rmapbt_mem_verify(bp);
     600             : 
     601        9256 :         if (fa)
     602           0 :                 xfs_verifier_error(bp, -EFSCORRUPTED, fa);
     603        9256 : }
     604             : 
     605             : /* skip crc checks on in-memory btrees to save time */
     606             : static const struct xfs_buf_ops xfs_rmapbt_mem_buf_ops = {
     607             :         .name                   = "xfs_rmapbt_mem",
     608             :         .magic                  = { 0, cpu_to_be32(XFS_RMAP_CRC_MAGIC) },
     609             :         .verify_read            = xfs_rmapbt_mem_rw_verify,
     610             :         .verify_write           = xfs_rmapbt_mem_rw_verify,
     611             :         .verify_struct          = xfs_rmapbt_mem_verify,
     612             : };
     613             : 
     614             : static const struct xfs_btree_ops xfs_rmapbt_mem_ops = {
     615             :         .rec_len                = sizeof(struct xfs_rmap_rec),
     616             :         .key_len                = 2 * sizeof(struct xfs_rmap_key),
     617             :         .lru_refs               = XFS_RMAP_BTREE_REF,
     618             :         .geom_flags             = XFS_BTREE_CRC_BLOCKS | XFS_BTREE_OVERLAPPING |
     619             :                                   XFS_BTREE_IN_XFILE,
     620             : 
     621             :         .dup_cursor             = xfbtree_dup_cursor,
     622             :         .set_root               = xfbtree_set_root,
     623             :         .alloc_block            = xfbtree_alloc_block,
     624             :         .free_block             = xfbtree_free_block,
     625             :         .get_minrecs            = xfbtree_get_minrecs,
     626             :         .get_maxrecs            = xfbtree_get_maxrecs,
     627             :         .init_key_from_rec      = xfs_rmapbt_init_key_from_rec,
     628             :         .init_high_key_from_rec = xfs_rmapbt_init_high_key_from_rec,
     629             :         .init_rec_from_cur      = xfs_rmapbt_init_rec_from_cur,
     630             :         .init_ptr_from_cur      = xfbtree_init_ptr_from_cur,
     631             :         .key_diff               = xfs_rmapbt_key_diff,
     632             :         .buf_ops                = &xfs_rmapbt_mem_buf_ops,
     633             :         .diff_two_keys          = xfs_rmapbt_diff_two_keys,
     634             :         .keys_inorder           = xfs_rmapbt_keys_inorder,
     635             :         .recs_inorder           = xfs_rmapbt_recs_inorder,
     636             :         .keys_contiguous        = xfs_rmapbt_keys_contiguous,
     637             : };
     638             : 
     639             : /* Create a cursor for an in-memory btree. */
     640             : struct xfs_btree_cur *
     641    19612567 : xfs_rmapbt_mem_cursor(
     642             :         struct xfs_perag        *pag,
     643             :         struct xfs_trans        *tp,
     644             :         struct xfs_buf          *head_bp,
     645             :         struct xfbtree          *xfbtree)
     646             : {
     647    19612567 :         struct xfs_btree_cur    *cur;
     648    19612567 :         struct xfs_mount        *mp = pag->pag_mount;
     649             : 
     650             :         /* Overlapping btree; 2 keys per pointer. */
     651    19612567 :         cur = xfs_btree_alloc_cursor(mp, tp, XFS_BTNUM_RMAP,
     652    19612567 :                         &xfs_rmapbt_mem_ops, mp->m_rmap_maxlevels,
     653             :                         xfs_rmapbt_cur_cache);
     654    19612506 :         cur->bc_statoff = XFS_STATS_CALC_INDEX(xs_rmap_2);
     655    19612506 :         cur->bc_mem.xfbtree = xfbtree;
     656    19612506 :         cur->bc_mem.head_bp = head_bp;
     657    19612506 :         cur->bc_nlevels = xfs_btree_mem_head_nlevels(head_bp);
     658             : 
     659    19612554 :         cur->bc_mem.pag = xfs_perag_hold(pag);
     660    19612558 :         return cur;
     661             : }
     662             : 
     663             : /* Create an in-memory rmap btree. */
     664             : int
     665        9261 : xfs_rmapbt_mem_create(
     666             :         struct xfs_mount        *mp,
     667             :         xfs_agnumber_t          agno,
     668             :         struct xfs_buftarg      *target,
     669             :         struct xfbtree          **xfbtreep)
     670             : {
     671        9261 :         struct xfbtree_config   cfg = {
     672             :                 .btree_ops      = &xfs_rmapbt_mem_ops,
     673             :                 .target         = target,
     674             :                 .owner          = agno,
     675             :                 .flags          = XFBTREE_DIRECT_MAP,
     676             :         };
     677             : 
     678        9261 :         return xfbtree_create(mp, &cfg, xfbtreep);
     679             : }
     680             : #endif /* CONFIG_XFS_BTREE_IN_XFILE */
     681             : 
     682             : /*
     683             :  * Install a new reverse mapping btree root.  Caller is responsible for
     684             :  * invalidating and freeing the old btree blocks.
     685             :  */
     686             : void
     687        9218 : xfs_rmapbt_commit_staged_btree(
     688             :         struct xfs_btree_cur    *cur,
     689             :         struct xfs_trans        *tp,
     690             :         struct xfs_buf          *agbp)
     691             : {
     692        9218 :         struct xfs_agf          *agf = agbp->b_addr;
     693        9218 :         struct xbtree_afakeroot *afake = cur->bc_ag.afake;
     694             : 
     695        9218 :         ASSERT(cur->bc_flags & XFS_BTREE_STAGING);
     696             : 
     697        9218 :         agf->agf_roots[cur->bc_btnum] = cpu_to_be32(afake->af_root);
     698        9218 :         agf->agf_levels[cur->bc_btnum] = cpu_to_be32(afake->af_levels);
     699        9218 :         agf->agf_rmap_blocks = cpu_to_be32(afake->af_blocks);
     700        9218 :         xfs_alloc_log_agf(tp, agbp, XFS_AGF_ROOTS | XFS_AGF_LEVELS |
     701             :                                     XFS_AGF_RMAP_BLOCKS);
     702        9218 :         xfs_btree_commit_afakeroot(cur, tp, agbp, &xfs_rmapbt_ops);
     703        9219 : }
     704             : 
     705             : /* Calculate number of records in a reverse mapping btree block. */
     706             : static inline unsigned int
     707             : xfs_rmapbt_block_maxrecs(
     708             :         unsigned int            blocklen,
     709             :         bool                    leaf)
     710             : {
     711    28283536 :         if (leaf)
     712       24343 :                 return blocklen / sizeof(struct xfs_rmap_rec);
     713     4350546 :         return blocklen /
     714             :                 (2 * sizeof(struct xfs_rmap_key) + sizeof(xfs_rmap_ptr_t));
     715             : }
     716             : 
     717             : /*
     718             :  * Calculate number of records in an rmap btree block.
     719             :  */
     720             : unsigned int
     721       48686 : xfs_rmapbt_maxrecs(
     722             :         struct xfs_mount        *mp,
     723             :         unsigned int            blocklen,
     724             :         bool                    leaf)
     725             : {
     726    28283536 :         blocklen -= XFS_RMAP_BLOCK_LEN;
     727    28283536 :         return xfs_rmapbt_block_maxrecs(blocklen, leaf);
     728             : }
     729             : 
     730             : /* Compute the max possible height for reverse mapping btrees. */
     731             : unsigned int
     732       24275 : xfs_rmapbt_maxlevels_ondisk(void)
     733             : {
     734       24275 :         unsigned int            minrecs[2];
     735       24275 :         unsigned int            blocklen;
     736             : 
     737       24275 :         blocklen = XFS_MIN_CRC_BLOCKSIZE - XFS_BTREE_SBLOCK_CRC_LEN;
     738             : 
     739       24275 :         minrecs[0] = xfs_rmapbt_block_maxrecs(blocklen, true) / 2;
     740       24275 :         minrecs[1] = xfs_rmapbt_block_maxrecs(blocklen, false) / 2;
     741             : 
     742             :         /*
     743             :          * Compute the asymptotic maxlevels for an rmapbt on any reflink fs.
     744             :          *
     745             :          * On a reflink filesystem, each AG block can have up to 2^32 (per the
     746             :          * refcount record format) owners, which means that theoretically we
     747             :          * could face up to 2^64 rmap records.  However, we're likely to run
     748             :          * out of blocks in the AG long before that happens, which means that
     749             :          * we must compute the max height based on what the btree will look
     750             :          * like if it consumes almost all the blocks in the AG due to maximal
     751             :          * sharing factor.
     752             :          */
     753       24275 :         return xfs_btree_space_to_height(minrecs, XFS_MAX_CRC_AG_BLOCKS);
     754             : }
     755             : 
     756             : /* Compute the maximum height of an rmap btree. */
     757             : void
     758       24337 : xfs_rmapbt_compute_maxlevels(
     759             :         struct xfs_mount                *mp)
     760             : {
     761       24337 :         if (!xfs_has_rmapbt(mp)) {
     762          74 :                 mp->m_rmap_maxlevels = 0;
     763          74 :                 return;
     764             :         }
     765             : 
     766       24263 :         if (xfs_has_reflink(mp)) {
     767             :                 /*
     768             :                  * Compute the asymptotic maxlevels for an rmap btree on a
     769             :                  * filesystem that supports reflink.
     770             :                  *
     771             :                  * On a reflink filesystem, each AG block can have up to 2^32
     772             :                  * (per the refcount record format) owners, which means that
     773             :                  * theoretically we could face up to 2^64 rmap records.
     774             :                  * However, we're likely to run out of blocks in the AG long
     775             :                  * before that happens, which means that we must compute the
     776             :                  * max height based on what the btree will look like if it
     777             :                  * consumes almost all the blocks in the AG due to maximal
     778             :                  * sharing factor.
     779             :                  */
     780       24258 :                 mp->m_rmap_maxlevels = xfs_btree_space_to_height(mp->m_rmap_mnr,
     781       24258 :                                 mp->m_sb.sb_agblocks);
     782             :         } else {
     783             :                 /*
     784             :                  * If there's no block sharing, compute the maximum rmapbt
     785             :                  * height assuming one rmap record per AG block.
     786             :                  */
     787           5 :                 mp->m_rmap_maxlevels = xfs_btree_compute_maxlevels(
     788           5 :                                 mp->m_rmap_mnr, mp->m_sb.sb_agblocks);
     789             :         }
     790       24263 :         ASSERT(mp->m_rmap_maxlevels <= xfs_rmapbt_maxlevels_ondisk());
     791             : }
     792             : 
     793             : /* Calculate the refcount btree size for some records. */
     794             : xfs_extlen_t
     795     1570323 : xfs_rmapbt_calc_size(
     796             :         struct xfs_mount        *mp,
     797             :         unsigned long long      len)
     798             : {
     799     1936848 :         return xfs_btree_calc_size(mp->m_rmap_mnr, len);
     800             : }
     801             : 
     802             : /*
     803             :  * Calculate the maximum refcount btree size.
     804             :  */
     805             : xfs_extlen_t
     806           0 : xfs_rmapbt_max_size(
     807             :         struct xfs_mount        *mp,
     808             :         xfs_agblock_t           agblocks)
     809             : {
     810             :         /* Bail out if we're uninitialized, which can happen in mkfs. */
     811           0 :         if (mp->m_rmap_mxr[0] == 0)
     812             :                 return 0;
     813             : 
     814      366526 :         return xfs_rmapbt_calc_size(mp, agblocks);
     815             : }
     816             : 
     817             : /*
     818             :  * Figure out how many blocks to reserve and how many are used by this btree.
     819             :  */
     820             : int
     821      369391 : xfs_rmapbt_calc_reserves(
     822             :         struct xfs_mount        *mp,
     823             :         struct xfs_trans        *tp,
     824             :         struct xfs_perag        *pag,
     825             :         xfs_extlen_t            *ask,
     826             :         xfs_extlen_t            *used)
     827             : {
     828      369391 :         struct xfs_buf          *agbp;
     829      369391 :         struct xfs_agf          *agf;
     830      369391 :         xfs_agblock_t           agblocks;
     831      369391 :         xfs_extlen_t            tree_len;
     832      369391 :         int                     error;
     833             : 
     834      369391 :         if (!xfs_has_rmapbt(mp))
     835             :                 return 0;
     836             : 
     837      366521 :         error = xfs_alloc_read_agf(pag, tp, 0, &agbp);
     838      366518 :         if (error)
     839             :                 return error;
     840             : 
     841      366518 :         agf = agbp->b_addr;
     842      366518 :         agblocks = be32_to_cpu(agf->agf_length);
     843      366518 :         tree_len = be32_to_cpu(agf->agf_rmap_blocks);
     844      366518 :         xfs_trans_brelse(tp, agbp);
     845             : 
     846             :         /*
     847             :          * The log is permanently allocated, so the space it occupies will
     848             :          * never be available for the kinds of things that would require btree
     849             :          * expansion.  We therefore can pretend the space isn't there.
     850             :          */
     851      733056 :         if (xfs_ag_contains_log(mp, pag->pag_agno))
     852       75692 :                 agblocks -= mp->m_sb.sb_logblocks;
     853             : 
     854             :         /* Reserve 1% of the AG or enough for 1 block per record. */
     855      366528 :         *ask += max(agblocks / 100, xfs_rmapbt_max_size(mp, agblocks));
     856      366527 :         *used += tree_len;
     857             : 
     858      366527 :         return error;
     859             : }
     860             : 
     861             : int __init
     862          12 : xfs_rmapbt_init_cur_cache(void)
     863             : {
     864          12 :         xfs_rmapbt_cur_cache = kmem_cache_create("xfs_rmapbt_cur",
     865          12 :                         xfs_btree_cur_sizeof(xfs_rmapbt_maxlevels_ondisk()),
     866             :                         0, 0, NULL);
     867             : 
     868          12 :         if (!xfs_rmapbt_cur_cache)
     869           0 :                 return -ENOMEM;
     870             :         return 0;
     871             : }
     872             : 
     873             : void
     874          12 : xfs_rmapbt_destroy_cur_cache(void)
     875             : {
     876          12 :         kmem_cache_destroy(xfs_rmapbt_cur_cache);
     877          12 :         xfs_rmapbt_cur_cache = NULL;
     878          12 : }

Generated by: LCOV version 1.14