LCOV - code coverage report
Current view: top level - fs/btrfs - qgroup.h (source / functions) Hit Total Coverage
Test: fstests of 6.5.0-rc3-djwa @ Mon Jul 31 20:08:17 PDT 2023 Lines: 0 6 0.0 %
Date: 2023-07-31 20:08:17 Functions: 0 0 -

          Line data    Source code
       1             : /* SPDX-License-Identifier: GPL-2.0 */
       2             : /*
       3             :  * Copyright (C) 2014 Facebook.  All rights reserved.
       4             :  */
       5             : 
       6             : #ifndef BTRFS_QGROUP_H
       7             : #define BTRFS_QGROUP_H
       8             : 
       9             : #include <linux/spinlock.h>
      10             : #include <linux/rbtree.h>
      11             : #include <linux/kobject.h>
      12             : #include "ulist.h"
      13             : #include "delayed-ref.h"
      14             : #include "misc.h"
      15             : 
      16             : /*
      17             :  * Btrfs qgroup overview
      18             :  *
      19             :  * Btrfs qgroup splits into 3 main part:
      20             :  * 1) Reserve
      21             :  *    Reserve metadata/data space for incoming operations
      22             :  *    Affect how qgroup limit works
      23             :  *
      24             :  * 2) Trace
      25             :  *    Tell btrfs qgroup to trace dirty extents.
      26             :  *
      27             :  *    Dirty extents including:
      28             :  *    - Newly allocated extents
      29             :  *    - Extents going to be deleted (in this trans)
      30             :  *    - Extents whose owner is going to be modified
      31             :  *
      32             :  *    This is the main part affects whether qgroup numbers will stay
      33             :  *    consistent.
      34             :  *    Btrfs qgroup can trace clean extents and won't cause any problem,
      35             :  *    but it will consume extra CPU time, it should be avoided if possible.
      36             :  *
      37             :  * 3) Account
      38             :  *    Btrfs qgroup will updates its numbers, based on dirty extents traced
      39             :  *    in previous step.
      40             :  *
      41             :  *    Normally at qgroup rescan and transaction commit time.
      42             :  */
      43             : 
      44             : /*
      45             :  * Special performance optimization for balance.
      46             :  *
      47             :  * For balance, we need to swap subtree of subvolume and reloc trees.
      48             :  * In theory, we need to trace all subtree blocks of both subvolume and reloc
      49             :  * trees, since their owner has changed during such swap.
      50             :  *
      51             :  * However since balance has ensured that both subtrees are containing the
      52             :  * same contents and have the same tree structures, such swap won't cause
      53             :  * qgroup number change.
      54             :  *
      55             :  * But there is a race window between subtree swap and transaction commit,
      56             :  * during that window, if we increase/decrease tree level or merge/split tree
      57             :  * blocks, we still need to trace the original subtrees.
      58             :  *
      59             :  * So for balance, we use a delayed subtree tracing, whose workflow is:
      60             :  *
      61             :  * 1) Record the subtree root block get swapped.
      62             :  *
      63             :  *    During subtree swap:
      64             :  *    O = Old tree blocks
      65             :  *    N = New tree blocks
      66             :  *          reloc tree                     subvolume tree X
      67             :  *             Root                               Root
      68             :  *            /    \                             /    \
      69             :  *          NA     OB                          OA      OB
      70             :  *        /  |     |  \                      /  |      |  \
      71             :  *      NC  ND     OE  OF                   OC  OD     OE  OF
      72             :  *
      73             :  *   In this case, NA and OA are going to be swapped, record (NA, OA) into
      74             :  *   subvolume tree X.
      75             :  *
      76             :  * 2) After subtree swap.
      77             :  *          reloc tree                     subvolume tree X
      78             :  *             Root                               Root
      79             :  *            /    \                             /    \
      80             :  *          OA     OB                          NA      OB
      81             :  *        /  |     |  \                      /  |      |  \
      82             :  *      OC  OD     OE  OF                   NC  ND     OE  OF
      83             :  *
      84             :  * 3a) COW happens for OB
      85             :  *     If we are going to COW tree block OB, we check OB's bytenr against
      86             :  *     tree X's swapped_blocks structure.
      87             :  *     If it doesn't fit any, nothing will happen.
      88             :  *
      89             :  * 3b) COW happens for NA
      90             :  *     Check NA's bytenr against tree X's swapped_blocks, and get a hit.
      91             :  *     Then we do subtree scan on both subtrees OA and NA.
      92             :  *     Resulting 6 tree blocks to be scanned (OA, OC, OD, NA, NC, ND).
      93             :  *
      94             :  *     Then no matter what we do to subvolume tree X, qgroup numbers will
      95             :  *     still be correct.
      96             :  *     Then NA's record gets removed from X's swapped_blocks.
      97             :  *
      98             :  * 4)  Transaction commit
      99             :  *     Any record in X's swapped_blocks gets removed, since there is no
     100             :  *     modification to the swapped subtrees, no need to trigger heavy qgroup
     101             :  *     subtree rescan for them.
     102             :  */
     103             : 
     104             : #define BTRFS_QGROUP_RUNTIME_FLAG_CANCEL_RESCAN         (1UL << 3)
     105             : #define BTRFS_QGROUP_RUNTIME_FLAG_NO_ACCOUNTING         (1UL << 4)
     106             : 
     107             : /*
     108             :  * Record a dirty extent, and info qgroup to update quota on it
     109             :  * TODO: Use kmem cache to alloc it.
     110             :  */
     111             : struct btrfs_qgroup_extent_record {
     112             :         struct rb_node node;
     113             :         u64 bytenr;
     114             :         u64 num_bytes;
     115             : 
     116             :         /*
     117             :          * For qgroup reserved data space freeing.
     118             :          *
     119             :          * @data_rsv_refroot and @data_rsv will be recorded after
     120             :          * BTRFS_ADD_DELAYED_EXTENT is called.
     121             :          * And will be used to free reserved qgroup space at
     122             :          * transaction commit time.
     123             :          */
     124             :         u32 data_rsv;           /* reserved data space needs to be freed */
     125             :         u64 data_rsv_refroot;   /* which root the reserved data belongs to */
     126             :         struct ulist *old_roots;
     127             : };
     128             : 
     129             : struct btrfs_qgroup_swapped_block {
     130             :         struct rb_node node;
     131             : 
     132             :         int level;
     133             :         bool trace_leaf;
     134             : 
     135             :         /* bytenr/generation of the tree block in subvolume tree after swap */
     136             :         u64 subvol_bytenr;
     137             :         u64 subvol_generation;
     138             : 
     139             :         /* bytenr/generation of the tree block in reloc tree after swap */
     140             :         u64 reloc_bytenr;
     141             :         u64 reloc_generation;
     142             : 
     143             :         u64 last_snapshot;
     144             :         struct btrfs_key first_key;
     145             : };
     146             : 
     147             : /*
     148             :  * Qgroup reservation types:
     149             :  *
     150             :  * DATA:
     151             :  *      space reserved for data
     152             :  *
     153             :  * META_PERTRANS:
     154             :  *      Space reserved for metadata (per-transaction)
     155             :  *      Due to the fact that qgroup data is only updated at transaction commit
     156             :  *      time, reserved space for metadata must be kept until transaction
     157             :  *      commits.
     158             :  *      Any metadata reserved that are used in btrfs_start_transaction() should
     159             :  *      be of this type.
     160             :  *
     161             :  * META_PREALLOC:
     162             :  *      There are cases where metadata space is reserved before starting
     163             :  *      transaction, and then btrfs_join_transaction() to get a trans handle.
     164             :  *      Any metadata reserved for such usage should be of this type.
     165             :  *      And after join_transaction() part (or all) of such reservation should
     166             :  *      be converted into META_PERTRANS.
     167             :  */
     168             : enum btrfs_qgroup_rsv_type {
     169             :         BTRFS_QGROUP_RSV_DATA,
     170             :         BTRFS_QGROUP_RSV_META_PERTRANS,
     171             :         BTRFS_QGROUP_RSV_META_PREALLOC,
     172             :         BTRFS_QGROUP_RSV_LAST,
     173             : };
     174             : 
     175             : /*
     176             :  * Represents how many bytes we have reserved for this qgroup.
     177             :  *
     178             :  * Each type should have different reservation behavior.
     179             :  * E.g, data follows its io_tree flag modification, while
     180             :  * *currently* meta is just reserve-and-clear during transaction.
     181             :  *
     182             :  * TODO: Add new type for reservation which can survive transaction commit.
     183             :  * Current metadata reservation behavior is not suitable for such case.
     184             :  */
     185             : struct btrfs_qgroup_rsv {
     186             :         u64 values[BTRFS_QGROUP_RSV_LAST];
     187             : };
     188             : 
     189             : /*
     190             :  * one struct for each qgroup, organized in fs_info->qgroup_tree.
     191             :  */
     192             : struct btrfs_qgroup {
     193             :         u64 qgroupid;
     194             : 
     195             :         /*
     196             :          * state
     197             :          */
     198             :         u64 rfer;       /* referenced */
     199             :         u64 rfer_cmpr;  /* referenced compressed */
     200             :         u64 excl;       /* exclusive */
     201             :         u64 excl_cmpr;  /* exclusive compressed */
     202             : 
     203             :         /*
     204             :          * limits
     205             :          */
     206             :         u64 lim_flags;  /* which limits are set */
     207             :         u64 max_rfer;
     208             :         u64 max_excl;
     209             :         u64 rsv_rfer;
     210             :         u64 rsv_excl;
     211             : 
     212             :         /*
     213             :          * reservation tracking
     214             :          */
     215             :         struct btrfs_qgroup_rsv rsv;
     216             : 
     217             :         /*
     218             :          * lists
     219             :          */
     220             :         struct list_head groups;  /* groups this group is member of */
     221             :         struct list_head members; /* groups that are members of this group */
     222             :         struct list_head dirty;   /* dirty groups */
     223             :         struct rb_node node;      /* tree of qgroups */
     224             : 
     225             :         /*
     226             :          * temp variables for accounting operations
     227             :          * Refer to qgroup_shared_accounting() for details.
     228             :          */
     229             :         u64 old_refcnt;
     230             :         u64 new_refcnt;
     231             : 
     232             :         /*
     233             :          * Sysfs kobjectid
     234             :          */
     235             :         struct kobject kobj;
     236             : };
     237             : 
     238             : static inline u64 btrfs_qgroup_subvolid(u64 qgroupid)
     239             : {
     240           0 :         return (qgroupid & ((1ULL << BTRFS_QGROUP_LEVEL_SHIFT) - 1));
     241             : }
     242             : 
     243             : /*
     244             :  * For qgroup event trace points only
     245             :  */
     246             : enum {
     247             :         ENUM_BIT(QGROUP_RESERVE),
     248             :         ENUM_BIT(QGROUP_RELEASE),
     249             :         ENUM_BIT(QGROUP_FREE),
     250             : };
     251             : 
     252             : int btrfs_quota_enable(struct btrfs_fs_info *fs_info);
     253             : int btrfs_quota_disable(struct btrfs_fs_info *fs_info);
     254             : int btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info);
     255             : void btrfs_qgroup_rescan_resume(struct btrfs_fs_info *fs_info);
     256             : int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info,
     257             :                                      bool interruptible);
     258             : int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans, u64 src,
     259             :                               u64 dst);
     260             : int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans, u64 src,
     261             :                               u64 dst);
     262             : int btrfs_create_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid);
     263             : int btrfs_remove_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid);
     264             : int btrfs_limit_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid,
     265             :                        struct btrfs_qgroup_limit *limit);
     266             : int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info);
     267             : void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info);
     268             : struct btrfs_delayed_extent_op;
     269             : 
     270             : /*
     271             :  * Inform qgroup to trace one dirty extent, its info is recorded in @record.
     272             :  * So qgroup can account it at transaction committing time.
     273             :  *
     274             :  * No lock version, caller must acquire delayed ref lock and allocated memory,
     275             :  * then call btrfs_qgroup_trace_extent_post() after exiting lock context.
     276             :  *
     277             :  * Return 0 for success insert
     278             :  * Return >0 for existing record, caller can free @record safely.
     279             :  * Error is not possible
     280             :  */
     281             : int btrfs_qgroup_trace_extent_nolock(
     282             :                 struct btrfs_fs_info *fs_info,
     283             :                 struct btrfs_delayed_ref_root *delayed_refs,
     284             :                 struct btrfs_qgroup_extent_record *record);
     285             : 
     286             : /*
     287             :  * Post handler after qgroup_trace_extent_nolock().
     288             :  *
     289             :  * NOTE: Current qgroup does the expensive backref walk at transaction
     290             :  * committing time with TRANS_STATE_COMMIT_DOING, this blocks incoming
     291             :  * new transaction.
     292             :  * This is designed to allow btrfs_find_all_roots() to get correct new_roots
     293             :  * result.
     294             :  *
     295             :  * However for old_roots there is no need to do backref walk at that time,
     296             :  * since we search commit roots to walk backref and result will always be
     297             :  * correct.
     298             :  *
     299             :  * Due to the nature of no lock version, we can't do backref there.
     300             :  * So we must call btrfs_qgroup_trace_extent_post() after exiting
     301             :  * spinlock context.
     302             :  *
     303             :  * TODO: If we can fix and prove btrfs_find_all_roots() can get correct result
     304             :  * using current root, then we can move all expensive backref walk out of
     305             :  * transaction committing, but not now as qgroup accounting will be wrong again.
     306             :  */
     307             : int btrfs_qgroup_trace_extent_post(struct btrfs_trans_handle *trans,
     308             :                                    struct btrfs_qgroup_extent_record *qrecord);
     309             : 
     310             : /*
     311             :  * Inform qgroup to trace one dirty extent, specified by @bytenr and
     312             :  * @num_bytes.
     313             :  * So qgroup can account it at commit trans time.
     314             :  *
     315             :  * Better encapsulated version, with memory allocation and backref walk for
     316             :  * commit roots.
     317             :  * So this can sleep.
     318             :  *
     319             :  * Return 0 if the operation is done.
     320             :  * Return <0 for error, like memory allocation failure or invalid parameter
     321             :  * (NULL trans)
     322             :  */
     323             : int btrfs_qgroup_trace_extent(struct btrfs_trans_handle *trans, u64 bytenr,
     324             :                               u64 num_bytes);
     325             : 
     326             : /*
     327             :  * Inform qgroup to trace all leaf items of data
     328             :  *
     329             :  * Return 0 for success
     330             :  * Return <0 for error(ENOMEM)
     331             :  */
     332             : int btrfs_qgroup_trace_leaf_items(struct btrfs_trans_handle *trans,
     333             :                                   struct extent_buffer *eb);
     334             : /*
     335             :  * Inform qgroup to trace a whole subtree, including all its child tree
     336             :  * blocks and data.
     337             :  * The root tree block is specified by @root_eb.
     338             :  *
     339             :  * Normally used by relocation(tree block swap) and subvolume deletion.
     340             :  *
     341             :  * Return 0 for success
     342             :  * Return <0 for error(ENOMEM or tree search error)
     343             :  */
     344             : int btrfs_qgroup_trace_subtree(struct btrfs_trans_handle *trans,
     345             :                                struct extent_buffer *root_eb,
     346             :                                u64 root_gen, int root_level);
     347             : int btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans, u64 bytenr,
     348             :                                 u64 num_bytes, struct ulist *old_roots,
     349             :                                 struct ulist *new_roots);
     350             : int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans);
     351             : int btrfs_run_qgroups(struct btrfs_trans_handle *trans);
     352             : int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, u64 srcid,
     353             :                          u64 objectid, struct btrfs_qgroup_inherit *inherit);
     354             : void btrfs_qgroup_free_refroot(struct btrfs_fs_info *fs_info,
     355             :                                u64 ref_root, u64 num_bytes,
     356             :                                enum btrfs_qgroup_rsv_type type);
     357             : 
     358             : #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
     359             : int btrfs_verify_qgroup_counts(struct btrfs_fs_info *fs_info, u64 qgroupid,
     360             :                                u64 rfer, u64 excl);
     361             : #endif
     362             : 
     363             : /* New io_tree based accurate qgroup reserve API */
     364             : int btrfs_qgroup_reserve_data(struct btrfs_inode *inode,
     365             :                         struct extent_changeset **reserved, u64 start, u64 len);
     366             : int btrfs_qgroup_release_data(struct btrfs_inode *inode, u64 start, u64 len);
     367             : int btrfs_qgroup_free_data(struct btrfs_inode *inode,
     368             :                            struct extent_changeset *reserved, u64 start,
     369             :                            u64 len);
     370             : int btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes,
     371             :                               enum btrfs_qgroup_rsv_type type, bool enforce);
     372             : int __btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes,
     373             :                                 enum btrfs_qgroup_rsv_type type, bool enforce,
     374             :                                 bool noflush);
     375             : /* Reserve metadata space for pertrans and prealloc type */
     376             : static inline int btrfs_qgroup_reserve_meta_pertrans(struct btrfs_root *root,
     377             :                                 int num_bytes, bool enforce)
     378             : {
     379           0 :         return __btrfs_qgroup_reserve_meta(root, num_bytes,
     380             :                                            BTRFS_QGROUP_RSV_META_PERTRANS,
     381             :                                            enforce, false);
     382             : }
     383             : static inline int btrfs_qgroup_reserve_meta_prealloc(struct btrfs_root *root,
     384             :                                                      int num_bytes, bool enforce,
     385             :                                                      bool noflush)
     386             : {
     387           0 :         return __btrfs_qgroup_reserve_meta(root, num_bytes,
     388             :                                            BTRFS_QGROUP_RSV_META_PREALLOC,
     389             :                                            enforce, noflush);
     390             : }
     391             : 
     392             : void __btrfs_qgroup_free_meta(struct btrfs_root *root, int num_bytes,
     393             :                              enum btrfs_qgroup_rsv_type type);
     394             : 
     395             : /* Free per-transaction meta reservation for error handling */
     396             : static inline void btrfs_qgroup_free_meta_pertrans(struct btrfs_root *root,
     397             :                                                    int num_bytes)
     398             : {
     399           0 :         __btrfs_qgroup_free_meta(root, num_bytes,
     400             :                         BTRFS_QGROUP_RSV_META_PERTRANS);
     401             : }
     402             : 
     403             : /* Pre-allocated meta reservation can be freed at need */
     404             : static inline void btrfs_qgroup_free_meta_prealloc(struct btrfs_root *root,
     405             :                                                    int num_bytes)
     406             : {
     407           0 :         __btrfs_qgroup_free_meta(root, num_bytes,
     408             :                         BTRFS_QGROUP_RSV_META_PREALLOC);
     409           0 : }
     410             : 
     411             : /*
     412             :  * Per-transaction meta reservation should be all freed at transaction commit
     413             :  * time
     414             :  */
     415             : void btrfs_qgroup_free_meta_all_pertrans(struct btrfs_root *root);
     416             : 
     417             : /*
     418             :  * Convert @num_bytes of META_PREALLOCATED reservation to META_PERTRANS.
     419             :  *
     420             :  * This is called when preallocated meta reservation needs to be used.
     421             :  * Normally after btrfs_join_transaction() call.
     422             :  */
     423             : void btrfs_qgroup_convert_reserved_meta(struct btrfs_root *root, int num_bytes);
     424             : 
     425             : void btrfs_qgroup_check_reserved_leak(struct btrfs_inode *inode);
     426             : 
     427             : /* btrfs_qgroup_swapped_blocks related functions */
     428             : void btrfs_qgroup_init_swapped_blocks(
     429             :         struct btrfs_qgroup_swapped_blocks *swapped_blocks);
     430             : 
     431             : void btrfs_qgroup_clean_swapped_blocks(struct btrfs_root *root);
     432             : int btrfs_qgroup_add_swapped_blocks(struct btrfs_trans_handle *trans,
     433             :                 struct btrfs_root *subvol_root,
     434             :                 struct btrfs_block_group *bg,
     435             :                 struct extent_buffer *subvol_parent, int subvol_slot,
     436             :                 struct extent_buffer *reloc_parent, int reloc_slot,
     437             :                 u64 last_snapshot);
     438             : int btrfs_qgroup_trace_subtree_after_cow(struct btrfs_trans_handle *trans,
     439             :                 struct btrfs_root *root, struct extent_buffer *eb);
     440             : void btrfs_qgroup_destroy_extent_records(struct btrfs_transaction *trans);
     441             : bool btrfs_check_quota_leak(struct btrfs_fs_info *fs_info);
     442             : 
     443             : #endif

Generated by: LCOV version 1.14