LCOV - code coverage report
Current view: top level - fs/btrfs - qgroup.c (source / functions) Hit Total Coverage
Test: fstests of 6.5.0-rc3-achx @ Mon Jul 31 20:08:12 PDT 2023 Lines: 1690 2266 74.6 %
Date: 2023-07-31 20:08:12 Functions: 78 83 94.0 %

          Line data    Source code
       1             : // SPDX-License-Identifier: GPL-2.0
       2             : /*
       3             :  * Copyright (C) 2011 STRATO.  All rights reserved.
       4             :  */
       5             : 
       6             : #include <linux/sched.h>
       7             : #include <linux/pagemap.h>
       8             : #include <linux/writeback.h>
       9             : #include <linux/blkdev.h>
      10             : #include <linux/rbtree.h>
      11             : #include <linux/slab.h>
      12             : #include <linux/workqueue.h>
      13             : #include <linux/btrfs.h>
      14             : #include <linux/sched/mm.h>
      15             : 
      16             : #include "ctree.h"
      17             : #include "transaction.h"
      18             : #include "disk-io.h"
      19             : #include "locking.h"
      20             : #include "ulist.h"
      21             : #include "backref.h"
      22             : #include "extent_io.h"
      23             : #include "qgroup.h"
      24             : #include "block-group.h"
      25             : #include "sysfs.h"
      26             : #include "tree-mod-log.h"
      27             : #include "fs.h"
      28             : #include "accessors.h"
      29             : #include "extent-tree.h"
      30             : #include "root-tree.h"
      31             : #include "tree-checker.h"
      32             : 
      33             : /*
      34             :  * Helpers to access qgroup reservation
      35             :  *
      36             :  * Callers should ensure the lock context and type are valid
      37             :  */
      38             : 
      39             : static u64 qgroup_rsv_total(const struct btrfs_qgroup *qgroup)
      40             : {
      41             :         u64 ret = 0;
      42             :         int i;
      43             : 
      44    13795612 :         for (i = 0; i < BTRFS_QGROUP_RSV_LAST; i++)
      45    10346709 :                 ret += qgroup->rsv.values[i];
      46             : 
      47     3448903 :         return ret;
      48             : }
      49             : 
      50             : #ifdef CONFIG_BTRFS_DEBUG
      51             : static const char *qgroup_rsv_type_str(enum btrfs_qgroup_rsv_type type)
      52             : {
      53             :         if (type == BTRFS_QGROUP_RSV_DATA)
      54             :                 return "data";
      55             :         if (type == BTRFS_QGROUP_RSV_META_PERTRANS)
      56             :                 return "meta_pertrans";
      57             :         if (type == BTRFS_QGROUP_RSV_META_PREALLOC)
      58             :                 return "meta_prealloc";
      59             :         return NULL;
      60             : }
      61             : #endif
      62             : 
      63     3857850 : static void qgroup_rsv_add(struct btrfs_fs_info *fs_info,
      64             :                            struct btrfs_qgroup *qgroup, u64 num_bytes,
      65             :                            enum btrfs_qgroup_rsv_type type)
      66             : {
      67     3857850 :         trace_qgroup_update_reserve(fs_info, qgroup, num_bytes, type);
      68     3876831 :         qgroup->rsv.values[type] += num_bytes;
      69     3857850 : }
      70             : 
      71     2590304 : static void qgroup_rsv_release(struct btrfs_fs_info *fs_info,
      72             :                                struct btrfs_qgroup *qgroup, u64 num_bytes,
      73             :                                enum btrfs_qgroup_rsv_type type)
      74             : {
      75     2590304 :         trace_qgroup_update_reserve(fs_info, qgroup, -(s64)num_bytes, type);
      76     2590304 :         if (qgroup->rsv.values[type] >= num_bytes) {
      77     2590304 :                 qgroup->rsv.values[type] -= num_bytes;
      78     2590304 :                 return;
      79             :         }
      80             : #ifdef CONFIG_BTRFS_DEBUG
      81             :         WARN_RATELIMIT(1,
      82             :                 "qgroup %llu %s reserved space underflow, have %llu to free %llu",
      83             :                 qgroup->qgroupid, qgroup_rsv_type_str(type),
      84             :                 qgroup->rsv.values[type], num_bytes);
      85             : #endif
      86           0 :         qgroup->rsv.values[type] = 0;
      87             : }
      88             : 
      89           4 : static void qgroup_rsv_add_by_qgroup(struct btrfs_fs_info *fs_info,
      90             :                                      struct btrfs_qgroup *dest,
      91             :                                      struct btrfs_qgroup *src)
      92             : {
      93           4 :         int i;
      94             : 
      95          16 :         for (i = 0; i < BTRFS_QGROUP_RSV_LAST; i++)
      96          12 :                 qgroup_rsv_add(fs_info, dest, src->rsv.values[i], i);
      97           4 : }
      98             : 
      99           0 : static void qgroup_rsv_release_by_qgroup(struct btrfs_fs_info *fs_info,
     100             :                                          struct btrfs_qgroup *dest,
     101             :                                           struct btrfs_qgroup *src)
     102             : {
     103           0 :         int i;
     104             : 
     105           0 :         for (i = 0; i < BTRFS_QGROUP_RSV_LAST; i++)
     106           0 :                 qgroup_rsv_release(fs_info, dest, src->rsv.values[i], i);
     107           0 : }
     108             : 
     109             : static void btrfs_qgroup_update_old_refcnt(struct btrfs_qgroup *qg, u64 seq,
     110             :                                            int mod)
     111             : {
     112     2536335 :         if (qg->old_refcnt < seq)
     113     2536335 :                 qg->old_refcnt = seq;
     114     2536335 :         qg->old_refcnt += mod;
     115     2536335 : }
     116             : 
     117             : static void btrfs_qgroup_update_new_refcnt(struct btrfs_qgroup *qg, u64 seq,
     118             :                                            int mod)
     119             : {
     120     2370190 :         if (qg->new_refcnt < seq)
     121     2370189 :                 qg->new_refcnt = seq;
     122     2370190 :         qg->new_refcnt += mod;
     123     2370190 : }
     124             : 
     125             : static inline u64 btrfs_qgroup_get_old_refcnt(struct btrfs_qgroup *qg, u64 seq)
     126             : {
     127     2599854 :         if (qg->old_refcnt < seq)
     128             :                 return 0;
     129     2536335 :         return qg->old_refcnt - seq;
     130             : }
     131             : 
     132             : static inline u64 btrfs_qgroup_get_new_refcnt(struct btrfs_qgroup *qg, u64 seq)
     133             : {
     134     2599854 :         if (qg->new_refcnt < seq)
     135             :                 return 0;
     136     2370189 :         return qg->new_refcnt - seq;
     137             : }
     138             : 
     139             : /*
     140             :  * glue structure to represent the relations between qgroups.
     141             :  */
     142             : struct btrfs_qgroup_list {
     143             :         struct list_head next_group;
     144             :         struct list_head next_member;
     145             :         struct btrfs_qgroup *group;
     146             :         struct btrfs_qgroup *member;
     147             : };
     148             : 
     149             : static inline u64 qgroup_to_aux(struct btrfs_qgroup *qg)
     150             : {
     151    11381286 :         return (u64)(uintptr_t)qg;
     152             : }
     153             : 
     154             : static inline struct btrfs_qgroup* unode_aux_to_qgroup(struct ulist_node *n)
     155             : {
     156    17835578 :         return (struct btrfs_qgroup *)(uintptr_t)n->aux;
     157             : }
     158             : 
     159             : static int
     160             : qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid,
     161             :                    int init_flags);
     162             : static void qgroup_rescan_zero_tracking(struct btrfs_fs_info *fs_info);
     163             : 
     164             : /* must be called with qgroup_ioctl_lock held */
     165    11365048 : static struct btrfs_qgroup *find_qgroup_rb(struct btrfs_fs_info *fs_info,
     166             :                                            u64 qgroupid)
     167             : {
     168    11365048 :         struct rb_node *n = fs_info->qgroup_tree.rb_node;
     169    11365048 :         struct btrfs_qgroup *qgroup;
     170             : 
     171    39418813 :         while (n) {
     172    39418557 :                 qgroup = rb_entry(n, struct btrfs_qgroup, node);
     173    39418557 :                 if (qgroup->qgroupid < qgroupid)
     174    21454286 :                         n = n->rb_left;
     175    17964271 :                 else if (qgroup->qgroupid > qgroupid)
     176     6599479 :                         n = n->rb_right;
     177             :                 else
     178    11364792 :                         return qgroup;
     179             :         }
     180             :         return NULL;
     181             : }
     182             : 
     183             : /* must be called with qgroup_lock held */
     184         583 : static struct btrfs_qgroup *add_qgroup_rb(struct btrfs_fs_info *fs_info,
     185             :                                           u64 qgroupid)
     186             : {
     187         583 :         struct rb_node **p = &fs_info->qgroup_tree.rb_node;
     188         583 :         struct rb_node *parent = NULL;
     189         583 :         struct btrfs_qgroup *qgroup;
     190             : 
     191        3040 :         while (*p) {
     192        2457 :                 parent = *p;
     193        2457 :                 qgroup = rb_entry(parent, struct btrfs_qgroup, node);
     194             : 
     195        2457 :                 if (qgroup->qgroupid < qgroupid)
     196        2433 :                         p = &(*p)->rb_left;
     197          24 :                 else if (qgroup->qgroupid > qgroupid)
     198          24 :                         p = &(*p)->rb_right;
     199             :                 else
     200           0 :                         return qgroup;
     201             :         }
     202             : 
     203         583 :         qgroup = kzalloc(sizeof(*qgroup), GFP_ATOMIC);
     204         583 :         if (!qgroup)
     205             :                 return ERR_PTR(-ENOMEM);
     206             : 
     207         583 :         qgroup->qgroupid = qgroupid;
     208         583 :         INIT_LIST_HEAD(&qgroup->groups);
     209         583 :         INIT_LIST_HEAD(&qgroup->members);
     210         583 :         INIT_LIST_HEAD(&qgroup->dirty);
     211             : 
     212         583 :         rb_link_node(&qgroup->node, parent, p);
     213         583 :         rb_insert_color(&qgroup->node, &fs_info->qgroup_tree);
     214             : 
     215         583 :         return qgroup;
     216             : }
     217             : 
     218         583 : static void __del_qgroup_rb(struct btrfs_fs_info *fs_info,
     219             :                             struct btrfs_qgroup *qgroup)
     220             : {
     221         583 :         struct btrfs_qgroup_list *list;
     222             : 
     223         583 :         list_del(&qgroup->dirty);
     224         583 :         while (!list_empty(&qgroup->groups)) {
     225           0 :                 list = list_first_entry(&qgroup->groups,
     226             :                                         struct btrfs_qgroup_list, next_group);
     227           0 :                 list_del(&list->next_group);
     228           0 :                 list_del(&list->next_member);
     229           0 :                 kfree(list);
     230             :         }
     231             : 
     232         609 :         while (!list_empty(&qgroup->members)) {
     233          26 :                 list = list_first_entry(&qgroup->members,
     234             :                                         struct btrfs_qgroup_list, next_member);
     235          26 :                 list_del(&list->next_group);
     236          26 :                 list_del(&list->next_member);
     237          26 :                 kfree(list);
     238             :         }
     239         583 : }
     240             : 
     241             : /* must be called with qgroup_lock held */
     242          25 : static int del_qgroup_rb(struct btrfs_fs_info *fs_info, u64 qgroupid)
     243             : {
     244          25 :         struct btrfs_qgroup *qgroup = find_qgroup_rb(fs_info, qgroupid);
     245             : 
     246          25 :         if (!qgroup)
     247             :                 return -ENOENT;
     248             : 
     249          25 :         rb_erase(&qgroup->node, &fs_info->qgroup_tree);
     250          25 :         __del_qgroup_rb(fs_info, qgroup);
     251          25 :         return 0;
     252             : }
     253             : 
     254             : /*
     255             :  * Add relation specified by two qgroups.
     256             :  *
     257             :  * Must be called with qgroup_lock held.
     258             :  *
     259             :  * Return: 0        on success
     260             :  *         -ENOENT  if one of the qgroups is NULL
     261             :  *         <0       other errors
     262             :  */
     263          27 : static int __add_relation_rb(struct btrfs_qgroup *member, struct btrfs_qgroup *parent)
     264             : {
     265          27 :         struct btrfs_qgroup_list *list;
     266             : 
     267          27 :         if (!member || !parent)
     268             :                 return -ENOENT;
     269             : 
     270          27 :         list = kzalloc(sizeof(*list), GFP_ATOMIC);
     271          27 :         if (!list)
     272             :                 return -ENOMEM;
     273             : 
     274          27 :         list->group = parent;
     275          27 :         list->member = member;
     276          27 :         list_add_tail(&list->next_group, &member->groups);
     277          27 :         list_add_tail(&list->next_member, &parent->members);
     278             : 
     279          27 :         return 0;
     280             : }
     281             : 
     282             : /*
     283             :  * Add relation specified by two qgroup ids.
     284             :  *
     285             :  * Must be called with qgroup_lock held.
     286             :  *
     287             :  * Return: 0        on success
     288             :  *         -ENOENT  if one of the ids does not exist
     289             :  *         <0       other errors
     290             :  */
     291          22 : static int add_relation_rb(struct btrfs_fs_info *fs_info, u64 memberid, u64 parentid)
     292             : {
     293          22 :         struct btrfs_qgroup *member;
     294          22 :         struct btrfs_qgroup *parent;
     295             : 
     296          22 :         member = find_qgroup_rb(fs_info, memberid);
     297          22 :         parent = find_qgroup_rb(fs_info, parentid);
     298             : 
     299          22 :         return __add_relation_rb(member, parent);
     300             : }
     301             : 
     302             : /* Must be called with qgroup_lock held */
     303           1 : static int del_relation_rb(struct btrfs_fs_info *fs_info,
     304             :                            u64 memberid, u64 parentid)
     305             : {
     306           1 :         struct btrfs_qgroup *member;
     307           1 :         struct btrfs_qgroup *parent;
     308           1 :         struct btrfs_qgroup_list *list;
     309             : 
     310           1 :         member = find_qgroup_rb(fs_info, memberid);
     311           1 :         parent = find_qgroup_rb(fs_info, parentid);
     312           1 :         if (!member || !parent)
     313             :                 return -ENOENT;
     314             : 
     315           1 :         list_for_each_entry(list, &member->groups, next_group) {
     316           1 :                 if (list->group == parent) {
     317           1 :                         list_del(&list->next_group);
     318           1 :                         list_del(&list->next_member);
     319           1 :                         kfree(list);
     320           1 :                         return 0;
     321             :                 }
     322             :         }
     323             :         return -ENOENT;
     324             : }
     325             : 
     326             : #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
     327             : int btrfs_verify_qgroup_counts(struct btrfs_fs_info *fs_info, u64 qgroupid,
     328             :                                u64 rfer, u64 excl)
     329             : {
     330             :         struct btrfs_qgroup *qgroup;
     331             : 
     332             :         qgroup = find_qgroup_rb(fs_info, qgroupid);
     333             :         if (!qgroup)
     334             :                 return -EINVAL;
     335             :         if (qgroup->rfer != rfer || qgroup->excl != excl)
     336             :                 return -EINVAL;
     337             :         return 0;
     338             : }
     339             : #endif
     340             : 
     341             : static void qgroup_mark_inconsistent(struct btrfs_fs_info *fs_info)
     342             : {
     343           3 :         fs_info->qgroup_flags |= (BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT |
     344             :                                   BTRFS_QGROUP_RUNTIME_FLAG_CANCEL_RESCAN |
     345             :                                   BTRFS_QGROUP_RUNTIME_FLAG_NO_ACCOUNTING);
     346           3 : }
     347             : 
     348             : /*
     349             :  * The full config is read in one go, only called from open_ctree()
     350             :  * It doesn't use any locking, as at this point we're still single-threaded
     351             :  */
     352        3211 : int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info)
     353             : {
     354        3211 :         struct btrfs_key key;
     355        3211 :         struct btrfs_key found_key;
     356        3211 :         struct btrfs_root *quota_root = fs_info->quota_root;
     357        3211 :         struct btrfs_path *path = NULL;
     358        3211 :         struct extent_buffer *l;
     359        3211 :         int slot;
     360        3211 :         int ret = 0;
     361        3211 :         u64 flags = 0;
     362        3211 :         u64 rescan_progress = 0;
     363             : 
     364        3211 :         if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags))
     365             :                 return 0;
     366             : 
     367          21 :         fs_info->qgroup_ulist = ulist_alloc(GFP_KERNEL);
     368          21 :         if (!fs_info->qgroup_ulist) {
     369           0 :                 ret = -ENOMEM;
     370           0 :                 goto out;
     371             :         }
     372             : 
     373          21 :         path = btrfs_alloc_path();
     374          21 :         if (!path) {
     375           0 :                 ret = -ENOMEM;
     376           0 :                 goto out;
     377             :         }
     378             : 
     379          21 :         ret = btrfs_sysfs_add_qgroups(fs_info);
     380          21 :         if (ret < 0)
     381           0 :                 goto out;
     382             :         /* default this to quota off, in case no status key is found */
     383          21 :         fs_info->qgroup_flags = 0;
     384             : 
     385             :         /*
     386             :          * pass 1: read status, all qgroup infos and limits
     387             :          */
     388          21 :         key.objectid = 0;
     389          21 :         key.type = 0;
     390          21 :         key.offset = 0;
     391          21 :         ret = btrfs_search_slot_for_read(quota_root, &key, path, 1, 1);
     392          21 :         if (ret)
     393           0 :                 goto out;
     394             : 
     395         353 :         while (1) {
     396         353 :                 struct btrfs_qgroup *qgroup;
     397             : 
     398         353 :                 slot = path->slots[0];
     399         353 :                 l = path->nodes[0];
     400         353 :                 btrfs_item_key_to_cpu(l, &found_key, slot);
     401             : 
     402         353 :                 if (found_key.type == BTRFS_QGROUP_STATUS_KEY) {
     403          21 :                         struct btrfs_qgroup_status_item *ptr;
     404             : 
     405          21 :                         ptr = btrfs_item_ptr(l, slot,
     406             :                                              struct btrfs_qgroup_status_item);
     407             : 
     408          21 :                         if (btrfs_qgroup_status_version(l, ptr) !=
     409             :                             BTRFS_QGROUP_STATUS_VERSION) {
     410           0 :                                 btrfs_err(fs_info,
     411             :                                  "old qgroup version, quota disabled");
     412           0 :                                 goto out;
     413             :                         }
     414          21 :                         if (btrfs_qgroup_status_generation(l, ptr) !=
     415          21 :                             fs_info->generation) {
     416           0 :                                 qgroup_mark_inconsistent(fs_info);
     417           0 :                                 btrfs_err(fs_info,
     418             :                                         "qgroup generation mismatch, marked as inconsistent");
     419             :                         }
     420          21 :                         fs_info->qgroup_flags = btrfs_qgroup_status_flags(l,
     421             :                                                                           ptr);
     422          21 :                         rescan_progress = btrfs_qgroup_status_rescan(l, ptr);
     423          21 :                         goto next1;
     424             :                 }
     425             : 
     426         332 :                 if (found_key.type != BTRFS_QGROUP_INFO_KEY &&
     427             :                     found_key.type != BTRFS_QGROUP_LIMIT_KEY)
     428          22 :                         goto next1;
     429             : 
     430         310 :                 qgroup = find_qgroup_rb(fs_info, found_key.offset);
     431         310 :                 if ((qgroup && found_key.type == BTRFS_QGROUP_INFO_KEY) ||
     432         155 :                     (!qgroup && found_key.type == BTRFS_QGROUP_LIMIT_KEY)) {
     433           0 :                         btrfs_err(fs_info, "inconsistent qgroup config");
     434           0 :                         qgroup_mark_inconsistent(fs_info);
     435             :                 }
     436         310 :                 if (!qgroup) {
     437         155 :                         qgroup = add_qgroup_rb(fs_info, found_key.offset);
     438         155 :                         if (IS_ERR(qgroup)) {
     439           0 :                                 ret = PTR_ERR(qgroup);
     440           0 :                                 goto out;
     441             :                         }
     442             :                 }
     443         310 :                 ret = btrfs_sysfs_add_one_qgroup(fs_info, qgroup);
     444         310 :                 if (ret < 0)
     445           0 :                         goto out;
     446             : 
     447         310 :                 switch (found_key.type) {
     448             :                 case BTRFS_QGROUP_INFO_KEY: {
     449         155 :                         struct btrfs_qgroup_info_item *ptr;
     450             : 
     451         155 :                         ptr = btrfs_item_ptr(l, slot,
     452             :                                              struct btrfs_qgroup_info_item);
     453         155 :                         qgroup->rfer = btrfs_qgroup_info_rfer(l, ptr);
     454         155 :                         qgroup->rfer_cmpr = btrfs_qgroup_info_rfer_cmpr(l, ptr);
     455         155 :                         qgroup->excl = btrfs_qgroup_info_excl(l, ptr);
     456         155 :                         qgroup->excl_cmpr = btrfs_qgroup_info_excl_cmpr(l, ptr);
     457             :                         /* generation currently unused */
     458         155 :                         break;
     459             :                 }
     460             :                 case BTRFS_QGROUP_LIMIT_KEY: {
     461         155 :                         struct btrfs_qgroup_limit_item *ptr;
     462             : 
     463         155 :                         ptr = btrfs_item_ptr(l, slot,
     464             :                                              struct btrfs_qgroup_limit_item);
     465         155 :                         qgroup->lim_flags = btrfs_qgroup_limit_flags(l, ptr);
     466         155 :                         qgroup->max_rfer = btrfs_qgroup_limit_max_rfer(l, ptr);
     467         155 :                         qgroup->max_excl = btrfs_qgroup_limit_max_excl(l, ptr);
     468         155 :                         qgroup->rsv_rfer = btrfs_qgroup_limit_rsv_rfer(l, ptr);
     469         155 :                         qgroup->rsv_excl = btrfs_qgroup_limit_rsv_excl(l, ptr);
     470         155 :                         break;
     471             :                 }
     472             :                 }
     473         353 : next1:
     474         353 :                 ret = btrfs_next_item(quota_root, path);
     475         353 :                 if (ret < 0)
     476           0 :                         goto out;
     477         353 :                 if (ret)
     478             :                         break;
     479             :         }
     480          21 :         btrfs_release_path(path);
     481             : 
     482             :         /*
     483             :          * pass 2: read all qgroup relations
     484             :          */
     485          21 :         key.objectid = 0;
     486          21 :         key.type = BTRFS_QGROUP_RELATION_KEY;
     487          21 :         key.offset = 0;
     488          21 :         ret = btrfs_search_slot_for_read(quota_root, &key, path, 1, 0);
     489          21 :         if (ret)
     490          19 :                 goto out;
     491          22 :         while (1) {
     492          22 :                 slot = path->slots[0];
     493          22 :                 l = path->nodes[0];
     494          22 :                 btrfs_item_key_to_cpu(l, &found_key, slot);
     495             : 
     496          22 :                 if (found_key.type != BTRFS_QGROUP_RELATION_KEY)
     497           0 :                         goto next2;
     498             : 
     499          22 :                 if (found_key.objectid > found_key.offset) {
     500             :                         /* parent <- member, not needed to build config */
     501             :                         /* FIXME should we omit the key completely? */
     502          11 :                         goto next2;
     503             :                 }
     504             : 
     505          11 :                 ret = add_relation_rb(fs_info, found_key.objectid,
     506             :                                       found_key.offset);
     507          11 :                 if (ret == -ENOENT) {
     508           0 :                         btrfs_warn(fs_info,
     509             :                                 "orphan qgroup relation 0x%llx->0x%llx",
     510             :                                 found_key.objectid, found_key.offset);
     511           0 :                         ret = 0;        /* ignore the error */
     512             :                 }
     513          11 :                 if (ret)
     514           0 :                         goto out;
     515          11 : next2:
     516          22 :                 ret = btrfs_next_item(quota_root, path);
     517          22 :                 if (ret < 0)
     518           0 :                         goto out;
     519          22 :                 if (ret)
     520             :                         break;
     521             :         }
     522           2 : out:
     523          21 :         btrfs_free_path(path);
     524          21 :         fs_info->qgroup_flags |= flags;
     525          21 :         if (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON))
     526           0 :                 clear_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags);
     527          21 :         else if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN &&
     528             :                  ret >= 0)
     529           1 :                 ret = qgroup_rescan_init(fs_info, rescan_progress, 0);
     530             : 
     531          21 :         if (ret < 0) {
     532           0 :                 ulist_free(fs_info->qgroup_ulist);
     533           0 :                 fs_info->qgroup_ulist = NULL;
     534           0 :                 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
     535           0 :                 btrfs_sysfs_del_qgroups(fs_info);
     536             :         }
     537             : 
     538          21 :         return ret < 0 ? ret : 0;
     539             : }
     540             : 
     541             : /*
     542             :  * Called in close_ctree() when quota is still enabled.  This verifies we don't
     543             :  * leak some reserved space.
     544             :  *
     545             :  * Return false if no reserved space is left.
     546             :  * Return true if some reserved space is leaked.
     547             :  */
     548        3212 : bool btrfs_check_quota_leak(struct btrfs_fs_info *fs_info)
     549             : {
     550        3212 :         struct rb_node *node;
     551        3212 :         bool ret = false;
     552             : 
     553        3212 :         if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags))
     554             :                 return ret;
     555             :         /*
     556             :          * Since we're unmounting, there is no race and no need to grab qgroup
     557             :          * lock.  And here we don't go post-order to provide a more user
     558             :          * friendly sorted result.
     559             :          */
     560         458 :         for (node = rb_first(&fs_info->qgroup_tree); node; node = rb_next(node)) {
     561             :                 struct btrfs_qgroup *qgroup;
     562             :                 int i;
     563             : 
     564             :                 qgroup = rb_entry(node, struct btrfs_qgroup, node);
     565        1624 :                 for (i = 0; i < BTRFS_QGROUP_RSV_LAST; i++) {
     566        1218 :                         if (qgroup->rsv.values[i]) {
     567           0 :                                 ret = true;
     568           0 :                                 btrfs_warn(fs_info,
     569             :                 "qgroup %hu/%llu has unreleased space, type %d rsv %llu",
     570             :                                    btrfs_qgroup_level(qgroup->qgroupid),
     571             :                                    btrfs_qgroup_subvolid(qgroup->qgroupid),
     572             :                                    i, qgroup->rsv.values[i]);
     573             :                         }
     574             :                 }
     575             :         }
     576             :         return ret;
     577             : }
     578             : 
     579             : /*
     580             :  * This is called from close_ctree() or open_ctree() or btrfs_quota_disable(),
     581             :  * first two are in single-threaded paths.And for the third one, we have set
     582             :  * quota_root to be null with qgroup_lock held before, so it is safe to clean
     583             :  * up the in-memory structures without qgroup_lock held.
     584             :  */
     585        3339 : void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info)
     586             : {
     587        3339 :         struct rb_node *n;
     588        3339 :         struct btrfs_qgroup *qgroup;
     589             : 
     590        3897 :         while ((n = rb_first(&fs_info->qgroup_tree))) {
     591         558 :                 qgroup = rb_entry(n, struct btrfs_qgroup, node);
     592         558 :                 rb_erase(n, &fs_info->qgroup_tree);
     593         558 :                 __del_qgroup_rb(fs_info, qgroup);
     594         558 :                 btrfs_sysfs_del_one_qgroup(fs_info, qgroup);
     595         558 :                 kfree(qgroup);
     596             :         }
     597             :         /*
     598             :          * We call btrfs_free_qgroup_config() when unmounting
     599             :          * filesystem and disabling quota, so we set qgroup_ulist
     600             :          * to be null here to avoid double free.
     601             :          */
     602        3339 :         ulist_free(fs_info->qgroup_ulist);
     603        3339 :         fs_info->qgroup_ulist = NULL;
     604        3339 :         btrfs_sysfs_del_qgroups(fs_info);
     605        3339 : }
     606             : 
     607          32 : static int add_qgroup_relation_item(struct btrfs_trans_handle *trans, u64 src,
     608             :                                     u64 dst)
     609             : {
     610          32 :         int ret;
     611          32 :         struct btrfs_root *quota_root = trans->fs_info->quota_root;
     612          32 :         struct btrfs_path *path;
     613          32 :         struct btrfs_key key;
     614             : 
     615          32 :         path = btrfs_alloc_path();
     616          32 :         if (!path)
     617             :                 return -ENOMEM;
     618             : 
     619          32 :         key.objectid = src;
     620          32 :         key.type = BTRFS_QGROUP_RELATION_KEY;
     621          32 :         key.offset = dst;
     622             : 
     623          32 :         ret = btrfs_insert_empty_item(trans, quota_root, path, &key, 0);
     624             : 
     625          32 :         btrfs_mark_buffer_dirty(path->nodes[0]);
     626             : 
     627          32 :         btrfs_free_path(path);
     628          32 :         return ret;
     629             : }
     630             : 
     631           2 : static int del_qgroup_relation_item(struct btrfs_trans_handle *trans, u64 src,
     632             :                                     u64 dst)
     633             : {
     634           2 :         int ret;
     635           2 :         struct btrfs_root *quota_root = trans->fs_info->quota_root;
     636           2 :         struct btrfs_path *path;
     637           2 :         struct btrfs_key key;
     638             : 
     639           2 :         path = btrfs_alloc_path();
     640           2 :         if (!path)
     641             :                 return -ENOMEM;
     642             : 
     643           2 :         key.objectid = src;
     644           2 :         key.type = BTRFS_QGROUP_RELATION_KEY;
     645           2 :         key.offset = dst;
     646             : 
     647           2 :         ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1);
     648           2 :         if (ret < 0)
     649           0 :                 goto out;
     650             : 
     651           2 :         if (ret > 0) {
     652           0 :                 ret = -ENOENT;
     653           0 :                 goto out;
     654             :         }
     655             : 
     656           2 :         ret = btrfs_del_item(trans, quota_root, path);
     657           2 : out:
     658           2 :         btrfs_free_path(path);
     659           2 :         return ret;
     660             : }
     661             : 
     662         428 : static int add_qgroup_item(struct btrfs_trans_handle *trans,
     663             :                            struct btrfs_root *quota_root, u64 qgroupid)
     664             : {
     665         428 :         int ret;
     666         428 :         struct btrfs_path *path;
     667         428 :         struct btrfs_qgroup_info_item *qgroup_info;
     668         428 :         struct btrfs_qgroup_limit_item *qgroup_limit;
     669         428 :         struct extent_buffer *leaf;
     670         428 :         struct btrfs_key key;
     671             : 
     672         428 :         if (btrfs_is_testing(quota_root->fs_info))
     673             :                 return 0;
     674             : 
     675         428 :         path = btrfs_alloc_path();
     676         428 :         if (!path)
     677             :                 return -ENOMEM;
     678             : 
     679         428 :         key.objectid = 0;
     680         428 :         key.type = BTRFS_QGROUP_INFO_KEY;
     681         428 :         key.offset = qgroupid;
     682             : 
     683             :         /*
     684             :          * Avoid a transaction abort by catching -EEXIST here. In that
     685             :          * case, we proceed by re-initializing the existing structure
     686             :          * on disk.
     687             :          */
     688             : 
     689         428 :         ret = btrfs_insert_empty_item(trans, quota_root, path, &key,
     690             :                                       sizeof(*qgroup_info));
     691         428 :         if (ret && ret != -EEXIST)
     692           0 :                 goto out;
     693             : 
     694         428 :         leaf = path->nodes[0];
     695         428 :         qgroup_info = btrfs_item_ptr(leaf, path->slots[0],
     696             :                                  struct btrfs_qgroup_info_item);
     697         428 :         btrfs_set_qgroup_info_generation(leaf, qgroup_info, trans->transid);
     698         428 :         btrfs_set_qgroup_info_rfer(leaf, qgroup_info, 0);
     699         428 :         btrfs_set_qgroup_info_rfer_cmpr(leaf, qgroup_info, 0);
     700         428 :         btrfs_set_qgroup_info_excl(leaf, qgroup_info, 0);
     701         428 :         btrfs_set_qgroup_info_excl_cmpr(leaf, qgroup_info, 0);
     702             : 
     703         428 :         btrfs_mark_buffer_dirty(leaf);
     704             : 
     705         428 :         btrfs_release_path(path);
     706             : 
     707         428 :         key.type = BTRFS_QGROUP_LIMIT_KEY;
     708         428 :         ret = btrfs_insert_empty_item(trans, quota_root, path, &key,
     709             :                                       sizeof(*qgroup_limit));
     710         428 :         if (ret && ret != -EEXIST)
     711           0 :                 goto out;
     712             : 
     713         428 :         leaf = path->nodes[0];
     714         428 :         qgroup_limit = btrfs_item_ptr(leaf, path->slots[0],
     715             :                                   struct btrfs_qgroup_limit_item);
     716         428 :         btrfs_set_qgroup_limit_flags(leaf, qgroup_limit, 0);
     717         428 :         btrfs_set_qgroup_limit_max_rfer(leaf, qgroup_limit, 0);
     718         428 :         btrfs_set_qgroup_limit_max_excl(leaf, qgroup_limit, 0);
     719         428 :         btrfs_set_qgroup_limit_rsv_rfer(leaf, qgroup_limit, 0);
     720         428 :         btrfs_set_qgroup_limit_rsv_excl(leaf, qgroup_limit, 0);
     721             : 
     722         428 :         btrfs_mark_buffer_dirty(leaf);
     723             : 
     724         428 :         ret = 0;
     725         428 : out:
     726         428 :         btrfs_free_path(path);
     727         428 :         return ret;
     728             : }
     729             : 
     730          25 : static int del_qgroup_item(struct btrfs_trans_handle *trans, u64 qgroupid)
     731             : {
     732          25 :         int ret;
     733          25 :         struct btrfs_root *quota_root = trans->fs_info->quota_root;
     734          25 :         struct btrfs_path *path;
     735          25 :         struct btrfs_key key;
     736             : 
     737          25 :         path = btrfs_alloc_path();
     738          25 :         if (!path)
     739             :                 return -ENOMEM;
     740             : 
     741          25 :         key.objectid = 0;
     742          25 :         key.type = BTRFS_QGROUP_INFO_KEY;
     743          25 :         key.offset = qgroupid;
     744          25 :         ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1);
     745          25 :         if (ret < 0)
     746           0 :                 goto out;
     747             : 
     748          25 :         if (ret > 0) {
     749           0 :                 ret = -ENOENT;
     750           0 :                 goto out;
     751             :         }
     752             : 
     753          25 :         ret = btrfs_del_item(trans, quota_root, path);
     754          25 :         if (ret)
     755           0 :                 goto out;
     756             : 
     757          25 :         btrfs_release_path(path);
     758             : 
     759          25 :         key.type = BTRFS_QGROUP_LIMIT_KEY;
     760          25 :         ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1);
     761          25 :         if (ret < 0)
     762           0 :                 goto out;
     763             : 
     764          25 :         if (ret > 0) {
     765           0 :                 ret = -ENOENT;
     766           0 :                 goto out;
     767             :         }
     768             : 
     769          25 :         ret = btrfs_del_item(trans, quota_root, path);
     770             : 
     771          25 : out:
     772          25 :         btrfs_free_path(path);
     773          25 :         return ret;
     774             : }
     775             : 
     776        5075 : static int update_qgroup_limit_item(struct btrfs_trans_handle *trans,
     777             :                                     struct btrfs_qgroup *qgroup)
     778             : {
     779        5075 :         struct btrfs_root *quota_root = trans->fs_info->quota_root;
     780        5075 :         struct btrfs_path *path;
     781        5075 :         struct btrfs_key key;
     782        5075 :         struct extent_buffer *l;
     783        5075 :         struct btrfs_qgroup_limit_item *qgroup_limit;
     784        5075 :         int ret;
     785        5075 :         int slot;
     786             : 
     787        5075 :         key.objectid = 0;
     788        5075 :         key.type = BTRFS_QGROUP_LIMIT_KEY;
     789        5075 :         key.offset = qgroup->qgroupid;
     790             : 
     791        5075 :         path = btrfs_alloc_path();
     792        5075 :         if (!path)
     793             :                 return -ENOMEM;
     794             : 
     795        5075 :         ret = btrfs_search_slot(trans, quota_root, &key, path, 0, 1);
     796        5075 :         if (ret > 0)
     797             :                 ret = -ENOENT;
     798             : 
     799        5075 :         if (ret)
     800           0 :                 goto out;
     801             : 
     802        5075 :         l = path->nodes[0];
     803        5075 :         slot = path->slots[0];
     804        5075 :         qgroup_limit = btrfs_item_ptr(l, slot, struct btrfs_qgroup_limit_item);
     805        5075 :         btrfs_set_qgroup_limit_flags(l, qgroup_limit, qgroup->lim_flags);
     806        5075 :         btrfs_set_qgroup_limit_max_rfer(l, qgroup_limit, qgroup->max_rfer);
     807        5075 :         btrfs_set_qgroup_limit_max_excl(l, qgroup_limit, qgroup->max_excl);
     808        5075 :         btrfs_set_qgroup_limit_rsv_rfer(l, qgroup_limit, qgroup->rsv_rfer);
     809        5075 :         btrfs_set_qgroup_limit_rsv_excl(l, qgroup_limit, qgroup->rsv_excl);
     810             : 
     811        5075 :         btrfs_mark_buffer_dirty(l);
     812             : 
     813        5075 : out:
     814        5075 :         btrfs_free_path(path);
     815        5075 :         return ret;
     816             : }
     817             : 
     818        5064 : static int update_qgroup_info_item(struct btrfs_trans_handle *trans,
     819             :                                    struct btrfs_qgroup *qgroup)
     820             : {
     821        5064 :         struct btrfs_fs_info *fs_info = trans->fs_info;
     822        5064 :         struct btrfs_root *quota_root = fs_info->quota_root;
     823        5064 :         struct btrfs_path *path;
     824        5064 :         struct btrfs_key key;
     825        5064 :         struct extent_buffer *l;
     826        5064 :         struct btrfs_qgroup_info_item *qgroup_info;
     827        5064 :         int ret;
     828        5064 :         int slot;
     829             : 
     830        5064 :         if (btrfs_is_testing(fs_info))
     831             :                 return 0;
     832             : 
     833        5064 :         key.objectid = 0;
     834        5064 :         key.type = BTRFS_QGROUP_INFO_KEY;
     835        5064 :         key.offset = qgroup->qgroupid;
     836             : 
     837        5064 :         path = btrfs_alloc_path();
     838        5064 :         if (!path)
     839             :                 return -ENOMEM;
     840             : 
     841        5064 :         ret = btrfs_search_slot(trans, quota_root, &key, path, 0, 1);
     842        5064 :         if (ret > 0)
     843             :                 ret = -ENOENT;
     844             : 
     845        5064 :         if (ret)
     846           0 :                 goto out;
     847             : 
     848        5064 :         l = path->nodes[0];
     849        5064 :         slot = path->slots[0];
     850        5064 :         qgroup_info = btrfs_item_ptr(l, slot, struct btrfs_qgroup_info_item);
     851        5064 :         btrfs_set_qgroup_info_generation(l, qgroup_info, trans->transid);
     852        5064 :         btrfs_set_qgroup_info_rfer(l, qgroup_info, qgroup->rfer);
     853        5064 :         btrfs_set_qgroup_info_rfer_cmpr(l, qgroup_info, qgroup->rfer_cmpr);
     854        5064 :         btrfs_set_qgroup_info_excl(l, qgroup_info, qgroup->excl);
     855        5064 :         btrfs_set_qgroup_info_excl_cmpr(l, qgroup_info, qgroup->excl_cmpr);
     856             : 
     857        5064 :         btrfs_mark_buffer_dirty(l);
     858             : 
     859        5064 : out:
     860        5064 :         btrfs_free_path(path);
     861        5064 :         return ret;
     862             : }
     863             : 
     864        7688 : static int update_qgroup_status_item(struct btrfs_trans_handle *trans)
     865             : {
     866        7688 :         struct btrfs_fs_info *fs_info = trans->fs_info;
     867        7688 :         struct btrfs_root *quota_root = fs_info->quota_root;
     868        7688 :         struct btrfs_path *path;
     869        7688 :         struct btrfs_key key;
     870        7688 :         struct extent_buffer *l;
     871        7688 :         struct btrfs_qgroup_status_item *ptr;
     872        7688 :         int ret;
     873        7688 :         int slot;
     874             : 
     875        7688 :         key.objectid = 0;
     876        7688 :         key.type = BTRFS_QGROUP_STATUS_KEY;
     877        7688 :         key.offset = 0;
     878             : 
     879        7688 :         path = btrfs_alloc_path();
     880        7688 :         if (!path)
     881             :                 return -ENOMEM;
     882             : 
     883        7688 :         ret = btrfs_search_slot(trans, quota_root, &key, path, 0, 1);
     884        7688 :         if (ret > 0)
     885             :                 ret = -ENOENT;
     886             : 
     887        7688 :         if (ret)
     888           0 :                 goto out;
     889             : 
     890        7688 :         l = path->nodes[0];
     891        7688 :         slot = path->slots[0];
     892        7688 :         ptr = btrfs_item_ptr(l, slot, struct btrfs_qgroup_status_item);
     893        7688 :         btrfs_set_qgroup_status_flags(l, ptr, fs_info->qgroup_flags &
     894             :                                       BTRFS_QGROUP_STATUS_FLAGS_MASK);
     895        7688 :         btrfs_set_qgroup_status_generation(l, ptr, trans->transid);
     896        7688 :         btrfs_set_qgroup_status_rescan(l, ptr,
     897             :                                 fs_info->qgroup_rescan_progress.objectid);
     898             : 
     899        7688 :         btrfs_mark_buffer_dirty(l);
     900             : 
     901        7688 : out:
     902        7688 :         btrfs_free_path(path);
     903        7688 :         return ret;
     904             : }
     905             : 
     906             : /*
     907             :  * called with qgroup_lock held
     908             :  */
     909         127 : static int btrfs_clean_quota_tree(struct btrfs_trans_handle *trans,
     910             :                                   struct btrfs_root *root)
     911             : {
     912         127 :         struct btrfs_path *path;
     913         127 :         struct btrfs_key key;
     914         127 :         struct extent_buffer *leaf = NULL;
     915         127 :         int ret;
     916         127 :         int nr = 0;
     917             : 
     918         127 :         path = btrfs_alloc_path();
     919         127 :         if (!path)
     920             :                 return -ENOMEM;
     921             : 
     922         127 :         key.objectid = 0;
     923         127 :         key.offset = 0;
     924         127 :         key.type = 0;
     925             : 
     926         381 :         while (1) {
     927         254 :                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
     928         254 :                 if (ret < 0)
     929           0 :                         goto out;
     930         254 :                 leaf = path->nodes[0];
     931         254 :                 nr = btrfs_header_nritems(leaf);
     932         254 :                 if (!nr)
     933             :                         break;
     934             :                 /*
     935             :                  * delete the leaf one by one
     936             :                  * since the whole tree is going
     937             :                  * to be deleted.
     938             :                  */
     939         127 :                 path->slots[0] = 0;
     940         127 :                 ret = btrfs_del_items(trans, root, path, 0, nr);
     941         127 :                 if (ret)
     942           0 :                         goto out;
     943             : 
     944         127 :                 btrfs_release_path(path);
     945             :         }
     946             :         ret = 0;
     947         127 : out:
     948         127 :         btrfs_free_path(path);
     949         127 :         return ret;
     950             : }
     951             : 
     952         252 : int btrfs_quota_enable(struct btrfs_fs_info *fs_info)
     953             : {
     954         252 :         struct btrfs_root *quota_root;
     955         252 :         struct btrfs_root *tree_root = fs_info->tree_root;
     956         252 :         struct btrfs_path *path = NULL;
     957         252 :         struct btrfs_qgroup_status_item *ptr;
     958         252 :         struct extent_buffer *leaf;
     959         252 :         struct btrfs_key key;
     960         252 :         struct btrfs_key found_key;
     961         252 :         struct btrfs_qgroup *qgroup = NULL;
     962         252 :         struct btrfs_trans_handle *trans = NULL;
     963         252 :         struct ulist *ulist = NULL;
     964         252 :         int ret = 0;
     965         252 :         int slot;
     966             : 
     967             :         /*
     968             :          * We need to have subvol_sem write locked, to prevent races between
     969             :          * concurrent tasks trying to enable quotas, because we will unlock
     970             :          * and relock qgroup_ioctl_lock before setting fs_info->quota_root
     971             :          * and before setting BTRFS_FS_QUOTA_ENABLED.
     972             :          */
     973         252 :         lockdep_assert_held_write(&fs_info->subvol_sem);
     974             : 
     975         252 :         if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) {
     976           0 :                 btrfs_err(fs_info,
     977             :                           "qgroups are currently unsupported in extent tree v2");
     978           0 :                 return -EINVAL;
     979             :         }
     980             : 
     981         252 :         mutex_lock(&fs_info->qgroup_ioctl_lock);
     982         252 :         if (fs_info->quota_root)
     983          94 :                 goto out;
     984             : 
     985         158 :         ulist = ulist_alloc(GFP_KERNEL);
     986         158 :         if (!ulist) {
     987           0 :                 ret = -ENOMEM;
     988           0 :                 goto out;
     989             :         }
     990             : 
     991         158 :         ret = btrfs_sysfs_add_qgroups(fs_info);
     992         158 :         if (ret < 0)
     993           0 :                 goto out;
     994             : 
     995             :         /*
     996             :          * Unlock qgroup_ioctl_lock before starting the transaction. This is to
     997             :          * avoid lock acquisition inversion problems (reported by lockdep) between
     998             :          * qgroup_ioctl_lock and the vfs freeze semaphores, acquired when we
     999             :          * start a transaction.
    1000             :          * After we started the transaction lock qgroup_ioctl_lock again and
    1001             :          * check if someone else created the quota root in the meanwhile. If so,
    1002             :          * just return success and release the transaction handle.
    1003             :          *
    1004             :          * Also we don't need to worry about someone else calling
    1005             :          * btrfs_sysfs_add_qgroups() after we unlock and getting an error because
    1006             :          * that function returns 0 (success) when the sysfs entries already exist.
    1007             :          */
    1008         158 :         mutex_unlock(&fs_info->qgroup_ioctl_lock);
    1009             : 
    1010             :         /*
    1011             :          * 1 for quota root item
    1012             :          * 1 for BTRFS_QGROUP_STATUS item
    1013             :          *
    1014             :          * Yet we also need 2*n items for a QGROUP_INFO/QGROUP_LIMIT items
    1015             :          * per subvolume. However those are not currently reserved since it
    1016             :          * would be a lot of overkill.
    1017             :          */
    1018         158 :         trans = btrfs_start_transaction(tree_root, 2);
    1019             : 
    1020         158 :         mutex_lock(&fs_info->qgroup_ioctl_lock);
    1021         158 :         if (IS_ERR(trans)) {
    1022           0 :                 ret = PTR_ERR(trans);
    1023           0 :                 trans = NULL;
    1024           0 :                 goto out;
    1025             :         }
    1026             : 
    1027         158 :         if (fs_info->quota_root)
    1028           0 :                 goto out;
    1029             : 
    1030         158 :         fs_info->qgroup_ulist = ulist;
    1031         158 :         ulist = NULL;
    1032             : 
    1033             :         /*
    1034             :          * initially create the quota tree
    1035             :          */
    1036         158 :         quota_root = btrfs_create_tree(trans, BTRFS_QUOTA_TREE_OBJECTID);
    1037         158 :         if (IS_ERR(quota_root)) {
    1038           0 :                 ret =  PTR_ERR(quota_root);
    1039           0 :                 btrfs_abort_transaction(trans, ret);
    1040           0 :                 goto out;
    1041             :         }
    1042             : 
    1043         158 :         path = btrfs_alloc_path();
    1044         158 :         if (!path) {
    1045           0 :                 ret = -ENOMEM;
    1046           0 :                 btrfs_abort_transaction(trans, ret);
    1047           0 :                 goto out_free_root;
    1048             :         }
    1049             : 
    1050         158 :         key.objectid = 0;
    1051         158 :         key.type = BTRFS_QGROUP_STATUS_KEY;
    1052         158 :         key.offset = 0;
    1053             : 
    1054         158 :         ret = btrfs_insert_empty_item(trans, quota_root, path, &key,
    1055             :                                       sizeof(*ptr));
    1056         158 :         if (ret) {
    1057           0 :                 btrfs_abort_transaction(trans, ret);
    1058           0 :                 goto out_free_path;
    1059             :         }
    1060             : 
    1061         158 :         leaf = path->nodes[0];
    1062         158 :         ptr = btrfs_item_ptr(leaf, path->slots[0],
    1063             :                                  struct btrfs_qgroup_status_item);
    1064         158 :         btrfs_set_qgroup_status_generation(leaf, ptr, trans->transid);
    1065         158 :         btrfs_set_qgroup_status_version(leaf, ptr, BTRFS_QGROUP_STATUS_VERSION);
    1066         158 :         fs_info->qgroup_flags = BTRFS_QGROUP_STATUS_FLAG_ON |
    1067             :                                 BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
    1068         158 :         btrfs_set_qgroup_status_flags(leaf, ptr, fs_info->qgroup_flags &
    1069             :                                       BTRFS_QGROUP_STATUS_FLAGS_MASK);
    1070         158 :         btrfs_set_qgroup_status_rescan(leaf, ptr, 0);
    1071             : 
    1072         158 :         btrfs_mark_buffer_dirty(leaf);
    1073             : 
    1074         158 :         key.objectid = 0;
    1075         158 :         key.type = BTRFS_ROOT_REF_KEY;
    1076         158 :         key.offset = 0;
    1077             : 
    1078         158 :         btrfs_release_path(path);
    1079         158 :         ret = btrfs_search_slot_for_read(tree_root, &key, path, 1, 0);
    1080         158 :         if (ret > 0)
    1081           0 :                 goto out_add_root;
    1082         158 :         if (ret < 0) {
    1083           0 :                 btrfs_abort_transaction(trans, ret);
    1084           0 :                 goto out_free_path;
    1085             :         }
    1086             : 
    1087        1963 :         while (1) {
    1088        1963 :                 slot = path->slots[0];
    1089        1963 :                 leaf = path->nodes[0];
    1090        1963 :                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
    1091             : 
    1092        1963 :                 if (found_key.type == BTRFS_ROOT_REF_KEY) {
    1093             : 
    1094             :                         /* Release locks on tree_root before we access quota_root */
    1095          16 :                         btrfs_release_path(path);
    1096             : 
    1097          16 :                         ret = add_qgroup_item(trans, quota_root,
    1098             :                                               found_key.offset);
    1099          16 :                         if (ret) {
    1100           0 :                                 btrfs_abort_transaction(trans, ret);
    1101           0 :                                 goto out_free_path;
    1102             :                         }
    1103             : 
    1104          16 :                         qgroup = add_qgroup_rb(fs_info, found_key.offset);
    1105          16 :                         if (IS_ERR(qgroup)) {
    1106           0 :                                 ret = PTR_ERR(qgroup);
    1107           0 :                                 btrfs_abort_transaction(trans, ret);
    1108           0 :                                 goto out_free_path;
    1109             :                         }
    1110          16 :                         ret = btrfs_sysfs_add_one_qgroup(fs_info, qgroup);
    1111          16 :                         if (ret < 0) {
    1112           0 :                                 btrfs_abort_transaction(trans, ret);
    1113           0 :                                 goto out_free_path;
    1114             :                         }
    1115          16 :                         ret = btrfs_search_slot_for_read(tree_root, &found_key,
    1116             :                                                          path, 1, 0);
    1117          16 :                         if (ret < 0) {
    1118           0 :                                 btrfs_abort_transaction(trans, ret);
    1119           0 :                                 goto out_free_path;
    1120             :                         }
    1121          16 :                         if (ret > 0) {
    1122             :                                 /*
    1123             :                                  * Shouldn't happen, but in case it does we
    1124             :                                  * don't need to do the btrfs_next_item, just
    1125             :                                  * continue.
    1126             :                                  */
    1127           0 :                                 continue;
    1128             :                         }
    1129             :                 }
    1130        1963 :                 ret = btrfs_next_item(tree_root, path);
    1131        1963 :                 if (ret < 0) {
    1132           0 :                         btrfs_abort_transaction(trans, ret);
    1133           0 :                         goto out_free_path;
    1134             :                 }
    1135        1963 :                 if (ret)
    1136             :                         break;
    1137             :         }
    1138             : 
    1139         158 : out_add_root:
    1140         158 :         btrfs_release_path(path);
    1141         158 :         ret = add_qgroup_item(trans, quota_root, BTRFS_FS_TREE_OBJECTID);
    1142         158 :         if (ret) {
    1143           0 :                 btrfs_abort_transaction(trans, ret);
    1144           0 :                 goto out_free_path;
    1145             :         }
    1146             : 
    1147         158 :         qgroup = add_qgroup_rb(fs_info, BTRFS_FS_TREE_OBJECTID);
    1148         158 :         if (IS_ERR(qgroup)) {
    1149           0 :                 ret = PTR_ERR(qgroup);
    1150           0 :                 btrfs_abort_transaction(trans, ret);
    1151           0 :                 goto out_free_path;
    1152             :         }
    1153         158 :         ret = btrfs_sysfs_add_one_qgroup(fs_info, qgroup);
    1154         158 :         if (ret < 0) {
    1155           0 :                 btrfs_abort_transaction(trans, ret);
    1156           0 :                 goto out_free_path;
    1157             :         }
    1158             : 
    1159         158 :         mutex_unlock(&fs_info->qgroup_ioctl_lock);
    1160             :         /*
    1161             :          * Commit the transaction while not holding qgroup_ioctl_lock, to avoid
    1162             :          * a deadlock with tasks concurrently doing other qgroup operations, such
    1163             :          * adding/removing qgroups or adding/deleting qgroup relations for example,
    1164             :          * because all qgroup operations first start or join a transaction and then
    1165             :          * lock the qgroup_ioctl_lock mutex.
    1166             :          * We are safe from a concurrent task trying to enable quotas, by calling
    1167             :          * this function, since we are serialized by fs_info->subvol_sem.
    1168             :          */
    1169         158 :         ret = btrfs_commit_transaction(trans);
    1170         158 :         trans = NULL;
    1171         158 :         mutex_lock(&fs_info->qgroup_ioctl_lock);
    1172         158 :         if (ret)
    1173           0 :                 goto out_free_path;
    1174             : 
    1175             :         /*
    1176             :          * Set quota enabled flag after committing the transaction, to avoid
    1177             :          * deadlocks on fs_info->qgroup_ioctl_lock with concurrent snapshot
    1178             :          * creation.
    1179             :          */
    1180         158 :         spin_lock(&fs_info->qgroup_lock);
    1181         158 :         fs_info->quota_root = quota_root;
    1182         158 :         set_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags);
    1183         158 :         spin_unlock(&fs_info->qgroup_lock);
    1184             : 
    1185         158 :         ret = qgroup_rescan_init(fs_info, 0, 1);
    1186         158 :         if (!ret) {
    1187         158 :                 qgroup_rescan_zero_tracking(fs_info);
    1188         158 :                 fs_info->qgroup_rescan_running = true;
    1189         158 :                 btrfs_queue_work(fs_info->qgroup_rescan_workers,
    1190             :                                  &fs_info->qgroup_rescan_work);
    1191             :         } else {
    1192             :                 /*
    1193             :                  * We have set both BTRFS_FS_QUOTA_ENABLED and
    1194             :                  * BTRFS_QGROUP_STATUS_FLAG_ON, so we can only fail with
    1195             :                  * -EINPROGRESS. That can happen because someone started the
    1196             :                  * rescan worker by calling quota rescan ioctl before we
    1197             :                  * attempted to initialize the rescan worker. Failure due to
    1198             :                  * quotas disabled in the meanwhile is not possible, because
    1199             :                  * we are holding a write lock on fs_info->subvol_sem, which
    1200             :                  * is also acquired when disabling quotas.
    1201             :                  * Ignore such error, and any other error would need to undo
    1202             :                  * everything we did in the transaction we just committed.
    1203             :                  */
    1204             :                 ASSERT(ret == -EINPROGRESS);
    1205             :                 ret = 0;
    1206             :         }
    1207             : 
    1208         158 : out_free_path:
    1209         158 :         btrfs_free_path(path);
    1210             : out_free_root:
    1211         158 :         if (ret)
    1212           0 :                 btrfs_put_root(quota_root);
    1213         158 : out:
    1214         252 :         if (ret) {
    1215           0 :                 ulist_free(fs_info->qgroup_ulist);
    1216           0 :                 fs_info->qgroup_ulist = NULL;
    1217           0 :                 btrfs_sysfs_del_qgroups(fs_info);
    1218             :         }
    1219         252 :         mutex_unlock(&fs_info->qgroup_ioctl_lock);
    1220         252 :         if (ret && trans)
    1221           0 :                 btrfs_end_transaction(trans);
    1222         252 :         else if (trans)
    1223           0 :                 ret = btrfs_end_transaction(trans);
    1224         252 :         ulist_free(ulist);
    1225         252 :         return ret;
    1226             : }
    1227             : 
    1228         221 : int btrfs_quota_disable(struct btrfs_fs_info *fs_info)
    1229             : {
    1230         221 :         struct btrfs_root *quota_root;
    1231         221 :         struct btrfs_trans_handle *trans = NULL;
    1232         221 :         int ret = 0;
    1233             : 
    1234             :         /*
    1235             :          * We need to have subvol_sem write locked to prevent races with
    1236             :          * snapshot creation.
    1237             :          */
    1238         221 :         lockdep_assert_held_write(&fs_info->subvol_sem);
    1239             : 
    1240             :         /*
    1241             :          * Lock the cleaner mutex to prevent races with concurrent relocation,
    1242             :          * because relocation may be building backrefs for blocks of the quota
    1243             :          * root while we are deleting the root. This is like dropping fs roots
    1244             :          * of deleted snapshots/subvolumes, we need the same protection.
    1245             :          *
    1246             :          * This also prevents races between concurrent tasks trying to disable
    1247             :          * quotas, because we will unlock and relock qgroup_ioctl_lock across
    1248             :          * BTRFS_FS_QUOTA_ENABLED changes.
    1249             :          */
    1250         221 :         mutex_lock(&fs_info->cleaner_mutex);
    1251             : 
    1252         221 :         mutex_lock(&fs_info->qgroup_ioctl_lock);
    1253         221 :         if (!fs_info->quota_root)
    1254          94 :                 goto out;
    1255             : 
    1256             :         /*
    1257             :          * Unlock the qgroup_ioctl_lock mutex before waiting for the rescan worker to
    1258             :          * complete. Otherwise we can deadlock because btrfs_remove_qgroup() needs
    1259             :          * to lock that mutex while holding a transaction handle and the rescan
    1260             :          * worker needs to commit a transaction.
    1261             :          */
    1262         127 :         mutex_unlock(&fs_info->qgroup_ioctl_lock);
    1263             : 
    1264             :         /*
    1265             :          * Request qgroup rescan worker to complete and wait for it. This wait
    1266             :          * must be done before transaction start for quota disable since it may
    1267             :          * deadlock with transaction by the qgroup rescan worker.
    1268             :          */
    1269         127 :         clear_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags);
    1270         127 :         btrfs_qgroup_wait_for_completion(fs_info, false);
    1271             : 
    1272             :         /*
    1273             :          * 1 For the root item
    1274             :          *
    1275             :          * We should also reserve enough items for the quota tree deletion in
    1276             :          * btrfs_clean_quota_tree but this is not done.
    1277             :          *
    1278             :          * Also, we must always start a transaction without holding the mutex
    1279             :          * qgroup_ioctl_lock, see btrfs_quota_enable().
    1280             :          */
    1281         127 :         trans = btrfs_start_transaction(fs_info->tree_root, 1);
    1282             : 
    1283         127 :         mutex_lock(&fs_info->qgroup_ioctl_lock);
    1284         127 :         if (IS_ERR(trans)) {
    1285           0 :                 ret = PTR_ERR(trans);
    1286           0 :                 trans = NULL;
    1287           0 :                 set_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags);
    1288           0 :                 goto out;
    1289             :         }
    1290             : 
    1291         127 :         if (!fs_info->quota_root)
    1292           0 :                 goto out;
    1293             : 
    1294         127 :         spin_lock(&fs_info->qgroup_lock);
    1295         127 :         quota_root = fs_info->quota_root;
    1296         127 :         fs_info->quota_root = NULL;
    1297         127 :         fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_ON;
    1298         127 :         fs_info->qgroup_drop_subtree_thres = BTRFS_MAX_LEVEL;
    1299         127 :         spin_unlock(&fs_info->qgroup_lock);
    1300             : 
    1301         127 :         btrfs_free_qgroup_config(fs_info);
    1302             : 
    1303         127 :         ret = btrfs_clean_quota_tree(trans, quota_root);
    1304         127 :         if (ret) {
    1305           0 :                 btrfs_abort_transaction(trans, ret);
    1306           0 :                 goto out;
    1307             :         }
    1308             : 
    1309         127 :         ret = btrfs_del_root(trans, &quota_root->root_key);
    1310         127 :         if (ret) {
    1311           0 :                 btrfs_abort_transaction(trans, ret);
    1312           0 :                 goto out;
    1313             :         }
    1314             : 
    1315         127 :         spin_lock(&fs_info->trans_lock);
    1316         127 :         list_del(&quota_root->dirty_list);
    1317         127 :         spin_unlock(&fs_info->trans_lock);
    1318             : 
    1319         127 :         btrfs_tree_lock(quota_root->node);
    1320         127 :         btrfs_clear_buffer_dirty(trans, quota_root->node);
    1321         127 :         btrfs_tree_unlock(quota_root->node);
    1322         127 :         btrfs_free_tree_block(trans, btrfs_root_id(quota_root),
    1323             :                               quota_root->node, 0, 1);
    1324             : 
    1325         127 :         btrfs_put_root(quota_root);
    1326             : 
    1327         221 : out:
    1328         221 :         mutex_unlock(&fs_info->qgroup_ioctl_lock);
    1329         221 :         if (ret && trans)
    1330           0 :                 btrfs_end_transaction(trans);
    1331         221 :         else if (trans)
    1332         127 :                 ret = btrfs_end_transaction(trans);
    1333         221 :         mutex_unlock(&fs_info->cleaner_mutex);
    1334             : 
    1335         221 :         return ret;
    1336             : }
    1337             : 
    1338      298723 : static void qgroup_dirty(struct btrfs_fs_info *fs_info,
    1339             :                          struct btrfs_qgroup *qgroup)
    1340             : {
    1341      298723 :         if (list_empty(&qgroup->dirty))
    1342        5100 :                 list_add(&qgroup->dirty, &fs_info->dirty_qgroups);
    1343      298723 : }
    1344             : 
    1345             : /*
    1346             :  * The easy accounting, we're updating qgroup relationship whose child qgroup
    1347             :  * only has exclusive extents.
    1348             :  *
    1349             :  * In this case, all exclusive extents will also be exclusive for parent, so
    1350             :  * excl/rfer just get added/removed.
    1351             :  *
    1352             :  * So is qgroup reservation space, which should also be added/removed to
    1353             :  * parent.
    1354             :  * Or when child tries to release reservation space, parent will underflow its
    1355             :  * reservation (for relationship adding case).
    1356             :  *
    1357             :  * Caller should hold fs_info->qgroup_lock.
    1358             :  */
    1359           4 : static int __qgroup_excl_accounting(struct btrfs_fs_info *fs_info,
    1360             :                                     struct ulist *tmp, u64 ref_root,
    1361             :                                     struct btrfs_qgroup *src, int sign)
    1362             : {
    1363           4 :         struct btrfs_qgroup *qgroup;
    1364           4 :         struct btrfs_qgroup_list *glist;
    1365           4 :         struct ulist_node *unode;
    1366           4 :         struct ulist_iterator uiter;
    1367           4 :         u64 num_bytes = src->excl;
    1368           4 :         int ret = 0;
    1369             : 
    1370           4 :         qgroup = find_qgroup_rb(fs_info, ref_root);
    1371           4 :         if (!qgroup)
    1372           0 :                 goto out;
    1373             : 
    1374           4 :         qgroup->rfer += sign * num_bytes;
    1375           4 :         qgroup->rfer_cmpr += sign * num_bytes;
    1376             : 
    1377           8 :         WARN_ON(sign < 0 && qgroup->excl < num_bytes);
    1378           4 :         qgroup->excl += sign * num_bytes;
    1379           4 :         qgroup->excl_cmpr += sign * num_bytes;
    1380             : 
    1381           4 :         if (sign > 0)
    1382           4 :                 qgroup_rsv_add_by_qgroup(fs_info, qgroup, src);
    1383             :         else
    1384           0 :                 qgroup_rsv_release_by_qgroup(fs_info, qgroup, src);
    1385             : 
    1386           4 :         qgroup_dirty(fs_info, qgroup);
    1387             : 
    1388             :         /* Get all of the parent groups that contain this qgroup */
    1389           4 :         list_for_each_entry(glist, &qgroup->groups, next_group) {
    1390           0 :                 ret = ulist_add(tmp, glist->group->qgroupid,
    1391             :                                 qgroup_to_aux(glist->group), GFP_ATOMIC);
    1392           0 :                 if (ret < 0)
    1393           0 :                         goto out;
    1394             :         }
    1395             : 
    1396             :         /* Iterate all of the parents and adjust their reference counts */
    1397           4 :         ULIST_ITER_INIT(&uiter);
    1398           4 :         while ((unode = ulist_next(tmp, &uiter))) {
    1399           0 :                 qgroup = unode_aux_to_qgroup(unode);
    1400           0 :                 qgroup->rfer += sign * num_bytes;
    1401           0 :                 qgroup->rfer_cmpr += sign * num_bytes;
    1402           0 :                 WARN_ON(sign < 0 && qgroup->excl < num_bytes);
    1403           0 :                 qgroup->excl += sign * num_bytes;
    1404           0 :                 if (sign > 0)
    1405           0 :                         qgroup_rsv_add_by_qgroup(fs_info, qgroup, src);
    1406             :                 else
    1407           0 :                         qgroup_rsv_release_by_qgroup(fs_info, qgroup, src);
    1408           0 :                 qgroup->excl_cmpr += sign * num_bytes;
    1409           0 :                 qgroup_dirty(fs_info, qgroup);
    1410             : 
    1411             :                 /* Add any parents of the parents */
    1412           0 :                 list_for_each_entry(glist, &qgroup->groups, next_group) {
    1413           0 :                         ret = ulist_add(tmp, glist->group->qgroupid,
    1414             :                                         qgroup_to_aux(glist->group), GFP_ATOMIC);
    1415           0 :                         if (ret < 0)
    1416           0 :                                 goto out;
    1417             :                 }
    1418             :         }
    1419             :         ret = 0;
    1420           4 : out:
    1421           4 :         return ret;
    1422             : }
    1423             : 
    1424             : 
    1425             : /*
    1426             :  * Quick path for updating qgroup with only excl refs.
    1427             :  *
    1428             :  * In that case, just update all parent will be enough.
    1429             :  * Or we needs to do a full rescan.
    1430             :  * Caller should also hold fs_info->qgroup_lock.
    1431             :  *
    1432             :  * Return 0 for quick update, return >0 for need to full rescan
    1433             :  * and mark INCONSISTENT flag.
    1434             :  * Return < 0 for other error.
    1435             :  */
    1436           6 : static int quick_update_accounting(struct btrfs_fs_info *fs_info,
    1437             :                                    struct ulist *tmp, u64 src, u64 dst,
    1438             :                                    int sign)
    1439             : {
    1440           6 :         struct btrfs_qgroup *qgroup;
    1441           6 :         int ret = 1;
    1442           6 :         int err = 0;
    1443             : 
    1444           6 :         qgroup = find_qgroup_rb(fs_info, src);
    1445           6 :         if (!qgroup)
    1446           0 :                 goto out;
    1447           6 :         if (qgroup->excl == qgroup->rfer) {
    1448           4 :                 ret = 0;
    1449           4 :                 err = __qgroup_excl_accounting(fs_info, tmp, dst,
    1450             :                                                qgroup, sign);
    1451           4 :                 if (err < 0) {
    1452           0 :                         ret = err;
    1453           0 :                         goto out;
    1454             :                 }
    1455             :         }
    1456           6 : out:
    1457           0 :         if (ret)
    1458           2 :                 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
    1459           6 :         return ret;
    1460             : }
    1461             : 
    1462           5 : int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans, u64 src,
    1463             :                               u64 dst)
    1464             : {
    1465           5 :         struct btrfs_fs_info *fs_info = trans->fs_info;
    1466           5 :         struct btrfs_qgroup *parent;
    1467           5 :         struct btrfs_qgroup *member;
    1468           5 :         struct btrfs_qgroup_list *list;
    1469           5 :         struct ulist *tmp;
    1470           5 :         unsigned int nofs_flag;
    1471           5 :         int ret = 0;
    1472             : 
    1473             :         /* Check the level of src and dst first */
    1474           5 :         if (btrfs_qgroup_level(src) >= btrfs_qgroup_level(dst))
    1475             :                 return -EINVAL;
    1476             : 
    1477             :         /* We hold a transaction handle open, must do a NOFS allocation. */
    1478           5 :         nofs_flag = memalloc_nofs_save();
    1479           5 :         tmp = ulist_alloc(GFP_KERNEL);
    1480           5 :         memalloc_nofs_restore(nofs_flag);
    1481           5 :         if (!tmp)
    1482             :                 return -ENOMEM;
    1483             : 
    1484           5 :         mutex_lock(&fs_info->qgroup_ioctl_lock);
    1485           5 :         if (!fs_info->quota_root) {
    1486           0 :                 ret = -ENOTCONN;
    1487           0 :                 goto out;
    1488             :         }
    1489           5 :         member = find_qgroup_rb(fs_info, src);
    1490           5 :         parent = find_qgroup_rb(fs_info, dst);
    1491           5 :         if (!member || !parent) {
    1492           0 :                 ret = -EINVAL;
    1493           0 :                 goto out;
    1494             :         }
    1495             : 
    1496             :         /* check if such qgroup relation exist firstly */
    1497           5 :         list_for_each_entry(list, &member->groups, next_group) {
    1498           0 :                 if (list->group == parent) {
    1499           0 :                         ret = -EEXIST;
    1500           0 :                         goto out;
    1501             :                 }
    1502             :         }
    1503             : 
    1504           5 :         ret = add_qgroup_relation_item(trans, src, dst);
    1505           5 :         if (ret)
    1506           0 :                 goto out;
    1507             : 
    1508           5 :         ret = add_qgroup_relation_item(trans, dst, src);
    1509           5 :         if (ret) {
    1510           0 :                 del_qgroup_relation_item(trans, src, dst);
    1511           0 :                 goto out;
    1512             :         }
    1513             : 
    1514           5 :         spin_lock(&fs_info->qgroup_lock);
    1515           5 :         ret = __add_relation_rb(member, parent);
    1516           5 :         if (ret < 0) {
    1517           0 :                 spin_unlock(&fs_info->qgroup_lock);
    1518           0 :                 goto out;
    1519             :         }
    1520           5 :         ret = quick_update_accounting(fs_info, tmp, src, dst, 1);
    1521           5 :         spin_unlock(&fs_info->qgroup_lock);
    1522           5 : out:
    1523           5 :         mutex_unlock(&fs_info->qgroup_ioctl_lock);
    1524           5 :         ulist_free(tmp);
    1525           5 :         return ret;
    1526             : }
    1527             : 
    1528           1 : static int __del_qgroup_relation(struct btrfs_trans_handle *trans, u64 src,
    1529             :                                  u64 dst)
    1530             : {
    1531           1 :         struct btrfs_fs_info *fs_info = trans->fs_info;
    1532           1 :         struct btrfs_qgroup *parent;
    1533           1 :         struct btrfs_qgroup *member;
    1534           1 :         struct btrfs_qgroup_list *list;
    1535           1 :         struct ulist *tmp;
    1536           1 :         bool found = false;
    1537           1 :         unsigned int nofs_flag;
    1538           1 :         int ret = 0;
    1539           1 :         int ret2;
    1540             : 
    1541             :         /* We hold a transaction handle open, must do a NOFS allocation. */
    1542           1 :         nofs_flag = memalloc_nofs_save();
    1543           1 :         tmp = ulist_alloc(GFP_KERNEL);
    1544           1 :         memalloc_nofs_restore(nofs_flag);
    1545           1 :         if (!tmp)
    1546             :                 return -ENOMEM;
    1547             : 
    1548           1 :         if (!fs_info->quota_root) {
    1549           0 :                 ret = -ENOTCONN;
    1550           0 :                 goto out;
    1551             :         }
    1552             : 
    1553           1 :         member = find_qgroup_rb(fs_info, src);
    1554           1 :         parent = find_qgroup_rb(fs_info, dst);
    1555             :         /*
    1556             :          * The parent/member pair doesn't exist, then try to delete the dead
    1557             :          * relation items only.
    1558             :          */
    1559           1 :         if (!member || !parent)
    1560           0 :                 goto delete_item;
    1561             : 
    1562             :         /* check if such qgroup relation exist firstly */
    1563           1 :         list_for_each_entry(list, &member->groups, next_group) {
    1564           1 :                 if (list->group == parent) {
    1565             :                         found = true;
    1566             :                         break;
    1567             :                 }
    1568             :         }
    1569             : 
    1570           0 : delete_item:
    1571           1 :         ret = del_qgroup_relation_item(trans, src, dst);
    1572           1 :         if (ret < 0 && ret != -ENOENT)
    1573           0 :                 goto out;
    1574           1 :         ret2 = del_qgroup_relation_item(trans, dst, src);
    1575           1 :         if (ret2 < 0 && ret2 != -ENOENT)
    1576           0 :                 goto out;
    1577             : 
    1578             :         /* At least one deletion succeeded, return 0 */
    1579           1 :         if (!ret || !ret2)
    1580           1 :                 ret = 0;
    1581             : 
    1582           1 :         if (found) {
    1583           1 :                 spin_lock(&fs_info->qgroup_lock);
    1584           1 :                 del_relation_rb(fs_info, src, dst);
    1585           1 :                 ret = quick_update_accounting(fs_info, tmp, src, dst, -1);
    1586           1 :                 spin_unlock(&fs_info->qgroup_lock);
    1587             :         }
    1588           0 : out:
    1589           1 :         ulist_free(tmp);
    1590           1 :         return ret;
    1591             : }
    1592             : 
    1593           1 : int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans, u64 src,
    1594             :                               u64 dst)
    1595             : {
    1596           1 :         struct btrfs_fs_info *fs_info = trans->fs_info;
    1597           1 :         int ret = 0;
    1598             : 
    1599           1 :         mutex_lock(&fs_info->qgroup_ioctl_lock);
    1600           1 :         ret = __del_qgroup_relation(trans, src, dst);
    1601           1 :         mutex_unlock(&fs_info->qgroup_ioctl_lock);
    1602             : 
    1603           1 :         return ret;
    1604             : }
    1605             : 
    1606         205 : int btrfs_create_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid)
    1607             : {
    1608         205 :         struct btrfs_fs_info *fs_info = trans->fs_info;
    1609         205 :         struct btrfs_root *quota_root;
    1610         205 :         struct btrfs_qgroup *qgroup;
    1611         205 :         int ret = 0;
    1612             : 
    1613         205 :         mutex_lock(&fs_info->qgroup_ioctl_lock);
    1614         205 :         if (!fs_info->quota_root) {
    1615         119 :                 ret = -ENOTCONN;
    1616         119 :                 goto out;
    1617             :         }
    1618          86 :         quota_root = fs_info->quota_root;
    1619          86 :         qgroup = find_qgroup_rb(fs_info, qgroupid);
    1620          86 :         if (qgroup) {
    1621          31 :                 ret = -EEXIST;
    1622          31 :                 goto out;
    1623             :         }
    1624             : 
    1625          55 :         ret = add_qgroup_item(trans, quota_root, qgroupid);
    1626          55 :         if (ret)
    1627           0 :                 goto out;
    1628             : 
    1629          55 :         spin_lock(&fs_info->qgroup_lock);
    1630          55 :         qgroup = add_qgroup_rb(fs_info, qgroupid);
    1631          55 :         spin_unlock(&fs_info->qgroup_lock);
    1632             : 
    1633          55 :         if (IS_ERR(qgroup)) {
    1634           0 :                 ret = PTR_ERR(qgroup);
    1635           0 :                 goto out;
    1636             :         }
    1637          55 :         ret = btrfs_sysfs_add_one_qgroup(fs_info, qgroup);
    1638         205 : out:
    1639         205 :         mutex_unlock(&fs_info->qgroup_ioctl_lock);
    1640         205 :         return ret;
    1641             : }
    1642             : 
    1643         200 : int btrfs_remove_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid)
    1644             : {
    1645         200 :         struct btrfs_fs_info *fs_info = trans->fs_info;
    1646         200 :         struct btrfs_qgroup *qgroup;
    1647         200 :         struct btrfs_qgroup_list *list;
    1648         200 :         int ret = 0;
    1649             : 
    1650         200 :         mutex_lock(&fs_info->qgroup_ioctl_lock);
    1651         200 :         if (!fs_info->quota_root) {
    1652         130 :                 ret = -ENOTCONN;
    1653         130 :                 goto out;
    1654             :         }
    1655             : 
    1656          70 :         qgroup = find_qgroup_rb(fs_info, qgroupid);
    1657          70 :         if (!qgroup) {
    1658          45 :                 ret = -ENOENT;
    1659          45 :                 goto out;
    1660             :         }
    1661             : 
    1662             :         /* Check if there are no children of this qgroup */
    1663          25 :         if (!list_empty(&qgroup->members)) {
    1664           0 :                 ret = -EBUSY;
    1665           0 :                 goto out;
    1666             :         }
    1667             : 
    1668          25 :         ret = del_qgroup_item(trans, qgroupid);
    1669          25 :         if (ret && ret != -ENOENT)
    1670           0 :                 goto out;
    1671             : 
    1672          25 :         while (!list_empty(&qgroup->groups)) {
    1673           0 :                 list = list_first_entry(&qgroup->groups,
    1674             :                                         struct btrfs_qgroup_list, next_group);
    1675           0 :                 ret = __del_qgroup_relation(trans, qgroupid,
    1676           0 :                                             list->group->qgroupid);
    1677           0 :                 if (ret)
    1678           0 :                         goto out;
    1679             :         }
    1680             : 
    1681          25 :         spin_lock(&fs_info->qgroup_lock);
    1682          25 :         del_qgroup_rb(fs_info, qgroupid);
    1683          25 :         spin_unlock(&fs_info->qgroup_lock);
    1684             : 
    1685             :         /*
    1686             :          * Remove the qgroup from sysfs now without holding the qgroup_lock
    1687             :          * spinlock, since the sysfs_remove_group() function needs to take
    1688             :          * the mutex kernfs_mutex through kernfs_remove_by_name_ns().
    1689             :          */
    1690          25 :         btrfs_sysfs_del_one_qgroup(fs_info, qgroup);
    1691          25 :         kfree(qgroup);
    1692         200 : out:
    1693         200 :         mutex_unlock(&fs_info->qgroup_ioctl_lock);
    1694         200 :         return ret;
    1695             : }
    1696             : 
    1697          11 : int btrfs_limit_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid,
    1698             :                        struct btrfs_qgroup_limit *limit)
    1699             : {
    1700          11 :         struct btrfs_fs_info *fs_info = trans->fs_info;
    1701          11 :         struct btrfs_qgroup *qgroup;
    1702          11 :         int ret = 0;
    1703             :         /* Sometimes we would want to clear the limit on this qgroup.
    1704             :          * To meet this requirement, we treat the -1 as a special value
    1705             :          * which tell kernel to clear the limit on this qgroup.
    1706             :          */
    1707          11 :         const u64 CLEAR_VALUE = -1;
    1708             : 
    1709          11 :         mutex_lock(&fs_info->qgroup_ioctl_lock);
    1710          11 :         if (!fs_info->quota_root) {
    1711           0 :                 ret = -ENOTCONN;
    1712           0 :                 goto out;
    1713             :         }
    1714             : 
    1715          11 :         qgroup = find_qgroup_rb(fs_info, qgroupid);
    1716          11 :         if (!qgroup) {
    1717           0 :                 ret = -ENOENT;
    1718           0 :                 goto out;
    1719             :         }
    1720             : 
    1721          11 :         spin_lock(&fs_info->qgroup_lock);
    1722          11 :         if (limit->flags & BTRFS_QGROUP_LIMIT_MAX_RFER) {
    1723           8 :                 if (limit->max_rfer == CLEAR_VALUE) {
    1724           0 :                         qgroup->lim_flags &= ~BTRFS_QGROUP_LIMIT_MAX_RFER;
    1725           0 :                         limit->flags &= ~BTRFS_QGROUP_LIMIT_MAX_RFER;
    1726           0 :                         qgroup->max_rfer = 0;
    1727             :                 } else {
    1728           8 :                         qgroup->max_rfer = limit->max_rfer;
    1729             :                 }
    1730             :         }
    1731          11 :         if (limit->flags & BTRFS_QGROUP_LIMIT_MAX_EXCL) {
    1732           3 :                 if (limit->max_excl == CLEAR_VALUE) {
    1733           0 :                         qgroup->lim_flags &= ~BTRFS_QGROUP_LIMIT_MAX_EXCL;
    1734           0 :                         limit->flags &= ~BTRFS_QGROUP_LIMIT_MAX_EXCL;
    1735           0 :                         qgroup->max_excl = 0;
    1736             :                 } else {
    1737           3 :                         qgroup->max_excl = limit->max_excl;
    1738             :                 }
    1739             :         }
    1740          11 :         if (limit->flags & BTRFS_QGROUP_LIMIT_RSV_RFER) {
    1741           0 :                 if (limit->rsv_rfer == CLEAR_VALUE) {
    1742           0 :                         qgroup->lim_flags &= ~BTRFS_QGROUP_LIMIT_RSV_RFER;
    1743           0 :                         limit->flags &= ~BTRFS_QGROUP_LIMIT_RSV_RFER;
    1744           0 :                         qgroup->rsv_rfer = 0;
    1745             :                 } else {
    1746           0 :                         qgroup->rsv_rfer = limit->rsv_rfer;
    1747             :                 }
    1748             :         }
    1749          11 :         if (limit->flags & BTRFS_QGROUP_LIMIT_RSV_EXCL) {
    1750           0 :                 if (limit->rsv_excl == CLEAR_VALUE) {
    1751           0 :                         qgroup->lim_flags &= ~BTRFS_QGROUP_LIMIT_RSV_EXCL;
    1752           0 :                         limit->flags &= ~BTRFS_QGROUP_LIMIT_RSV_EXCL;
    1753           0 :                         qgroup->rsv_excl = 0;
    1754             :                 } else {
    1755           0 :                         qgroup->rsv_excl = limit->rsv_excl;
    1756             :                 }
    1757             :         }
    1758          11 :         qgroup->lim_flags |= limit->flags;
    1759             : 
    1760          11 :         spin_unlock(&fs_info->qgroup_lock);
    1761             : 
    1762          11 :         ret = update_qgroup_limit_item(trans, qgroup);
    1763          11 :         if (ret) {
    1764           0 :                 qgroup_mark_inconsistent(fs_info);
    1765           0 :                 btrfs_info(fs_info, "unable to update quota limit for %llu",
    1766             :                        qgroupid);
    1767             :         }
    1768             : 
    1769          11 : out:
    1770          11 :         mutex_unlock(&fs_info->qgroup_ioctl_lock);
    1771          11 :         return ret;
    1772             : }
    1773             : 
    1774      388025 : int btrfs_qgroup_trace_extent_nolock(struct btrfs_fs_info *fs_info,
    1775             :                                 struct btrfs_delayed_ref_root *delayed_refs,
    1776             :                                 struct btrfs_qgroup_extent_record *record)
    1777             : {
    1778      388025 :         struct rb_node **p = &delayed_refs->dirty_extent_root.rb_node;
    1779      388025 :         struct rb_node *parent_node = NULL;
    1780      388025 :         struct btrfs_qgroup_extent_record *entry;
    1781      388025 :         u64 bytenr = record->bytenr;
    1782             : 
    1783      388025 :         lockdep_assert_held(&delayed_refs->lock);
    1784      388025 :         trace_btrfs_qgroup_trace_extent(fs_info, record);
    1785             : 
    1786     4215682 :         while (*p) {
    1787     4024144 :                 parent_node = *p;
    1788     4024144 :                 entry = rb_entry(parent_node, struct btrfs_qgroup_extent_record,
    1789             :                                  node);
    1790     4024144 :                 if (bytenr < entry->bytenr) {
    1791     1601190 :                         p = &(*p)->rb_left;
    1792     2422954 :                 } else if (bytenr > entry->bytenr) {
    1793     2226467 :                         p = &(*p)->rb_right;
    1794             :                 } else {
    1795      196487 :                         if (record->data_rsv && !entry->data_rsv) {
    1796           0 :                                 entry->data_rsv = record->data_rsv;
    1797           0 :                                 entry->data_rsv_refroot =
    1798           0 :                                         record->data_rsv_refroot;
    1799             :                         }
    1800      196487 :                         return 1;
    1801             :                 }
    1802             :         }
    1803             : 
    1804      191538 :         rb_link_node(&record->node, parent_node, p);
    1805      191538 :         rb_insert_color(&record->node, &delayed_refs->dirty_extent_root);
    1806      191538 :         return 0;
    1807             : }
    1808             : 
    1809      191538 : int btrfs_qgroup_trace_extent_post(struct btrfs_trans_handle *trans,
    1810             :                                    struct btrfs_qgroup_extent_record *qrecord)
    1811             : {
    1812      191538 :         struct btrfs_backref_walk_ctx ctx = { 0 };
    1813      191538 :         int ret;
    1814             : 
    1815             :         /*
    1816             :          * We are always called in a context where we are already holding a
    1817             :          * transaction handle. Often we are called when adding a data delayed
    1818             :          * reference from btrfs_truncate_inode_items() (truncating or unlinking),
    1819             :          * in which case we will be holding a write lock on extent buffer from a
    1820             :          * subvolume tree. In this case we can't allow btrfs_find_all_roots() to
    1821             :          * acquire fs_info->commit_root_sem, because that is a higher level lock
    1822             :          * that must be acquired before locking any extent buffers.
    1823             :          *
    1824             :          * So we want btrfs_find_all_roots() to not acquire the commit_root_sem
    1825             :          * but we can't pass it a non-NULL transaction handle, because otherwise
    1826             :          * it would not use commit roots and would lock extent buffers, causing
    1827             :          * a deadlock if it ends up trying to read lock the same extent buffer
    1828             :          * that was previously write locked at btrfs_truncate_inode_items().
    1829             :          *
    1830             :          * So pass a NULL transaction handle to btrfs_find_all_roots() and
    1831             :          * explicitly tell it to not acquire the commit_root_sem - if we are
    1832             :          * holding a transaction handle we don't need its protection.
    1833             :          */
    1834      191538 :         ASSERT(trans != NULL);
    1835             : 
    1836      191538 :         if (trans->fs_info->qgroup_flags & BTRFS_QGROUP_RUNTIME_FLAG_NO_ACCOUNTING)
    1837             :                 return 0;
    1838             : 
    1839      191532 :         ctx.bytenr = qrecord->bytenr;
    1840      191532 :         ctx.fs_info = trans->fs_info;
    1841             : 
    1842      191532 :         ret = btrfs_find_all_roots(&ctx, true);
    1843      191510 :         if (ret < 0) {
    1844           0 :                 qgroup_mark_inconsistent(trans->fs_info);
    1845           0 :                 btrfs_warn(trans->fs_info,
    1846             : "error accounting new delayed refs extent (err code: %d), quota inconsistent",
    1847             :                         ret);
    1848           0 :                 return 0;
    1849             :         }
    1850             : 
    1851             :         /*
    1852             :          * Here we don't need to get the lock of
    1853             :          * trans->transaction->delayed_refs, since inserted qrecord won't
    1854             :          * be deleted, only qrecord->node may be modified (new qrecord insert)
    1855             :          *
    1856             :          * So modifying qrecord->old_roots is safe here
    1857             :          */
    1858      191510 :         qrecord->old_roots = ctx.roots;
    1859      191510 :         return 0;
    1860             : }
    1861             : 
    1862      253418 : int btrfs_qgroup_trace_extent(struct btrfs_trans_handle *trans, u64 bytenr,
    1863             :                               u64 num_bytes)
    1864             : {
    1865      253418 :         struct btrfs_fs_info *fs_info = trans->fs_info;
    1866      253418 :         struct btrfs_qgroup_extent_record *record;
    1867      253418 :         struct btrfs_delayed_ref_root *delayed_refs;
    1868      253418 :         int ret;
    1869             : 
    1870      253418 :         if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)
    1871      253418 :             || bytenr == 0 || num_bytes == 0)
    1872             :                 return 0;
    1873      165543 :         record = kzalloc(sizeof(*record), GFP_NOFS);
    1874      165543 :         if (!record)
    1875             :                 return -ENOMEM;
    1876             : 
    1877      165543 :         delayed_refs = &trans->transaction->delayed_refs;
    1878      165543 :         record->bytenr = bytenr;
    1879      165543 :         record->num_bytes = num_bytes;
    1880      165543 :         record->old_roots = NULL;
    1881             : 
    1882      165543 :         spin_lock(&delayed_refs->lock);
    1883      165543 :         ret = btrfs_qgroup_trace_extent_nolock(fs_info, delayed_refs, record);
    1884      165543 :         spin_unlock(&delayed_refs->lock);
    1885      165543 :         if (ret > 0) {
    1886      147904 :                 kfree(record);
    1887      147904 :                 return 0;
    1888             :         }
    1889       17639 :         return btrfs_qgroup_trace_extent_post(trans, record);
    1890             : }
    1891             : 
    1892       36412 : int btrfs_qgroup_trace_leaf_items(struct btrfs_trans_handle *trans,
    1893             :                                   struct extent_buffer *eb)
    1894             : {
    1895       36412 :         struct btrfs_fs_info *fs_info = trans->fs_info;
    1896       36412 :         int nr = btrfs_header_nritems(eb);
    1897       36412 :         int i, extent_type, ret;
    1898       36412 :         struct btrfs_key key;
    1899       36412 :         struct btrfs_file_extent_item *fi;
    1900       36412 :         u64 bytenr, num_bytes;
    1901             : 
    1902             :         /* We can be called directly from walk_up_proc() */
    1903       36412 :         if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags))
    1904             :                 return 0;
    1905             : 
    1906     1690605 :         for (i = 0; i < nr; i++) {
    1907     1655001 :                 btrfs_item_key_to_cpu(eb, &key, i);
    1908             : 
    1909     1655001 :                 if (key.type != BTRFS_EXTENT_DATA_KEY)
    1910     1324064 :                         continue;
    1911             : 
    1912      330937 :                 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
    1913             :                 /* filter out non qgroup-accountable extents  */
    1914      330937 :                 extent_type = btrfs_file_extent_type(eb, fi);
    1915             : 
    1916      330937 :                 if (extent_type == BTRFS_FILE_EXTENT_INLINE)
    1917      165395 :                         continue;
    1918             : 
    1919      165542 :                 bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
    1920      165542 :                 if (!bytenr)
    1921           0 :                         continue;
    1922             : 
    1923      165542 :                 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
    1924             : 
    1925      165542 :                 ret = btrfs_qgroup_trace_extent(trans, bytenr, num_bytes);
    1926      165542 :                 if (ret)
    1927           0 :                         return ret;
    1928             :         }
    1929       35604 :         cond_resched();
    1930       35604 :         return 0;
    1931             : }
    1932             : 
    1933             : /*
    1934             :  * Walk up the tree from the bottom, freeing leaves and any interior
    1935             :  * nodes which have had all slots visited. If a node (leaf or
    1936             :  * interior) is freed, the node above it will have it's slot
    1937             :  * incremented. The root node will never be freed.
    1938             :  *
    1939             :  * At the end of this function, we should have a path which has all
    1940             :  * slots incremented to the next position for a search. If we need to
    1941             :  * read a new node it will be NULL and the node above it will have the
    1942             :  * correct slot selected for a later read.
    1943             :  *
    1944             :  * If we increment the root nodes slot counter past the number of
    1945             :  * elements, 1 is returned to signal completion of the search.
    1946             :  */
    1947           0 : static int adjust_slots_upwards(struct btrfs_path *path, int root_level)
    1948             : {
    1949           0 :         int level = 0;
    1950           0 :         int nr, slot;
    1951           0 :         struct extent_buffer *eb;
    1952             : 
    1953           0 :         if (root_level == 0)
    1954             :                 return 1;
    1955             : 
    1956           0 :         while (level <= root_level) {
    1957           0 :                 eb = path->nodes[level];
    1958           0 :                 nr = btrfs_header_nritems(eb);
    1959           0 :                 path->slots[level]++;
    1960           0 :                 slot = path->slots[level];
    1961           0 :                 if (slot >= nr || level == 0) {
    1962             :                         /*
    1963             :                          * Don't free the root -  we will detect this
    1964             :                          * condition after our loop and return a
    1965             :                          * positive value for caller to stop walking the tree.
    1966             :                          */
    1967           0 :                         if (level != root_level) {
    1968           0 :                                 btrfs_tree_unlock_rw(eb, path->locks[level]);
    1969           0 :                                 path->locks[level] = 0;
    1970             : 
    1971           0 :                                 free_extent_buffer(eb);
    1972           0 :                                 path->nodes[level] = NULL;
    1973           0 :                                 path->slots[level] = 0;
    1974             :                         }
    1975             :                 } else {
    1976             :                         /*
    1977             :                          * We have a valid slot to walk back down
    1978             :                          * from. Stop here so caller can process these
    1979             :                          * new nodes.
    1980             :                          */
    1981             :                         break;
    1982             :                 }
    1983             : 
    1984           0 :                 level++;
    1985             :         }
    1986             : 
    1987           0 :         eb = path->nodes[root_level];
    1988           0 :         if (path->slots[root_level] >= btrfs_header_nritems(eb))
    1989           0 :                 return 1;
    1990             : 
    1991             :         return 0;
    1992             : }
    1993             : 
    1994             : /*
    1995             :  * Helper function to trace a subtree tree block swap.
    1996             :  *
    1997             :  * The swap will happen in highest tree block, but there may be a lot of
    1998             :  * tree blocks involved.
    1999             :  *
    2000             :  * For example:
    2001             :  *  OO = Old tree blocks
    2002             :  *  NN = New tree blocks allocated during balance
    2003             :  *
    2004             :  *           File tree (257)                  Reloc tree for 257
    2005             :  * L2              OO                                NN
    2006             :  *               /    \                            /    \
    2007             :  * L1          OO      OO (a)                    OO      NN (a)
    2008             :  *            / \     / \                       / \     / \
    2009             :  * L0       OO   OO OO   OO                   OO   OO NN   NN
    2010             :  *                  (b)  (c)                          (b)  (c)
    2011             :  *
    2012             :  * When calling qgroup_trace_extent_swap(), we will pass:
    2013             :  * @src_eb = OO(a)
    2014             :  * @dst_path = [ nodes[1] = NN(a), nodes[0] = NN(c) ]
    2015             :  * @dst_level = 0
    2016             :  * @root_level = 1
    2017             :  *
    2018             :  * In that case, qgroup_trace_extent_swap() will search from OO(a) to
    2019             :  * reach OO(c), then mark both OO(c) and NN(c) as qgroup dirty.
    2020             :  *
    2021             :  * The main work of qgroup_trace_extent_swap() can be split into 3 parts:
    2022             :  *
    2023             :  * 1) Tree search from @src_eb
    2024             :  *    It should acts as a simplified btrfs_search_slot().
    2025             :  *    The key for search can be extracted from @dst_path->nodes[dst_level]
    2026             :  *    (first key).
    2027             :  *
    2028             :  * 2) Mark the final tree blocks in @src_path and @dst_path qgroup dirty
    2029             :  *    NOTE: In above case, OO(a) and NN(a) won't be marked qgroup dirty.
    2030             :  *    They should be marked during previous (@dst_level = 1) iteration.
    2031             :  *
    2032             :  * 3) Mark file extents in leaves dirty
    2033             :  *    We don't have good way to pick out new file extents only.
    2034             :  *    So we still follow the old method by scanning all file extents in
    2035             :  *    the leave.
    2036             :  *
    2037             :  * This function can free us from keeping two paths, thus later we only need
    2038             :  * to care about how to iterate all new tree blocks in reloc tree.
    2039             :  */
    2040           0 : static int qgroup_trace_extent_swap(struct btrfs_trans_handle* trans,
    2041             :                                     struct extent_buffer *src_eb,
    2042             :                                     struct btrfs_path *dst_path,
    2043             :                                     int dst_level, int root_level,
    2044             :                                     bool trace_leaf)
    2045             : {
    2046           0 :         struct btrfs_key key;
    2047           0 :         struct btrfs_path *src_path;
    2048           0 :         struct btrfs_fs_info *fs_info = trans->fs_info;
    2049           0 :         u32 nodesize = fs_info->nodesize;
    2050           0 :         int cur_level = root_level;
    2051           0 :         int ret;
    2052             : 
    2053           0 :         BUG_ON(dst_level > root_level);
    2054             :         /* Level mismatch */
    2055           0 :         if (btrfs_header_level(src_eb) != root_level)
    2056             :                 return -EINVAL;
    2057             : 
    2058           0 :         src_path = btrfs_alloc_path();
    2059           0 :         if (!src_path) {
    2060           0 :                 ret = -ENOMEM;
    2061           0 :                 goto out;
    2062             :         }
    2063             : 
    2064           0 :         if (dst_level)
    2065           0 :                 btrfs_node_key_to_cpu(dst_path->nodes[dst_level], &key, 0);
    2066             :         else
    2067           0 :                 btrfs_item_key_to_cpu(dst_path->nodes[dst_level], &key, 0);
    2068             : 
    2069             :         /* For src_path */
    2070           0 :         atomic_inc(&src_eb->refs);
    2071           0 :         src_path->nodes[root_level] = src_eb;
    2072           0 :         src_path->slots[root_level] = dst_path->slots[root_level];
    2073           0 :         src_path->locks[root_level] = 0;
    2074             : 
    2075             :         /* A simplified version of btrfs_search_slot() */
    2076           0 :         while (cur_level >= dst_level) {
    2077           0 :                 struct btrfs_key src_key;
    2078           0 :                 struct btrfs_key dst_key;
    2079             : 
    2080           0 :                 if (src_path->nodes[cur_level] == NULL) {
    2081           0 :                         struct extent_buffer *eb;
    2082           0 :                         int parent_slot;
    2083             : 
    2084           0 :                         eb = src_path->nodes[cur_level + 1];
    2085           0 :                         parent_slot = src_path->slots[cur_level + 1];
    2086             : 
    2087           0 :                         eb = btrfs_read_node_slot(eb, parent_slot);
    2088           0 :                         if (IS_ERR(eb)) {
    2089           0 :                                 ret = PTR_ERR(eb);
    2090           0 :                                 goto out;
    2091             :                         }
    2092             : 
    2093           0 :                         src_path->nodes[cur_level] = eb;
    2094             : 
    2095           0 :                         btrfs_tree_read_lock(eb);
    2096           0 :                         src_path->locks[cur_level] = BTRFS_READ_LOCK;
    2097             :                 }
    2098             : 
    2099           0 :                 src_path->slots[cur_level] = dst_path->slots[cur_level];
    2100           0 :                 if (cur_level) {
    2101           0 :                         btrfs_node_key_to_cpu(dst_path->nodes[cur_level],
    2102             :                                         &dst_key, dst_path->slots[cur_level]);
    2103           0 :                         btrfs_node_key_to_cpu(src_path->nodes[cur_level],
    2104             :                                         &src_key, src_path->slots[cur_level]);
    2105             :                 } else {
    2106           0 :                         btrfs_item_key_to_cpu(dst_path->nodes[cur_level],
    2107             :                                         &dst_key, dst_path->slots[cur_level]);
    2108           0 :                         btrfs_item_key_to_cpu(src_path->nodes[cur_level],
    2109             :                                         &src_key, src_path->slots[cur_level]);
    2110             :                 }
    2111             :                 /* Content mismatch, something went wrong */
    2112           0 :                 if (btrfs_comp_cpu_keys(&dst_key, &src_key)) {
    2113           0 :                         ret = -ENOENT;
    2114           0 :                         goto out;
    2115             :                 }
    2116           0 :                 cur_level--;
    2117             :         }
    2118             : 
    2119             :         /*
    2120             :          * Now both @dst_path and @src_path have been populated, record the tree
    2121             :          * blocks for qgroup accounting.
    2122             :          */
    2123           0 :         ret = btrfs_qgroup_trace_extent(trans, src_path->nodes[dst_level]->start,
    2124             :                                         nodesize);
    2125           0 :         if (ret < 0)
    2126           0 :                 goto out;
    2127           0 :         ret = btrfs_qgroup_trace_extent(trans, dst_path->nodes[dst_level]->start,
    2128             :                                         nodesize);
    2129           0 :         if (ret < 0)
    2130           0 :                 goto out;
    2131             : 
    2132             :         /* Record leaf file extents */
    2133           0 :         if (dst_level == 0 && trace_leaf) {
    2134           0 :                 ret = btrfs_qgroup_trace_leaf_items(trans, src_path->nodes[0]);
    2135           0 :                 if (ret < 0)
    2136           0 :                         goto out;
    2137           0 :                 ret = btrfs_qgroup_trace_leaf_items(trans, dst_path->nodes[0]);
    2138             :         }
    2139           0 : out:
    2140           0 :         btrfs_free_path(src_path);
    2141           0 :         return ret;
    2142             : }
    2143             : 
    2144             : /*
    2145             :  * Helper function to do recursive generation-aware depth-first search, to
    2146             :  * locate all new tree blocks in a subtree of reloc tree.
    2147             :  *
    2148             :  * E.g. (OO = Old tree blocks, NN = New tree blocks, whose gen == last_snapshot)
    2149             :  *         reloc tree
    2150             :  * L2         NN (a)
    2151             :  *          /    \
    2152             :  * L1    OO        NN (b)
    2153             :  *      /  \      /  \
    2154             :  * L0  OO  OO    OO  NN
    2155             :  *               (c) (d)
    2156             :  * If we pass:
    2157             :  * @dst_path = [ nodes[1] = NN(b), nodes[0] = NULL ],
    2158             :  * @cur_level = 1
    2159             :  * @root_level = 1
    2160             :  *
    2161             :  * We will iterate through tree blocks NN(b), NN(d) and info qgroup to trace
    2162             :  * above tree blocks along with their counter parts in file tree.
    2163             :  * While during search, old tree blocks OO(c) will be skipped as tree block swap
    2164             :  * won't affect OO(c).
    2165             :  */
    2166           0 : static int qgroup_trace_new_subtree_blocks(struct btrfs_trans_handle* trans,
    2167             :                                            struct extent_buffer *src_eb,
    2168             :                                            struct btrfs_path *dst_path,
    2169             :                                            int cur_level, int root_level,
    2170             :                                            u64 last_snapshot, bool trace_leaf)
    2171             : {
    2172           0 :         struct btrfs_fs_info *fs_info = trans->fs_info;
    2173           0 :         struct extent_buffer *eb;
    2174           0 :         bool need_cleanup = false;
    2175           0 :         int ret = 0;
    2176           0 :         int i;
    2177             : 
    2178             :         /* Level sanity check */
    2179           0 :         if (cur_level < 0 || cur_level >= BTRFS_MAX_LEVEL - 1 ||
    2180           0 :             root_level < 0 || root_level >= BTRFS_MAX_LEVEL - 1 ||
    2181           0 :             root_level < cur_level) {
    2182           0 :                 btrfs_err_rl(fs_info,
    2183             :                         "%s: bad levels, cur_level=%d root_level=%d",
    2184             :                         __func__, cur_level, root_level);
    2185           0 :                 return -EUCLEAN;
    2186             :         }
    2187             : 
    2188             :         /* Read the tree block if needed */
    2189           0 :         if (dst_path->nodes[cur_level] == NULL) {
    2190           0 :                 int parent_slot;
    2191           0 :                 u64 child_gen;
    2192             : 
    2193             :                 /*
    2194             :                  * dst_path->nodes[root_level] must be initialized before
    2195             :                  * calling this function.
    2196             :                  */
    2197           0 :                 if (cur_level == root_level) {
    2198           0 :                         btrfs_err_rl(fs_info,
    2199             :         "%s: dst_path->nodes[%d] not initialized, root_level=%d cur_level=%d",
    2200             :                                 __func__, root_level, root_level, cur_level);
    2201           0 :                         return -EUCLEAN;
    2202             :                 }
    2203             : 
    2204             :                 /*
    2205             :                  * We need to get child blockptr/gen from parent before we can
    2206             :                  * read it.
    2207             :                   */
    2208           0 :                 eb = dst_path->nodes[cur_level + 1];
    2209           0 :                 parent_slot = dst_path->slots[cur_level + 1];
    2210           0 :                 child_gen = btrfs_node_ptr_generation(eb, parent_slot);
    2211             : 
    2212             :                 /* This node is old, no need to trace */
    2213           0 :                 if (child_gen < last_snapshot)
    2214           0 :                         goto out;
    2215             : 
    2216           0 :                 eb = btrfs_read_node_slot(eb, parent_slot);
    2217           0 :                 if (IS_ERR(eb)) {
    2218           0 :                         ret = PTR_ERR(eb);
    2219           0 :                         goto out;
    2220             :                 }
    2221             : 
    2222           0 :                 dst_path->nodes[cur_level] = eb;
    2223           0 :                 dst_path->slots[cur_level] = 0;
    2224             : 
    2225           0 :                 btrfs_tree_read_lock(eb);
    2226           0 :                 dst_path->locks[cur_level] = BTRFS_READ_LOCK;
    2227           0 :                 need_cleanup = true;
    2228             :         }
    2229             : 
    2230             :         /* Now record this tree block and its counter part for qgroups */
    2231           0 :         ret = qgroup_trace_extent_swap(trans, src_eb, dst_path, cur_level,
    2232             :                                        root_level, trace_leaf);
    2233           0 :         if (ret < 0)
    2234           0 :                 goto cleanup;
    2235             : 
    2236           0 :         eb = dst_path->nodes[cur_level];
    2237             : 
    2238           0 :         if (cur_level > 0) {
    2239             :                 /* Iterate all child tree blocks */
    2240           0 :                 for (i = 0; i < btrfs_header_nritems(eb); i++) {
    2241             :                         /* Skip old tree blocks as they won't be swapped */
    2242           0 :                         if (btrfs_node_ptr_generation(eb, i) < last_snapshot)
    2243           0 :                                 continue;
    2244           0 :                         dst_path->slots[cur_level] = i;
    2245             : 
    2246             :                         /* Recursive call (at most 7 times) */
    2247           0 :                         ret = qgroup_trace_new_subtree_blocks(trans, src_eb,
    2248             :                                         dst_path, cur_level - 1, root_level,
    2249             :                                         last_snapshot, trace_leaf);
    2250           0 :                         if (ret < 0)
    2251           0 :                                 goto cleanup;
    2252             :                 }
    2253             :         }
    2254             : 
    2255           0 : cleanup:
    2256           0 :         if (need_cleanup) {
    2257             :                 /* Clean up */
    2258           0 :                 btrfs_tree_unlock_rw(dst_path->nodes[cur_level],
    2259           0 :                                      dst_path->locks[cur_level]);
    2260           0 :                 free_extent_buffer(dst_path->nodes[cur_level]);
    2261           0 :                 dst_path->nodes[cur_level] = NULL;
    2262           0 :                 dst_path->slots[cur_level] = 0;
    2263           0 :                 dst_path->locks[cur_level] = 0;
    2264             :         }
    2265           0 : out:
    2266             :         return ret;
    2267             : }
    2268             : 
    2269           0 : static int qgroup_trace_subtree_swap(struct btrfs_trans_handle *trans,
    2270             :                                 struct extent_buffer *src_eb,
    2271             :                                 struct extent_buffer *dst_eb,
    2272             :                                 u64 last_snapshot, bool trace_leaf)
    2273             : {
    2274           0 :         struct btrfs_fs_info *fs_info = trans->fs_info;
    2275           0 :         struct btrfs_path *dst_path = NULL;
    2276           0 :         int level;
    2277           0 :         int ret;
    2278             : 
    2279           0 :         if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags))
    2280             :                 return 0;
    2281             : 
    2282             :         /* Wrong parameter order */
    2283           0 :         if (btrfs_header_generation(src_eb) > btrfs_header_generation(dst_eb)) {
    2284           0 :                 btrfs_err_rl(fs_info,
    2285             :                 "%s: bad parameter order, src_gen=%llu dst_gen=%llu", __func__,
    2286             :                              btrfs_header_generation(src_eb),
    2287             :                              btrfs_header_generation(dst_eb));
    2288           0 :                 return -EUCLEAN;
    2289             :         }
    2290             : 
    2291           0 :         if (!extent_buffer_uptodate(src_eb) || !extent_buffer_uptodate(dst_eb)) {
    2292           0 :                 ret = -EIO;
    2293           0 :                 goto out;
    2294             :         }
    2295             : 
    2296           0 :         level = btrfs_header_level(dst_eb);
    2297           0 :         dst_path = btrfs_alloc_path();
    2298           0 :         if (!dst_path) {
    2299           0 :                 ret = -ENOMEM;
    2300           0 :                 goto out;
    2301             :         }
    2302             :         /* For dst_path */
    2303           0 :         atomic_inc(&dst_eb->refs);
    2304           0 :         dst_path->nodes[level] = dst_eb;
    2305           0 :         dst_path->slots[level] = 0;
    2306           0 :         dst_path->locks[level] = 0;
    2307             : 
    2308             :         /* Do the generation aware breadth-first search */
    2309           0 :         ret = qgroup_trace_new_subtree_blocks(trans, src_eb, dst_path, level,
    2310             :                                               level, last_snapshot, trace_leaf);
    2311           0 :         if (ret < 0)
    2312             :                 goto out;
    2313             :         ret = 0;
    2314             : 
    2315           0 : out:
    2316           0 :         btrfs_free_path(dst_path);
    2317           0 :         if (ret < 0)
    2318           0 :                 qgroup_mark_inconsistent(fs_info);
    2319             :         return ret;
    2320             : }
    2321             : 
    2322       33729 : int btrfs_qgroup_trace_subtree(struct btrfs_trans_handle *trans,
    2323             :                                struct extent_buffer *root_eb,
    2324             :                                u64 root_gen, int root_level)
    2325             : {
    2326       33729 :         struct btrfs_fs_info *fs_info = trans->fs_info;
    2327       33729 :         int ret = 0;
    2328       33729 :         int level;
    2329       33729 :         u8 drop_subptree_thres;
    2330       33729 :         struct extent_buffer *eb = root_eb;
    2331       33729 :         struct btrfs_path *path = NULL;
    2332             : 
    2333       33729 :         BUG_ON(root_level < 0 || root_level >= BTRFS_MAX_LEVEL);
    2334       33729 :         BUG_ON(root_eb == NULL);
    2335             : 
    2336       33729 :         if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags))
    2337             :                 return 0;
    2338             : 
    2339       33099 :         spin_lock(&fs_info->qgroup_lock);
    2340       33099 :         drop_subptree_thres = fs_info->qgroup_drop_subtree_thres;
    2341       33099 :         spin_unlock(&fs_info->qgroup_lock);
    2342             : 
    2343             :         /*
    2344             :          * This function only gets called for snapshot drop, if we hit a high
    2345             :          * node here, it means we are going to change ownership for quite a lot
    2346             :          * of extents, which will greatly slow down btrfs_commit_transaction().
    2347             :          *
    2348             :          * So here if we find a high tree here, we just skip the accounting and
    2349             :          * mark qgroup inconsistent.
    2350             :          */
    2351       33099 :         if (root_level >= drop_subptree_thres) {
    2352           0 :                 qgroup_mark_inconsistent(fs_info);
    2353           0 :                 return 0;
    2354             :         }
    2355             : 
    2356       66198 :         if (!extent_buffer_uptodate(root_eb)) {
    2357           0 :                 struct btrfs_tree_parent_check check = {
    2358             :                         .has_first_key = false,
    2359             :                         .transid = root_gen,
    2360             :                         .level = root_level
    2361             :                 };
    2362             : 
    2363           0 :                 ret = btrfs_read_extent_buffer(root_eb, &check);
    2364           0 :                 if (ret)
    2365           0 :                         goto out;
    2366             :         }
    2367             : 
    2368       33099 :         if (root_level == 0) {
    2369       33099 :                 ret = btrfs_qgroup_trace_leaf_items(trans, root_eb);
    2370       33099 :                 goto out;
    2371             :         }
    2372             : 
    2373           0 :         path = btrfs_alloc_path();
    2374           0 :         if (!path)
    2375             :                 return -ENOMEM;
    2376             : 
    2377             :         /*
    2378             :          * Walk down the tree.  Missing extent blocks are filled in as
    2379             :          * we go. Metadata is accounted every time we read a new
    2380             :          * extent block.
    2381             :          *
    2382             :          * When we reach a leaf, we account for file extent items in it,
    2383             :          * walk back up the tree (adjusting slot pointers as we go)
    2384             :          * and restart the search process.
    2385             :          */
    2386           0 :         atomic_inc(&root_eb->refs);      /* For path */
    2387           0 :         path->nodes[root_level] = root_eb;
    2388           0 :         path->slots[root_level] = 0;
    2389           0 :         path->locks[root_level] = 0; /* so release_path doesn't try to unlock */
    2390           0 : walk_down:
    2391           0 :         level = root_level;
    2392           0 :         while (level >= 0) {
    2393           0 :                 if (path->nodes[level] == NULL) {
    2394           0 :                         int parent_slot;
    2395           0 :                         u64 child_bytenr;
    2396             : 
    2397             :                         /*
    2398             :                          * We need to get child blockptr from parent before we
    2399             :                          * can read it.
    2400             :                           */
    2401           0 :                         eb = path->nodes[level + 1];
    2402           0 :                         parent_slot = path->slots[level + 1];
    2403           0 :                         child_bytenr = btrfs_node_blockptr(eb, parent_slot);
    2404             : 
    2405           0 :                         eb = btrfs_read_node_slot(eb, parent_slot);
    2406           0 :                         if (IS_ERR(eb)) {
    2407           0 :                                 ret = PTR_ERR(eb);
    2408           0 :                                 goto out;
    2409             :                         }
    2410             : 
    2411           0 :                         path->nodes[level] = eb;
    2412           0 :                         path->slots[level] = 0;
    2413             : 
    2414           0 :                         btrfs_tree_read_lock(eb);
    2415           0 :                         path->locks[level] = BTRFS_READ_LOCK;
    2416             : 
    2417           0 :                         ret = btrfs_qgroup_trace_extent(trans, child_bytenr,
    2418           0 :                                                         fs_info->nodesize);
    2419           0 :                         if (ret)
    2420           0 :                                 goto out;
    2421             :                 }
    2422             : 
    2423           0 :                 if (level == 0) {
    2424           0 :                         ret = btrfs_qgroup_trace_leaf_items(trans,
    2425             :                                                             path->nodes[level]);
    2426           0 :                         if (ret)
    2427           0 :                                 goto out;
    2428             : 
    2429             :                         /* Nonzero return here means we completed our search */
    2430           0 :                         ret = adjust_slots_upwards(path, root_level);
    2431           0 :                         if (ret)
    2432             :                                 break;
    2433             : 
    2434             :                         /* Restart search with new slots */
    2435           0 :                         goto walk_down;
    2436             :                 }
    2437             : 
    2438           0 :                 level--;
    2439             :         }
    2440             : 
    2441             :         ret = 0;
    2442       33099 : out:
    2443       33099 :         btrfs_free_path(path);
    2444             : 
    2445       33099 :         return ret;
    2446             : }
    2447             : 
    2448             : #define UPDATE_NEW      0
    2449             : #define UPDATE_OLD      1
    2450             : /*
    2451             :  * Walk all of the roots that points to the bytenr and adjust their refcnts.
    2452             :  */
    2453      412540 : static int qgroup_update_refcnt(struct btrfs_fs_info *fs_info,
    2454             :                                 struct ulist *roots, struct ulist *tmp,
    2455             :                                 struct ulist *qgroups, u64 seq, int update_old)
    2456             : {
    2457      412540 :         struct ulist_node *unode;
    2458      412540 :         struct ulist_iterator uiter;
    2459      412540 :         struct ulist_node *tmp_unode;
    2460      412540 :         struct ulist_iterator tmp_uiter;
    2461      412540 :         struct btrfs_qgroup *qg;
    2462      412540 :         int ret = 0;
    2463             : 
    2464      412540 :         if (!roots)
    2465             :                 return 0;
    2466      394932 :         ULIST_ITER_INIT(&uiter);
    2467     5298057 :         while ((unode = ulist_next(roots, &uiter))) {
    2468     4903125 :                 qg = find_qgroup_rb(fs_info, unode->val);
    2469     4903125 :                 if (!qg)
    2470           0 :                         continue;
    2471             : 
    2472     4903125 :                 ulist_reinit(tmp);
    2473     4903125 :                 ret = ulist_add(qgroups, qg->qgroupid, qgroup_to_aux(qg),
    2474             :                                 GFP_ATOMIC);
    2475     4903125 :                 if (ret < 0)
    2476           0 :                         return ret;
    2477     4903125 :                 ret = ulist_add(tmp, qg->qgroupid, qgroup_to_aux(qg), GFP_ATOMIC);
    2478     4903125 :                 if (ret < 0)
    2479           0 :                         return ret;
    2480     4903125 :                 ULIST_ITER_INIT(&tmp_uiter);
    2481     9809650 :                 while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) {
    2482     4906525 :                         struct btrfs_qgroup_list *glist;
    2483             : 
    2484     4906525 :                         qg = unode_aux_to_qgroup(tmp_unode);
    2485     4906525 :                         if (update_old)
    2486     2536335 :                                 btrfs_qgroup_update_old_refcnt(qg, seq, 1);
    2487             :                         else
    2488     2370190 :                                 btrfs_qgroup_update_new_refcnt(qg, seq, 1);
    2489     4909925 :                         list_for_each_entry(glist, &qg->groups, next_group) {
    2490        3400 :                                 ret = ulist_add(qgroups, glist->group->qgroupid,
    2491             :                                                 qgroup_to_aux(glist->group),
    2492             :                                                 GFP_ATOMIC);
    2493        3400 :                                 if (ret < 0)
    2494           0 :                                         return ret;
    2495        3400 :                                 ret = ulist_add(tmp, glist->group->qgroupid,
    2496             :                                                 qgroup_to_aux(glist->group),
    2497             :                                                 GFP_ATOMIC);
    2498        3400 :                                 if (ret < 0)
    2499           0 :                                         return ret;
    2500             :                         }
    2501             :                 }
    2502             :         }
    2503             :         return 0;
    2504             : }
    2505             : 
    2506             : /*
    2507             :  * Update qgroup rfer/excl counters.
    2508             :  * Rfer update is easy, codes can explain themselves.
    2509             :  *
    2510             :  * Excl update is tricky, the update is split into 2 parts.
    2511             :  * Part 1: Possible exclusive <-> sharing detect:
    2512             :  *      |       A       |       !A      |
    2513             :  *  -------------------------------------
    2514             :  *  B   |       *       |       -       |
    2515             :  *  -------------------------------------
    2516             :  *  !B  |       +       |       **      |
    2517             :  *  -------------------------------------
    2518             :  *
    2519             :  * Conditions:
    2520             :  * A:   cur_old_roots < nr_old_roots (not exclusive before)
    2521             :  * !A:  cur_old_roots == nr_old_roots   (possible exclusive before)
    2522             :  * B:   cur_new_roots < nr_new_roots (not exclusive now)
    2523             :  * !B:  cur_new_roots == nr_new_roots   (possible exclusive now)
    2524             :  *
    2525             :  * Results:
    2526             :  * +: Possible sharing -> exclusive  -: Possible exclusive -> sharing
    2527             :  * *: Definitely not changed.           **: Possible unchanged.
    2528             :  *
    2529             :  * For !A and !B condition, the exception is cur_old/new_roots == 0 case.
    2530             :  *
    2531             :  * To make the logic clear, we first use condition A and B to split
    2532             :  * combination into 4 results.
    2533             :  *
    2534             :  * Then, for result "+" and "-", check old/new_roots == 0 case, as in them
    2535             :  * only on variant maybe 0.
    2536             :  *
    2537             :  * Lastly, check result **, since there are 2 variants maybe 0, split them
    2538             :  * again(2x2).
    2539             :  * But this time we don't need to consider other things, the codes and logic
    2540             :  * is easy to understand now.
    2541             :  */
    2542      206270 : static int qgroup_update_counters(struct btrfs_fs_info *fs_info,
    2543             :                                   struct ulist *qgroups,
    2544             :                                   u64 nr_old_roots,
    2545             :                                   u64 nr_new_roots,
    2546             :                                   u64 num_bytes, u64 seq)
    2547             : {
    2548      206270 :         struct ulist_node *unode;
    2549      206270 :         struct ulist_iterator uiter;
    2550      206270 :         struct btrfs_qgroup *qg;
    2551      206270 :         u64 cur_new_count, cur_old_count;
    2552             : 
    2553      206270 :         ULIST_ITER_INIT(&uiter);
    2554     2806124 :         while ((unode = ulist_next(qgroups, &uiter))) {
    2555     2599854 :                 bool dirty = false;
    2556             : 
    2557     2599854 :                 qg = unode_aux_to_qgroup(unode);
    2558     2599854 :                 cur_old_count = btrfs_qgroup_get_old_refcnt(qg, seq);
    2559     2599854 :                 cur_new_count = btrfs_qgroup_get_new_refcnt(qg, seq);
    2560             : 
    2561     2599854 :                 trace_qgroup_update_counters(fs_info, qg, cur_old_count,
    2562             :                                              cur_new_count);
    2563             : 
    2564             :                 /* Rfer update part */
    2565     2599854 :                 if (cur_old_count == 0 && cur_new_count > 0) {
    2566       63519 :                         qg->rfer += num_bytes;
    2567       63519 :                         qg->rfer_cmpr += num_bytes;
    2568       63519 :                         dirty = true;
    2569             :                 }
    2570     2599854 :                 if (cur_old_count > 0 && cur_new_count == 0) {
    2571      229665 :                         qg->rfer -= num_bytes;
    2572      229665 :                         qg->rfer_cmpr -= num_bytes;
    2573      229665 :                         dirty = true;
    2574             :                 }
    2575             : 
    2576             :                 /* Excl update part */
    2577             :                 /* Exclusive/none -> shared case */
    2578     2599854 :                 if (cur_old_count == nr_old_roots &&
    2579     2599854 :                     cur_new_count < nr_new_roots) {
    2580             :                         /* Exclusive -> shared */
    2581        2931 :                         if (cur_old_count != 0) {
    2582           0 :                                 qg->excl -= num_bytes;
    2583           0 :                                 qg->excl_cmpr -= num_bytes;
    2584           0 :                                 dirty = true;
    2585             :                         }
    2586             :                 }
    2587             : 
    2588             :                 /* Shared -> exclusive/none case */
    2589     2599854 :                 if (cur_old_count < nr_old_roots &&
    2590     2599854 :                     cur_new_count == nr_new_roots) {
    2591             :                         /* Shared->exclusive */
    2592        5519 :                         if (cur_new_count != 0) {
    2593        4966 :                                 qg->excl += num_bytes;
    2594        4966 :                                 qg->excl_cmpr += num_bytes;
    2595        4966 :                                 dirty = true;
    2596             :                         }
    2597             :                 }
    2598             : 
    2599             :                 /* Exclusive/none -> exclusive/none case */
    2600     2599854 :                 if (cur_old_count == nr_old_roots &&
    2601             :                     cur_new_count == nr_new_roots) {
    2602       97807 :                         if (cur_old_count == 0) {
    2603             :                                 /* None -> exclusive/none */
    2604             : 
    2605       60588 :                                 if (cur_new_count != 0) {
    2606             :                                         /* None -> exclusive */
    2607       60588 :                                         qg->excl += num_bytes;
    2608       60588 :                                         qg->excl_cmpr += num_bytes;
    2609       60588 :                                         dirty = true;
    2610             :                                 }
    2611             :                                 /* None -> none, nothing changed */
    2612             :                         } else {
    2613             :                                 /* Exclusive -> exclusive/none */
    2614             : 
    2615       37219 :                                 if (cur_new_count == 0) {
    2616             :                                         /* Exclusive -> none */
    2617       24284 :                                         qg->excl -= num_bytes;
    2618       24284 :                                         qg->excl_cmpr -= num_bytes;
    2619       24284 :                                         dirty = true;
    2620             :                                 }
    2621             :                                 /* Exclusive -> exclusive, nothing changed */
    2622             :                         }
    2623             :                 }
    2624             : 
    2625     2599854 :                 if (dirty)
    2626      298150 :                         qgroup_dirty(fs_info, qg);
    2627             :         }
    2628      206270 :         return 0;
    2629             : }
    2630             : 
    2631             : /*
    2632             :  * Check if the @roots potentially is a list of fs tree roots
    2633             :  *
    2634             :  * Return 0 for definitely not a fs/subvol tree roots ulist
    2635             :  * Return 1 for possible fs/subvol tree roots in the list (considering an empty
    2636             :  *          one as well)
    2637             :  */
    2638      406326 : static int maybe_fs_roots(struct ulist *roots)
    2639             : {
    2640      406326 :         struct ulist_node *unode;
    2641      406326 :         struct ulist_iterator uiter;
    2642             : 
    2643             :         /* Empty one, still possible for fs roots */
    2644      406326 :         if (!roots || roots->nnodes == 0)
    2645             :                 return 1;
    2646             : 
    2647      333844 :         ULIST_ITER_INIT(&uiter);
    2648      333844 :         unode = ulist_next(roots, &uiter);
    2649      333844 :         if (!unode)
    2650             :                 return 1;
    2651             : 
    2652             :         /*
    2653             :          * If it contains fs tree roots, then it must belong to fs/subvol
    2654             :          * trees.
    2655             :          * If it contains a non-fs tree, it won't be shared with fs/subvol trees.
    2656             :          */
    2657      333844 :         return is_fstree(unode->val);
    2658             : }
    2659             : 
    2660      215660 : int btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans, u64 bytenr,
    2661             :                                 u64 num_bytes, struct ulist *old_roots,
    2662             :                                 struct ulist *new_roots)
    2663             : {
    2664      215660 :         struct btrfs_fs_info *fs_info = trans->fs_info;
    2665      215660 :         struct ulist *qgroups = NULL;
    2666      215660 :         struct ulist *tmp = NULL;
    2667      215660 :         u64 seq;
    2668      215660 :         u64 nr_new_roots = 0;
    2669      215660 :         u64 nr_old_roots = 0;
    2670      215660 :         int ret = 0;
    2671             : 
    2672             :         /*
    2673             :          * If quotas get disabled meanwhile, the resources need to be freed and
    2674             :          * we can't just exit here.
    2675             :          */
    2676      215660 :         if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) ||
    2677      214859 :             fs_info->qgroup_flags & BTRFS_QGROUP_RUNTIME_FLAG_NO_ACCOUNTING)
    2678         801 :                 goto out_free;
    2679             : 
    2680      214859 :         if (new_roots) {
    2681      214859 :                 if (!maybe_fs_roots(new_roots))
    2682        4449 :                         goto out_free;
    2683      210410 :                 nr_new_roots = new_roots->nnodes;
    2684             :         }
    2685      210410 :         if (old_roots) {
    2686      191467 :                 if (!maybe_fs_roots(old_roots))
    2687           0 :                         goto out_free;
    2688      191467 :                 nr_old_roots = old_roots->nnodes;
    2689             :         }
    2690             : 
    2691             :         /* Quick exit, either not fs tree roots, or won't affect any qgroup */
    2692      210410 :         if (nr_old_roots == 0 && nr_new_roots == 0)
    2693        4140 :                 goto out_free;
    2694             : 
    2695      206270 :         BUG_ON(!fs_info->quota_root);
    2696             : 
    2697      206270 :         trace_btrfs_qgroup_account_extent(fs_info, trans->transid, bytenr,
    2698             :                                         num_bytes, nr_old_roots, nr_new_roots);
    2699             : 
    2700      206270 :         qgroups = ulist_alloc(GFP_NOFS);
    2701      206270 :         if (!qgroups) {
    2702           0 :                 ret = -ENOMEM;
    2703           0 :                 goto out_free;
    2704             :         }
    2705      206270 :         tmp = ulist_alloc(GFP_NOFS);
    2706      206270 :         if (!tmp) {
    2707           0 :                 ret = -ENOMEM;
    2708           0 :                 goto out_free;
    2709             :         }
    2710             : 
    2711      206270 :         mutex_lock(&fs_info->qgroup_rescan_lock);
    2712      206270 :         if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) {
    2713       17612 :                 if (fs_info->qgroup_rescan_progress.objectid <= bytenr) {
    2714           0 :                         mutex_unlock(&fs_info->qgroup_rescan_lock);
    2715           0 :                         ret = 0;
    2716           0 :                         goto out_free;
    2717             :                 }
    2718             :         }
    2719      206270 :         mutex_unlock(&fs_info->qgroup_rescan_lock);
    2720             : 
    2721      206270 :         spin_lock(&fs_info->qgroup_lock);
    2722      206270 :         seq = fs_info->qgroup_seq;
    2723             : 
    2724             :         /* Update old refcnts using old_roots */
    2725      206270 :         ret = qgroup_update_refcnt(fs_info, old_roots, tmp, qgroups, seq,
    2726             :                                    UPDATE_OLD);
    2727      206270 :         if (ret < 0)
    2728           0 :                 goto out;
    2729             : 
    2730             :         /* Update new refcnts using new_roots */
    2731      206270 :         ret = qgroup_update_refcnt(fs_info, new_roots, tmp, qgroups, seq,
    2732             :                                    UPDATE_NEW);
    2733      206270 :         if (ret < 0)
    2734           0 :                 goto out;
    2735             : 
    2736      206270 :         qgroup_update_counters(fs_info, qgroups, nr_old_roots, nr_new_roots,
    2737             :                                num_bytes, seq);
    2738             : 
    2739             :         /*
    2740             :          * Bump qgroup_seq to avoid seq overlap
    2741             :          */
    2742      206270 :         fs_info->qgroup_seq += max(nr_old_roots, nr_new_roots) + 1;
    2743      206270 : out:
    2744      206270 :         spin_unlock(&fs_info->qgroup_lock);
    2745      215660 : out_free:
    2746      215660 :         ulist_free(tmp);
    2747      215660 :         ulist_free(qgroups);
    2748      215660 :         ulist_free(old_roots);
    2749      215660 :         ulist_free(new_roots);
    2750      215660 :         return ret;
    2751             : }
    2752             : 
    2753      203435 : int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans)
    2754             : {
    2755      203435 :         struct btrfs_fs_info *fs_info = trans->fs_info;
    2756      203435 :         struct btrfs_qgroup_extent_record *record;
    2757      203435 :         struct btrfs_delayed_ref_root *delayed_refs;
    2758      203435 :         struct ulist *new_roots = NULL;
    2759      203435 :         struct rb_node *node;
    2760      203435 :         u64 num_dirty_extents = 0;
    2761      203435 :         u64 qgroup_to_skip;
    2762      203435 :         int ret = 0;
    2763             : 
    2764      203435 :         delayed_refs = &trans->transaction->delayed_refs;
    2765      203435 :         qgroup_to_skip = delayed_refs->qgroup_to_skip;
    2766      394973 :         while ((node = rb_first(&delayed_refs->dirty_extent_root))) {
    2767      191538 :                 record = rb_entry(node, struct btrfs_qgroup_extent_record,
    2768             :                                   node);
    2769             : 
    2770      191538 :                 num_dirty_extents++;
    2771      191538 :                 trace_btrfs_qgroup_account_extents(fs_info, record);
    2772             : 
    2773      191538 :                 if (!ret && !(fs_info->qgroup_flags &
    2774             :                               BTRFS_QGROUP_RUNTIME_FLAG_NO_ACCOUNTING)) {
    2775      191532 :                         struct btrfs_backref_walk_ctx ctx = { 0 };
    2776             : 
    2777      191532 :                         ctx.bytenr = record->bytenr;
    2778      191532 :                         ctx.fs_info = fs_info;
    2779             : 
    2780             :                         /*
    2781             :                          * Old roots should be searched when inserting qgroup
    2782             :                          * extent record.
    2783             :                          *
    2784             :                          * But for INCONSISTENT (NO_ACCOUNTING) -> rescan case,
    2785             :                          * we may have some record inserted during
    2786             :                          * NO_ACCOUNTING (thus no old_roots populated), but
    2787             :                          * later we start rescan, which clears NO_ACCOUNTING,
    2788             :                          * leaving some inserted records without old_roots
    2789             :                          * populated.
    2790             :                          *
    2791             :                          * Those cases are rare and should not cause too much
    2792             :                          * time spent during commit_transaction().
    2793             :                          */
    2794      191532 :                         if (!record->old_roots) {
    2795             :                                 /* Search commit root to find old_roots */
    2796           0 :                                 ret = btrfs_find_all_roots(&ctx, false);
    2797           0 :                                 if (ret < 0)
    2798           0 :                                         goto cleanup;
    2799           0 :                                 record->old_roots = ctx.roots;
    2800           0 :                                 ctx.roots = NULL;
    2801             :                         }
    2802             : 
    2803             :                         /* Free the reserved data space */
    2804      191532 :                         btrfs_qgroup_free_refroot(fs_info,
    2805             :                                         record->data_rsv_refroot,
    2806      191532 :                                         record->data_rsv,
    2807             :                                         BTRFS_QGROUP_RSV_DATA);
    2808             :                         /*
    2809             :                          * Use BTRFS_SEQ_LAST as time_seq to do special search,
    2810             :                          * which doesn't lock tree or delayed_refs and search
    2811             :                          * current root. It's safe inside commit_transaction().
    2812             :                          */
    2813      191532 :                         ctx.trans = trans;
    2814      191532 :                         ctx.time_seq = BTRFS_SEQ_LAST;
    2815      191532 :                         ret = btrfs_find_all_roots(&ctx, false);
    2816      191532 :                         if (ret < 0)
    2817           0 :                                 goto cleanup;
    2818      191532 :                         new_roots = ctx.roots;
    2819      191532 :                         if (qgroup_to_skip) {
    2820      119426 :                                 ulist_del(new_roots, qgroup_to_skip, 0);
    2821      119426 :                                 ulist_del(record->old_roots, qgroup_to_skip,
    2822             :                                           0);
    2823             :                         }
    2824      191532 :                         ret = btrfs_qgroup_account_extent(trans, record->bytenr,
    2825             :                                                           record->num_bytes,
    2826             :                                                           record->old_roots,
    2827             :                                                           new_roots);
    2828      191532 :                         record->old_roots = NULL;
    2829      191532 :                         new_roots = NULL;
    2830             :                 }
    2831           6 : cleanup:
    2832      191538 :                 ulist_free(record->old_roots);
    2833      191538 :                 ulist_free(new_roots);
    2834      191538 :                 new_roots = NULL;
    2835      191538 :                 rb_erase(node, &delayed_refs->dirty_extent_root);
    2836      191538 :                 kfree(record);
    2837             : 
    2838             :         }
    2839      203435 :         trace_qgroup_num_dirty_extents(fs_info, trans->transid,
    2840             :                                        num_dirty_extents);
    2841      203435 :         return ret;
    2842             : }
    2843             : 
    2844             : /*
    2845             :  * Writes all changed qgroups to disk.
    2846             :  * Called by the transaction commit path and the qgroup assign ioctl.
    2847             :  */
    2848      203441 : int btrfs_run_qgroups(struct btrfs_trans_handle *trans)
    2849             : {
    2850      203441 :         struct btrfs_fs_info *fs_info = trans->fs_info;
    2851      203441 :         int ret = 0;
    2852             : 
    2853             :         /*
    2854             :          * In case we are called from the qgroup assign ioctl, assert that we
    2855             :          * are holding the qgroup_ioctl_lock, otherwise we can race with a quota
    2856             :          * disable operation (ioctl) and access a freed quota root.
    2857             :          */
    2858      203441 :         if (trans->transaction->state != TRANS_STATE_COMMIT_DOING)
    2859      203441 :                 lockdep_assert_held(&fs_info->qgroup_ioctl_lock);
    2860             : 
    2861      203441 :         if (!fs_info->quota_root)
    2862             :                 return ret;
    2863             : 
    2864        7536 :         spin_lock(&fs_info->qgroup_lock);
    2865       12600 :         while (!list_empty(&fs_info->dirty_qgroups)) {
    2866        5064 :                 struct btrfs_qgroup *qgroup;
    2867        5064 :                 qgroup = list_first_entry(&fs_info->dirty_qgroups,
    2868             :                                           struct btrfs_qgroup, dirty);
    2869        5064 :                 list_del_init(&qgroup->dirty);
    2870        5064 :                 spin_unlock(&fs_info->qgroup_lock);
    2871        5064 :                 ret = update_qgroup_info_item(trans, qgroup);
    2872        5064 :                 if (ret)
    2873           0 :                         qgroup_mark_inconsistent(fs_info);
    2874        5064 :                 ret = update_qgroup_limit_item(trans, qgroup);
    2875        5064 :                 if (ret)
    2876           0 :                         qgroup_mark_inconsistent(fs_info);
    2877        5064 :                 spin_lock(&fs_info->qgroup_lock);
    2878             :         }
    2879       15072 :         if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags))
    2880        7521 :                 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_ON;
    2881             :         else
    2882          15 :                 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_ON;
    2883        7536 :         spin_unlock(&fs_info->qgroup_lock);
    2884             : 
    2885        7536 :         ret = update_qgroup_status_item(trans);
    2886        7536 :         if (ret)
    2887           0 :                 qgroup_mark_inconsistent(fs_info);
    2888             : 
    2889             :         return ret;
    2890             : }
    2891             : 
    2892             : /*
    2893             :  * Copy the accounting information between qgroups. This is necessary
    2894             :  * when a snapshot or a subvolume is created. Throwing an error will
    2895             :  * cause a transaction abort so we take extra care here to only error
    2896             :  * when a readonly fs is a reasonable outcome.
    2897             :  */
    2898         426 : int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, u64 srcid,
    2899             :                          u64 objectid, struct btrfs_qgroup_inherit *inherit)
    2900             : {
    2901         426 :         int ret = 0;
    2902         426 :         int i;
    2903         426 :         u64 *i_qgroups;
    2904         426 :         bool committing = false;
    2905         426 :         struct btrfs_fs_info *fs_info = trans->fs_info;
    2906         426 :         struct btrfs_root *quota_root;
    2907         426 :         struct btrfs_qgroup *srcgroup;
    2908         426 :         struct btrfs_qgroup *dstgroup;
    2909         426 :         bool need_rescan = false;
    2910         426 :         u32 level_size = 0;
    2911         426 :         u64 nums;
    2912             : 
    2913             :         /*
    2914             :          * There are only two callers of this function.
    2915             :          *
    2916             :          * One in create_subvol() in the ioctl context, which needs to hold
    2917             :          * the qgroup_ioctl_lock.
    2918             :          *
    2919             :          * The other one in create_pending_snapshot() where no other qgroup
    2920             :          * code can modify the fs as they all need to either start a new trans
    2921             :          * or hold a trans handler, thus we don't need to hold
    2922             :          * qgroup_ioctl_lock.
    2923             :          * This would avoid long and complex lock chain and make lockdep happy.
    2924             :          */
    2925         426 :         spin_lock(&fs_info->trans_lock);
    2926         426 :         if (trans->transaction->state == TRANS_STATE_COMMIT_DOING)
    2927         174 :                 committing = true;
    2928         426 :         spin_unlock(&fs_info->trans_lock);
    2929             : 
    2930         426 :         if (!committing)
    2931         252 :                 mutex_lock(&fs_info->qgroup_ioctl_lock);
    2932         426 :         if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags))
    2933         227 :                 goto out;
    2934             : 
    2935         199 :         quota_root = fs_info->quota_root;
    2936         199 :         if (!quota_root) {
    2937           0 :                 ret = -EINVAL;
    2938           0 :                 goto out;
    2939             :         }
    2940             : 
    2941         199 :         if (inherit) {
    2942          13 :                 i_qgroups = (u64 *)(inherit + 1);
    2943          13 :                 nums = inherit->num_qgroups + 2 * inherit->num_ref_copies +
    2944          13 :                        2 * inherit->num_excl_copies;
    2945          26 :                 for (i = 0; i < nums; ++i) {
    2946          13 :                         srcgroup = find_qgroup_rb(fs_info, *i_qgroups);
    2947             : 
    2948             :                         /*
    2949             :                          * Zero out invalid groups so we can ignore
    2950             :                          * them later.
    2951             :                          */
    2952          13 :                         if (!srcgroup ||
    2953          12 :                             ((srcgroup->qgroupid >> 48) <= (objectid >> 48)))
    2954           2 :                                 *i_qgroups = 0ULL;
    2955             : 
    2956          13 :                         ++i_qgroups;
    2957             :                 }
    2958             :         }
    2959             : 
    2960             :         /*
    2961             :          * create a tracking group for the subvol itself
    2962             :          */
    2963         199 :         ret = add_qgroup_item(trans, quota_root, objectid);
    2964         199 :         if (ret)
    2965           0 :                 goto out;
    2966             : 
    2967             :         /*
    2968             :          * add qgroup to all inherited groups
    2969             :          */
    2970         199 :         if (inherit) {
    2971          13 :                 i_qgroups = (u64 *)(inherit + 1);
    2972          26 :                 for (i = 0; i < inherit->num_qgroups; ++i, ++i_qgroups) {
    2973          13 :                         if (*i_qgroups == 0)
    2974           2 :                                 continue;
    2975          11 :                         ret = add_qgroup_relation_item(trans, objectid,
    2976             :                                                        *i_qgroups);
    2977          11 :                         if (ret && ret != -EEXIST)
    2978           0 :                                 goto out;
    2979          11 :                         ret = add_qgroup_relation_item(trans, *i_qgroups,
    2980             :                                                        objectid);
    2981          11 :                         if (ret && ret != -EEXIST)
    2982           0 :                                 goto out;
    2983             :                 }
    2984             :                 ret = 0;
    2985             :         }
    2986             : 
    2987             : 
    2988         199 :         spin_lock(&fs_info->qgroup_lock);
    2989             : 
    2990         199 :         dstgroup = add_qgroup_rb(fs_info, objectid);
    2991         199 :         if (IS_ERR(dstgroup)) {
    2992           0 :                 ret = PTR_ERR(dstgroup);
    2993           0 :                 goto unlock;
    2994             :         }
    2995             : 
    2996         199 :         if (inherit && inherit->flags & BTRFS_QGROUP_INHERIT_SET_LIMITS) {
    2997           0 :                 dstgroup->lim_flags = inherit->lim.flags;
    2998           0 :                 dstgroup->max_rfer = inherit->lim.max_rfer;
    2999           0 :                 dstgroup->max_excl = inherit->lim.max_excl;
    3000           0 :                 dstgroup->rsv_rfer = inherit->lim.rsv_rfer;
    3001           0 :                 dstgroup->rsv_excl = inherit->lim.rsv_excl;
    3002             : 
    3003           0 :                 qgroup_dirty(fs_info, dstgroup);
    3004             :         }
    3005             : 
    3006         199 :         if (srcid) {
    3007         174 :                 srcgroup = find_qgroup_rb(fs_info, srcid);
    3008         174 :                 if (!srcgroup)
    3009           0 :                         goto unlock;
    3010             : 
    3011             :                 /*
    3012             :                  * We call inherit after we clone the root in order to make sure
    3013             :                  * our counts don't go crazy, so at this point the only
    3014             :                  * difference between the two roots should be the root node.
    3015             :                  */
    3016         174 :                 level_size = fs_info->nodesize;
    3017         174 :                 dstgroup->rfer = srcgroup->rfer;
    3018         174 :                 dstgroup->rfer_cmpr = srcgroup->rfer_cmpr;
    3019         174 :                 dstgroup->excl = level_size;
    3020         174 :                 dstgroup->excl_cmpr = level_size;
    3021         174 :                 srcgroup->excl = level_size;
    3022         174 :                 srcgroup->excl_cmpr = level_size;
    3023             : 
    3024             :                 /* inherit the limit info */
    3025         174 :                 dstgroup->lim_flags = srcgroup->lim_flags;
    3026         174 :                 dstgroup->max_rfer = srcgroup->max_rfer;
    3027         174 :                 dstgroup->max_excl = srcgroup->max_excl;
    3028         174 :                 dstgroup->rsv_rfer = srcgroup->rsv_rfer;
    3029         174 :                 dstgroup->rsv_excl = srcgroup->rsv_excl;
    3030             : 
    3031         174 :                 qgroup_dirty(fs_info, dstgroup);
    3032         174 :                 qgroup_dirty(fs_info, srcgroup);
    3033             :         }
    3034             : 
    3035         199 :         if (!inherit)
    3036         186 :                 goto unlock;
    3037             : 
    3038          13 :         i_qgroups = (u64 *)(inherit + 1);
    3039          26 :         for (i = 0; i < inherit->num_qgroups; ++i) {
    3040          13 :                 if (*i_qgroups) {
    3041          11 :                         ret = add_relation_rb(fs_info, objectid, *i_qgroups);
    3042          11 :                         if (ret)
    3043           0 :                                 goto unlock;
    3044             :                 }
    3045          13 :                 ++i_qgroups;
    3046             : 
    3047             :                 /*
    3048             :                  * If we're doing a snapshot, and adding the snapshot to a new
    3049             :                  * qgroup, the numbers are guaranteed to be incorrect.
    3050             :                  */
    3051          13 :                 if (srcid)
    3052           3 :                         need_rescan = true;
    3053             :         }
    3054             : 
    3055          13 :         for (i = 0; i <  inherit->num_ref_copies; ++i, i_qgroups += 2) {
    3056           0 :                 struct btrfs_qgroup *src;
    3057           0 :                 struct btrfs_qgroup *dst;
    3058             : 
    3059           0 :                 if (!i_qgroups[0] || !i_qgroups[1])
    3060           0 :                         continue;
    3061             : 
    3062           0 :                 src = find_qgroup_rb(fs_info, i_qgroups[0]);
    3063           0 :                 dst = find_qgroup_rb(fs_info, i_qgroups[1]);
    3064             : 
    3065           0 :                 if (!src || !dst) {
    3066           0 :                         ret = -EINVAL;
    3067           0 :                         goto unlock;
    3068             :                 }
    3069             : 
    3070           0 :                 dst->rfer = src->rfer - level_size;
    3071           0 :                 dst->rfer_cmpr = src->rfer_cmpr - level_size;
    3072             : 
    3073             :                 /* Manually tweaking numbers certainly needs a rescan */
    3074           0 :                 need_rescan = true;
    3075             :         }
    3076          13 :         for (i = 0; i <  inherit->num_excl_copies; ++i, i_qgroups += 2) {
    3077           0 :                 struct btrfs_qgroup *src;
    3078           0 :                 struct btrfs_qgroup *dst;
    3079             : 
    3080           0 :                 if (!i_qgroups[0] || !i_qgroups[1])
    3081           0 :                         continue;
    3082             : 
    3083           0 :                 src = find_qgroup_rb(fs_info, i_qgroups[0]);
    3084           0 :                 dst = find_qgroup_rb(fs_info, i_qgroups[1]);
    3085             : 
    3086           0 :                 if (!src || !dst) {
    3087           0 :                         ret = -EINVAL;
    3088           0 :                         goto unlock;
    3089             :                 }
    3090             : 
    3091           0 :                 dst->excl = src->excl + level_size;
    3092           0 :                 dst->excl_cmpr = src->excl_cmpr + level_size;
    3093           0 :                 need_rescan = true;
    3094             :         }
    3095             : 
    3096          13 : unlock:
    3097         199 :         spin_unlock(&fs_info->qgroup_lock);
    3098         199 :         if (!ret)
    3099         199 :                 ret = btrfs_sysfs_add_one_qgroup(fs_info, dstgroup);
    3100           0 : out:
    3101         426 :         if (!committing)
    3102         252 :                 mutex_unlock(&fs_info->qgroup_ioctl_lock);
    3103         426 :         if (need_rescan)
    3104           3 :                 qgroup_mark_inconsistent(fs_info);
    3105         426 :         return ret;
    3106             : }
    3107             : 
    3108     3872955 : static bool qgroup_check_limits(const struct btrfs_qgroup *qg, u64 num_bytes)
    3109             : {
    3110     3872955 :         if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_RFER) &&
    3111      105343 :             qgroup_rsv_total(qg) + (s64)qg->rfer + num_bytes > qg->max_rfer)
    3112             :                 return false;
    3113             : 
    3114     3849925 :         if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_EXCL) &&
    3115     3343560 :             qgroup_rsv_total(qg) + (s64)qg->excl + num_bytes > qg->max_excl)
    3116          16 :                 return false;
    3117             : 
    3118             :         return true;
    3119             : }
    3120             : 
    3121     3878039 : static int qgroup_reserve(struct btrfs_root *root, u64 num_bytes, bool enforce,
    3122             :                           enum btrfs_qgroup_rsv_type type)
    3123             : {
    3124     3878039 :         struct btrfs_qgroup *qgroup;
    3125     3878039 :         struct btrfs_fs_info *fs_info = root->fs_info;
    3126     3878039 :         u64 ref_root = root->root_key.objectid;
    3127     3878039 :         int ret = 0;
    3128     3878039 :         struct ulist_node *unode;
    3129     3878039 :         struct ulist_iterator uiter;
    3130             : 
    3131     3878039 :         if (!is_fstree(ref_root))
    3132             :                 return 0;
    3133             : 
    3134     3878036 :         if (num_bytes == 0)
    3135             :                 return 0;
    3136             : 
    3137     7751684 :         if (test_bit(BTRFS_FS_QUOTA_OVERRIDE, &fs_info->flags) &&
    3138           0 :             capable(CAP_SYS_RESOURCE))
    3139           0 :                 enforce = false;
    3140             : 
    3141     3875842 :         spin_lock(&fs_info->qgroup_lock);
    3142     3875995 :         if (!fs_info->quota_root)
    3143           0 :                 goto out;
    3144             : 
    3145     3875995 :         qgroup = find_qgroup_rb(fs_info, ref_root);
    3146     3875995 :         if (!qgroup)
    3147           0 :                 goto out;
    3148             : 
    3149             :         /*
    3150             :          * in a first step, we check all affected qgroups if any limits would
    3151             :          * be exceeded
    3152             :          */
    3153     3875995 :         ulist_reinit(fs_info->qgroup_ulist);
    3154     3875995 :         ret = ulist_add(fs_info->qgroup_ulist, qgroup->qgroupid,
    3155             :                         qgroup_to_aux(qgroup), GFP_ATOMIC);
    3156     3875995 :         if (ret < 0)
    3157           0 :                 goto out;
    3158     3875995 :         ULIST_ITER_INIT(&uiter);
    3159     7734006 :         while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) {
    3160     3881057 :                 struct btrfs_qgroup *qg;
    3161     3881057 :                 struct btrfs_qgroup_list *glist;
    3162             : 
    3163     3881057 :                 qg = unode_aux_to_qgroup(unode);
    3164             : 
    3165     3881057 :                 if (enforce && !qgroup_check_limits(qg, num_bytes)) {
    3166       23046 :                         ret = -EDQUOT;
    3167       23046 :                         goto out;
    3168             :                 }
    3169             : 
    3170     3863073 :                 list_for_each_entry(glist, &qg->groups, next_group) {
    3171        5062 :                         ret = ulist_add(fs_info->qgroup_ulist,
    3172             :                                         glist->group->qgroupid,
    3173             :                                         qgroup_to_aux(glist->group), GFP_ATOMIC);
    3174        5062 :                         if (ret < 0)
    3175           0 :                                 goto out;
    3176             :                 }
    3177             :         }
    3178     3852949 :         ret = 0;
    3179             :         /*
    3180             :          * no limits exceeded, now record the reservation into all qgroups
    3181             :          */
    3182     3852949 :         ULIST_ITER_INIT(&uiter);
    3183     7710787 :         while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) {
    3184     3857838 :                 struct btrfs_qgroup *qg;
    3185             : 
    3186     3857838 :                 qg = unode_aux_to_qgroup(unode);
    3187             : 
    3188     3857838 :                 qgroup_rsv_add(fs_info, qg, num_bytes, type);
    3189             :         }
    3190             : 
    3191     3852949 : out:
    3192     3875995 :         spin_unlock(&fs_info->qgroup_lock);
    3193     3875995 :         return ret;
    3194             : }
    3195             : 
    3196             : /*
    3197             :  * Free @num_bytes of reserved space with @type for qgroup.  (Normally level 0
    3198             :  * qgroup).
    3199             :  *
    3200             :  * Will handle all higher level qgroup too.
    3201             :  *
    3202             :  * NOTE: If @num_bytes is (u64)-1, this means to free all bytes of this qgroup.
    3203             :  * This special case is only used for META_PERTRANS type.
    3204             :  */
    3205     5027797 : void btrfs_qgroup_free_refroot(struct btrfs_fs_info *fs_info,
    3206             :                                u64 ref_root, u64 num_bytes,
    3207             :                                enum btrfs_qgroup_rsv_type type)
    3208             : {
    3209     5027797 :         struct btrfs_qgroup *qgroup;
    3210     5027797 :         struct ulist_node *unode;
    3211     5027797 :         struct ulist_iterator uiter;
    3212     5027797 :         int ret = 0;
    3213             : 
    3214     5027797 :         if (!is_fstree(ref_root))
    3215     2459195 :                 return;
    3216             : 
    3217     4849854 :         if (num_bytes == 0)
    3218             :                 return;
    3219             : 
    3220     2568602 :         if (num_bytes == (u64)-1 && type != BTRFS_QGROUP_RSV_META_PERTRANS) {
    3221           0 :                 WARN(1, "%s: Invalid type to free", __func__);
    3222           0 :                 return;
    3223             :         }
    3224     2568602 :         spin_lock(&fs_info->qgroup_lock);
    3225             : 
    3226     2568603 :         if (!fs_info->quota_root)
    3227           0 :                 goto out;
    3228             : 
    3229     2568603 :         qgroup = find_qgroup_rb(fs_info, ref_root);
    3230     2568603 :         if (!qgroup)
    3231           0 :                 goto out;
    3232             : 
    3233     2568603 :         if (num_bytes == (u64)-1)
    3234             :                 /*
    3235             :                  * We're freeing all pertrans rsv, get reserved value from
    3236             :                  * level 0 qgroup as real num_bytes to free.
    3237             :                  */
    3238        7754 :                 num_bytes = qgroup->rsv.values[type];
    3239             : 
    3240     2568603 :         ulist_reinit(fs_info->qgroup_ulist);
    3241     2568603 :         ret = ulist_add(fs_info->qgroup_ulist, qgroup->qgroupid,
    3242             :                         qgroup_to_aux(qgroup), GFP_ATOMIC);
    3243     2568603 :         if (ret < 0)
    3244           0 :                 goto out;
    3245     2568603 :         ULIST_ITER_INIT(&uiter);
    3246     5139926 :         while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) {
    3247     2571323 :                 struct btrfs_qgroup *qg;
    3248     2571323 :                 struct btrfs_qgroup_list *glist;
    3249             : 
    3250     2571323 :                 qg = unode_aux_to_qgroup(unode);
    3251             : 
    3252     2571323 :                 qgroup_rsv_release(fs_info, qg, num_bytes, type);
    3253             : 
    3254     2574043 :                 list_for_each_entry(glist, &qg->groups, next_group) {
    3255        2720 :                         ret = ulist_add(fs_info->qgroup_ulist,
    3256             :                                         glist->group->qgroupid,
    3257             :                                         qgroup_to_aux(glist->group), GFP_ATOMIC);
    3258        2720 :                         if (ret < 0)
    3259           0 :                                 goto out;
    3260             :                 }
    3261             :         }
    3262             : 
    3263     2568603 : out:
    3264     2568603 :         spin_unlock(&fs_info->qgroup_lock);
    3265             : }
    3266             : 
    3267             : /*
    3268             :  * Check if the leaf is the last leaf. Which means all node pointers
    3269             :  * are at their last position.
    3270             :  */
    3271         240 : static bool is_last_leaf(struct btrfs_path *path)
    3272             : {
    3273         240 :         int i;
    3274             : 
    3275         257 :         for (i = 1; i < BTRFS_MAX_LEVEL && path->nodes[i]; i++) {
    3276         113 :                 if (path->slots[i] != btrfs_header_nritems(path->nodes[i]) - 1)
    3277             :                         return false;
    3278             :         }
    3279             :         return true;
    3280             : }
    3281             : 
    3282             : /*
    3283             :  * returns < 0 on error, 0 when more leafs are to be scanned.
    3284             :  * returns 1 when done.
    3285             :  */
    3286         241 : static int qgroup_rescan_leaf(struct btrfs_trans_handle *trans,
    3287             :                               struct btrfs_path *path)
    3288             : {
    3289         241 :         struct btrfs_fs_info *fs_info = trans->fs_info;
    3290         241 :         struct btrfs_root *extent_root;
    3291         241 :         struct btrfs_key found;
    3292         241 :         struct extent_buffer *scratch_leaf = NULL;
    3293         241 :         u64 num_bytes;
    3294         241 :         bool done;
    3295         241 :         int slot;
    3296         241 :         int ret;
    3297             : 
    3298         241 :         mutex_lock(&fs_info->qgroup_rescan_lock);
    3299         241 :         extent_root = btrfs_extent_root(fs_info,
    3300             :                                 fs_info->qgroup_rescan_progress.objectid);
    3301         241 :         ret = btrfs_search_slot_for_read(extent_root,
    3302         241 :                                          &fs_info->qgroup_rescan_progress,
    3303             :                                          path, 1, 0);
    3304             : 
    3305         241 :         btrfs_debug(fs_info,
    3306             :                 "current progress key (%llu %u %llu), search_slot ret %d",
    3307             :                 fs_info->qgroup_rescan_progress.objectid,
    3308             :                 fs_info->qgroup_rescan_progress.type,
    3309             :                 fs_info->qgroup_rescan_progress.offset, ret);
    3310             : 
    3311         241 :         if (ret) {
    3312             :                 /*
    3313             :                  * The rescan is about to end, we will not be scanning any
    3314             :                  * further blocks. We cannot unset the RESCAN flag here, because
    3315             :                  * we want to commit the transaction if everything went well.
    3316             :                  * To make the live accounting work in this phase, we set our
    3317             :                  * scan progress pointer such that every real extent objectid
    3318             :                  * will be smaller.
    3319             :                  */
    3320           1 :                 fs_info->qgroup_rescan_progress.objectid = (u64)-1;
    3321           1 :                 btrfs_release_path(path);
    3322           1 :                 mutex_unlock(&fs_info->qgroup_rescan_lock);
    3323           1 :                 return ret;
    3324             :         }
    3325         240 :         done = is_last_leaf(path);
    3326             : 
    3327         240 :         btrfs_item_key_to_cpu(path->nodes[0], &found,
    3328         240 :                               btrfs_header_nritems(path->nodes[0]) - 1);
    3329         240 :         fs_info->qgroup_rescan_progress.objectid = found.objectid + 1;
    3330             : 
    3331         240 :         scratch_leaf = btrfs_clone_extent_buffer(path->nodes[0]);
    3332         240 :         if (!scratch_leaf) {
    3333           0 :                 ret = -ENOMEM;
    3334           0 :                 mutex_unlock(&fs_info->qgroup_rescan_lock);
    3335           0 :                 goto out;
    3336             :         }
    3337         240 :         slot = path->slots[0];
    3338         240 :         btrfs_release_path(path);
    3339         240 :         mutex_unlock(&fs_info->qgroup_rescan_lock);
    3340             : 
    3341       25363 :         for (; slot < btrfs_header_nritems(scratch_leaf); ++slot) {
    3342       24883 :                 struct btrfs_backref_walk_ctx ctx = { 0 };
    3343             : 
    3344       24883 :                 btrfs_item_key_to_cpu(scratch_leaf, &found, slot);
    3345       24883 :                 if (found.type != BTRFS_EXTENT_ITEM_KEY &&
    3346             :                     found.type != BTRFS_METADATA_ITEM_KEY)
    3347         755 :                         continue;
    3348       24128 :                 if (found.type == BTRFS_METADATA_ITEM_KEY)
    3349       10631 :                         num_bytes = fs_info->nodesize;
    3350             :                 else
    3351       13497 :                         num_bytes = found.offset;
    3352             : 
    3353       24128 :                 ctx.bytenr = found.objectid;
    3354       24128 :                 ctx.fs_info = fs_info;
    3355             : 
    3356       24128 :                 ret = btrfs_find_all_roots(&ctx, false);
    3357       24128 :                 if (ret < 0)
    3358           0 :                         goto out;
    3359             :                 /* For rescan, just pass old_roots as NULL */
    3360       24128 :                 ret = btrfs_qgroup_account_extent(trans, found.objectid,
    3361             :                                                   num_bytes, NULL, ctx.roots);
    3362       24128 :                 if (ret < 0)
    3363           0 :                         goto out;
    3364             :         }
    3365         240 : out:
    3366         240 :         if (scratch_leaf)
    3367         240 :                 free_extent_buffer(scratch_leaf);
    3368             : 
    3369         240 :         if (done && !ret) {
    3370         144 :                 ret = 1;
    3371         144 :                 fs_info->qgroup_rescan_progress.objectid = (u64)-1;
    3372             :         }
    3373             :         return ret;
    3374             : }
    3375             : 
    3376         278 : static bool rescan_should_stop(struct btrfs_fs_info *fs_info)
    3377             : {
    3378         555 :         return btrfs_fs_closing(fs_info) ||
    3379         277 :                 test_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state) ||
    3380         555 :                 !test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) ||
    3381         241 :                           fs_info->qgroup_flags & BTRFS_QGROUP_RUNTIME_FLAG_CANCEL_RESCAN;
    3382             : }
    3383             : 
    3384         182 : static void btrfs_qgroup_rescan_worker(struct btrfs_work *work)
    3385             : {
    3386         182 :         struct btrfs_fs_info *fs_info = container_of(work, struct btrfs_fs_info,
    3387             :                                                      qgroup_rescan_work);
    3388         182 :         struct btrfs_path *path;
    3389         182 :         struct btrfs_trans_handle *trans = NULL;
    3390         182 :         int err = -ENOMEM;
    3391         182 :         int ret = 0;
    3392         182 :         bool stopped = false;
    3393         182 :         bool did_leaf_rescans = false;
    3394             : 
    3395         182 :         path = btrfs_alloc_path();
    3396         182 :         if (!path)
    3397           0 :                 goto out;
    3398             :         /*
    3399             :          * Rescan should only search for commit root, and any later difference
    3400             :          * should be recorded by qgroup
    3401             :          */
    3402         182 :         path->search_commit_root = 1;
    3403         182 :         path->skip_locking = 1;
    3404             : 
    3405         182 :         err = 0;
    3406         423 :         while (!err && !(stopped = rescan_should_stop(fs_info))) {
    3407         241 :                 trans = btrfs_start_transaction(fs_info->fs_root, 0);
    3408         241 :                 if (IS_ERR(trans)) {
    3409           0 :                         err = PTR_ERR(trans);
    3410           0 :                         break;
    3411             :                 }
    3412             : 
    3413         241 :                 err = qgroup_rescan_leaf(trans, path);
    3414         241 :                 did_leaf_rescans = true;
    3415             : 
    3416         241 :                 if (err > 0)
    3417         145 :                         btrfs_commit_transaction(trans);
    3418             :                 else
    3419          96 :                         btrfs_end_transaction(trans);
    3420             :         }
    3421             : 
    3422         182 : out:
    3423         182 :         btrfs_free_path(path);
    3424             : 
    3425         182 :         mutex_lock(&fs_info->qgroup_rescan_lock);
    3426         182 :         if (err > 0 &&
    3427         145 :             fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT) {
    3428         124 :                 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
    3429          58 :         } else if (err < 0 || stopped) {
    3430          37 :                 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
    3431             :         }
    3432         182 :         mutex_unlock(&fs_info->qgroup_rescan_lock);
    3433             : 
    3434             :         /*
    3435             :          * Only update status, since the previous part has already updated the
    3436             :          * qgroup info, and only if we did any actual work. This also prevents
    3437             :          * race with a concurrent quota disable, which has already set
    3438             :          * fs_info->quota_root to NULL and cleared BTRFS_FS_QUOTA_ENABLED at
    3439             :          * btrfs_quota_disable().
    3440             :          */
    3441         182 :         if (did_leaf_rescans) {
    3442         152 :                 trans = btrfs_start_transaction(fs_info->quota_root, 1);
    3443         152 :                 if (IS_ERR(trans)) {
    3444           0 :                         err = PTR_ERR(trans);
    3445           0 :                         trans = NULL;
    3446           0 :                         btrfs_err(fs_info,
    3447             :                                   "fail to start transaction for status update: %d",
    3448             :                                   err);
    3449             :                 }
    3450             :         } else {
    3451             :                 trans = NULL;
    3452             :         }
    3453             : 
    3454         182 :         mutex_lock(&fs_info->qgroup_rescan_lock);
    3455         182 :         if (!stopped ||
    3456          37 :             fs_info->qgroup_flags & BTRFS_QGROUP_RUNTIME_FLAG_CANCEL_RESCAN)
    3457         145 :                 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
    3458         182 :         if (trans) {
    3459         152 :                 ret = update_qgroup_status_item(trans);
    3460         152 :                 if (ret < 0) {
    3461           0 :                         err = ret;
    3462           0 :                         btrfs_err(fs_info, "fail to update qgroup status: %d",
    3463             :                                   err);
    3464             :                 }
    3465             :         }
    3466         182 :         fs_info->qgroup_rescan_running = false;
    3467         182 :         fs_info->qgroup_flags &= ~BTRFS_QGROUP_RUNTIME_FLAG_CANCEL_RESCAN;
    3468         182 :         complete_all(&fs_info->qgroup_rescan_completion);
    3469         182 :         mutex_unlock(&fs_info->qgroup_rescan_lock);
    3470             : 
    3471         182 :         if (!trans)
    3472             :                 return;
    3473             : 
    3474         152 :         btrfs_end_transaction(trans);
    3475             : 
    3476         152 :         if (stopped) {
    3477           7 :                 btrfs_info(fs_info, "qgroup scan paused");
    3478         145 :         } else if (fs_info->qgroup_flags & BTRFS_QGROUP_RUNTIME_FLAG_CANCEL_RESCAN) {
    3479           0 :                 btrfs_info(fs_info, "qgroup scan cancelled");
    3480         145 :         } else if (err >= 0) {
    3481         145 :                 btrfs_info(fs_info, "qgroup scan completed%s",
    3482             :                         err > 0 ? " (inconsistency flag cleared)" : "");
    3483             :         } else {
    3484           0 :                 btrfs_err(fs_info, "qgroup scan failed with %d", err);
    3485             :         }
    3486             : }
    3487             : 
    3488             : /*
    3489             :  * Checks that (a) no rescan is running and (b) quota is enabled. Allocates all
    3490             :  * memory required for the rescan context.
    3491             :  */
    3492             : static int
    3493         184 : qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid,
    3494             :                    int init_flags)
    3495             : {
    3496         184 :         int ret = 0;
    3497             : 
    3498         184 :         if (!init_flags) {
    3499             :                 /* we're resuming qgroup rescan at mount time */
    3500           1 :                 if (!(fs_info->qgroup_flags &
    3501             :                       BTRFS_QGROUP_STATUS_FLAG_RESCAN)) {
    3502           0 :                         btrfs_warn(fs_info,
    3503             :                         "qgroup rescan init failed, qgroup rescan is not queued");
    3504           0 :                         ret = -EINVAL;
    3505           1 :                 } else if (!(fs_info->qgroup_flags &
    3506             :                              BTRFS_QGROUP_STATUS_FLAG_ON)) {
    3507           0 :                         btrfs_warn(fs_info,
    3508             :                         "qgroup rescan init failed, qgroup is not enabled");
    3509           0 :                         ret = -EINVAL;
    3510             :                 }
    3511             : 
    3512           0 :                 if (ret)
    3513           0 :                         return ret;
    3514             :         }
    3515             : 
    3516         184 :         mutex_lock(&fs_info->qgroup_rescan_lock);
    3517             : 
    3518         184 :         if (init_flags) {
    3519         183 :                 if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) {
    3520           2 :                         btrfs_warn(fs_info,
    3521             :                                    "qgroup rescan is already in progress");
    3522           2 :                         ret = -EINPROGRESS;
    3523         181 :                 } else if (!(fs_info->qgroup_flags &
    3524             :                              BTRFS_QGROUP_STATUS_FLAG_ON)) {
    3525           0 :                         btrfs_warn(fs_info,
    3526             :                         "qgroup rescan init failed, qgroup is not enabled");
    3527           0 :                         ret = -EINVAL;
    3528         181 :                 } else if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) {
    3529             :                         /* Quota disable is in progress */
    3530             :                         ret = -EBUSY;
    3531             :                 }
    3532             : 
    3533           2 :                 if (ret) {
    3534           2 :                         mutex_unlock(&fs_info->qgroup_rescan_lock);
    3535           2 :                         return ret;
    3536             :                 }
    3537         181 :                 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_RESCAN;
    3538             :         }
    3539             : 
    3540         182 :         memset(&fs_info->qgroup_rescan_progress, 0,
    3541             :                 sizeof(fs_info->qgroup_rescan_progress));
    3542         182 :         fs_info->qgroup_flags &= ~(BTRFS_QGROUP_RUNTIME_FLAG_CANCEL_RESCAN |
    3543             :                                    BTRFS_QGROUP_RUNTIME_FLAG_NO_ACCOUNTING);
    3544         182 :         fs_info->qgroup_rescan_progress.objectid = progress_objectid;
    3545         182 :         init_completion(&fs_info->qgroup_rescan_completion);
    3546         182 :         mutex_unlock(&fs_info->qgroup_rescan_lock);
    3547             : 
    3548         182 :         btrfs_init_work(&fs_info->qgroup_rescan_work,
    3549             :                         btrfs_qgroup_rescan_worker, NULL, NULL);
    3550         182 :         return 0;
    3551             : }
    3552             : 
    3553             : static void
    3554         181 : qgroup_rescan_zero_tracking(struct btrfs_fs_info *fs_info)
    3555             : {
    3556         181 :         struct rb_node *n;
    3557         181 :         struct btrfs_qgroup *qgroup;
    3558             : 
    3559         181 :         spin_lock(&fs_info->qgroup_lock);
    3560             :         /* clear all current qgroup tracking information */
    3561         402 :         for (n = rb_first(&fs_info->qgroup_tree); n; n = rb_next(n)) {
    3562         221 :                 qgroup = rb_entry(n, struct btrfs_qgroup, node);
    3563         221 :                 qgroup->rfer = 0;
    3564         221 :                 qgroup->rfer_cmpr = 0;
    3565         221 :                 qgroup->excl = 0;
    3566         221 :                 qgroup->excl_cmpr = 0;
    3567         221 :                 qgroup_dirty(fs_info, qgroup);
    3568             :         }
    3569         181 :         spin_unlock(&fs_info->qgroup_lock);
    3570         181 : }
    3571             : 
    3572             : int
    3573          25 : btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info)
    3574             : {
    3575          25 :         int ret = 0;
    3576          25 :         struct btrfs_trans_handle *trans;
    3577             : 
    3578          25 :         ret = qgroup_rescan_init(fs_info, 0, 1);
    3579          25 :         if (ret)
    3580             :                 return ret;
    3581             : 
    3582             :         /*
    3583             :          * We have set the rescan_progress to 0, which means no more
    3584             :          * delayed refs will be accounted by btrfs_qgroup_account_ref.
    3585             :          * However, btrfs_qgroup_account_ref may be right after its call
    3586             :          * to btrfs_find_all_roots, in which case it would still do the
    3587             :          * accounting.
    3588             :          * To solve this, we're committing the transaction, which will
    3589             :          * ensure we run all delayed refs and only after that, we are
    3590             :          * going to clear all tracking information for a clean start.
    3591             :          */
    3592             : 
    3593          23 :         trans = btrfs_join_transaction(fs_info->fs_root);
    3594          23 :         if (IS_ERR(trans)) {
    3595           0 :                 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
    3596           0 :                 return PTR_ERR(trans);
    3597             :         }
    3598          23 :         ret = btrfs_commit_transaction(trans);
    3599          23 :         if (ret) {
    3600           0 :                 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
    3601           0 :                 return ret;
    3602             :         }
    3603             : 
    3604          23 :         qgroup_rescan_zero_tracking(fs_info);
    3605             : 
    3606          23 :         mutex_lock(&fs_info->qgroup_rescan_lock);
    3607          23 :         fs_info->qgroup_rescan_running = true;
    3608          23 :         btrfs_queue_work(fs_info->qgroup_rescan_workers,
    3609             :                          &fs_info->qgroup_rescan_work);
    3610          23 :         mutex_unlock(&fs_info->qgroup_rescan_lock);
    3611             : 
    3612          23 :         return 0;
    3613             : }
    3614             : 
    3615        3366 : int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info,
    3616             :                                      bool interruptible)
    3617             : {
    3618        3366 :         int running;
    3619        3366 :         int ret = 0;
    3620             : 
    3621        3366 :         mutex_lock(&fs_info->qgroup_rescan_lock);
    3622        3366 :         running = fs_info->qgroup_rescan_running;
    3623        3366 :         mutex_unlock(&fs_info->qgroup_rescan_lock);
    3624             : 
    3625        3366 :         if (!running)
    3626             :                 return 0;
    3627             : 
    3628         137 :         if (interruptible)
    3629          23 :                 ret = wait_for_completion_interruptible(
    3630             :                                         &fs_info->qgroup_rescan_completion);
    3631             :         else
    3632         114 :                 wait_for_completion(&fs_info->qgroup_rescan_completion);
    3633             : 
    3634             :         return ret;
    3635             : }
    3636             : 
    3637             : /*
    3638             :  * this is only called from open_ctree where we're still single threaded, thus
    3639             :  * locking is omitted here.
    3640             :  */
    3641             : void
    3642        3175 : btrfs_qgroup_rescan_resume(struct btrfs_fs_info *fs_info)
    3643             : {
    3644        3175 :         if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) {
    3645           1 :                 mutex_lock(&fs_info->qgroup_rescan_lock);
    3646           1 :                 fs_info->qgroup_rescan_running = true;
    3647           1 :                 btrfs_queue_work(fs_info->qgroup_rescan_workers,
    3648             :                                  &fs_info->qgroup_rescan_work);
    3649           1 :                 mutex_unlock(&fs_info->qgroup_rescan_lock);
    3650             :         }
    3651        3175 : }
    3652             : 
    3653             : #define rbtree_iterate_from_safe(node, next, start)                             \
    3654             :        for (node = start; node && ({ next = rb_next(node); 1;}); node = next)
    3655             : 
    3656        4708 : static int qgroup_unreserve_range(struct btrfs_inode *inode,
    3657             :                                   struct extent_changeset *reserved, u64 start,
    3658             :                                   u64 len)
    3659             : {
    3660        4708 :         struct rb_node *node;
    3661        4708 :         struct rb_node *next;
    3662        4708 :         struct ulist_node *entry;
    3663        4708 :         int ret = 0;
    3664             : 
    3665        4708 :         node = reserved->range_changed.root.rb_node;
    3666        4708 :         if (!node)
    3667             :                 return 0;
    3668        9460 :         while (node) {
    3669        4752 :                 entry = rb_entry(node, struct ulist_node, rb_node);
    3670        4752 :                 if (entry->val < start)
    3671          44 :                         node = node->rb_right;
    3672             :                 else
    3673        4708 :                         node = node->rb_left;
    3674             :         }
    3675             : 
    3676        4708 :         if (entry->val > start && rb_prev(&entry->rb_node))
    3677           0 :                 entry = rb_entry(rb_prev(&entry->rb_node), struct ulist_node,
    3678             :                                  rb_node);
    3679             : 
    3680        9402 :         rbtree_iterate_from_safe(node, next, &entry->rb_node) {
    3681        4712 :                 u64 entry_start;
    3682        4712 :                 u64 entry_end;
    3683        4712 :                 u64 entry_len;
    3684        4712 :                 int clear_ret;
    3685             : 
    3686        4712 :                 entry = rb_entry(node, struct ulist_node, rb_node);
    3687        4712 :                 entry_start = entry->val;
    3688        4712 :                 entry_end = entry->aux;
    3689        4712 :                 entry_len = entry_end - entry_start + 1;
    3690             : 
    3691        4712 :                 if (entry_start >= start + len)
    3692             :                         break;
    3693        4712 :                 if (entry_start + entry_len <= start)
    3694           0 :                         continue;
    3695             :                 /*
    3696             :                  * Now the entry is in [start, start + len), revert the
    3697             :                  * EXTENT_QGROUP_RESERVED bit.
    3698             :                  */
    3699        4712 :                 clear_ret = clear_extent_bits(&inode->io_tree, entry_start,
    3700             :                                               entry_end, EXTENT_QGROUP_RESERVED);
    3701        4709 :                 if (!ret && clear_ret < 0)
    3702           0 :                         ret = clear_ret;
    3703             : 
    3704        4709 :                 ulist_del(&reserved->range_changed, entry->val, entry->aux);
    3705        4694 :                 if (likely(reserved->bytes_changed >= entry_len)) {
    3706        4694 :                         reserved->bytes_changed -= entry_len;
    3707             :                 } else {
    3708           0 :                         WARN_ON(1);
    3709           0 :                         reserved->bytes_changed = 0;
    3710             :                 }
    3711             :         }
    3712             : 
    3713             :         return ret;
    3714             : }
    3715             : 
    3716             : /*
    3717             :  * Try to free some space for qgroup.
    3718             :  *
    3719             :  * For qgroup, there are only 3 ways to free qgroup space:
    3720             :  * - Flush nodatacow write
    3721             :  *   Any nodatacow write will free its reserved data space at run_delalloc_range().
    3722             :  *   In theory, we should only flush nodatacow inodes, but it's not yet
    3723             :  *   possible, so we need to flush the whole root.
    3724             :  *
    3725             :  * - Wait for ordered extents
    3726             :  *   When ordered extents are finished, their reserved metadata is finally
    3727             :  *   converted to per_trans status, which can be freed by later commit
    3728             :  *   transaction.
    3729             :  *
    3730             :  * - Commit transaction
    3731             :  *   This would free the meta_per_trans space.
    3732             :  *   In theory this shouldn't provide much space, but any more qgroup space
    3733             :  *   is needed.
    3734             :  */
    3735       11501 : static int try_flush_qgroup(struct btrfs_root *root)
    3736             : {
    3737       11501 :         struct btrfs_trans_handle *trans;
    3738       11501 :         int ret;
    3739             : 
    3740             :         /* Can't hold an open transaction or we run the risk of deadlocking. */
    3741       11501 :         ASSERT(current->journal_info == NULL);
    3742       11501 :         if (WARN_ON(current->journal_info))
    3743             :                 return 0;
    3744             : 
    3745             :         /*
    3746             :          * We don't want to run flush again and again, so if there is a running
    3747             :          * one, we won't try to start a new flush, but exit directly.
    3748             :          */
    3749       11501 :         if (test_and_set_bit(BTRFS_ROOT_QGROUP_FLUSHING, &root->state)) {
    3750       71410 :                 wait_event(root->qgroup_flush_wait,
    3751             :                         !test_bit(BTRFS_ROOT_QGROUP_FLUSHING, &root->state));
    3752        6309 :                 return 0;
    3753             :         }
    3754             : 
    3755        5195 :         ret = btrfs_start_delalloc_snapshot(root, true);
    3756        5194 :         if (ret < 0)
    3757           0 :                 goto out;
    3758        5194 :         btrfs_wait_ordered_extents(root, U64_MAX, 0, (u64)-1);
    3759             : 
    3760        5195 :         trans = btrfs_join_transaction(root);
    3761        5195 :         if (IS_ERR(trans)) {
    3762           0 :                 ret = PTR_ERR(trans);
    3763           0 :                 goto out;
    3764             :         }
    3765             : 
    3766        5195 :         ret = btrfs_commit_transaction(trans);
    3767        5192 : out:
    3768        5192 :         clear_bit(BTRFS_ROOT_QGROUP_FLUSHING, &root->state);
    3769        5195 :         wake_up(&root->qgroup_flush_wait);
    3770        5195 :         return ret;
    3771             : }
    3772             : 
    3773    39594689 : static int qgroup_reserve_data(struct btrfs_inode *inode,
    3774             :                         struct extent_changeset **reserved_ret, u64 start,
    3775             :                         u64 len)
    3776             : {
    3777    39594689 :         struct btrfs_root *root = inode->root;
    3778    39594689 :         struct extent_changeset *reserved;
    3779    39594689 :         bool new_reserved = false;
    3780    39594689 :         u64 orig_reserved;
    3781    39594689 :         u64 to_reserve;
    3782    39594689 :         int ret;
    3783             : 
    3784    39594689 :         if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &root->fs_info->flags) ||
    3785     3428276 :             !is_fstree(root->root_key.objectid) || len == 0)
    3786             :                 return 0;
    3787             : 
    3788             :         /* @reserved parameter is mandatory for qgroup */
    3789     1911462 :         if (WARN_ON(!reserved_ret))
    3790             :                 return -EINVAL;
    3791     1911462 :         if (!*reserved_ret) {
    3792     1902162 :                 new_reserved = true;
    3793     1902162 :                 *reserved_ret = extent_changeset_alloc();
    3794     1902130 :                 if (!*reserved_ret)
    3795             :                         return -ENOMEM;
    3796             :         }
    3797     1911430 :         reserved = *reserved_ret;
    3798             :         /* Record already reserved space */
    3799     1911430 :         orig_reserved = reserved->bytes_changed;
    3800     1911430 :         ret = set_record_extent_bits(&inode->io_tree, start,
    3801     1911430 :                         start + len -1, EXTENT_QGROUP_RESERVED, reserved);
    3802             : 
    3803             :         /* Newly reserved space */
    3804     1911436 :         to_reserve = reserved->bytes_changed - orig_reserved;
    3805     1911436 :         trace_btrfs_qgroup_reserve_data(&inode->vfs_inode, start, len,
    3806             :                                         to_reserve, QGROUP_RESERVE);
    3807     1911415 :         if (ret < 0)
    3808           0 :                 goto out;
    3809     1911415 :         ret = qgroup_reserve(root, to_reserve, true, BTRFS_QGROUP_RSV_DATA);
    3810     1911470 :         if (ret < 0)
    3811        4708 :                 goto cleanup;
    3812             : 
    3813             :         return ret;
    3814             : 
    3815             : cleanup:
    3816        4708 :         qgroup_unreserve_range(inode, reserved, start, len);
    3817        4677 : out:
    3818        4677 :         if (new_reserved) {
    3819        4573 :                 extent_changeset_free(reserved);
    3820        4561 :                 *reserved_ret = NULL;
    3821             :         }
    3822             :         return ret;
    3823             : }
    3824             : 
    3825             : /*
    3826             :  * Reserve qgroup space for range [start, start + len).
    3827             :  *
    3828             :  * This function will either reserve space from related qgroups or do nothing
    3829             :  * if the range is already reserved.
    3830             :  *
    3831             :  * Return 0 for successful reservation
    3832             :  * Return <0 for error (including -EQUOT)
    3833             :  *
    3834             :  * NOTE: This function may sleep for memory allocation, dirty page flushing and
    3835             :  *       commit transaction. So caller should not hold any dirty page locked.
    3836             :  */
    3837    39594659 : int btrfs_qgroup_reserve_data(struct btrfs_inode *inode,
    3838             :                         struct extent_changeset **reserved_ret, u64 start,
    3839             :                         u64 len)
    3840             : {
    3841    39594659 :         int ret;
    3842             : 
    3843    39594659 :         ret = qgroup_reserve_data(inode, reserved_ret, start, len);
    3844    39587416 :         if (ret <= 0 && ret != -EDQUOT)
    3845             :                 return ret;
    3846             : 
    3847        2723 :         ret = try_flush_qgroup(inode->root);
    3848        2723 :         if (ret < 0)
    3849             :                 return ret;
    3850        2723 :         return qgroup_reserve_data(inode, reserved_ret, start, len);
    3851             : }
    3852             : 
    3853             : /* Free ranges specified by @reserved, normally in error path */
    3854         927 : static int qgroup_free_reserved_data(struct btrfs_inode *inode,
    3855             :                         struct extent_changeset *reserved, u64 start, u64 len)
    3856             : {
    3857         927 :         struct btrfs_root *root = inode->root;
    3858         927 :         struct ulist_node *unode;
    3859         927 :         struct ulist_iterator uiter;
    3860         927 :         struct extent_changeset changeset;
    3861         927 :         int freed = 0;
    3862         927 :         int ret;
    3863             : 
    3864         927 :         extent_changeset_init(&changeset);
    3865         927 :         len = round_up(start + len, root->fs_info->sectorsize);
    3866         927 :         start = round_down(start, root->fs_info->sectorsize);
    3867             : 
    3868         927 :         ULIST_ITER_INIT(&uiter);
    3869        1866 :         while ((unode = ulist_next(&reserved->range_changed, &uiter))) {
    3870         939 :                 u64 range_start = unode->val;
    3871             :                 /* unode->aux is the inclusive end */
    3872         939 :                 u64 range_len = unode->aux - range_start + 1;
    3873         939 :                 u64 free_start;
    3874         939 :                 u64 free_len;
    3875             : 
    3876         939 :                 extent_changeset_release(&changeset);
    3877             : 
    3878             :                 /* Only free range in range [start, start + len) */
    3879         939 :                 if (range_start >= start + len ||
    3880         939 :                     range_start + range_len <= start)
    3881           5 :                         continue;
    3882         934 :                 free_start = max(range_start, start);
    3883         934 :                 free_len = min(start + len, range_start + range_len) -
    3884             :                            free_start;
    3885             :                 /*
    3886             :                  * TODO: To also modify reserved->ranges_reserved to reflect
    3887             :                  * the modification.
    3888             :                  *
    3889             :                  * However as long as we free qgroup reserved according to
    3890             :                  * EXTENT_QGROUP_RESERVED, we won't double free.
    3891             :                  * So not need to rush.
    3892             :                  */
    3893         934 :                 ret = clear_record_extent_bits(&inode->io_tree, free_start,
    3894             :                                 free_start + free_len - 1,
    3895             :                                 EXTENT_QGROUP_RESERVED, &changeset);
    3896         934 :                 if (ret < 0)
    3897           0 :                         goto out;
    3898         934 :                 freed += changeset.bytes_changed;
    3899             :         }
    3900         927 :         btrfs_qgroup_free_refroot(root->fs_info, root->root_key.objectid, freed,
    3901             :                                   BTRFS_QGROUP_RSV_DATA);
    3902         927 :         ret = freed;
    3903         927 : out:
    3904         927 :         extent_changeset_release(&changeset);
    3905         927 :         return ret;
    3906             : }
    3907             : 
    3908    89713671 : static int __btrfs_qgroup_release_data(struct btrfs_inode *inode,
    3909             :                         struct extent_changeset *reserved, u64 start, u64 len,
    3910             :                         int free)
    3911             : {
    3912    89713671 :         struct extent_changeset changeset;
    3913    89713671 :         int trace_op = QGROUP_RELEASE;
    3914    89713671 :         int ret;
    3915             : 
    3916    89713671 :         if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &inode->root->fs_info->flags))
    3917             :                 return 0;
    3918             : 
    3919             :         /* In release case, we shouldn't have @reserved */
    3920     2944809 :         WARN_ON(!free && reserved);
    3921     2944809 :         if (free && reserved)
    3922         927 :                 return qgroup_free_reserved_data(inode, reserved, start, len);
    3923     2943882 :         extent_changeset_init(&changeset);
    3924     2943978 :         ret = clear_record_extent_bits(&inode->io_tree, start, start + len -1,
    3925             :                                        EXTENT_QGROUP_RESERVED, &changeset);
    3926     2943730 :         if (ret < 0)
    3927           0 :                 goto out;
    3928             : 
    3929     2943730 :         if (free)
    3930     2922048 :                 trace_op = QGROUP_FREE;
    3931     2943730 :         trace_btrfs_qgroup_release_data(&inode->vfs_inode, start, len,
    3932             :                                         changeset.bytes_changed, trace_op);
    3933     2943707 :         if (free)
    3934     2922042 :                 btrfs_qgroup_free_refroot(inode->root->fs_info,
    3935     2922042 :                                 inode->root->root_key.objectid,
    3936             :                                 changeset.bytes_changed, BTRFS_QGROUP_RSV_DATA);
    3937     2943924 :         ret = changeset.bytes_changed;
    3938     2943924 : out:
    3939     2943924 :         extent_changeset_release(&changeset);
    3940     2943924 :         return ret;
    3941             : }
    3942             : 
    3943             : /*
    3944             :  * Free a reserved space range from io_tree and related qgroups
    3945             :  *
    3946             :  * Should be called when a range of pages get invalidated before reaching disk.
    3947             :  * Or for error cleanup case.
    3948             :  * if @reserved is given, only reserved range in [@start, @start + @len) will
    3949             :  * be freed.
    3950             :  *
    3951             :  * For data written to disk, use btrfs_qgroup_release_data().
    3952             :  *
    3953             :  * NOTE: This function may sleep for memory allocation.
    3954             :  */
    3955    86154543 : int btrfs_qgroup_free_data(struct btrfs_inode *inode,
    3956             :                         struct extent_changeset *reserved, u64 start, u64 len)
    3957             : {
    3958    86154543 :         return __btrfs_qgroup_release_data(inode, reserved, start, len, 1);
    3959             : }
    3960             : 
    3961             : /*
    3962             :  * Release a reserved space range from io_tree only.
    3963             :  *
    3964             :  * Should be called when a range of pages get written to disk and corresponding
    3965             :  * FILE_EXTENT is inserted into corresponding root.
    3966             :  *
    3967             :  * Since new qgroup accounting framework will only update qgroup numbers at
    3968             :  * commit_transaction() time, its reserved space shouldn't be freed from
    3969             :  * related qgroups.
    3970             :  *
    3971             :  * But we should release the range from io_tree, to allow further write to be
    3972             :  * COWed.
    3973             :  *
    3974             :  * NOTE: This function may sleep for memory allocation.
    3975             :  */
    3976     3559959 : int btrfs_qgroup_release_data(struct btrfs_inode *inode, u64 start, u64 len)
    3977             : {
    3978     3559959 :         return __btrfs_qgroup_release_data(inode, NULL, start, len, 0);
    3979             : }
    3980             : 
    3981     1948381 : static void add_root_meta_rsv(struct btrfs_root *root, int num_bytes,
    3982             :                               enum btrfs_qgroup_rsv_type type)
    3983             : {
    3984     1948381 :         if (type != BTRFS_QGROUP_RSV_META_PREALLOC &&
    3985             :             type != BTRFS_QGROUP_RSV_META_PERTRANS)
    3986             :                 return;
    3987     1948381 :         if (num_bytes == 0)
    3988             :                 return;
    3989             : 
    3990     1948381 :         spin_lock(&root->qgroup_meta_rsv_lock);
    3991     1948381 :         if (type == BTRFS_QGROUP_RSV_META_PREALLOC)
    3992     1907164 :                 root->qgroup_meta_rsv_prealloc += num_bytes;
    3993             :         else
    3994       41217 :                 root->qgroup_meta_rsv_pertrans += num_bytes;
    3995     1948381 :         spin_unlock(&root->qgroup_meta_rsv_lock);
    3996             : }
    3997             : 
    3998     1975521 : static int sub_root_meta_rsv(struct btrfs_root *root, int num_bytes,
    3999             :                              enum btrfs_qgroup_rsv_type type)
    4000             : {
    4001     1975521 :         if (type != BTRFS_QGROUP_RSV_META_PREALLOC &&
    4002             :             type != BTRFS_QGROUP_RSV_META_PERTRANS)
    4003             :                 return 0;
    4004     1975521 :         if (num_bytes == 0)
    4005             :                 return 0;
    4006             : 
    4007     1944241 :         spin_lock(&root->qgroup_meta_rsv_lock);
    4008     1944276 :         if (type == BTRFS_QGROUP_RSV_META_PREALLOC) {
    4009     1944276 :                 num_bytes = min_t(u64, root->qgroup_meta_rsv_prealloc,
    4010             :                                   num_bytes);
    4011     1944276 :                 root->qgroup_meta_rsv_prealloc -= num_bytes;
    4012             :         } else {
    4013           0 :                 num_bytes = min_t(u64, root->qgroup_meta_rsv_pertrans,
    4014             :                                   num_bytes);
    4015           0 :                 root->qgroup_meta_rsv_pertrans -= num_bytes;
    4016             :         }
    4017     1944276 :         spin_unlock(&root->qgroup_meta_rsv_lock);
    4018     1944276 :         return num_bytes;
    4019             : }
    4020             : 
    4021    69007072 : int btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes,
    4022             :                               enum btrfs_qgroup_rsv_type type, bool enforce)
    4023             : {
    4024    69007072 :         struct btrfs_fs_info *fs_info = root->fs_info;
    4025    69007072 :         int ret;
    4026             : 
    4027    69007072 :         if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) ||
    4028     3910685 :             !is_fstree(root->root_key.objectid) || num_bytes == 0)
    4029             :                 return 0;
    4030             : 
    4031     1966617 :         BUG_ON(num_bytes != round_down(num_bytes, fs_info->nodesize));
    4032     1966617 :         trace_qgroup_meta_reserve(root, (s64)num_bytes, type);
    4033     1966601 :         ret = qgroup_reserve(root, num_bytes, enforce, type);
    4034     1966717 :         if (ret < 0)
    4035             :                 return ret;
    4036             :         /*
    4037             :          * Record what we have reserved into root.
    4038             :          *
    4039             :          * To avoid quota disabled->enabled underflow.
    4040             :          * In that case, we may try to free space we haven't reserved
    4041             :          * (since quota was disabled), so record what we reserved into root.
    4042             :          * And ensure later release won't underflow this number.
    4043             :          */
    4044     1948381 :         add_root_meta_rsv(root, num_bytes, type);
    4045     1948381 :         return ret;
    4046             : }
    4047             : 
    4048    66735053 : int __btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes,
    4049             :                                 enum btrfs_qgroup_rsv_type type, bool enforce,
    4050             :                                 bool noflush)
    4051             : {
    4052    66735053 :         int ret;
    4053             : 
    4054    66735053 :         ret = btrfs_qgroup_reserve_meta(root, num_bytes, type, enforce);
    4055    66707790 :         if ((ret <= 0 && ret != -EDQUOT) || noflush)
    4056             :                 return ret;
    4057             : 
    4058        8779 :         ret = try_flush_qgroup(root);
    4059        8745 :         if (ret < 0)
    4060             :                 return ret;
    4061        8745 :         return btrfs_qgroup_reserve_meta(root, num_bytes, type, enforce);
    4062             : }
    4063             : 
    4064      152038 : void btrfs_qgroup_free_meta_all_pertrans(struct btrfs_root *root)
    4065             : {
    4066      152038 :         struct btrfs_fs_info *fs_info = root->fs_info;
    4067             : 
    4068      152038 :         if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) ||
    4069        8162 :             !is_fstree(root->root_key.objectid))
    4070             :                 return;
    4071             : 
    4072             :         /* TODO: Update trace point to handle such free */
    4073        7754 :         trace_qgroup_meta_free_all_pertrans(root);
    4074             :         /* Special value -1 means to free all reserved space */
    4075        7754 :         btrfs_qgroup_free_refroot(fs_info, root->root_key.objectid, (u64)-1,
    4076             :                                   BTRFS_QGROUP_RSV_META_PERTRANS);
    4077             : }
    4078             : 
    4079    40398349 : void __btrfs_qgroup_free_meta(struct btrfs_root *root, int num_bytes,
    4080             :                               enum btrfs_qgroup_rsv_type type)
    4081             : {
    4082    40398349 :         struct btrfs_fs_info *fs_info = root->fs_info;
    4083             : 
    4084    40398349 :         if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) ||
    4085     2304398 :             !is_fstree(root->root_key.objectid))
    4086             :                 return;
    4087             : 
    4088             :         /*
    4089             :          * reservation for META_PREALLOC can happen before quota is enabled,
    4090             :          * which can lead to underflow.
    4091             :          * Here ensure we will only free what we really have reserved.
    4092             :          */
    4093     1905492 :         num_bytes = sub_root_meta_rsv(root, num_bytes, type);
    4094     1905490 :         BUG_ON(num_bytes != round_down(num_bytes, fs_info->nodesize));
    4095     1905490 :         trace_qgroup_meta_reserve(root, -(s64)num_bytes, type);
    4096     1905484 :         btrfs_qgroup_free_refroot(fs_info, root->root_key.objectid,
    4097             :                                   num_bytes, type);
    4098             : }
    4099             : 
    4100       70076 : static void qgroup_convert_meta(struct btrfs_fs_info *fs_info, u64 ref_root,
    4101             :                                 int num_bytes)
    4102             : {
    4103       70076 :         struct btrfs_qgroup *qgroup;
    4104       70076 :         struct ulist_node *unode;
    4105       70076 :         struct ulist_iterator uiter;
    4106       70076 :         int ret = 0;
    4107             : 
    4108       70076 :         if (num_bytes == 0)
    4109       53509 :                 return;
    4110       16567 :         if (!fs_info->quota_root)
    4111             :                 return;
    4112             : 
    4113       16567 :         spin_lock(&fs_info->qgroup_lock);
    4114       16568 :         qgroup = find_qgroup_rb(fs_info, ref_root);
    4115       16568 :         if (!qgroup)
    4116           0 :                 goto out;
    4117       16568 :         ulist_reinit(fs_info->qgroup_ulist);
    4118       16568 :         ret = ulist_add(fs_info->qgroup_ulist, qgroup->qgroupid,
    4119             :                        qgroup_to_aux(qgroup), GFP_ATOMIC);
    4120       16568 :         if (ret < 0)
    4121           0 :                 goto out;
    4122       16568 :         ULIST_ITER_INIT(&uiter);
    4123       35549 :         while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) {
    4124       18981 :                 struct btrfs_qgroup *qg;
    4125       18981 :                 struct btrfs_qgroup_list *glist;
    4126             : 
    4127       18981 :                 qg = unode_aux_to_qgroup(unode);
    4128             : 
    4129       18981 :                 qgroup_rsv_release(fs_info, qg, num_bytes,
    4130             :                                 BTRFS_QGROUP_RSV_META_PREALLOC);
    4131       18981 :                 qgroup_rsv_add(fs_info, qg, num_bytes,
    4132             :                                 BTRFS_QGROUP_RSV_META_PERTRANS);
    4133       21394 :                 list_for_each_entry(glist, &qg->groups, next_group) {
    4134        2413 :                         ret = ulist_add(fs_info->qgroup_ulist,
    4135             :                                         glist->group->qgroupid,
    4136             :                                         qgroup_to_aux(glist->group), GFP_ATOMIC);
    4137        2413 :                         if (ret < 0)
    4138           0 :                                 goto out;
    4139             :                 }
    4140             :         }
    4141       16568 : out:
    4142       16568 :         spin_unlock(&fs_info->qgroup_lock);
    4143             : }
    4144             : 
    4145    29213826 : void btrfs_qgroup_convert_reserved_meta(struct btrfs_root *root, int num_bytes)
    4146             : {
    4147    29213826 :         struct btrfs_fs_info *fs_info = root->fs_info;
    4148             : 
    4149    29213826 :         if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) ||
    4150       74385 :             !is_fstree(root->root_key.objectid))
    4151             :                 return;
    4152             :         /* Same as btrfs_qgroup_free_meta_prealloc() */
    4153       70057 :         num_bytes = sub_root_meta_rsv(root, num_bytes,
    4154             :                                       BTRFS_QGROUP_RSV_META_PREALLOC);
    4155       70075 :         trace_qgroup_meta_convert(root, num_bytes);
    4156       70074 :         qgroup_convert_meta(fs_info, root->root_key.objectid, num_bytes);
    4157             : }
    4158             : 
    4159             : /*
    4160             :  * Check qgroup reserved space leaking, normally at destroy inode
    4161             :  * time
    4162             :  */
    4163     3851696 : void btrfs_qgroup_check_reserved_leak(struct btrfs_inode *inode)
    4164             : {
    4165     3851696 :         struct extent_changeset changeset;
    4166     3851696 :         struct ulist_node *unode;
    4167     3851696 :         struct ulist_iterator iter;
    4168     3851696 :         int ret;
    4169             : 
    4170     3851696 :         extent_changeset_init(&changeset);
    4171     3851633 :         ret = clear_record_extent_bits(&inode->io_tree, 0, (u64)-1,
    4172             :                         EXTENT_QGROUP_RESERVED, &changeset);
    4173             : 
    4174     3851618 :         WARN_ON(ret < 0);
    4175     3851618 :         if (WARN_ON(changeset.bytes_changed)) {
    4176           0 :                 ULIST_ITER_INIT(&iter);
    4177           0 :                 while ((unode = ulist_next(&changeset.range_changed, &iter))) {
    4178           0 :                         btrfs_warn(inode->root->fs_info,
    4179             :                 "leaking qgroup reserved space, ino: %llu, start: %llu, end: %llu",
    4180             :                                 btrfs_ino(inode), unode->val, unode->aux);
    4181             :                 }
    4182           0 :                 btrfs_qgroup_free_refroot(inode->root->fs_info,
    4183           0 :                                 inode->root->root_key.objectid,
    4184             :                                 changeset.bytes_changed, BTRFS_QGROUP_RSV_DATA);
    4185             : 
    4186             :         }
    4187     3851618 :         extent_changeset_release(&changeset);
    4188     3851578 : }
    4189             : 
    4190       65223 : void btrfs_qgroup_init_swapped_blocks(
    4191             :         struct btrfs_qgroup_swapped_blocks *swapped_blocks)
    4192             : {
    4193       65223 :         int i;
    4194             : 
    4195       65223 :         spin_lock_init(&swapped_blocks->lock);
    4196      652230 :         for (i = 0; i < BTRFS_MAX_LEVEL; i++)
    4197      521784 :                 swapped_blocks->blocks[i] = RB_ROOT;
    4198       65223 :         swapped_blocks->swapped = false;
    4199       65223 : }
    4200             : 
    4201             : /*
    4202             :  * Delete all swapped blocks record of @root.
    4203             :  * Every record here means we skipped a full subtree scan for qgroup.
    4204             :  *
    4205             :  * Gets called when committing one transaction.
    4206             :  */
    4207      998174 : void btrfs_qgroup_clean_swapped_blocks(struct btrfs_root *root)
    4208             : {
    4209      998174 :         struct btrfs_qgroup_swapped_blocks *swapped_blocks;
    4210      998174 :         int i;
    4211             : 
    4212      998174 :         swapped_blocks = &root->swapped_blocks;
    4213             : 
    4214      998174 :         spin_lock(&swapped_blocks->lock);
    4215      998174 :         if (!swapped_blocks->swapped)
    4216      998150 :                 goto out;
    4217         216 :         for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
    4218         192 :                 struct rb_root *cur_root = &swapped_blocks->blocks[i];
    4219         192 :                 struct btrfs_qgroup_swapped_block *entry;
    4220         192 :                 struct btrfs_qgroup_swapped_block *next;
    4221             : 
    4222         413 :                 rbtree_postorder_for_each_entry_safe(entry, next, cur_root,
    4223             :                                                      node)
    4224          29 :                         kfree(entry);
    4225         192 :                 swapped_blocks->blocks[i] = RB_ROOT;
    4226             :         }
    4227          24 :         swapped_blocks->swapped = false;
    4228      998174 : out:
    4229      998174 :         spin_unlock(&swapped_blocks->lock);
    4230      998174 : }
    4231             : 
    4232             : /*
    4233             :  * Add subtree roots record into @subvol_root.
    4234             :  *
    4235             :  * @subvol_root:        tree root of the subvolume tree get swapped
    4236             :  * @bg:                 block group under balance
    4237             :  * @subvol_parent/slot: pointer to the subtree root in subvolume tree
    4238             :  * @reloc_parent/slot:  pointer to the subtree root in reloc tree
    4239             :  *                      BOTH POINTERS ARE BEFORE TREE SWAP
    4240             :  * @last_snapshot:      last snapshot generation of the subvolume tree
    4241             :  */
    4242       43324 : int btrfs_qgroup_add_swapped_blocks(struct btrfs_trans_handle *trans,
    4243             :                 struct btrfs_root *subvol_root,
    4244             :                 struct btrfs_block_group *bg,
    4245             :                 struct extent_buffer *subvol_parent, int subvol_slot,
    4246             :                 struct extent_buffer *reloc_parent, int reloc_slot,
    4247             :                 u64 last_snapshot)
    4248             : {
    4249       43324 :         struct btrfs_fs_info *fs_info = subvol_root->fs_info;
    4250       43324 :         struct btrfs_qgroup_swapped_blocks *blocks = &subvol_root->swapped_blocks;
    4251       43324 :         struct btrfs_qgroup_swapped_block *block;
    4252       43324 :         struct rb_node **cur;
    4253       43324 :         struct rb_node *parent = NULL;
    4254       43324 :         int level = btrfs_header_level(subvol_parent) - 1;
    4255       43324 :         int ret = 0;
    4256             : 
    4257       43324 :         if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags))
    4258             :                 return 0;
    4259             : 
    4260          58 :         if (btrfs_node_ptr_generation(subvol_parent, subvol_slot) >
    4261             :             btrfs_node_ptr_generation(reloc_parent, reloc_slot)) {
    4262           0 :                 btrfs_err_rl(fs_info,
    4263             :                 "%s: bad parameter order, subvol_gen=%llu reloc_gen=%llu",
    4264             :                         __func__,
    4265             :                         btrfs_node_ptr_generation(subvol_parent, subvol_slot),
    4266             :                         btrfs_node_ptr_generation(reloc_parent, reloc_slot));
    4267           0 :                 return -EUCLEAN;
    4268             :         }
    4269             : 
    4270          29 :         block = kmalloc(sizeof(*block), GFP_NOFS);
    4271          29 :         if (!block) {
    4272           0 :                 ret = -ENOMEM;
    4273           0 :                 goto out;
    4274             :         }
    4275             : 
    4276             :         /*
    4277             :          * @reloc_parent/slot is still before swap, while @block is going to
    4278             :          * record the bytenr after swap, so we do the swap here.
    4279             :          */
    4280          29 :         block->subvol_bytenr = btrfs_node_blockptr(reloc_parent, reloc_slot);
    4281          29 :         block->subvol_generation = btrfs_node_ptr_generation(reloc_parent,
    4282             :                                                              reloc_slot);
    4283          29 :         block->reloc_bytenr = btrfs_node_blockptr(subvol_parent, subvol_slot);
    4284          29 :         block->reloc_generation = btrfs_node_ptr_generation(subvol_parent,
    4285             :                                                             subvol_slot);
    4286          29 :         block->last_snapshot = last_snapshot;
    4287          29 :         block->level = level;
    4288             : 
    4289             :         /*
    4290             :          * If we have bg == NULL, we're called from btrfs_recover_relocation(),
    4291             :          * no one else can modify tree blocks thus we qgroup will not change
    4292             :          * no matter the value of trace_leaf.
    4293             :          */
    4294          29 :         if (bg && bg->flags & BTRFS_BLOCK_GROUP_DATA)
    4295          29 :                 block->trace_leaf = true;
    4296             :         else
    4297           0 :                 block->trace_leaf = false;
    4298          29 :         btrfs_node_key_to_cpu(reloc_parent, &block->first_key, reloc_slot);
    4299             : 
    4300             :         /* Insert @block into @blocks */
    4301          29 :         spin_lock(&blocks->lock);
    4302          29 :         cur = &blocks->blocks[level].rb_node;
    4303          36 :         while (*cur) {
    4304           7 :                 struct btrfs_qgroup_swapped_block *entry;
    4305             : 
    4306           7 :                 parent = *cur;
    4307           7 :                 entry = rb_entry(parent, struct btrfs_qgroup_swapped_block,
    4308             :                                  node);
    4309             : 
    4310           7 :                 if (entry->subvol_bytenr < block->subvol_bytenr) {
    4311           7 :                         cur = &(*cur)->rb_left;
    4312           0 :                 } else if (entry->subvol_bytenr > block->subvol_bytenr) {
    4313           0 :                         cur = &(*cur)->rb_right;
    4314             :                 } else {
    4315           0 :                         if (entry->subvol_generation !=
    4316           0 :                                         block->subvol_generation ||
    4317           0 :                             entry->reloc_bytenr != block->reloc_bytenr ||
    4318           0 :                             entry->reloc_generation !=
    4319           0 :                                         block->reloc_generation) {
    4320             :                                 /*
    4321             :                                  * Duplicated but mismatch entry found.
    4322             :                                  * Shouldn't happen.
    4323             :                                  *
    4324             :                                  * Marking qgroup inconsistent should be enough
    4325             :                                  * for end users.
    4326             :                                  */
    4327           0 :                                 WARN_ON(IS_ENABLED(CONFIG_BTRFS_DEBUG));
    4328           0 :                                 ret = -EEXIST;
    4329             :                         }
    4330           0 :                         kfree(block);
    4331           0 :                         goto out_unlock;
    4332             :                 }
    4333             :         }
    4334          29 :         rb_link_node(&block->node, parent, cur);
    4335          29 :         rb_insert_color(&block->node, &blocks->blocks[level]);
    4336          29 :         blocks->swapped = true;
    4337          29 : out_unlock:
    4338          29 :         spin_unlock(&blocks->lock);
    4339             : out:
    4340          29 :         if (ret < 0)
    4341           0 :                 qgroup_mark_inconsistent(fs_info);
    4342             :         return ret;
    4343             : }
    4344             : 
    4345             : /*
    4346             :  * Check if the tree block is a subtree root, and if so do the needed
    4347             :  * delayed subtree trace for qgroup.
    4348             :  *
    4349             :  * This is called during btrfs_cow_block().
    4350             :  */
    4351     8419473 : int btrfs_qgroup_trace_subtree_after_cow(struct btrfs_trans_handle *trans,
    4352             :                                          struct btrfs_root *root,
    4353             :                                          struct extent_buffer *subvol_eb)
    4354             : {
    4355     8419473 :         struct btrfs_fs_info *fs_info = root->fs_info;
    4356     8419473 :         struct btrfs_tree_parent_check check = { 0 };
    4357     8419473 :         struct btrfs_qgroup_swapped_blocks *blocks = &root->swapped_blocks;
    4358     8419473 :         struct btrfs_qgroup_swapped_block *block;
    4359     8419473 :         struct extent_buffer *reloc_eb = NULL;
    4360     8419473 :         struct rb_node *node;
    4361     8419473 :         bool found = false;
    4362     8419473 :         bool swapped = false;
    4363     8419473 :         int level = btrfs_header_level(subvol_eb);
    4364     8419473 :         int ret = 0;
    4365     8419473 :         int i;
    4366             : 
    4367     8419473 :         if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags))
    4368             :                 return 0;
    4369       82730 :         if (!is_fstree(root->root_key.objectid) || !root->reloc_root)
    4370             :                 return 0;
    4371             : 
    4372        3507 :         spin_lock(&blocks->lock);
    4373        3507 :         if (!blocks->swapped) {
    4374        3507 :                 spin_unlock(&blocks->lock);
    4375        3507 :                 return 0;
    4376             :         }
    4377           0 :         node = blocks->blocks[level].rb_node;
    4378             : 
    4379           0 :         while (node) {
    4380           0 :                 block = rb_entry(node, struct btrfs_qgroup_swapped_block, node);
    4381           0 :                 if (block->subvol_bytenr < subvol_eb->start) {
    4382           0 :                         node = node->rb_left;
    4383           0 :                 } else if (block->subvol_bytenr > subvol_eb->start) {
    4384           0 :                         node = node->rb_right;
    4385             :                 } else {
    4386             :                         found = true;
    4387             :                         break;
    4388             :                 }
    4389             :         }
    4390           0 :         if (!found) {
    4391           0 :                 spin_unlock(&blocks->lock);
    4392           0 :                 goto out;
    4393             :         }
    4394             :         /* Found one, remove it from @blocks first and update blocks->swapped */
    4395           0 :         rb_erase(&block->node, &blocks->blocks[level]);
    4396           0 :         for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
    4397           0 :                 if (RB_EMPTY_ROOT(&blocks->blocks[i])) {
    4398             :                         swapped = true;
    4399             :                         break;
    4400             :                 }
    4401             :         }
    4402           0 :         blocks->swapped = swapped;
    4403           0 :         spin_unlock(&blocks->lock);
    4404             : 
    4405           0 :         check.level = block->level;
    4406           0 :         check.transid = block->reloc_generation;
    4407           0 :         check.has_first_key = true;
    4408           0 :         memcpy(&check.first_key, &block->first_key, sizeof(check.first_key));
    4409             : 
    4410             :         /* Read out reloc subtree root */
    4411           0 :         reloc_eb = read_tree_block(fs_info, block->reloc_bytenr, &check);
    4412           0 :         if (IS_ERR(reloc_eb)) {
    4413           0 :                 ret = PTR_ERR(reloc_eb);
    4414           0 :                 reloc_eb = NULL;
    4415           0 :                 goto free_out;
    4416             :         }
    4417           0 :         if (!extent_buffer_uptodate(reloc_eb)) {
    4418           0 :                 ret = -EIO;
    4419           0 :                 goto free_out;
    4420             :         }
    4421             : 
    4422           0 :         ret = qgroup_trace_subtree_swap(trans, reloc_eb, subvol_eb,
    4423           0 :                         block->last_snapshot, block->trace_leaf);
    4424           0 : free_out:
    4425           0 :         kfree(block);
    4426           0 :         free_extent_buffer(reloc_eb);
    4427             : out:
    4428           0 :         if (ret < 0) {
    4429           0 :                 btrfs_err_rl(fs_info,
    4430             :                              "failed to account subtree at bytenr %llu: %d",
    4431             :                              subvol_eb->start, ret);
    4432           0 :                 qgroup_mark_inconsistent(fs_info);
    4433             :         }
    4434             :         return ret;
    4435             : }
    4436             : 
    4437           2 : void btrfs_qgroup_destroy_extent_records(struct btrfs_transaction *trans)
    4438             : {
    4439           2 :         struct btrfs_qgroup_extent_record *entry;
    4440           2 :         struct btrfs_qgroup_extent_record *next;
    4441           2 :         struct rb_root *root;
    4442             : 
    4443           2 :         root = &trans->delayed_refs.dirty_extent_root;
    4444           4 :         rbtree_postorder_for_each_entry_safe(entry, next, root, node) {
    4445           0 :                 ulist_free(entry->old_roots);
    4446           0 :                 kfree(entry);
    4447             :         }
    4448           2 :         *root = RB_ROOT;
    4449           2 : }

Generated by: LCOV version 1.14