Line data Source code
1 : // SPDX-License-Identifier: GPL-2.0
2 : /*
3 : * Copyright (C) 2011 STRATO. All rights reserved.
4 : */
5 :
6 : #include <linux/sched.h>
7 : #include <linux/pagemap.h>
8 : #include <linux/writeback.h>
9 : #include <linux/blkdev.h>
10 : #include <linux/rbtree.h>
11 : #include <linux/slab.h>
12 : #include <linux/workqueue.h>
13 : #include <linux/btrfs.h>
14 : #include <linux/sched/mm.h>
15 :
16 : #include "ctree.h"
17 : #include "transaction.h"
18 : #include "disk-io.h"
19 : #include "locking.h"
20 : #include "ulist.h"
21 : #include "backref.h"
22 : #include "extent_io.h"
23 : #include "qgroup.h"
24 : #include "block-group.h"
25 : #include "sysfs.h"
26 : #include "tree-mod-log.h"
27 : #include "fs.h"
28 : #include "accessors.h"
29 : #include "extent-tree.h"
30 : #include "root-tree.h"
31 : #include "tree-checker.h"
32 :
33 : /*
34 : * Helpers to access qgroup reservation
35 : *
36 : * Callers should ensure the lock context and type are valid
37 : */
38 :
39 : static u64 qgroup_rsv_total(const struct btrfs_qgroup *qgroup)
40 : {
41 : u64 ret = 0;
42 : int i;
43 :
44 13837812 : for (i = 0; i < BTRFS_QGROUP_RSV_LAST; i++)
45 10378359 : ret += qgroup->rsv.values[i];
46 :
47 3459453 : return ret;
48 : }
49 :
50 : #ifdef CONFIG_BTRFS_DEBUG
51 : static const char *qgroup_rsv_type_str(enum btrfs_qgroup_rsv_type type)
52 : {
53 : if (type == BTRFS_QGROUP_RSV_DATA)
54 : return "data";
55 : if (type == BTRFS_QGROUP_RSV_META_PERTRANS)
56 : return "meta_pertrans";
57 : if (type == BTRFS_QGROUP_RSV_META_PREALLOC)
58 : return "meta_prealloc";
59 : return NULL;
60 : }
61 : #endif
62 :
63 3851219 : static void qgroup_rsv_add(struct btrfs_fs_info *fs_info,
64 : struct btrfs_qgroup *qgroup, u64 num_bytes,
65 : enum btrfs_qgroup_rsv_type type)
66 : {
67 3851219 : trace_qgroup_update_reserve(fs_info, qgroup, num_bytes, type);
68 3871121 : qgroup->rsv.values[type] += num_bytes;
69 3851219 : }
70 :
71 2583718 : static void qgroup_rsv_release(struct btrfs_fs_info *fs_info,
72 : struct btrfs_qgroup *qgroup, u64 num_bytes,
73 : enum btrfs_qgroup_rsv_type type)
74 : {
75 2583718 : trace_qgroup_update_reserve(fs_info, qgroup, -(s64)num_bytes, type);
76 2583718 : if (qgroup->rsv.values[type] >= num_bytes) {
77 2583718 : qgroup->rsv.values[type] -= num_bytes;
78 2583718 : return;
79 : }
80 : #ifdef CONFIG_BTRFS_DEBUG
81 : WARN_RATELIMIT(1,
82 : "qgroup %llu %s reserved space underflow, have %llu to free %llu",
83 : qgroup->qgroupid, qgroup_rsv_type_str(type),
84 : qgroup->rsv.values[type], num_bytes);
85 : #endif
86 0 : qgroup->rsv.values[type] = 0;
87 : }
88 :
89 4 : static void qgroup_rsv_add_by_qgroup(struct btrfs_fs_info *fs_info,
90 : struct btrfs_qgroup *dest,
91 : struct btrfs_qgroup *src)
92 : {
93 4 : int i;
94 :
95 16 : for (i = 0; i < BTRFS_QGROUP_RSV_LAST; i++)
96 12 : qgroup_rsv_add(fs_info, dest, src->rsv.values[i], i);
97 4 : }
98 :
99 0 : static void qgroup_rsv_release_by_qgroup(struct btrfs_fs_info *fs_info,
100 : struct btrfs_qgroup *dest,
101 : struct btrfs_qgroup *src)
102 : {
103 0 : int i;
104 :
105 0 : for (i = 0; i < BTRFS_QGROUP_RSV_LAST; i++)
106 0 : qgroup_rsv_release(fs_info, dest, src->rsv.values[i], i);
107 0 : }
108 :
109 : static void btrfs_qgroup_update_old_refcnt(struct btrfs_qgroup *qg, u64 seq,
110 : int mod)
111 : {
112 2441649 : if (qg->old_refcnt < seq)
113 2441649 : qg->old_refcnt = seq;
114 2441649 : qg->old_refcnt += mod;
115 2441649 : }
116 :
117 : static void btrfs_qgroup_update_new_refcnt(struct btrfs_qgroup *qg, u64 seq,
118 : int mod)
119 : {
120 2279145 : if (qg->new_refcnt < seq)
121 2279144 : qg->new_refcnt = seq;
122 2279145 : qg->new_refcnt += mod;
123 2279145 : }
124 :
125 : static inline u64 btrfs_qgroup_get_old_refcnt(struct btrfs_qgroup *qg, u64 seq)
126 : {
127 2510446 : if (qg->old_refcnt < seq)
128 : return 0;
129 2441649 : return qg->old_refcnt - seq;
130 : }
131 :
132 : static inline u64 btrfs_qgroup_get_new_refcnt(struct btrfs_qgroup *qg, u64 seq)
133 : {
134 2510446 : if (qg->new_refcnt < seq)
135 : return 0;
136 2279144 : return qg->new_refcnt - seq;
137 : }
138 :
139 : /*
140 : * glue structure to represent the relations between qgroups.
141 : */
142 : struct btrfs_qgroup_list {
143 : struct list_head next_group;
144 : struct list_head next_member;
145 : struct btrfs_qgroup *group;
146 : struct btrfs_qgroup *member;
147 : };
148 :
149 : static inline u64 qgroup_to_aux(struct btrfs_qgroup *qg)
150 : {
151 11200032 : return (u64)(uintptr_t)qg;
152 : }
153 :
154 : static inline struct btrfs_qgroup* unode_aux_to_qgroup(struct ulist_node *n)
155 : {
156 17558349 : return (struct btrfs_qgroup *)(uintptr_t)n->aux;
157 : }
158 :
159 : static int
160 : qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid,
161 : int init_flags);
162 : static void qgroup_rescan_zero_tracking(struct btrfs_fs_info *fs_info);
163 :
164 : /* must be called with qgroup_ioctl_lock held */
165 11183974 : static struct btrfs_qgroup *find_qgroup_rb(struct btrfs_fs_info *fs_info,
166 : u64 qgroupid)
167 : {
168 11183974 : struct rb_node *n = fs_info->qgroup_tree.rb_node;
169 11183974 : struct btrfs_qgroup *qgroup;
170 :
171 37745061 : while (n) {
172 37744787 : qgroup = rb_entry(n, struct btrfs_qgroup, node);
173 37744787 : if (qgroup->qgroupid < qgroupid)
174 20388964 : n = n->rb_left;
175 17355823 : else if (qgroup->qgroupid > qgroupid)
176 6172123 : n = n->rb_right;
177 : else
178 11183700 : return qgroup;
179 : }
180 : return NULL;
181 : }
182 :
183 : /* must be called with qgroup_lock held */
184 593 : static struct btrfs_qgroup *add_qgroup_rb(struct btrfs_fs_info *fs_info,
185 : u64 qgroupid)
186 : {
187 593 : struct rb_node **p = &fs_info->qgroup_tree.rb_node;
188 593 : struct rb_node *parent = NULL;
189 593 : struct btrfs_qgroup *qgroup;
190 :
191 3053 : while (*p) {
192 2460 : parent = *p;
193 2460 : qgroup = rb_entry(parent, struct btrfs_qgroup, node);
194 :
195 2460 : if (qgroup->qgroupid < qgroupid)
196 2436 : p = &(*p)->rb_left;
197 24 : else if (qgroup->qgroupid > qgroupid)
198 24 : p = &(*p)->rb_right;
199 : else
200 0 : return qgroup;
201 : }
202 :
203 593 : qgroup = kzalloc(sizeof(*qgroup), GFP_ATOMIC);
204 593 : if (!qgroup)
205 : return ERR_PTR(-ENOMEM);
206 :
207 593 : qgroup->qgroupid = qgroupid;
208 593 : INIT_LIST_HEAD(&qgroup->groups);
209 593 : INIT_LIST_HEAD(&qgroup->members);
210 593 : INIT_LIST_HEAD(&qgroup->dirty);
211 :
212 593 : rb_link_node(&qgroup->node, parent, p);
213 593 : rb_insert_color(&qgroup->node, &fs_info->qgroup_tree);
214 :
215 593 : return qgroup;
216 : }
217 :
218 593 : static void __del_qgroup_rb(struct btrfs_fs_info *fs_info,
219 : struct btrfs_qgroup *qgroup)
220 : {
221 593 : struct btrfs_qgroup_list *list;
222 :
223 593 : list_del(&qgroup->dirty);
224 593 : while (!list_empty(&qgroup->groups)) {
225 0 : list = list_first_entry(&qgroup->groups,
226 : struct btrfs_qgroup_list, next_group);
227 0 : list_del(&list->next_group);
228 0 : list_del(&list->next_member);
229 0 : kfree(list);
230 : }
231 :
232 619 : while (!list_empty(&qgroup->members)) {
233 26 : list = list_first_entry(&qgroup->members,
234 : struct btrfs_qgroup_list, next_member);
235 26 : list_del(&list->next_group);
236 26 : list_del(&list->next_member);
237 26 : kfree(list);
238 : }
239 593 : }
240 :
241 : /* must be called with qgroup_lock held */
242 38 : static int del_qgroup_rb(struct btrfs_fs_info *fs_info, u64 qgroupid)
243 : {
244 38 : struct btrfs_qgroup *qgroup = find_qgroup_rb(fs_info, qgroupid);
245 :
246 38 : if (!qgroup)
247 : return -ENOENT;
248 :
249 38 : rb_erase(&qgroup->node, &fs_info->qgroup_tree);
250 38 : __del_qgroup_rb(fs_info, qgroup);
251 38 : return 0;
252 : }
253 :
254 : /*
255 : * Add relation specified by two qgroups.
256 : *
257 : * Must be called with qgroup_lock held.
258 : *
259 : * Return: 0 on success
260 : * -ENOENT if one of the qgroups is NULL
261 : * <0 other errors
262 : */
263 27 : static int __add_relation_rb(struct btrfs_qgroup *member, struct btrfs_qgroup *parent)
264 : {
265 27 : struct btrfs_qgroup_list *list;
266 :
267 27 : if (!member || !parent)
268 : return -ENOENT;
269 :
270 27 : list = kzalloc(sizeof(*list), GFP_ATOMIC);
271 27 : if (!list)
272 : return -ENOMEM;
273 :
274 27 : list->group = parent;
275 27 : list->member = member;
276 27 : list_add_tail(&list->next_group, &member->groups);
277 27 : list_add_tail(&list->next_member, &parent->members);
278 :
279 27 : return 0;
280 : }
281 :
282 : /*
283 : * Add relation specified by two qgroup ids.
284 : *
285 : * Must be called with qgroup_lock held.
286 : *
287 : * Return: 0 on success
288 : * -ENOENT if one of the ids does not exist
289 : * <0 other errors
290 : */
291 22 : static int add_relation_rb(struct btrfs_fs_info *fs_info, u64 memberid, u64 parentid)
292 : {
293 22 : struct btrfs_qgroup *member;
294 22 : struct btrfs_qgroup *parent;
295 :
296 22 : member = find_qgroup_rb(fs_info, memberid);
297 22 : parent = find_qgroup_rb(fs_info, parentid);
298 :
299 22 : return __add_relation_rb(member, parent);
300 : }
301 :
302 : /* Must be called with qgroup_lock held */
303 1 : static int del_relation_rb(struct btrfs_fs_info *fs_info,
304 : u64 memberid, u64 parentid)
305 : {
306 1 : struct btrfs_qgroup *member;
307 1 : struct btrfs_qgroup *parent;
308 1 : struct btrfs_qgroup_list *list;
309 :
310 1 : member = find_qgroup_rb(fs_info, memberid);
311 1 : parent = find_qgroup_rb(fs_info, parentid);
312 1 : if (!member || !parent)
313 : return -ENOENT;
314 :
315 1 : list_for_each_entry(list, &member->groups, next_group) {
316 1 : if (list->group == parent) {
317 1 : list_del(&list->next_group);
318 1 : list_del(&list->next_member);
319 1 : kfree(list);
320 1 : return 0;
321 : }
322 : }
323 : return -ENOENT;
324 : }
325 :
326 : #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
327 : int btrfs_verify_qgroup_counts(struct btrfs_fs_info *fs_info, u64 qgroupid,
328 : u64 rfer, u64 excl)
329 : {
330 : struct btrfs_qgroup *qgroup;
331 :
332 : qgroup = find_qgroup_rb(fs_info, qgroupid);
333 : if (!qgroup)
334 : return -EINVAL;
335 : if (qgroup->rfer != rfer || qgroup->excl != excl)
336 : return -EINVAL;
337 : return 0;
338 : }
339 : #endif
340 :
341 : static void qgroup_mark_inconsistent(struct btrfs_fs_info *fs_info)
342 : {
343 3 : fs_info->qgroup_flags |= (BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT |
344 : BTRFS_QGROUP_RUNTIME_FLAG_CANCEL_RESCAN |
345 : BTRFS_QGROUP_RUNTIME_FLAG_NO_ACCOUNTING);
346 3 : }
347 :
348 : /*
349 : * The full config is read in one go, only called from open_ctree()
350 : * It doesn't use any locking, as at this point we're still single-threaded
351 : */
352 3217 : int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info)
353 : {
354 3217 : struct btrfs_key key;
355 3217 : struct btrfs_key found_key;
356 3217 : struct btrfs_root *quota_root = fs_info->quota_root;
357 3217 : struct btrfs_path *path = NULL;
358 3217 : struct extent_buffer *l;
359 3217 : int slot;
360 3217 : int ret = 0;
361 3217 : u64 flags = 0;
362 3217 : u64 rescan_progress = 0;
363 :
364 3217 : if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags))
365 : return 0;
366 :
367 22 : fs_info->qgroup_ulist = ulist_alloc(GFP_KERNEL);
368 22 : if (!fs_info->qgroup_ulist) {
369 0 : ret = -ENOMEM;
370 0 : goto out;
371 : }
372 :
373 22 : path = btrfs_alloc_path();
374 22 : if (!path) {
375 0 : ret = -ENOMEM;
376 0 : goto out;
377 : }
378 :
379 22 : ret = btrfs_sysfs_add_qgroups(fs_info);
380 22 : if (ret < 0)
381 0 : goto out;
382 : /* default this to quota off, in case no status key is found */
383 22 : fs_info->qgroup_flags = 0;
384 :
385 : /*
386 : * pass 1: read status, all qgroup infos and limits
387 : */
388 22 : key.objectid = 0;
389 22 : key.type = 0;
390 22 : key.offset = 0;
391 22 : ret = btrfs_search_slot_for_read(quota_root, &key, path, 1, 1);
392 22 : if (ret)
393 0 : goto out;
394 :
395 356 : while (1) {
396 356 : struct btrfs_qgroup *qgroup;
397 :
398 356 : slot = path->slots[0];
399 356 : l = path->nodes[0];
400 356 : btrfs_item_key_to_cpu(l, &found_key, slot);
401 :
402 356 : if (found_key.type == BTRFS_QGROUP_STATUS_KEY) {
403 22 : struct btrfs_qgroup_status_item *ptr;
404 :
405 22 : ptr = btrfs_item_ptr(l, slot,
406 : struct btrfs_qgroup_status_item);
407 :
408 22 : if (btrfs_qgroup_status_version(l, ptr) !=
409 : BTRFS_QGROUP_STATUS_VERSION) {
410 0 : btrfs_err(fs_info,
411 : "old qgroup version, quota disabled");
412 0 : goto out;
413 : }
414 22 : if (btrfs_qgroup_status_generation(l, ptr) !=
415 22 : fs_info->generation) {
416 0 : qgroup_mark_inconsistent(fs_info);
417 0 : btrfs_err(fs_info,
418 : "qgroup generation mismatch, marked as inconsistent");
419 : }
420 22 : fs_info->qgroup_flags = btrfs_qgroup_status_flags(l,
421 : ptr);
422 22 : rescan_progress = btrfs_qgroup_status_rescan(l, ptr);
423 22 : goto next1;
424 : }
425 :
426 334 : if (found_key.type != BTRFS_QGROUP_INFO_KEY &&
427 : found_key.type != BTRFS_QGROUP_LIMIT_KEY)
428 22 : goto next1;
429 :
430 312 : qgroup = find_qgroup_rb(fs_info, found_key.offset);
431 312 : if ((qgroup && found_key.type == BTRFS_QGROUP_INFO_KEY) ||
432 156 : (!qgroup && found_key.type == BTRFS_QGROUP_LIMIT_KEY)) {
433 0 : btrfs_err(fs_info, "inconsistent qgroup config");
434 0 : qgroup_mark_inconsistent(fs_info);
435 : }
436 312 : if (!qgroup) {
437 156 : qgroup = add_qgroup_rb(fs_info, found_key.offset);
438 156 : if (IS_ERR(qgroup)) {
439 0 : ret = PTR_ERR(qgroup);
440 0 : goto out;
441 : }
442 : }
443 312 : ret = btrfs_sysfs_add_one_qgroup(fs_info, qgroup);
444 312 : if (ret < 0)
445 0 : goto out;
446 :
447 312 : switch (found_key.type) {
448 : case BTRFS_QGROUP_INFO_KEY: {
449 156 : struct btrfs_qgroup_info_item *ptr;
450 :
451 156 : ptr = btrfs_item_ptr(l, slot,
452 : struct btrfs_qgroup_info_item);
453 156 : qgroup->rfer = btrfs_qgroup_info_rfer(l, ptr);
454 156 : qgroup->rfer_cmpr = btrfs_qgroup_info_rfer_cmpr(l, ptr);
455 156 : qgroup->excl = btrfs_qgroup_info_excl(l, ptr);
456 156 : qgroup->excl_cmpr = btrfs_qgroup_info_excl_cmpr(l, ptr);
457 : /* generation currently unused */
458 156 : break;
459 : }
460 : case BTRFS_QGROUP_LIMIT_KEY: {
461 156 : struct btrfs_qgroup_limit_item *ptr;
462 :
463 156 : ptr = btrfs_item_ptr(l, slot,
464 : struct btrfs_qgroup_limit_item);
465 156 : qgroup->lim_flags = btrfs_qgroup_limit_flags(l, ptr);
466 156 : qgroup->max_rfer = btrfs_qgroup_limit_max_rfer(l, ptr);
467 156 : qgroup->max_excl = btrfs_qgroup_limit_max_excl(l, ptr);
468 156 : qgroup->rsv_rfer = btrfs_qgroup_limit_rsv_rfer(l, ptr);
469 156 : qgroup->rsv_excl = btrfs_qgroup_limit_rsv_excl(l, ptr);
470 156 : break;
471 : }
472 : }
473 356 : next1:
474 356 : ret = btrfs_next_item(quota_root, path);
475 356 : if (ret < 0)
476 0 : goto out;
477 356 : if (ret)
478 : break;
479 : }
480 22 : btrfs_release_path(path);
481 :
482 : /*
483 : * pass 2: read all qgroup relations
484 : */
485 22 : key.objectid = 0;
486 22 : key.type = BTRFS_QGROUP_RELATION_KEY;
487 22 : key.offset = 0;
488 22 : ret = btrfs_search_slot_for_read(quota_root, &key, path, 1, 0);
489 22 : if (ret)
490 20 : goto out;
491 22 : while (1) {
492 22 : slot = path->slots[0];
493 22 : l = path->nodes[0];
494 22 : btrfs_item_key_to_cpu(l, &found_key, slot);
495 :
496 22 : if (found_key.type != BTRFS_QGROUP_RELATION_KEY)
497 0 : goto next2;
498 :
499 22 : if (found_key.objectid > found_key.offset) {
500 : /* parent <- member, not needed to build config */
501 : /* FIXME should we omit the key completely? */
502 11 : goto next2;
503 : }
504 :
505 11 : ret = add_relation_rb(fs_info, found_key.objectid,
506 : found_key.offset);
507 11 : if (ret == -ENOENT) {
508 0 : btrfs_warn(fs_info,
509 : "orphan qgroup relation 0x%llx->0x%llx",
510 : found_key.objectid, found_key.offset);
511 0 : ret = 0; /* ignore the error */
512 : }
513 11 : if (ret)
514 0 : goto out;
515 11 : next2:
516 22 : ret = btrfs_next_item(quota_root, path);
517 22 : if (ret < 0)
518 0 : goto out;
519 22 : if (ret)
520 : break;
521 : }
522 2 : out:
523 22 : btrfs_free_path(path);
524 22 : fs_info->qgroup_flags |= flags;
525 22 : if (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON))
526 0 : clear_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags);
527 22 : else if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN &&
528 : ret >= 0)
529 1 : ret = qgroup_rescan_init(fs_info, rescan_progress, 0);
530 :
531 22 : if (ret < 0) {
532 0 : ulist_free(fs_info->qgroup_ulist);
533 0 : fs_info->qgroup_ulist = NULL;
534 0 : fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
535 0 : btrfs_sysfs_del_qgroups(fs_info);
536 : }
537 :
538 22 : return ret < 0 ? ret : 0;
539 : }
540 :
541 : /*
542 : * Called in close_ctree() when quota is still enabled. This verifies we don't
543 : * leak some reserved space.
544 : *
545 : * Return false if no reserved space is left.
546 : * Return true if some reserved space is leaked.
547 : */
548 3217 : bool btrfs_check_quota_leak(struct btrfs_fs_info *fs_info)
549 : {
550 3217 : struct rb_node *node;
551 3217 : bool ret = false;
552 :
553 3217 : if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags))
554 : return ret;
555 : /*
556 : * Since we're unmounting, there is no race and no need to grab qgroup
557 : * lock. And here we don't go post-order to provide a more user
558 : * friendly sorted result.
559 : */
560 462 : for (node = rb_first(&fs_info->qgroup_tree); node; node = rb_next(node)) {
561 : struct btrfs_qgroup *qgroup;
562 : int i;
563 :
564 : qgroup = rb_entry(node, struct btrfs_qgroup, node);
565 1632 : for (i = 0; i < BTRFS_QGROUP_RSV_LAST; i++) {
566 1224 : if (qgroup->rsv.values[i]) {
567 0 : ret = true;
568 0 : btrfs_warn(fs_info,
569 : "qgroup %hu/%llu has unreleased space, type %d rsv %llu",
570 : btrfs_qgroup_level(qgroup->qgroupid),
571 : btrfs_qgroup_subvolid(qgroup->qgroupid),
572 : i, qgroup->rsv.values[i]);
573 : }
574 : }
575 : }
576 : return ret;
577 : }
578 :
579 : /*
580 : * This is called from close_ctree() or open_ctree() or btrfs_quota_disable(),
581 : * first two are in single-threaded paths.And for the third one, we have set
582 : * quota_root to be null with qgroup_lock held before, so it is safe to clean
583 : * up the in-memory structures without qgroup_lock held.
584 : */
585 3349 : void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info)
586 : {
587 3349 : struct rb_node *n;
588 3349 : struct btrfs_qgroup *qgroup;
589 :
590 3904 : while ((n = rb_first(&fs_info->qgroup_tree))) {
591 555 : qgroup = rb_entry(n, struct btrfs_qgroup, node);
592 555 : rb_erase(n, &fs_info->qgroup_tree);
593 555 : __del_qgroup_rb(fs_info, qgroup);
594 555 : btrfs_sysfs_del_one_qgroup(fs_info, qgroup);
595 555 : kfree(qgroup);
596 : }
597 : /*
598 : * We call btrfs_free_qgroup_config() when unmounting
599 : * filesystem and disabling quota, so we set qgroup_ulist
600 : * to be null here to avoid double free.
601 : */
602 3349 : ulist_free(fs_info->qgroup_ulist);
603 3349 : fs_info->qgroup_ulist = NULL;
604 3349 : btrfs_sysfs_del_qgroups(fs_info);
605 3349 : }
606 :
607 32 : static int add_qgroup_relation_item(struct btrfs_trans_handle *trans, u64 src,
608 : u64 dst)
609 : {
610 32 : int ret;
611 32 : struct btrfs_root *quota_root = trans->fs_info->quota_root;
612 32 : struct btrfs_path *path;
613 32 : struct btrfs_key key;
614 :
615 32 : path = btrfs_alloc_path();
616 32 : if (!path)
617 : return -ENOMEM;
618 :
619 32 : key.objectid = src;
620 32 : key.type = BTRFS_QGROUP_RELATION_KEY;
621 32 : key.offset = dst;
622 :
623 32 : ret = btrfs_insert_empty_item(trans, quota_root, path, &key, 0);
624 :
625 32 : btrfs_mark_buffer_dirty(path->nodes[0]);
626 :
627 32 : btrfs_free_path(path);
628 32 : return ret;
629 : }
630 :
631 2 : static int del_qgroup_relation_item(struct btrfs_trans_handle *trans, u64 src,
632 : u64 dst)
633 : {
634 2 : int ret;
635 2 : struct btrfs_root *quota_root = trans->fs_info->quota_root;
636 2 : struct btrfs_path *path;
637 2 : struct btrfs_key key;
638 :
639 2 : path = btrfs_alloc_path();
640 2 : if (!path)
641 : return -ENOMEM;
642 :
643 2 : key.objectid = src;
644 2 : key.type = BTRFS_QGROUP_RELATION_KEY;
645 2 : key.offset = dst;
646 :
647 2 : ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1);
648 2 : if (ret < 0)
649 0 : goto out;
650 :
651 2 : if (ret > 0) {
652 0 : ret = -ENOENT;
653 0 : goto out;
654 : }
655 :
656 2 : ret = btrfs_del_item(trans, quota_root, path);
657 2 : out:
658 2 : btrfs_free_path(path);
659 2 : return ret;
660 : }
661 :
662 437 : static int add_qgroup_item(struct btrfs_trans_handle *trans,
663 : struct btrfs_root *quota_root, u64 qgroupid)
664 : {
665 437 : int ret;
666 437 : struct btrfs_path *path;
667 437 : struct btrfs_qgroup_info_item *qgroup_info;
668 437 : struct btrfs_qgroup_limit_item *qgroup_limit;
669 437 : struct extent_buffer *leaf;
670 437 : struct btrfs_key key;
671 :
672 437 : if (btrfs_is_testing(quota_root->fs_info))
673 : return 0;
674 :
675 437 : path = btrfs_alloc_path();
676 437 : if (!path)
677 : return -ENOMEM;
678 :
679 437 : key.objectid = 0;
680 437 : key.type = BTRFS_QGROUP_INFO_KEY;
681 437 : key.offset = qgroupid;
682 :
683 : /*
684 : * Avoid a transaction abort by catching -EEXIST here. In that
685 : * case, we proceed by re-initializing the existing structure
686 : * on disk.
687 : */
688 :
689 437 : ret = btrfs_insert_empty_item(trans, quota_root, path, &key,
690 : sizeof(*qgroup_info));
691 437 : if (ret && ret != -EEXIST)
692 0 : goto out;
693 :
694 437 : leaf = path->nodes[0];
695 437 : qgroup_info = btrfs_item_ptr(leaf, path->slots[0],
696 : struct btrfs_qgroup_info_item);
697 437 : btrfs_set_qgroup_info_generation(leaf, qgroup_info, trans->transid);
698 437 : btrfs_set_qgroup_info_rfer(leaf, qgroup_info, 0);
699 437 : btrfs_set_qgroup_info_rfer_cmpr(leaf, qgroup_info, 0);
700 437 : btrfs_set_qgroup_info_excl(leaf, qgroup_info, 0);
701 437 : btrfs_set_qgroup_info_excl_cmpr(leaf, qgroup_info, 0);
702 :
703 437 : btrfs_mark_buffer_dirty(leaf);
704 :
705 437 : btrfs_release_path(path);
706 :
707 437 : key.type = BTRFS_QGROUP_LIMIT_KEY;
708 437 : ret = btrfs_insert_empty_item(trans, quota_root, path, &key,
709 : sizeof(*qgroup_limit));
710 437 : if (ret && ret != -EEXIST)
711 0 : goto out;
712 :
713 437 : leaf = path->nodes[0];
714 437 : qgroup_limit = btrfs_item_ptr(leaf, path->slots[0],
715 : struct btrfs_qgroup_limit_item);
716 437 : btrfs_set_qgroup_limit_flags(leaf, qgroup_limit, 0);
717 437 : btrfs_set_qgroup_limit_max_rfer(leaf, qgroup_limit, 0);
718 437 : btrfs_set_qgroup_limit_max_excl(leaf, qgroup_limit, 0);
719 437 : btrfs_set_qgroup_limit_rsv_rfer(leaf, qgroup_limit, 0);
720 437 : btrfs_set_qgroup_limit_rsv_excl(leaf, qgroup_limit, 0);
721 :
722 437 : btrfs_mark_buffer_dirty(leaf);
723 :
724 437 : ret = 0;
725 437 : out:
726 437 : btrfs_free_path(path);
727 437 : return ret;
728 : }
729 :
730 38 : static int del_qgroup_item(struct btrfs_trans_handle *trans, u64 qgroupid)
731 : {
732 38 : int ret;
733 38 : struct btrfs_root *quota_root = trans->fs_info->quota_root;
734 38 : struct btrfs_path *path;
735 38 : struct btrfs_key key;
736 :
737 38 : path = btrfs_alloc_path();
738 38 : if (!path)
739 : return -ENOMEM;
740 :
741 38 : key.objectid = 0;
742 38 : key.type = BTRFS_QGROUP_INFO_KEY;
743 38 : key.offset = qgroupid;
744 38 : ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1);
745 38 : if (ret < 0)
746 0 : goto out;
747 :
748 38 : if (ret > 0) {
749 0 : ret = -ENOENT;
750 0 : goto out;
751 : }
752 :
753 38 : ret = btrfs_del_item(trans, quota_root, path);
754 38 : if (ret)
755 0 : goto out;
756 :
757 38 : btrfs_release_path(path);
758 :
759 38 : key.type = BTRFS_QGROUP_LIMIT_KEY;
760 38 : ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1);
761 38 : if (ret < 0)
762 0 : goto out;
763 :
764 38 : if (ret > 0) {
765 0 : ret = -ENOENT;
766 0 : goto out;
767 : }
768 :
769 38 : ret = btrfs_del_item(trans, quota_root, path);
770 :
771 38 : out:
772 38 : btrfs_free_path(path);
773 38 : return ret;
774 : }
775 :
776 6637 : static int update_qgroup_limit_item(struct btrfs_trans_handle *trans,
777 : struct btrfs_qgroup *qgroup)
778 : {
779 6637 : struct btrfs_root *quota_root = trans->fs_info->quota_root;
780 6637 : struct btrfs_path *path;
781 6637 : struct btrfs_key key;
782 6637 : struct extent_buffer *l;
783 6637 : struct btrfs_qgroup_limit_item *qgroup_limit;
784 6637 : int ret;
785 6637 : int slot;
786 :
787 6637 : key.objectid = 0;
788 6637 : key.type = BTRFS_QGROUP_LIMIT_KEY;
789 6637 : key.offset = qgroup->qgroupid;
790 :
791 6637 : path = btrfs_alloc_path();
792 6637 : if (!path)
793 : return -ENOMEM;
794 :
795 6637 : ret = btrfs_search_slot(trans, quota_root, &key, path, 0, 1);
796 6637 : if (ret > 0)
797 : ret = -ENOENT;
798 :
799 6637 : if (ret)
800 0 : goto out;
801 :
802 6637 : l = path->nodes[0];
803 6637 : slot = path->slots[0];
804 6637 : qgroup_limit = btrfs_item_ptr(l, slot, struct btrfs_qgroup_limit_item);
805 6637 : btrfs_set_qgroup_limit_flags(l, qgroup_limit, qgroup->lim_flags);
806 6637 : btrfs_set_qgroup_limit_max_rfer(l, qgroup_limit, qgroup->max_rfer);
807 6637 : btrfs_set_qgroup_limit_max_excl(l, qgroup_limit, qgroup->max_excl);
808 6637 : btrfs_set_qgroup_limit_rsv_rfer(l, qgroup_limit, qgroup->rsv_rfer);
809 6637 : btrfs_set_qgroup_limit_rsv_excl(l, qgroup_limit, qgroup->rsv_excl);
810 :
811 6637 : btrfs_mark_buffer_dirty(l);
812 :
813 6637 : out:
814 6637 : btrfs_free_path(path);
815 6637 : return ret;
816 : }
817 :
818 6626 : static int update_qgroup_info_item(struct btrfs_trans_handle *trans,
819 : struct btrfs_qgroup *qgroup)
820 : {
821 6626 : struct btrfs_fs_info *fs_info = trans->fs_info;
822 6626 : struct btrfs_root *quota_root = fs_info->quota_root;
823 6626 : struct btrfs_path *path;
824 6626 : struct btrfs_key key;
825 6626 : struct extent_buffer *l;
826 6626 : struct btrfs_qgroup_info_item *qgroup_info;
827 6626 : int ret;
828 6626 : int slot;
829 :
830 6626 : if (btrfs_is_testing(fs_info))
831 : return 0;
832 :
833 6626 : key.objectid = 0;
834 6626 : key.type = BTRFS_QGROUP_INFO_KEY;
835 6626 : key.offset = qgroup->qgroupid;
836 :
837 6626 : path = btrfs_alloc_path();
838 6626 : if (!path)
839 : return -ENOMEM;
840 :
841 6626 : ret = btrfs_search_slot(trans, quota_root, &key, path, 0, 1);
842 6626 : if (ret > 0)
843 : ret = -ENOENT;
844 :
845 6626 : if (ret)
846 0 : goto out;
847 :
848 6626 : l = path->nodes[0];
849 6626 : slot = path->slots[0];
850 6626 : qgroup_info = btrfs_item_ptr(l, slot, struct btrfs_qgroup_info_item);
851 6626 : btrfs_set_qgroup_info_generation(l, qgroup_info, trans->transid);
852 6626 : btrfs_set_qgroup_info_rfer(l, qgroup_info, qgroup->rfer);
853 6626 : btrfs_set_qgroup_info_rfer_cmpr(l, qgroup_info, qgroup->rfer_cmpr);
854 6626 : btrfs_set_qgroup_info_excl(l, qgroup_info, qgroup->excl);
855 6626 : btrfs_set_qgroup_info_excl_cmpr(l, qgroup_info, qgroup->excl_cmpr);
856 :
857 6626 : btrfs_mark_buffer_dirty(l);
858 :
859 6626 : out:
860 6626 : btrfs_free_path(path);
861 6626 : return ret;
862 : }
863 :
864 11551 : static int update_qgroup_status_item(struct btrfs_trans_handle *trans)
865 : {
866 11551 : struct btrfs_fs_info *fs_info = trans->fs_info;
867 11551 : struct btrfs_root *quota_root = fs_info->quota_root;
868 11551 : struct btrfs_path *path;
869 11551 : struct btrfs_key key;
870 11551 : struct extent_buffer *l;
871 11551 : struct btrfs_qgroup_status_item *ptr;
872 11551 : int ret;
873 11551 : int slot;
874 :
875 11551 : key.objectid = 0;
876 11551 : key.type = BTRFS_QGROUP_STATUS_KEY;
877 11551 : key.offset = 0;
878 :
879 11551 : path = btrfs_alloc_path();
880 11551 : if (!path)
881 : return -ENOMEM;
882 :
883 11551 : ret = btrfs_search_slot(trans, quota_root, &key, path, 0, 1);
884 11551 : if (ret > 0)
885 : ret = -ENOENT;
886 :
887 11551 : if (ret)
888 0 : goto out;
889 :
890 11551 : l = path->nodes[0];
891 11551 : slot = path->slots[0];
892 11551 : ptr = btrfs_item_ptr(l, slot, struct btrfs_qgroup_status_item);
893 11551 : btrfs_set_qgroup_status_flags(l, ptr, fs_info->qgroup_flags &
894 : BTRFS_QGROUP_STATUS_FLAGS_MASK);
895 11551 : btrfs_set_qgroup_status_generation(l, ptr, trans->transid);
896 11551 : btrfs_set_qgroup_status_rescan(l, ptr,
897 : fs_info->qgroup_rescan_progress.objectid);
898 :
899 11551 : btrfs_mark_buffer_dirty(l);
900 :
901 11551 : out:
902 11551 : btrfs_free_path(path);
903 11551 : return ret;
904 : }
905 :
906 : /*
907 : * called with qgroup_lock held
908 : */
909 132 : static int btrfs_clean_quota_tree(struct btrfs_trans_handle *trans,
910 : struct btrfs_root *root)
911 : {
912 132 : struct btrfs_path *path;
913 132 : struct btrfs_key key;
914 132 : struct extent_buffer *leaf = NULL;
915 132 : int ret;
916 132 : int nr = 0;
917 :
918 132 : path = btrfs_alloc_path();
919 132 : if (!path)
920 : return -ENOMEM;
921 :
922 132 : key.objectid = 0;
923 132 : key.offset = 0;
924 132 : key.type = 0;
925 :
926 396 : while (1) {
927 264 : ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
928 264 : if (ret < 0)
929 0 : goto out;
930 264 : leaf = path->nodes[0];
931 264 : nr = btrfs_header_nritems(leaf);
932 264 : if (!nr)
933 : break;
934 : /*
935 : * delete the leaf one by one
936 : * since the whole tree is going
937 : * to be deleted.
938 : */
939 132 : path->slots[0] = 0;
940 132 : ret = btrfs_del_items(trans, root, path, 0, nr);
941 132 : if (ret)
942 0 : goto out;
943 :
944 132 : btrfs_release_path(path);
945 : }
946 : ret = 0;
947 132 : out:
948 132 : btrfs_free_path(path);
949 132 : return ret;
950 : }
951 :
952 252 : int btrfs_quota_enable(struct btrfs_fs_info *fs_info)
953 : {
954 252 : struct btrfs_root *quota_root;
955 252 : struct btrfs_root *tree_root = fs_info->tree_root;
956 252 : struct btrfs_path *path = NULL;
957 252 : struct btrfs_qgroup_status_item *ptr;
958 252 : struct extent_buffer *leaf;
959 252 : struct btrfs_key key;
960 252 : struct btrfs_key found_key;
961 252 : struct btrfs_qgroup *qgroup = NULL;
962 252 : struct btrfs_trans_handle *trans = NULL;
963 252 : struct ulist *ulist = NULL;
964 252 : int ret = 0;
965 252 : int slot;
966 :
967 : /*
968 : * We need to have subvol_sem write locked, to prevent races between
969 : * concurrent tasks trying to enable quotas, because we will unlock
970 : * and relock qgroup_ioctl_lock before setting fs_info->quota_root
971 : * and before setting BTRFS_FS_QUOTA_ENABLED.
972 : */
973 252 : lockdep_assert_held_write(&fs_info->subvol_sem);
974 :
975 252 : if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) {
976 0 : btrfs_err(fs_info,
977 : "qgroups are currently unsupported in extent tree v2");
978 0 : return -EINVAL;
979 : }
980 :
981 252 : mutex_lock(&fs_info->qgroup_ioctl_lock);
982 252 : if (fs_info->quota_root)
983 88 : goto out;
984 :
985 164 : ulist = ulist_alloc(GFP_KERNEL);
986 164 : if (!ulist) {
987 0 : ret = -ENOMEM;
988 0 : goto out;
989 : }
990 :
991 164 : ret = btrfs_sysfs_add_qgroups(fs_info);
992 164 : if (ret < 0)
993 0 : goto out;
994 :
995 : /*
996 : * Unlock qgroup_ioctl_lock before starting the transaction. This is to
997 : * avoid lock acquisition inversion problems (reported by lockdep) between
998 : * qgroup_ioctl_lock and the vfs freeze semaphores, acquired when we
999 : * start a transaction.
1000 : * After we started the transaction lock qgroup_ioctl_lock again and
1001 : * check if someone else created the quota root in the meanwhile. If so,
1002 : * just return success and release the transaction handle.
1003 : *
1004 : * Also we don't need to worry about someone else calling
1005 : * btrfs_sysfs_add_qgroups() after we unlock and getting an error because
1006 : * that function returns 0 (success) when the sysfs entries already exist.
1007 : */
1008 164 : mutex_unlock(&fs_info->qgroup_ioctl_lock);
1009 :
1010 : /*
1011 : * 1 for quota root item
1012 : * 1 for BTRFS_QGROUP_STATUS item
1013 : *
1014 : * Yet we also need 2*n items for a QGROUP_INFO/QGROUP_LIMIT items
1015 : * per subvolume. However those are not currently reserved since it
1016 : * would be a lot of overkill.
1017 : */
1018 164 : trans = btrfs_start_transaction(tree_root, 2);
1019 :
1020 164 : mutex_lock(&fs_info->qgroup_ioctl_lock);
1021 164 : if (IS_ERR(trans)) {
1022 0 : ret = PTR_ERR(trans);
1023 0 : trans = NULL;
1024 0 : goto out;
1025 : }
1026 :
1027 164 : if (fs_info->quota_root)
1028 0 : goto out;
1029 :
1030 164 : fs_info->qgroup_ulist = ulist;
1031 164 : ulist = NULL;
1032 :
1033 : /*
1034 : * initially create the quota tree
1035 : */
1036 164 : quota_root = btrfs_create_tree(trans, BTRFS_QUOTA_TREE_OBJECTID);
1037 164 : if (IS_ERR(quota_root)) {
1038 0 : ret = PTR_ERR(quota_root);
1039 0 : btrfs_abort_transaction(trans, ret);
1040 0 : goto out;
1041 : }
1042 :
1043 164 : path = btrfs_alloc_path();
1044 164 : if (!path) {
1045 0 : ret = -ENOMEM;
1046 0 : btrfs_abort_transaction(trans, ret);
1047 0 : goto out_free_root;
1048 : }
1049 :
1050 164 : key.objectid = 0;
1051 164 : key.type = BTRFS_QGROUP_STATUS_KEY;
1052 164 : key.offset = 0;
1053 :
1054 164 : ret = btrfs_insert_empty_item(trans, quota_root, path, &key,
1055 : sizeof(*ptr));
1056 164 : if (ret) {
1057 0 : btrfs_abort_transaction(trans, ret);
1058 0 : goto out_free_path;
1059 : }
1060 :
1061 164 : leaf = path->nodes[0];
1062 164 : ptr = btrfs_item_ptr(leaf, path->slots[0],
1063 : struct btrfs_qgroup_status_item);
1064 164 : btrfs_set_qgroup_status_generation(leaf, ptr, trans->transid);
1065 164 : btrfs_set_qgroup_status_version(leaf, ptr, BTRFS_QGROUP_STATUS_VERSION);
1066 164 : fs_info->qgroup_flags = BTRFS_QGROUP_STATUS_FLAG_ON |
1067 : BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
1068 164 : btrfs_set_qgroup_status_flags(leaf, ptr, fs_info->qgroup_flags &
1069 : BTRFS_QGROUP_STATUS_FLAGS_MASK);
1070 164 : btrfs_set_qgroup_status_rescan(leaf, ptr, 0);
1071 :
1072 164 : btrfs_mark_buffer_dirty(leaf);
1073 :
1074 164 : key.objectid = 0;
1075 164 : key.type = BTRFS_ROOT_REF_KEY;
1076 164 : key.offset = 0;
1077 :
1078 164 : btrfs_release_path(path);
1079 164 : ret = btrfs_search_slot_for_read(tree_root, &key, path, 1, 0);
1080 164 : if (ret > 0)
1081 0 : goto out_add_root;
1082 164 : if (ret < 0) {
1083 0 : btrfs_abort_transaction(trans, ret);
1084 0 : goto out_free_path;
1085 : }
1086 :
1087 2035 : while (1) {
1088 2035 : slot = path->slots[0];
1089 2035 : leaf = path->nodes[0];
1090 2035 : btrfs_item_key_to_cpu(leaf, &found_key, slot);
1091 :
1092 2035 : if (found_key.type == BTRFS_ROOT_REF_KEY) {
1093 :
1094 : /* Release locks on tree_root before we access quota_root */
1095 16 : btrfs_release_path(path);
1096 :
1097 16 : ret = add_qgroup_item(trans, quota_root,
1098 : found_key.offset);
1099 16 : if (ret) {
1100 0 : btrfs_abort_transaction(trans, ret);
1101 0 : goto out_free_path;
1102 : }
1103 :
1104 16 : qgroup = add_qgroup_rb(fs_info, found_key.offset);
1105 16 : if (IS_ERR(qgroup)) {
1106 0 : ret = PTR_ERR(qgroup);
1107 0 : btrfs_abort_transaction(trans, ret);
1108 0 : goto out_free_path;
1109 : }
1110 16 : ret = btrfs_sysfs_add_one_qgroup(fs_info, qgroup);
1111 16 : if (ret < 0) {
1112 0 : btrfs_abort_transaction(trans, ret);
1113 0 : goto out_free_path;
1114 : }
1115 16 : ret = btrfs_search_slot_for_read(tree_root, &found_key,
1116 : path, 1, 0);
1117 16 : if (ret < 0) {
1118 0 : btrfs_abort_transaction(trans, ret);
1119 0 : goto out_free_path;
1120 : }
1121 16 : if (ret > 0) {
1122 : /*
1123 : * Shouldn't happen, but in case it does we
1124 : * don't need to do the btrfs_next_item, just
1125 : * continue.
1126 : */
1127 0 : continue;
1128 : }
1129 : }
1130 2035 : ret = btrfs_next_item(tree_root, path);
1131 2035 : if (ret < 0) {
1132 0 : btrfs_abort_transaction(trans, ret);
1133 0 : goto out_free_path;
1134 : }
1135 2035 : if (ret)
1136 : break;
1137 : }
1138 :
1139 164 : out_add_root:
1140 164 : btrfs_release_path(path);
1141 164 : ret = add_qgroup_item(trans, quota_root, BTRFS_FS_TREE_OBJECTID);
1142 164 : if (ret) {
1143 0 : btrfs_abort_transaction(trans, ret);
1144 0 : goto out_free_path;
1145 : }
1146 :
1147 164 : qgroup = add_qgroup_rb(fs_info, BTRFS_FS_TREE_OBJECTID);
1148 164 : if (IS_ERR(qgroup)) {
1149 0 : ret = PTR_ERR(qgroup);
1150 0 : btrfs_abort_transaction(trans, ret);
1151 0 : goto out_free_path;
1152 : }
1153 164 : ret = btrfs_sysfs_add_one_qgroup(fs_info, qgroup);
1154 164 : if (ret < 0) {
1155 0 : btrfs_abort_transaction(trans, ret);
1156 0 : goto out_free_path;
1157 : }
1158 :
1159 164 : mutex_unlock(&fs_info->qgroup_ioctl_lock);
1160 : /*
1161 : * Commit the transaction while not holding qgroup_ioctl_lock, to avoid
1162 : * a deadlock with tasks concurrently doing other qgroup operations, such
1163 : * adding/removing qgroups or adding/deleting qgroup relations for example,
1164 : * because all qgroup operations first start or join a transaction and then
1165 : * lock the qgroup_ioctl_lock mutex.
1166 : * We are safe from a concurrent task trying to enable quotas, by calling
1167 : * this function, since we are serialized by fs_info->subvol_sem.
1168 : */
1169 164 : ret = btrfs_commit_transaction(trans);
1170 164 : trans = NULL;
1171 164 : mutex_lock(&fs_info->qgroup_ioctl_lock);
1172 164 : if (ret)
1173 0 : goto out_free_path;
1174 :
1175 : /*
1176 : * Set quota enabled flag after committing the transaction, to avoid
1177 : * deadlocks on fs_info->qgroup_ioctl_lock with concurrent snapshot
1178 : * creation.
1179 : */
1180 164 : spin_lock(&fs_info->qgroup_lock);
1181 164 : fs_info->quota_root = quota_root;
1182 164 : set_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags);
1183 164 : spin_unlock(&fs_info->qgroup_lock);
1184 :
1185 164 : ret = qgroup_rescan_init(fs_info, 0, 1);
1186 164 : if (!ret) {
1187 164 : qgroup_rescan_zero_tracking(fs_info);
1188 164 : fs_info->qgroup_rescan_running = true;
1189 164 : btrfs_queue_work(fs_info->qgroup_rescan_workers,
1190 : &fs_info->qgroup_rescan_work);
1191 : } else {
1192 : /*
1193 : * We have set both BTRFS_FS_QUOTA_ENABLED and
1194 : * BTRFS_QGROUP_STATUS_FLAG_ON, so we can only fail with
1195 : * -EINPROGRESS. That can happen because someone started the
1196 : * rescan worker by calling quota rescan ioctl before we
1197 : * attempted to initialize the rescan worker. Failure due to
1198 : * quotas disabled in the meanwhile is not possible, because
1199 : * we are holding a write lock on fs_info->subvol_sem, which
1200 : * is also acquired when disabling quotas.
1201 : * Ignore such error, and any other error would need to undo
1202 : * everything we did in the transaction we just committed.
1203 : */
1204 : ASSERT(ret == -EINPROGRESS);
1205 : ret = 0;
1206 : }
1207 :
1208 164 : out_free_path:
1209 164 : btrfs_free_path(path);
1210 : out_free_root:
1211 164 : if (ret)
1212 0 : btrfs_put_root(quota_root);
1213 164 : out:
1214 252 : if (ret) {
1215 0 : ulist_free(fs_info->qgroup_ulist);
1216 0 : fs_info->qgroup_ulist = NULL;
1217 0 : btrfs_sysfs_del_qgroups(fs_info);
1218 : }
1219 252 : mutex_unlock(&fs_info->qgroup_ioctl_lock);
1220 252 : if (ret && trans)
1221 0 : btrfs_end_transaction(trans);
1222 252 : else if (trans)
1223 0 : ret = btrfs_end_transaction(trans);
1224 252 : ulist_free(ulist);
1225 252 : return ret;
1226 : }
1227 :
1228 221 : int btrfs_quota_disable(struct btrfs_fs_info *fs_info)
1229 : {
1230 221 : struct btrfs_root *quota_root;
1231 221 : struct btrfs_trans_handle *trans = NULL;
1232 221 : int ret = 0;
1233 :
1234 : /*
1235 : * We need to have subvol_sem write locked to prevent races with
1236 : * snapshot creation.
1237 : */
1238 221 : lockdep_assert_held_write(&fs_info->subvol_sem);
1239 :
1240 : /*
1241 : * Lock the cleaner mutex to prevent races with concurrent relocation,
1242 : * because relocation may be building backrefs for blocks of the quota
1243 : * root while we are deleting the root. This is like dropping fs roots
1244 : * of deleted snapshots/subvolumes, we need the same protection.
1245 : *
1246 : * This also prevents races between concurrent tasks trying to disable
1247 : * quotas, because we will unlock and relock qgroup_ioctl_lock across
1248 : * BTRFS_FS_QUOTA_ENABLED changes.
1249 : */
1250 221 : mutex_lock(&fs_info->cleaner_mutex);
1251 :
1252 221 : mutex_lock(&fs_info->qgroup_ioctl_lock);
1253 221 : if (!fs_info->quota_root)
1254 89 : goto out;
1255 :
1256 : /*
1257 : * Unlock the qgroup_ioctl_lock mutex before waiting for the rescan worker to
1258 : * complete. Otherwise we can deadlock because btrfs_remove_qgroup() needs
1259 : * to lock that mutex while holding a transaction handle and the rescan
1260 : * worker needs to commit a transaction.
1261 : */
1262 132 : mutex_unlock(&fs_info->qgroup_ioctl_lock);
1263 :
1264 : /*
1265 : * Request qgroup rescan worker to complete and wait for it. This wait
1266 : * must be done before transaction start for quota disable since it may
1267 : * deadlock with transaction by the qgroup rescan worker.
1268 : */
1269 132 : clear_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags);
1270 132 : btrfs_qgroup_wait_for_completion(fs_info, false);
1271 :
1272 : /*
1273 : * 1 For the root item
1274 : *
1275 : * We should also reserve enough items for the quota tree deletion in
1276 : * btrfs_clean_quota_tree but this is not done.
1277 : *
1278 : * Also, we must always start a transaction without holding the mutex
1279 : * qgroup_ioctl_lock, see btrfs_quota_enable().
1280 : */
1281 132 : trans = btrfs_start_transaction(fs_info->tree_root, 1);
1282 :
1283 132 : mutex_lock(&fs_info->qgroup_ioctl_lock);
1284 132 : if (IS_ERR(trans)) {
1285 0 : ret = PTR_ERR(trans);
1286 0 : trans = NULL;
1287 0 : set_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags);
1288 0 : goto out;
1289 : }
1290 :
1291 132 : if (!fs_info->quota_root)
1292 0 : goto out;
1293 :
1294 132 : spin_lock(&fs_info->qgroup_lock);
1295 132 : quota_root = fs_info->quota_root;
1296 132 : fs_info->quota_root = NULL;
1297 132 : fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_ON;
1298 132 : fs_info->qgroup_drop_subtree_thres = BTRFS_MAX_LEVEL;
1299 132 : spin_unlock(&fs_info->qgroup_lock);
1300 :
1301 132 : btrfs_free_qgroup_config(fs_info);
1302 :
1303 132 : ret = btrfs_clean_quota_tree(trans, quota_root);
1304 132 : if (ret) {
1305 0 : btrfs_abort_transaction(trans, ret);
1306 0 : goto out;
1307 : }
1308 :
1309 132 : ret = btrfs_del_root(trans, "a_root->root_key);
1310 132 : if (ret) {
1311 0 : btrfs_abort_transaction(trans, ret);
1312 0 : goto out;
1313 : }
1314 :
1315 132 : spin_lock(&fs_info->trans_lock);
1316 132 : list_del("a_root->dirty_list);
1317 132 : spin_unlock(&fs_info->trans_lock);
1318 :
1319 132 : btrfs_tree_lock(quota_root->node);
1320 132 : btrfs_clear_buffer_dirty(trans, quota_root->node);
1321 132 : btrfs_tree_unlock(quota_root->node);
1322 132 : btrfs_free_tree_block(trans, btrfs_root_id(quota_root),
1323 : quota_root->node, 0, 1);
1324 :
1325 132 : btrfs_put_root(quota_root);
1326 :
1327 221 : out:
1328 221 : mutex_unlock(&fs_info->qgroup_ioctl_lock);
1329 221 : if (ret && trans)
1330 0 : btrfs_end_transaction(trans);
1331 221 : else if (trans)
1332 132 : ret = btrfs_end_transaction(trans);
1333 221 : mutex_unlock(&fs_info->cleaner_mutex);
1334 :
1335 221 : return ret;
1336 : }
1337 :
1338 305513 : static void qgroup_dirty(struct btrfs_fs_info *fs_info,
1339 : struct btrfs_qgroup *qgroup)
1340 : {
1341 305513 : if (list_empty(&qgroup->dirty))
1342 6673 : list_add(&qgroup->dirty, &fs_info->dirty_qgroups);
1343 305513 : }
1344 :
1345 : /*
1346 : * The easy accounting, we're updating qgroup relationship whose child qgroup
1347 : * only has exclusive extents.
1348 : *
1349 : * In this case, all exclusive extents will also be exclusive for parent, so
1350 : * excl/rfer just get added/removed.
1351 : *
1352 : * So is qgroup reservation space, which should also be added/removed to
1353 : * parent.
1354 : * Or when child tries to release reservation space, parent will underflow its
1355 : * reservation (for relationship adding case).
1356 : *
1357 : * Caller should hold fs_info->qgroup_lock.
1358 : */
1359 4 : static int __qgroup_excl_accounting(struct btrfs_fs_info *fs_info,
1360 : struct ulist *tmp, u64 ref_root,
1361 : struct btrfs_qgroup *src, int sign)
1362 : {
1363 4 : struct btrfs_qgroup *qgroup;
1364 4 : struct btrfs_qgroup_list *glist;
1365 4 : struct ulist_node *unode;
1366 4 : struct ulist_iterator uiter;
1367 4 : u64 num_bytes = src->excl;
1368 4 : int ret = 0;
1369 :
1370 4 : qgroup = find_qgroup_rb(fs_info, ref_root);
1371 4 : if (!qgroup)
1372 0 : goto out;
1373 :
1374 4 : qgroup->rfer += sign * num_bytes;
1375 4 : qgroup->rfer_cmpr += sign * num_bytes;
1376 :
1377 8 : WARN_ON(sign < 0 && qgroup->excl < num_bytes);
1378 4 : qgroup->excl += sign * num_bytes;
1379 4 : qgroup->excl_cmpr += sign * num_bytes;
1380 :
1381 4 : if (sign > 0)
1382 4 : qgroup_rsv_add_by_qgroup(fs_info, qgroup, src);
1383 : else
1384 0 : qgroup_rsv_release_by_qgroup(fs_info, qgroup, src);
1385 :
1386 4 : qgroup_dirty(fs_info, qgroup);
1387 :
1388 : /* Get all of the parent groups that contain this qgroup */
1389 4 : list_for_each_entry(glist, &qgroup->groups, next_group) {
1390 0 : ret = ulist_add(tmp, glist->group->qgroupid,
1391 : qgroup_to_aux(glist->group), GFP_ATOMIC);
1392 0 : if (ret < 0)
1393 0 : goto out;
1394 : }
1395 :
1396 : /* Iterate all of the parents and adjust their reference counts */
1397 4 : ULIST_ITER_INIT(&uiter);
1398 4 : while ((unode = ulist_next(tmp, &uiter))) {
1399 0 : qgroup = unode_aux_to_qgroup(unode);
1400 0 : qgroup->rfer += sign * num_bytes;
1401 0 : qgroup->rfer_cmpr += sign * num_bytes;
1402 0 : WARN_ON(sign < 0 && qgroup->excl < num_bytes);
1403 0 : qgroup->excl += sign * num_bytes;
1404 0 : if (sign > 0)
1405 0 : qgroup_rsv_add_by_qgroup(fs_info, qgroup, src);
1406 : else
1407 0 : qgroup_rsv_release_by_qgroup(fs_info, qgroup, src);
1408 0 : qgroup->excl_cmpr += sign * num_bytes;
1409 0 : qgroup_dirty(fs_info, qgroup);
1410 :
1411 : /* Add any parents of the parents */
1412 0 : list_for_each_entry(glist, &qgroup->groups, next_group) {
1413 0 : ret = ulist_add(tmp, glist->group->qgroupid,
1414 : qgroup_to_aux(glist->group), GFP_ATOMIC);
1415 0 : if (ret < 0)
1416 0 : goto out;
1417 : }
1418 : }
1419 : ret = 0;
1420 4 : out:
1421 4 : return ret;
1422 : }
1423 :
1424 :
1425 : /*
1426 : * Quick path for updating qgroup with only excl refs.
1427 : *
1428 : * In that case, just update all parent will be enough.
1429 : * Or we needs to do a full rescan.
1430 : * Caller should also hold fs_info->qgroup_lock.
1431 : *
1432 : * Return 0 for quick update, return >0 for need to full rescan
1433 : * and mark INCONSISTENT flag.
1434 : * Return < 0 for other error.
1435 : */
1436 6 : static int quick_update_accounting(struct btrfs_fs_info *fs_info,
1437 : struct ulist *tmp, u64 src, u64 dst,
1438 : int sign)
1439 : {
1440 6 : struct btrfs_qgroup *qgroup;
1441 6 : int ret = 1;
1442 6 : int err = 0;
1443 :
1444 6 : qgroup = find_qgroup_rb(fs_info, src);
1445 6 : if (!qgroup)
1446 0 : goto out;
1447 6 : if (qgroup->excl == qgroup->rfer) {
1448 4 : ret = 0;
1449 4 : err = __qgroup_excl_accounting(fs_info, tmp, dst,
1450 : qgroup, sign);
1451 4 : if (err < 0) {
1452 0 : ret = err;
1453 0 : goto out;
1454 : }
1455 : }
1456 6 : out:
1457 0 : if (ret)
1458 2 : fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
1459 6 : return ret;
1460 : }
1461 :
1462 5 : int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans, u64 src,
1463 : u64 dst)
1464 : {
1465 5 : struct btrfs_fs_info *fs_info = trans->fs_info;
1466 5 : struct btrfs_qgroup *parent;
1467 5 : struct btrfs_qgroup *member;
1468 5 : struct btrfs_qgroup_list *list;
1469 5 : struct ulist *tmp;
1470 5 : unsigned int nofs_flag;
1471 5 : int ret = 0;
1472 :
1473 : /* Check the level of src and dst first */
1474 5 : if (btrfs_qgroup_level(src) >= btrfs_qgroup_level(dst))
1475 : return -EINVAL;
1476 :
1477 : /* We hold a transaction handle open, must do a NOFS allocation. */
1478 5 : nofs_flag = memalloc_nofs_save();
1479 5 : tmp = ulist_alloc(GFP_KERNEL);
1480 5 : memalloc_nofs_restore(nofs_flag);
1481 5 : if (!tmp)
1482 : return -ENOMEM;
1483 :
1484 5 : mutex_lock(&fs_info->qgroup_ioctl_lock);
1485 5 : if (!fs_info->quota_root) {
1486 0 : ret = -ENOTCONN;
1487 0 : goto out;
1488 : }
1489 5 : member = find_qgroup_rb(fs_info, src);
1490 5 : parent = find_qgroup_rb(fs_info, dst);
1491 5 : if (!member || !parent) {
1492 0 : ret = -EINVAL;
1493 0 : goto out;
1494 : }
1495 :
1496 : /* check if such qgroup relation exist firstly */
1497 5 : list_for_each_entry(list, &member->groups, next_group) {
1498 0 : if (list->group == parent) {
1499 0 : ret = -EEXIST;
1500 0 : goto out;
1501 : }
1502 : }
1503 :
1504 5 : ret = add_qgroup_relation_item(trans, src, dst);
1505 5 : if (ret)
1506 0 : goto out;
1507 :
1508 5 : ret = add_qgroup_relation_item(trans, dst, src);
1509 5 : if (ret) {
1510 0 : del_qgroup_relation_item(trans, src, dst);
1511 0 : goto out;
1512 : }
1513 :
1514 5 : spin_lock(&fs_info->qgroup_lock);
1515 5 : ret = __add_relation_rb(member, parent);
1516 5 : if (ret < 0) {
1517 0 : spin_unlock(&fs_info->qgroup_lock);
1518 0 : goto out;
1519 : }
1520 5 : ret = quick_update_accounting(fs_info, tmp, src, dst, 1);
1521 5 : spin_unlock(&fs_info->qgroup_lock);
1522 5 : out:
1523 5 : mutex_unlock(&fs_info->qgroup_ioctl_lock);
1524 5 : ulist_free(tmp);
1525 5 : return ret;
1526 : }
1527 :
1528 1 : static int __del_qgroup_relation(struct btrfs_trans_handle *trans, u64 src,
1529 : u64 dst)
1530 : {
1531 1 : struct btrfs_fs_info *fs_info = trans->fs_info;
1532 1 : struct btrfs_qgroup *parent;
1533 1 : struct btrfs_qgroup *member;
1534 1 : struct btrfs_qgroup_list *list;
1535 1 : struct ulist *tmp;
1536 1 : bool found = false;
1537 1 : unsigned int nofs_flag;
1538 1 : int ret = 0;
1539 1 : int ret2;
1540 :
1541 : /* We hold a transaction handle open, must do a NOFS allocation. */
1542 1 : nofs_flag = memalloc_nofs_save();
1543 1 : tmp = ulist_alloc(GFP_KERNEL);
1544 1 : memalloc_nofs_restore(nofs_flag);
1545 1 : if (!tmp)
1546 : return -ENOMEM;
1547 :
1548 1 : if (!fs_info->quota_root) {
1549 0 : ret = -ENOTCONN;
1550 0 : goto out;
1551 : }
1552 :
1553 1 : member = find_qgroup_rb(fs_info, src);
1554 1 : parent = find_qgroup_rb(fs_info, dst);
1555 : /*
1556 : * The parent/member pair doesn't exist, then try to delete the dead
1557 : * relation items only.
1558 : */
1559 1 : if (!member || !parent)
1560 0 : goto delete_item;
1561 :
1562 : /* check if such qgroup relation exist firstly */
1563 1 : list_for_each_entry(list, &member->groups, next_group) {
1564 1 : if (list->group == parent) {
1565 : found = true;
1566 : break;
1567 : }
1568 : }
1569 :
1570 0 : delete_item:
1571 1 : ret = del_qgroup_relation_item(trans, src, dst);
1572 1 : if (ret < 0 && ret != -ENOENT)
1573 0 : goto out;
1574 1 : ret2 = del_qgroup_relation_item(trans, dst, src);
1575 1 : if (ret2 < 0 && ret2 != -ENOENT)
1576 0 : goto out;
1577 :
1578 : /* At least one deletion succeeded, return 0 */
1579 1 : if (!ret || !ret2)
1580 1 : ret = 0;
1581 :
1582 1 : if (found) {
1583 1 : spin_lock(&fs_info->qgroup_lock);
1584 1 : del_relation_rb(fs_info, src, dst);
1585 1 : ret = quick_update_accounting(fs_info, tmp, src, dst, -1);
1586 1 : spin_unlock(&fs_info->qgroup_lock);
1587 : }
1588 0 : out:
1589 1 : ulist_free(tmp);
1590 1 : return ret;
1591 : }
1592 :
1593 1 : int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans, u64 src,
1594 : u64 dst)
1595 : {
1596 1 : struct btrfs_fs_info *fs_info = trans->fs_info;
1597 1 : int ret = 0;
1598 :
1599 1 : mutex_lock(&fs_info->qgroup_ioctl_lock);
1600 1 : ret = __del_qgroup_relation(trans, src, dst);
1601 1 : mutex_unlock(&fs_info->qgroup_ioctl_lock);
1602 :
1603 1 : return ret;
1604 : }
1605 :
1606 205 : int btrfs_create_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid)
1607 : {
1608 205 : struct btrfs_fs_info *fs_info = trans->fs_info;
1609 205 : struct btrfs_root *quota_root;
1610 205 : struct btrfs_qgroup *qgroup;
1611 205 : int ret = 0;
1612 :
1613 205 : mutex_lock(&fs_info->qgroup_ioctl_lock);
1614 205 : if (!fs_info->quota_root) {
1615 116 : ret = -ENOTCONN;
1616 116 : goto out;
1617 : }
1618 89 : quota_root = fs_info->quota_root;
1619 89 : qgroup = find_qgroup_rb(fs_info, qgroupid);
1620 89 : if (qgroup) {
1621 31 : ret = -EEXIST;
1622 31 : goto out;
1623 : }
1624 :
1625 58 : ret = add_qgroup_item(trans, quota_root, qgroupid);
1626 58 : if (ret)
1627 0 : goto out;
1628 :
1629 58 : spin_lock(&fs_info->qgroup_lock);
1630 58 : qgroup = add_qgroup_rb(fs_info, qgroupid);
1631 58 : spin_unlock(&fs_info->qgroup_lock);
1632 :
1633 58 : if (IS_ERR(qgroup)) {
1634 0 : ret = PTR_ERR(qgroup);
1635 0 : goto out;
1636 : }
1637 58 : ret = btrfs_sysfs_add_one_qgroup(fs_info, qgroup);
1638 205 : out:
1639 205 : mutex_unlock(&fs_info->qgroup_ioctl_lock);
1640 205 : return ret;
1641 : }
1642 :
1643 200 : int btrfs_remove_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid)
1644 : {
1645 200 : struct btrfs_fs_info *fs_info = trans->fs_info;
1646 200 : struct btrfs_qgroup *qgroup;
1647 200 : struct btrfs_qgroup_list *list;
1648 200 : int ret = 0;
1649 :
1650 200 : mutex_lock(&fs_info->qgroup_ioctl_lock);
1651 200 : if (!fs_info->quota_root) {
1652 103 : ret = -ENOTCONN;
1653 103 : goto out;
1654 : }
1655 :
1656 97 : qgroup = find_qgroup_rb(fs_info, qgroupid);
1657 97 : if (!qgroup) {
1658 59 : ret = -ENOENT;
1659 59 : goto out;
1660 : }
1661 :
1662 : /* Check if there are no children of this qgroup */
1663 38 : if (!list_empty(&qgroup->members)) {
1664 0 : ret = -EBUSY;
1665 0 : goto out;
1666 : }
1667 :
1668 38 : ret = del_qgroup_item(trans, qgroupid);
1669 38 : if (ret && ret != -ENOENT)
1670 0 : goto out;
1671 :
1672 38 : while (!list_empty(&qgroup->groups)) {
1673 0 : list = list_first_entry(&qgroup->groups,
1674 : struct btrfs_qgroup_list, next_group);
1675 0 : ret = __del_qgroup_relation(trans, qgroupid,
1676 0 : list->group->qgroupid);
1677 0 : if (ret)
1678 0 : goto out;
1679 : }
1680 :
1681 38 : spin_lock(&fs_info->qgroup_lock);
1682 38 : del_qgroup_rb(fs_info, qgroupid);
1683 38 : spin_unlock(&fs_info->qgroup_lock);
1684 :
1685 : /*
1686 : * Remove the qgroup from sysfs now without holding the qgroup_lock
1687 : * spinlock, since the sysfs_remove_group() function needs to take
1688 : * the mutex kernfs_mutex through kernfs_remove_by_name_ns().
1689 : */
1690 38 : btrfs_sysfs_del_one_qgroup(fs_info, qgroup);
1691 38 : kfree(qgroup);
1692 200 : out:
1693 200 : mutex_unlock(&fs_info->qgroup_ioctl_lock);
1694 200 : return ret;
1695 : }
1696 :
1697 11 : int btrfs_limit_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid,
1698 : struct btrfs_qgroup_limit *limit)
1699 : {
1700 11 : struct btrfs_fs_info *fs_info = trans->fs_info;
1701 11 : struct btrfs_qgroup *qgroup;
1702 11 : int ret = 0;
1703 : /* Sometimes we would want to clear the limit on this qgroup.
1704 : * To meet this requirement, we treat the -1 as a special value
1705 : * which tell kernel to clear the limit on this qgroup.
1706 : */
1707 11 : const u64 CLEAR_VALUE = -1;
1708 :
1709 11 : mutex_lock(&fs_info->qgroup_ioctl_lock);
1710 11 : if (!fs_info->quota_root) {
1711 0 : ret = -ENOTCONN;
1712 0 : goto out;
1713 : }
1714 :
1715 11 : qgroup = find_qgroup_rb(fs_info, qgroupid);
1716 11 : if (!qgroup) {
1717 0 : ret = -ENOENT;
1718 0 : goto out;
1719 : }
1720 :
1721 11 : spin_lock(&fs_info->qgroup_lock);
1722 11 : if (limit->flags & BTRFS_QGROUP_LIMIT_MAX_RFER) {
1723 8 : if (limit->max_rfer == CLEAR_VALUE) {
1724 0 : qgroup->lim_flags &= ~BTRFS_QGROUP_LIMIT_MAX_RFER;
1725 0 : limit->flags &= ~BTRFS_QGROUP_LIMIT_MAX_RFER;
1726 0 : qgroup->max_rfer = 0;
1727 : } else {
1728 8 : qgroup->max_rfer = limit->max_rfer;
1729 : }
1730 : }
1731 11 : if (limit->flags & BTRFS_QGROUP_LIMIT_MAX_EXCL) {
1732 3 : if (limit->max_excl == CLEAR_VALUE) {
1733 0 : qgroup->lim_flags &= ~BTRFS_QGROUP_LIMIT_MAX_EXCL;
1734 0 : limit->flags &= ~BTRFS_QGROUP_LIMIT_MAX_EXCL;
1735 0 : qgroup->max_excl = 0;
1736 : } else {
1737 3 : qgroup->max_excl = limit->max_excl;
1738 : }
1739 : }
1740 11 : if (limit->flags & BTRFS_QGROUP_LIMIT_RSV_RFER) {
1741 0 : if (limit->rsv_rfer == CLEAR_VALUE) {
1742 0 : qgroup->lim_flags &= ~BTRFS_QGROUP_LIMIT_RSV_RFER;
1743 0 : limit->flags &= ~BTRFS_QGROUP_LIMIT_RSV_RFER;
1744 0 : qgroup->rsv_rfer = 0;
1745 : } else {
1746 0 : qgroup->rsv_rfer = limit->rsv_rfer;
1747 : }
1748 : }
1749 11 : if (limit->flags & BTRFS_QGROUP_LIMIT_RSV_EXCL) {
1750 0 : if (limit->rsv_excl == CLEAR_VALUE) {
1751 0 : qgroup->lim_flags &= ~BTRFS_QGROUP_LIMIT_RSV_EXCL;
1752 0 : limit->flags &= ~BTRFS_QGROUP_LIMIT_RSV_EXCL;
1753 0 : qgroup->rsv_excl = 0;
1754 : } else {
1755 0 : qgroup->rsv_excl = limit->rsv_excl;
1756 : }
1757 : }
1758 11 : qgroup->lim_flags |= limit->flags;
1759 :
1760 11 : spin_unlock(&fs_info->qgroup_lock);
1761 :
1762 11 : ret = update_qgroup_limit_item(trans, qgroup);
1763 11 : if (ret) {
1764 0 : qgroup_mark_inconsistent(fs_info);
1765 0 : btrfs_info(fs_info, "unable to update quota limit for %llu",
1766 : qgroupid);
1767 : }
1768 :
1769 11 : out:
1770 11 : mutex_unlock(&fs_info->qgroup_ioctl_lock);
1771 11 : return ret;
1772 : }
1773 :
1774 388425 : int btrfs_qgroup_trace_extent_nolock(struct btrfs_fs_info *fs_info,
1775 : struct btrfs_delayed_ref_root *delayed_refs,
1776 : struct btrfs_qgroup_extent_record *record)
1777 : {
1778 388425 : struct rb_node **p = &delayed_refs->dirty_extent_root.rb_node;
1779 388425 : struct rb_node *parent_node = NULL;
1780 388425 : struct btrfs_qgroup_extent_record *entry;
1781 388425 : u64 bytenr = record->bytenr;
1782 :
1783 388425 : lockdep_assert_held(&delayed_refs->lock);
1784 388425 : trace_btrfs_qgroup_trace_extent(fs_info, record);
1785 :
1786 4161302 : while (*p) {
1787 3968435 : parent_node = *p;
1788 3968435 : entry = rb_entry(parent_node, struct btrfs_qgroup_extent_record,
1789 : node);
1790 3968435 : if (bytenr < entry->bytenr) {
1791 1564134 : p = &(*p)->rb_left;
1792 2404301 : } else if (bytenr > entry->bytenr) {
1793 2208743 : p = &(*p)->rb_right;
1794 : } else {
1795 195558 : if (record->data_rsv && !entry->data_rsv) {
1796 0 : entry->data_rsv = record->data_rsv;
1797 0 : entry->data_rsv_refroot =
1798 0 : record->data_rsv_refroot;
1799 : }
1800 195558 : return 1;
1801 : }
1802 : }
1803 :
1804 192867 : rb_link_node(&record->node, parent_node, p);
1805 192867 : rb_insert_color(&record->node, &delayed_refs->dirty_extent_root);
1806 192867 : return 0;
1807 : }
1808 :
1809 192867 : int btrfs_qgroup_trace_extent_post(struct btrfs_trans_handle *trans,
1810 : struct btrfs_qgroup_extent_record *qrecord)
1811 : {
1812 192867 : struct btrfs_backref_walk_ctx ctx = { 0 };
1813 192867 : int ret;
1814 :
1815 : /*
1816 : * We are always called in a context where we are already holding a
1817 : * transaction handle. Often we are called when adding a data delayed
1818 : * reference from btrfs_truncate_inode_items() (truncating or unlinking),
1819 : * in which case we will be holding a write lock on extent buffer from a
1820 : * subvolume tree. In this case we can't allow btrfs_find_all_roots() to
1821 : * acquire fs_info->commit_root_sem, because that is a higher level lock
1822 : * that must be acquired before locking any extent buffers.
1823 : *
1824 : * So we want btrfs_find_all_roots() to not acquire the commit_root_sem
1825 : * but we can't pass it a non-NULL transaction handle, because otherwise
1826 : * it would not use commit roots and would lock extent buffers, causing
1827 : * a deadlock if it ends up trying to read lock the same extent buffer
1828 : * that was previously write locked at btrfs_truncate_inode_items().
1829 : *
1830 : * So pass a NULL transaction handle to btrfs_find_all_roots() and
1831 : * explicitly tell it to not acquire the commit_root_sem - if we are
1832 : * holding a transaction handle we don't need its protection.
1833 : */
1834 192867 : ASSERT(trans != NULL);
1835 :
1836 192867 : if (trans->fs_info->qgroup_flags & BTRFS_QGROUP_RUNTIME_FLAG_NO_ACCOUNTING)
1837 : return 0;
1838 :
1839 192861 : ctx.bytenr = qrecord->bytenr;
1840 192861 : ctx.fs_info = trans->fs_info;
1841 :
1842 192861 : ret = btrfs_find_all_roots(&ctx, true);
1843 192832 : if (ret < 0) {
1844 0 : qgroup_mark_inconsistent(trans->fs_info);
1845 0 : btrfs_warn(trans->fs_info,
1846 : "error accounting new delayed refs extent (err code: %d), quota inconsistent",
1847 : ret);
1848 0 : return 0;
1849 : }
1850 :
1851 : /*
1852 : * Here we don't need to get the lock of
1853 : * trans->transaction->delayed_refs, since inserted qrecord won't
1854 : * be deleted, only qrecord->node may be modified (new qrecord insert)
1855 : *
1856 : * So modifying qrecord->old_roots is safe here
1857 : */
1858 192832 : qrecord->old_roots = ctx.roots;
1859 192832 : return 0;
1860 : }
1861 :
1862 260164 : int btrfs_qgroup_trace_extent(struct btrfs_trans_handle *trans, u64 bytenr,
1863 : u64 num_bytes)
1864 : {
1865 260164 : struct btrfs_fs_info *fs_info = trans->fs_info;
1866 260164 : struct btrfs_qgroup_extent_record *record;
1867 260164 : struct btrfs_delayed_ref_root *delayed_refs;
1868 260164 : int ret;
1869 :
1870 260164 : if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)
1871 260164 : || bytenr == 0 || num_bytes == 0)
1872 : return 0;
1873 164960 : record = kzalloc(sizeof(*record), GFP_NOFS);
1874 164960 : if (!record)
1875 : return -ENOMEM;
1876 :
1877 164960 : delayed_refs = &trans->transaction->delayed_refs;
1878 164960 : record->bytenr = bytenr;
1879 164960 : record->num_bytes = num_bytes;
1880 164960 : record->old_roots = NULL;
1881 :
1882 164960 : spin_lock(&delayed_refs->lock);
1883 164960 : ret = btrfs_qgroup_trace_extent_nolock(fs_info, delayed_refs, record);
1884 164960 : spin_unlock(&delayed_refs->lock);
1885 164960 : if (ret > 0) {
1886 147744 : kfree(record);
1887 147744 : return 0;
1888 : }
1889 17216 : return btrfs_qgroup_trace_extent_post(trans, record);
1890 : }
1891 :
1892 36268 : int btrfs_qgroup_trace_leaf_items(struct btrfs_trans_handle *trans,
1893 : struct extent_buffer *eb)
1894 : {
1895 36268 : struct btrfs_fs_info *fs_info = trans->fs_info;
1896 36268 : int nr = btrfs_header_nritems(eb);
1897 36268 : int i, extent_type, ret;
1898 36268 : struct btrfs_key key;
1899 36268 : struct btrfs_file_extent_item *fi;
1900 36268 : u64 bytenr, num_bytes;
1901 :
1902 : /* We can be called directly from walk_up_proc() */
1903 36268 : if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags))
1904 : return 0;
1905 :
1906 1683138 : for (i = 0; i < nr; i++) {
1907 1647646 : btrfs_item_key_to_cpu(eb, &key, i);
1908 :
1909 1647646 : if (key.type != BTRFS_EXTENT_DATA_KEY)
1910 1318202 : continue;
1911 :
1912 329444 : fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
1913 : /* filter out non qgroup-accountable extents */
1914 329444 : extent_type = btrfs_file_extent_type(eb, fi);
1915 :
1916 329444 : if (extent_type == BTRFS_FILE_EXTENT_INLINE)
1917 164485 : continue;
1918 :
1919 164959 : bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
1920 164959 : if (!bytenr)
1921 0 : continue;
1922 :
1923 164959 : num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
1924 :
1925 164959 : ret = btrfs_qgroup_trace_extent(trans, bytenr, num_bytes);
1926 164959 : if (ret)
1927 0 : return ret;
1928 : }
1929 35492 : cond_resched();
1930 35492 : return 0;
1931 : }
1932 :
1933 : /*
1934 : * Walk up the tree from the bottom, freeing leaves and any interior
1935 : * nodes which have had all slots visited. If a node (leaf or
1936 : * interior) is freed, the node above it will have it's slot
1937 : * incremented. The root node will never be freed.
1938 : *
1939 : * At the end of this function, we should have a path which has all
1940 : * slots incremented to the next position for a search. If we need to
1941 : * read a new node it will be NULL and the node above it will have the
1942 : * correct slot selected for a later read.
1943 : *
1944 : * If we increment the root nodes slot counter past the number of
1945 : * elements, 1 is returned to signal completion of the search.
1946 : */
1947 0 : static int adjust_slots_upwards(struct btrfs_path *path, int root_level)
1948 : {
1949 0 : int level = 0;
1950 0 : int nr, slot;
1951 0 : struct extent_buffer *eb;
1952 :
1953 0 : if (root_level == 0)
1954 : return 1;
1955 :
1956 0 : while (level <= root_level) {
1957 0 : eb = path->nodes[level];
1958 0 : nr = btrfs_header_nritems(eb);
1959 0 : path->slots[level]++;
1960 0 : slot = path->slots[level];
1961 0 : if (slot >= nr || level == 0) {
1962 : /*
1963 : * Don't free the root - we will detect this
1964 : * condition after our loop and return a
1965 : * positive value for caller to stop walking the tree.
1966 : */
1967 0 : if (level != root_level) {
1968 0 : btrfs_tree_unlock_rw(eb, path->locks[level]);
1969 0 : path->locks[level] = 0;
1970 :
1971 0 : free_extent_buffer(eb);
1972 0 : path->nodes[level] = NULL;
1973 0 : path->slots[level] = 0;
1974 : }
1975 : } else {
1976 : /*
1977 : * We have a valid slot to walk back down
1978 : * from. Stop here so caller can process these
1979 : * new nodes.
1980 : */
1981 : break;
1982 : }
1983 :
1984 0 : level++;
1985 : }
1986 :
1987 0 : eb = path->nodes[root_level];
1988 0 : if (path->slots[root_level] >= btrfs_header_nritems(eb))
1989 0 : return 1;
1990 :
1991 : return 0;
1992 : }
1993 :
1994 : /*
1995 : * Helper function to trace a subtree tree block swap.
1996 : *
1997 : * The swap will happen in highest tree block, but there may be a lot of
1998 : * tree blocks involved.
1999 : *
2000 : * For example:
2001 : * OO = Old tree blocks
2002 : * NN = New tree blocks allocated during balance
2003 : *
2004 : * File tree (257) Reloc tree for 257
2005 : * L2 OO NN
2006 : * / \ / \
2007 : * L1 OO OO (a) OO NN (a)
2008 : * / \ / \ / \ / \
2009 : * L0 OO OO OO OO OO OO NN NN
2010 : * (b) (c) (b) (c)
2011 : *
2012 : * When calling qgroup_trace_extent_swap(), we will pass:
2013 : * @src_eb = OO(a)
2014 : * @dst_path = [ nodes[1] = NN(a), nodes[0] = NN(c) ]
2015 : * @dst_level = 0
2016 : * @root_level = 1
2017 : *
2018 : * In that case, qgroup_trace_extent_swap() will search from OO(a) to
2019 : * reach OO(c), then mark both OO(c) and NN(c) as qgroup dirty.
2020 : *
2021 : * The main work of qgroup_trace_extent_swap() can be split into 3 parts:
2022 : *
2023 : * 1) Tree search from @src_eb
2024 : * It should acts as a simplified btrfs_search_slot().
2025 : * The key for search can be extracted from @dst_path->nodes[dst_level]
2026 : * (first key).
2027 : *
2028 : * 2) Mark the final tree blocks in @src_path and @dst_path qgroup dirty
2029 : * NOTE: In above case, OO(a) and NN(a) won't be marked qgroup dirty.
2030 : * They should be marked during previous (@dst_level = 1) iteration.
2031 : *
2032 : * 3) Mark file extents in leaves dirty
2033 : * We don't have good way to pick out new file extents only.
2034 : * So we still follow the old method by scanning all file extents in
2035 : * the leave.
2036 : *
2037 : * This function can free us from keeping two paths, thus later we only need
2038 : * to care about how to iterate all new tree blocks in reloc tree.
2039 : */
2040 0 : static int qgroup_trace_extent_swap(struct btrfs_trans_handle* trans,
2041 : struct extent_buffer *src_eb,
2042 : struct btrfs_path *dst_path,
2043 : int dst_level, int root_level,
2044 : bool trace_leaf)
2045 : {
2046 0 : struct btrfs_key key;
2047 0 : struct btrfs_path *src_path;
2048 0 : struct btrfs_fs_info *fs_info = trans->fs_info;
2049 0 : u32 nodesize = fs_info->nodesize;
2050 0 : int cur_level = root_level;
2051 0 : int ret;
2052 :
2053 0 : BUG_ON(dst_level > root_level);
2054 : /* Level mismatch */
2055 0 : if (btrfs_header_level(src_eb) != root_level)
2056 : return -EINVAL;
2057 :
2058 0 : src_path = btrfs_alloc_path();
2059 0 : if (!src_path) {
2060 0 : ret = -ENOMEM;
2061 0 : goto out;
2062 : }
2063 :
2064 0 : if (dst_level)
2065 0 : btrfs_node_key_to_cpu(dst_path->nodes[dst_level], &key, 0);
2066 : else
2067 0 : btrfs_item_key_to_cpu(dst_path->nodes[dst_level], &key, 0);
2068 :
2069 : /* For src_path */
2070 0 : atomic_inc(&src_eb->refs);
2071 0 : src_path->nodes[root_level] = src_eb;
2072 0 : src_path->slots[root_level] = dst_path->slots[root_level];
2073 0 : src_path->locks[root_level] = 0;
2074 :
2075 : /* A simplified version of btrfs_search_slot() */
2076 0 : while (cur_level >= dst_level) {
2077 0 : struct btrfs_key src_key;
2078 0 : struct btrfs_key dst_key;
2079 :
2080 0 : if (src_path->nodes[cur_level] == NULL) {
2081 0 : struct extent_buffer *eb;
2082 0 : int parent_slot;
2083 :
2084 0 : eb = src_path->nodes[cur_level + 1];
2085 0 : parent_slot = src_path->slots[cur_level + 1];
2086 :
2087 0 : eb = btrfs_read_node_slot(eb, parent_slot);
2088 0 : if (IS_ERR(eb)) {
2089 0 : ret = PTR_ERR(eb);
2090 0 : goto out;
2091 : }
2092 :
2093 0 : src_path->nodes[cur_level] = eb;
2094 :
2095 0 : btrfs_tree_read_lock(eb);
2096 0 : src_path->locks[cur_level] = BTRFS_READ_LOCK;
2097 : }
2098 :
2099 0 : src_path->slots[cur_level] = dst_path->slots[cur_level];
2100 0 : if (cur_level) {
2101 0 : btrfs_node_key_to_cpu(dst_path->nodes[cur_level],
2102 : &dst_key, dst_path->slots[cur_level]);
2103 0 : btrfs_node_key_to_cpu(src_path->nodes[cur_level],
2104 : &src_key, src_path->slots[cur_level]);
2105 : } else {
2106 0 : btrfs_item_key_to_cpu(dst_path->nodes[cur_level],
2107 : &dst_key, dst_path->slots[cur_level]);
2108 0 : btrfs_item_key_to_cpu(src_path->nodes[cur_level],
2109 : &src_key, src_path->slots[cur_level]);
2110 : }
2111 : /* Content mismatch, something went wrong */
2112 0 : if (btrfs_comp_cpu_keys(&dst_key, &src_key)) {
2113 0 : ret = -ENOENT;
2114 0 : goto out;
2115 : }
2116 0 : cur_level--;
2117 : }
2118 :
2119 : /*
2120 : * Now both @dst_path and @src_path have been populated, record the tree
2121 : * blocks for qgroup accounting.
2122 : */
2123 0 : ret = btrfs_qgroup_trace_extent(trans, src_path->nodes[dst_level]->start,
2124 : nodesize);
2125 0 : if (ret < 0)
2126 0 : goto out;
2127 0 : ret = btrfs_qgroup_trace_extent(trans, dst_path->nodes[dst_level]->start,
2128 : nodesize);
2129 0 : if (ret < 0)
2130 0 : goto out;
2131 :
2132 : /* Record leaf file extents */
2133 0 : if (dst_level == 0 && trace_leaf) {
2134 0 : ret = btrfs_qgroup_trace_leaf_items(trans, src_path->nodes[0]);
2135 0 : if (ret < 0)
2136 0 : goto out;
2137 0 : ret = btrfs_qgroup_trace_leaf_items(trans, dst_path->nodes[0]);
2138 : }
2139 0 : out:
2140 0 : btrfs_free_path(src_path);
2141 0 : return ret;
2142 : }
2143 :
2144 : /*
2145 : * Helper function to do recursive generation-aware depth-first search, to
2146 : * locate all new tree blocks in a subtree of reloc tree.
2147 : *
2148 : * E.g. (OO = Old tree blocks, NN = New tree blocks, whose gen == last_snapshot)
2149 : * reloc tree
2150 : * L2 NN (a)
2151 : * / \
2152 : * L1 OO NN (b)
2153 : * / \ / \
2154 : * L0 OO OO OO NN
2155 : * (c) (d)
2156 : * If we pass:
2157 : * @dst_path = [ nodes[1] = NN(b), nodes[0] = NULL ],
2158 : * @cur_level = 1
2159 : * @root_level = 1
2160 : *
2161 : * We will iterate through tree blocks NN(b), NN(d) and info qgroup to trace
2162 : * above tree blocks along with their counter parts in file tree.
2163 : * While during search, old tree blocks OO(c) will be skipped as tree block swap
2164 : * won't affect OO(c).
2165 : */
2166 0 : static int qgroup_trace_new_subtree_blocks(struct btrfs_trans_handle* trans,
2167 : struct extent_buffer *src_eb,
2168 : struct btrfs_path *dst_path,
2169 : int cur_level, int root_level,
2170 : u64 last_snapshot, bool trace_leaf)
2171 : {
2172 0 : struct btrfs_fs_info *fs_info = trans->fs_info;
2173 0 : struct extent_buffer *eb;
2174 0 : bool need_cleanup = false;
2175 0 : int ret = 0;
2176 0 : int i;
2177 :
2178 : /* Level sanity check */
2179 0 : if (cur_level < 0 || cur_level >= BTRFS_MAX_LEVEL - 1 ||
2180 0 : root_level < 0 || root_level >= BTRFS_MAX_LEVEL - 1 ||
2181 0 : root_level < cur_level) {
2182 0 : btrfs_err_rl(fs_info,
2183 : "%s: bad levels, cur_level=%d root_level=%d",
2184 : __func__, cur_level, root_level);
2185 0 : return -EUCLEAN;
2186 : }
2187 :
2188 : /* Read the tree block if needed */
2189 0 : if (dst_path->nodes[cur_level] == NULL) {
2190 0 : int parent_slot;
2191 0 : u64 child_gen;
2192 :
2193 : /*
2194 : * dst_path->nodes[root_level] must be initialized before
2195 : * calling this function.
2196 : */
2197 0 : if (cur_level == root_level) {
2198 0 : btrfs_err_rl(fs_info,
2199 : "%s: dst_path->nodes[%d] not initialized, root_level=%d cur_level=%d",
2200 : __func__, root_level, root_level, cur_level);
2201 0 : return -EUCLEAN;
2202 : }
2203 :
2204 : /*
2205 : * We need to get child blockptr/gen from parent before we can
2206 : * read it.
2207 : */
2208 0 : eb = dst_path->nodes[cur_level + 1];
2209 0 : parent_slot = dst_path->slots[cur_level + 1];
2210 0 : child_gen = btrfs_node_ptr_generation(eb, parent_slot);
2211 :
2212 : /* This node is old, no need to trace */
2213 0 : if (child_gen < last_snapshot)
2214 0 : goto out;
2215 :
2216 0 : eb = btrfs_read_node_slot(eb, parent_slot);
2217 0 : if (IS_ERR(eb)) {
2218 0 : ret = PTR_ERR(eb);
2219 0 : goto out;
2220 : }
2221 :
2222 0 : dst_path->nodes[cur_level] = eb;
2223 0 : dst_path->slots[cur_level] = 0;
2224 :
2225 0 : btrfs_tree_read_lock(eb);
2226 0 : dst_path->locks[cur_level] = BTRFS_READ_LOCK;
2227 0 : need_cleanup = true;
2228 : }
2229 :
2230 : /* Now record this tree block and its counter part for qgroups */
2231 0 : ret = qgroup_trace_extent_swap(trans, src_eb, dst_path, cur_level,
2232 : root_level, trace_leaf);
2233 0 : if (ret < 0)
2234 0 : goto cleanup;
2235 :
2236 0 : eb = dst_path->nodes[cur_level];
2237 :
2238 0 : if (cur_level > 0) {
2239 : /* Iterate all child tree blocks */
2240 0 : for (i = 0; i < btrfs_header_nritems(eb); i++) {
2241 : /* Skip old tree blocks as they won't be swapped */
2242 0 : if (btrfs_node_ptr_generation(eb, i) < last_snapshot)
2243 0 : continue;
2244 0 : dst_path->slots[cur_level] = i;
2245 :
2246 : /* Recursive call (at most 7 times) */
2247 0 : ret = qgroup_trace_new_subtree_blocks(trans, src_eb,
2248 : dst_path, cur_level - 1, root_level,
2249 : last_snapshot, trace_leaf);
2250 0 : if (ret < 0)
2251 0 : goto cleanup;
2252 : }
2253 : }
2254 :
2255 0 : cleanup:
2256 0 : if (need_cleanup) {
2257 : /* Clean up */
2258 0 : btrfs_tree_unlock_rw(dst_path->nodes[cur_level],
2259 0 : dst_path->locks[cur_level]);
2260 0 : free_extent_buffer(dst_path->nodes[cur_level]);
2261 0 : dst_path->nodes[cur_level] = NULL;
2262 0 : dst_path->slots[cur_level] = 0;
2263 0 : dst_path->locks[cur_level] = 0;
2264 : }
2265 0 : out:
2266 : return ret;
2267 : }
2268 :
2269 0 : static int qgroup_trace_subtree_swap(struct btrfs_trans_handle *trans,
2270 : struct extent_buffer *src_eb,
2271 : struct extent_buffer *dst_eb,
2272 : u64 last_snapshot, bool trace_leaf)
2273 : {
2274 0 : struct btrfs_fs_info *fs_info = trans->fs_info;
2275 0 : struct btrfs_path *dst_path = NULL;
2276 0 : int level;
2277 0 : int ret;
2278 :
2279 0 : if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags))
2280 : return 0;
2281 :
2282 : /* Wrong parameter order */
2283 0 : if (btrfs_header_generation(src_eb) > btrfs_header_generation(dst_eb)) {
2284 0 : btrfs_err_rl(fs_info,
2285 : "%s: bad parameter order, src_gen=%llu dst_gen=%llu", __func__,
2286 : btrfs_header_generation(src_eb),
2287 : btrfs_header_generation(dst_eb));
2288 0 : return -EUCLEAN;
2289 : }
2290 :
2291 0 : if (!extent_buffer_uptodate(src_eb) || !extent_buffer_uptodate(dst_eb)) {
2292 0 : ret = -EIO;
2293 0 : goto out;
2294 : }
2295 :
2296 0 : level = btrfs_header_level(dst_eb);
2297 0 : dst_path = btrfs_alloc_path();
2298 0 : if (!dst_path) {
2299 0 : ret = -ENOMEM;
2300 0 : goto out;
2301 : }
2302 : /* For dst_path */
2303 0 : atomic_inc(&dst_eb->refs);
2304 0 : dst_path->nodes[level] = dst_eb;
2305 0 : dst_path->slots[level] = 0;
2306 0 : dst_path->locks[level] = 0;
2307 :
2308 : /* Do the generation aware breadth-first search */
2309 0 : ret = qgroup_trace_new_subtree_blocks(trans, src_eb, dst_path, level,
2310 : level, last_snapshot, trace_leaf);
2311 0 : if (ret < 0)
2312 : goto out;
2313 : ret = 0;
2314 :
2315 0 : out:
2316 0 : btrfs_free_path(dst_path);
2317 0 : if (ret < 0)
2318 0 : qgroup_mark_inconsistent(fs_info);
2319 : return ret;
2320 : }
2321 :
2322 33697 : int btrfs_qgroup_trace_subtree(struct btrfs_trans_handle *trans,
2323 : struct extent_buffer *root_eb,
2324 : u64 root_gen, int root_level)
2325 : {
2326 33697 : struct btrfs_fs_info *fs_info = trans->fs_info;
2327 33697 : int ret = 0;
2328 33697 : int level;
2329 33697 : u8 drop_subptree_thres;
2330 33697 : struct extent_buffer *eb = root_eb;
2331 33697 : struct btrfs_path *path = NULL;
2332 :
2333 33697 : BUG_ON(root_level < 0 || root_level >= BTRFS_MAX_LEVEL);
2334 33697 : BUG_ON(root_eb == NULL);
2335 :
2336 33697 : if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags))
2337 : return 0;
2338 :
2339 33067 : spin_lock(&fs_info->qgroup_lock);
2340 33067 : drop_subptree_thres = fs_info->qgroup_drop_subtree_thres;
2341 33067 : spin_unlock(&fs_info->qgroup_lock);
2342 :
2343 : /*
2344 : * This function only gets called for snapshot drop, if we hit a high
2345 : * node here, it means we are going to change ownership for quite a lot
2346 : * of extents, which will greatly slow down btrfs_commit_transaction().
2347 : *
2348 : * So here if we find a high tree here, we just skip the accounting and
2349 : * mark qgroup inconsistent.
2350 : */
2351 33067 : if (root_level >= drop_subptree_thres) {
2352 0 : qgroup_mark_inconsistent(fs_info);
2353 0 : return 0;
2354 : }
2355 :
2356 66134 : if (!extent_buffer_uptodate(root_eb)) {
2357 0 : struct btrfs_tree_parent_check check = {
2358 : .has_first_key = false,
2359 : .transid = root_gen,
2360 : .level = root_level
2361 : };
2362 :
2363 0 : ret = btrfs_read_extent_buffer(root_eb, &check);
2364 0 : if (ret)
2365 0 : goto out;
2366 : }
2367 :
2368 33067 : if (root_level == 0) {
2369 33067 : ret = btrfs_qgroup_trace_leaf_items(trans, root_eb);
2370 33067 : goto out;
2371 : }
2372 :
2373 0 : path = btrfs_alloc_path();
2374 0 : if (!path)
2375 : return -ENOMEM;
2376 :
2377 : /*
2378 : * Walk down the tree. Missing extent blocks are filled in as
2379 : * we go. Metadata is accounted every time we read a new
2380 : * extent block.
2381 : *
2382 : * When we reach a leaf, we account for file extent items in it,
2383 : * walk back up the tree (adjusting slot pointers as we go)
2384 : * and restart the search process.
2385 : */
2386 0 : atomic_inc(&root_eb->refs); /* For path */
2387 0 : path->nodes[root_level] = root_eb;
2388 0 : path->slots[root_level] = 0;
2389 0 : path->locks[root_level] = 0; /* so release_path doesn't try to unlock */
2390 0 : walk_down:
2391 0 : level = root_level;
2392 0 : while (level >= 0) {
2393 0 : if (path->nodes[level] == NULL) {
2394 0 : int parent_slot;
2395 0 : u64 child_bytenr;
2396 :
2397 : /*
2398 : * We need to get child blockptr from parent before we
2399 : * can read it.
2400 : */
2401 0 : eb = path->nodes[level + 1];
2402 0 : parent_slot = path->slots[level + 1];
2403 0 : child_bytenr = btrfs_node_blockptr(eb, parent_slot);
2404 :
2405 0 : eb = btrfs_read_node_slot(eb, parent_slot);
2406 0 : if (IS_ERR(eb)) {
2407 0 : ret = PTR_ERR(eb);
2408 0 : goto out;
2409 : }
2410 :
2411 0 : path->nodes[level] = eb;
2412 0 : path->slots[level] = 0;
2413 :
2414 0 : btrfs_tree_read_lock(eb);
2415 0 : path->locks[level] = BTRFS_READ_LOCK;
2416 :
2417 0 : ret = btrfs_qgroup_trace_extent(trans, child_bytenr,
2418 0 : fs_info->nodesize);
2419 0 : if (ret)
2420 0 : goto out;
2421 : }
2422 :
2423 0 : if (level == 0) {
2424 0 : ret = btrfs_qgroup_trace_leaf_items(trans,
2425 : path->nodes[level]);
2426 0 : if (ret)
2427 0 : goto out;
2428 :
2429 : /* Nonzero return here means we completed our search */
2430 0 : ret = adjust_slots_upwards(path, root_level);
2431 0 : if (ret)
2432 : break;
2433 :
2434 : /* Restart search with new slots */
2435 0 : goto walk_down;
2436 : }
2437 :
2438 0 : level--;
2439 : }
2440 :
2441 : ret = 0;
2442 33067 : out:
2443 33067 : btrfs_free_path(path);
2444 :
2445 33067 : return ret;
2446 : }
2447 :
2448 : #define UPDATE_NEW 0
2449 : #define UPDATE_OLD 1
2450 : /*
2451 : * Walk all of the roots that points to the bytenr and adjust their refcnts.
2452 : */
2453 421048 : static int qgroup_update_refcnt(struct btrfs_fs_info *fs_info,
2454 : struct ulist *roots, struct ulist *tmp,
2455 : struct ulist *qgroups, u64 seq, int update_old)
2456 : {
2457 421048 : struct ulist_node *unode;
2458 421048 : struct ulist_iterator uiter;
2459 421048 : struct ulist_node *tmp_unode;
2460 421048 : struct ulist_iterator tmp_uiter;
2461 421048 : struct btrfs_qgroup *qg;
2462 421048 : int ret = 0;
2463 :
2464 421048 : if (!roots)
2465 : return 0;
2466 400442 : ULIST_ITER_INIT(&uiter);
2467 5117900 : while ((unode = ulist_next(roots, &uiter))) {
2468 4717458 : qg = find_qgroup_rb(fs_info, unode->val);
2469 4717458 : if (!qg)
2470 0 : continue;
2471 :
2472 4717458 : ulist_reinit(tmp);
2473 4717458 : ret = ulist_add(qgroups, qg->qgroupid, qgroup_to_aux(qg),
2474 : GFP_ATOMIC);
2475 4717458 : if (ret < 0)
2476 0 : return ret;
2477 4717458 : ret = ulist_add(tmp, qg->qgroupid, qgroup_to_aux(qg), GFP_ATOMIC);
2478 4717458 : if (ret < 0)
2479 0 : return ret;
2480 4717458 : ULIST_ITER_INIT(&tmp_uiter);
2481 9438252 : while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) {
2482 4720794 : struct btrfs_qgroup_list *glist;
2483 :
2484 4720794 : qg = unode_aux_to_qgroup(tmp_unode);
2485 4720794 : if (update_old)
2486 2441649 : btrfs_qgroup_update_old_refcnt(qg, seq, 1);
2487 : else
2488 2279145 : btrfs_qgroup_update_new_refcnt(qg, seq, 1);
2489 4724130 : list_for_each_entry(glist, &qg->groups, next_group) {
2490 3336 : ret = ulist_add(qgroups, glist->group->qgroupid,
2491 : qgroup_to_aux(glist->group),
2492 : GFP_ATOMIC);
2493 3336 : if (ret < 0)
2494 0 : return ret;
2495 3336 : ret = ulist_add(tmp, glist->group->qgroupid,
2496 : qgroup_to_aux(glist->group),
2497 : GFP_ATOMIC);
2498 3336 : if (ret < 0)
2499 0 : return ret;
2500 : }
2501 : }
2502 : }
2503 : return 0;
2504 : }
2505 :
2506 : /*
2507 : * Update qgroup rfer/excl counters.
2508 : * Rfer update is easy, codes can explain themselves.
2509 : *
2510 : * Excl update is tricky, the update is split into 2 parts.
2511 : * Part 1: Possible exclusive <-> sharing detect:
2512 : * | A | !A |
2513 : * -------------------------------------
2514 : * B | * | - |
2515 : * -------------------------------------
2516 : * !B | + | ** |
2517 : * -------------------------------------
2518 : *
2519 : * Conditions:
2520 : * A: cur_old_roots < nr_old_roots (not exclusive before)
2521 : * !A: cur_old_roots == nr_old_roots (possible exclusive before)
2522 : * B: cur_new_roots < nr_new_roots (not exclusive now)
2523 : * !B: cur_new_roots == nr_new_roots (possible exclusive now)
2524 : *
2525 : * Results:
2526 : * +: Possible sharing -> exclusive -: Possible exclusive -> sharing
2527 : * *: Definitely not changed. **: Possible unchanged.
2528 : *
2529 : * For !A and !B condition, the exception is cur_old/new_roots == 0 case.
2530 : *
2531 : * To make the logic clear, we first use condition A and B to split
2532 : * combination into 4 results.
2533 : *
2534 : * Then, for result "+" and "-", check old/new_roots == 0 case, as in them
2535 : * only on variant maybe 0.
2536 : *
2537 : * Lastly, check result **, since there are 2 variants maybe 0, split them
2538 : * again(2x2).
2539 : * But this time we don't need to consider other things, the codes and logic
2540 : * is easy to understand now.
2541 : */
2542 210524 : static int qgroup_update_counters(struct btrfs_fs_info *fs_info,
2543 : struct ulist *qgroups,
2544 : u64 nr_old_roots,
2545 : u64 nr_new_roots,
2546 : u64 num_bytes, u64 seq)
2547 : {
2548 210524 : struct ulist_node *unode;
2549 210524 : struct ulist_iterator uiter;
2550 210524 : struct btrfs_qgroup *qg;
2551 210524 : u64 cur_new_count, cur_old_count;
2552 :
2553 210524 : ULIST_ITER_INIT(&uiter);
2554 2720970 : while ((unode = ulist_next(qgroups, &uiter))) {
2555 2510446 : bool dirty = false;
2556 :
2557 2510446 : qg = unode_aux_to_qgroup(unode);
2558 2510446 : cur_old_count = btrfs_qgroup_get_old_refcnt(qg, seq);
2559 2510446 : cur_new_count = btrfs_qgroup_get_new_refcnt(qg, seq);
2560 :
2561 2510446 : trace_qgroup_update_counters(fs_info, qg, cur_old_count,
2562 : cur_new_count);
2563 :
2564 : /* Rfer update part */
2565 2510446 : if (cur_old_count == 0 && cur_new_count > 0) {
2566 68797 : qg->rfer += num_bytes;
2567 68797 : qg->rfer_cmpr += num_bytes;
2568 68797 : dirty = true;
2569 : }
2570 2510446 : if (cur_old_count > 0 && cur_new_count == 0) {
2571 231302 : qg->rfer -= num_bytes;
2572 231302 : qg->rfer_cmpr -= num_bytes;
2573 231302 : dirty = true;
2574 : }
2575 :
2576 : /* Excl update part */
2577 : /* Exclusive/none -> shared case */
2578 2510446 : if (cur_old_count == nr_old_roots &&
2579 2510446 : cur_new_count < nr_new_roots) {
2580 : /* Exclusive -> shared */
2581 3087 : if (cur_old_count != 0) {
2582 0 : qg->excl -= num_bytes;
2583 0 : qg->excl_cmpr -= num_bytes;
2584 0 : dirty = true;
2585 : }
2586 : }
2587 :
2588 : /* Shared -> exclusive/none case */
2589 2510446 : if (cur_old_count < nr_old_roots &&
2590 2510446 : cur_new_count == nr_new_roots) {
2591 : /* Shared->exclusive */
2592 5315 : if (cur_new_count != 0) {
2593 4837 : qg->excl += num_bytes;
2594 4837 : qg->excl_cmpr += num_bytes;
2595 4837 : dirty = true;
2596 : }
2597 : }
2598 :
2599 : /* Exclusive/none -> exclusive/none case */
2600 2510446 : if (cur_old_count == nr_old_roots &&
2601 : cur_new_count == nr_new_roots) {
2602 103936 : if (cur_old_count == 0) {
2603 : /* None -> exclusive/none */
2604 :
2605 65710 : if (cur_new_count != 0) {
2606 : /* None -> exclusive */
2607 65710 : qg->excl += num_bytes;
2608 65710 : qg->excl_cmpr += num_bytes;
2609 65710 : dirty = true;
2610 : }
2611 : /* None -> none, nothing changed */
2612 : } else {
2613 : /* Exclusive -> exclusive/none */
2614 :
2615 38226 : if (cur_new_count == 0) {
2616 : /* Exclusive -> none */
2617 26636 : qg->excl -= num_bytes;
2618 26636 : qg->excl_cmpr -= num_bytes;
2619 26636 : dirty = true;
2620 : }
2621 : /* Exclusive -> exclusive, nothing changed */
2622 : }
2623 : }
2624 :
2625 2510446 : if (dirty)
2626 304936 : qgroup_dirty(fs_info, qg);
2627 : }
2628 210524 : return 0;
2629 : }
2630 :
2631 : /*
2632 : * Check if the @roots potentially is a list of fs tree roots
2633 : *
2634 : * Return 0 for definitely not a fs/subvol tree roots ulist
2635 : * Return 1 for possible fs/subvol tree roots in the list (considering an empty
2636 : * one as well)
2637 : */
2638 413913 : static int maybe_fs_roots(struct ulist *roots)
2639 : {
2640 413913 : struct ulist_node *unode;
2641 413913 : struct ulist_iterator uiter;
2642 :
2643 : /* Empty one, still possible for fs roots */
2644 413913 : if (!roots || roots->nnodes == 0)
2645 : return 1;
2646 :
2647 334857 : ULIST_ITER_INIT(&uiter);
2648 334857 : unode = ulist_next(roots, &uiter);
2649 334857 : if (!unode)
2650 : return 1;
2651 :
2652 : /*
2653 : * If it contains fs tree roots, then it must belong to fs/subvol
2654 : * trees.
2655 : * If it contains a non-fs tree, it won't be shared with fs/subvol trees.
2656 : */
2657 334857 : return is_fstree(unode->val);
2658 : }
2659 :
2660 221628 : int btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans, u64 bytenr,
2661 : u64 num_bytes, struct ulist *old_roots,
2662 : struct ulist *new_roots)
2663 : {
2664 221628 : struct btrfs_fs_info *fs_info = trans->fs_info;
2665 221628 : struct ulist *qgroups = NULL;
2666 221628 : struct ulist *tmp = NULL;
2667 221628 : u64 seq;
2668 221628 : u64 nr_new_roots = 0;
2669 221628 : u64 nr_old_roots = 0;
2670 221628 : int ret = 0;
2671 :
2672 : /*
2673 : * If quotas get disabled meanwhile, the resources need to be freed and
2674 : * we can't just exit here.
2675 : */
2676 221628 : if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) ||
2677 221052 : fs_info->qgroup_flags & BTRFS_QGROUP_RUNTIME_FLAG_NO_ACCOUNTING)
2678 576 : goto out_free;
2679 :
2680 221052 : if (new_roots) {
2681 221052 : if (!maybe_fs_roots(new_roots))
2682 4534 : goto out_free;
2683 216518 : nr_new_roots = new_roots->nnodes;
2684 : }
2685 216518 : if (old_roots) {
2686 192861 : if (!maybe_fs_roots(old_roots))
2687 0 : goto out_free;
2688 192861 : nr_old_roots = old_roots->nnodes;
2689 : }
2690 :
2691 : /* Quick exit, either not fs tree roots, or won't affect any qgroup */
2692 216518 : if (nr_old_roots == 0 && nr_new_roots == 0)
2693 5994 : goto out_free;
2694 :
2695 210524 : BUG_ON(!fs_info->quota_root);
2696 :
2697 210524 : trace_btrfs_qgroup_account_extent(fs_info, trans->transid, bytenr,
2698 : num_bytes, nr_old_roots, nr_new_roots);
2699 :
2700 210524 : qgroups = ulist_alloc(GFP_NOFS);
2701 210524 : if (!qgroups) {
2702 0 : ret = -ENOMEM;
2703 0 : goto out_free;
2704 : }
2705 210524 : tmp = ulist_alloc(GFP_NOFS);
2706 210524 : if (!tmp) {
2707 0 : ret = -ENOMEM;
2708 0 : goto out_free;
2709 : }
2710 :
2711 210524 : mutex_lock(&fs_info->qgroup_rescan_lock);
2712 210524 : if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) {
2713 20607 : if (fs_info->qgroup_rescan_progress.objectid <= bytenr) {
2714 0 : mutex_unlock(&fs_info->qgroup_rescan_lock);
2715 0 : ret = 0;
2716 0 : goto out_free;
2717 : }
2718 : }
2719 210524 : mutex_unlock(&fs_info->qgroup_rescan_lock);
2720 :
2721 210524 : spin_lock(&fs_info->qgroup_lock);
2722 210524 : seq = fs_info->qgroup_seq;
2723 :
2724 : /* Update old refcnts using old_roots */
2725 210524 : ret = qgroup_update_refcnt(fs_info, old_roots, tmp, qgroups, seq,
2726 : UPDATE_OLD);
2727 210524 : if (ret < 0)
2728 0 : goto out;
2729 :
2730 : /* Update new refcnts using new_roots */
2731 210524 : ret = qgroup_update_refcnt(fs_info, new_roots, tmp, qgroups, seq,
2732 : UPDATE_NEW);
2733 210524 : if (ret < 0)
2734 0 : goto out;
2735 :
2736 210524 : qgroup_update_counters(fs_info, qgroups, nr_old_roots, nr_new_roots,
2737 : num_bytes, seq);
2738 :
2739 : /*
2740 : * Bump qgroup_seq to avoid seq overlap
2741 : */
2742 210524 : fs_info->qgroup_seq += max(nr_old_roots, nr_new_roots) + 1;
2743 210524 : out:
2744 210524 : spin_unlock(&fs_info->qgroup_lock);
2745 221628 : out_free:
2746 221628 : ulist_free(tmp);
2747 221628 : ulist_free(qgroups);
2748 221628 : ulist_free(old_roots);
2749 221628 : ulist_free(new_roots);
2750 221628 : return ret;
2751 : }
2752 :
2753 206349 : int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans)
2754 : {
2755 206349 : struct btrfs_fs_info *fs_info = trans->fs_info;
2756 206349 : struct btrfs_qgroup_extent_record *record;
2757 206349 : struct btrfs_delayed_ref_root *delayed_refs;
2758 206349 : struct ulist *new_roots = NULL;
2759 206349 : struct rb_node *node;
2760 206349 : u64 num_dirty_extents = 0;
2761 206349 : u64 qgroup_to_skip;
2762 206349 : int ret = 0;
2763 :
2764 206349 : delayed_refs = &trans->transaction->delayed_refs;
2765 206349 : qgroup_to_skip = delayed_refs->qgroup_to_skip;
2766 399216 : while ((node = rb_first(&delayed_refs->dirty_extent_root))) {
2767 192867 : record = rb_entry(node, struct btrfs_qgroup_extent_record,
2768 : node);
2769 :
2770 192867 : num_dirty_extents++;
2771 192867 : trace_btrfs_qgroup_account_extents(fs_info, record);
2772 :
2773 192867 : if (!ret && !(fs_info->qgroup_flags &
2774 : BTRFS_QGROUP_RUNTIME_FLAG_NO_ACCOUNTING)) {
2775 192861 : struct btrfs_backref_walk_ctx ctx = { 0 };
2776 :
2777 192861 : ctx.bytenr = record->bytenr;
2778 192861 : ctx.fs_info = fs_info;
2779 :
2780 : /*
2781 : * Old roots should be searched when inserting qgroup
2782 : * extent record.
2783 : *
2784 : * But for INCONSISTENT (NO_ACCOUNTING) -> rescan case,
2785 : * we may have some record inserted during
2786 : * NO_ACCOUNTING (thus no old_roots populated), but
2787 : * later we start rescan, which clears NO_ACCOUNTING,
2788 : * leaving some inserted records without old_roots
2789 : * populated.
2790 : *
2791 : * Those cases are rare and should not cause too much
2792 : * time spent during commit_transaction().
2793 : */
2794 192861 : if (!record->old_roots) {
2795 : /* Search commit root to find old_roots */
2796 0 : ret = btrfs_find_all_roots(&ctx, false);
2797 0 : if (ret < 0)
2798 0 : goto cleanup;
2799 0 : record->old_roots = ctx.roots;
2800 0 : ctx.roots = NULL;
2801 : }
2802 :
2803 : /* Free the reserved data space */
2804 192861 : btrfs_qgroup_free_refroot(fs_info,
2805 : record->data_rsv_refroot,
2806 192861 : record->data_rsv,
2807 : BTRFS_QGROUP_RSV_DATA);
2808 : /*
2809 : * Use BTRFS_SEQ_LAST as time_seq to do special search,
2810 : * which doesn't lock tree or delayed_refs and search
2811 : * current root. It's safe inside commit_transaction().
2812 : */
2813 192861 : ctx.trans = trans;
2814 192861 : ctx.time_seq = BTRFS_SEQ_LAST;
2815 192861 : ret = btrfs_find_all_roots(&ctx, false);
2816 192861 : if (ret < 0)
2817 0 : goto cleanup;
2818 192861 : new_roots = ctx.roots;
2819 192861 : if (qgroup_to_skip) {
2820 116847 : ulist_del(new_roots, qgroup_to_skip, 0);
2821 116847 : ulist_del(record->old_roots, qgroup_to_skip,
2822 : 0);
2823 : }
2824 192861 : ret = btrfs_qgroup_account_extent(trans, record->bytenr,
2825 : record->num_bytes,
2826 : record->old_roots,
2827 : new_roots);
2828 192861 : record->old_roots = NULL;
2829 192861 : new_roots = NULL;
2830 : }
2831 6 : cleanup:
2832 192867 : ulist_free(record->old_roots);
2833 192867 : ulist_free(new_roots);
2834 192867 : new_roots = NULL;
2835 192867 : rb_erase(node, &delayed_refs->dirty_extent_root);
2836 192867 : kfree(record);
2837 :
2838 : }
2839 206349 : trace_qgroup_num_dirty_extents(fs_info, trans->transid,
2840 : num_dirty_extents);
2841 206349 : return ret;
2842 : }
2843 :
2844 : /*
2845 : * Writes all changed qgroups to disk.
2846 : * Called by the transaction commit path and the qgroup assign ioctl.
2847 : */
2848 206355 : int btrfs_run_qgroups(struct btrfs_trans_handle *trans)
2849 : {
2850 206355 : struct btrfs_fs_info *fs_info = trans->fs_info;
2851 206355 : int ret = 0;
2852 :
2853 : /*
2854 : * In case we are called from the qgroup assign ioctl, assert that we
2855 : * are holding the qgroup_ioctl_lock, otherwise we can race with a quota
2856 : * disable operation (ioctl) and access a freed quota root.
2857 : */
2858 206355 : if (trans->transaction->state != TRANS_STATE_COMMIT_DOING)
2859 206355 : lockdep_assert_held(&fs_info->qgroup_ioctl_lock);
2860 :
2861 206355 : if (!fs_info->quota_root)
2862 : return ret;
2863 :
2864 11411 : spin_lock(&fs_info->qgroup_lock);
2865 18037 : while (!list_empty(&fs_info->dirty_qgroups)) {
2866 6626 : struct btrfs_qgroup *qgroup;
2867 6626 : qgroup = list_first_entry(&fs_info->dirty_qgroups,
2868 : struct btrfs_qgroup, dirty);
2869 6626 : list_del_init(&qgroup->dirty);
2870 6626 : spin_unlock(&fs_info->qgroup_lock);
2871 6626 : ret = update_qgroup_info_item(trans, qgroup);
2872 6626 : if (ret)
2873 0 : qgroup_mark_inconsistent(fs_info);
2874 6626 : ret = update_qgroup_limit_item(trans, qgroup);
2875 6626 : if (ret)
2876 0 : qgroup_mark_inconsistent(fs_info);
2877 6626 : spin_lock(&fs_info->qgroup_lock);
2878 : }
2879 22822 : if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags))
2880 11391 : fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_ON;
2881 : else
2882 20 : fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_ON;
2883 11411 : spin_unlock(&fs_info->qgroup_lock);
2884 :
2885 11411 : ret = update_qgroup_status_item(trans);
2886 11411 : if (ret)
2887 0 : qgroup_mark_inconsistent(fs_info);
2888 :
2889 : return ret;
2890 : }
2891 :
2892 : /*
2893 : * Copy the accounting information between qgroups. This is necessary
2894 : * when a snapshot or a subvolume is created. Throwing an error will
2895 : * cause a transaction abort so we take extra care here to only error
2896 : * when a readonly fs is a reasonable outcome.
2897 : */
2898 426 : int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, u64 srcid,
2899 : u64 objectid, struct btrfs_qgroup_inherit *inherit)
2900 : {
2901 426 : int ret = 0;
2902 426 : int i;
2903 426 : u64 *i_qgroups;
2904 426 : bool committing = false;
2905 426 : struct btrfs_fs_info *fs_info = trans->fs_info;
2906 426 : struct btrfs_root *quota_root;
2907 426 : struct btrfs_qgroup *srcgroup;
2908 426 : struct btrfs_qgroup *dstgroup;
2909 426 : bool need_rescan = false;
2910 426 : u32 level_size = 0;
2911 426 : u64 nums;
2912 :
2913 : /*
2914 : * There are only two callers of this function.
2915 : *
2916 : * One in create_subvol() in the ioctl context, which needs to hold
2917 : * the qgroup_ioctl_lock.
2918 : *
2919 : * The other one in create_pending_snapshot() where no other qgroup
2920 : * code can modify the fs as they all need to either start a new trans
2921 : * or hold a trans handler, thus we don't need to hold
2922 : * qgroup_ioctl_lock.
2923 : * This would avoid long and complex lock chain and make lockdep happy.
2924 : */
2925 426 : spin_lock(&fs_info->trans_lock);
2926 426 : if (trans->transaction->state == TRANS_STATE_COMMIT_DOING)
2927 174 : committing = true;
2928 426 : spin_unlock(&fs_info->trans_lock);
2929 :
2930 426 : if (!committing)
2931 252 : mutex_lock(&fs_info->qgroup_ioctl_lock);
2932 426 : if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags))
2933 227 : goto out;
2934 :
2935 199 : quota_root = fs_info->quota_root;
2936 199 : if (!quota_root) {
2937 0 : ret = -EINVAL;
2938 0 : goto out;
2939 : }
2940 :
2941 199 : if (inherit) {
2942 13 : i_qgroups = (u64 *)(inherit + 1);
2943 13 : nums = inherit->num_qgroups + 2 * inherit->num_ref_copies +
2944 13 : 2 * inherit->num_excl_copies;
2945 26 : for (i = 0; i < nums; ++i) {
2946 13 : srcgroup = find_qgroup_rb(fs_info, *i_qgroups);
2947 :
2948 : /*
2949 : * Zero out invalid groups so we can ignore
2950 : * them later.
2951 : */
2952 13 : if (!srcgroup ||
2953 12 : ((srcgroup->qgroupid >> 48) <= (objectid >> 48)))
2954 2 : *i_qgroups = 0ULL;
2955 :
2956 13 : ++i_qgroups;
2957 : }
2958 : }
2959 :
2960 : /*
2961 : * create a tracking group for the subvol itself
2962 : */
2963 199 : ret = add_qgroup_item(trans, quota_root, objectid);
2964 199 : if (ret)
2965 0 : goto out;
2966 :
2967 : /*
2968 : * add qgroup to all inherited groups
2969 : */
2970 199 : if (inherit) {
2971 13 : i_qgroups = (u64 *)(inherit + 1);
2972 26 : for (i = 0; i < inherit->num_qgroups; ++i, ++i_qgroups) {
2973 13 : if (*i_qgroups == 0)
2974 2 : continue;
2975 11 : ret = add_qgroup_relation_item(trans, objectid,
2976 : *i_qgroups);
2977 11 : if (ret && ret != -EEXIST)
2978 0 : goto out;
2979 11 : ret = add_qgroup_relation_item(trans, *i_qgroups,
2980 : objectid);
2981 11 : if (ret && ret != -EEXIST)
2982 0 : goto out;
2983 : }
2984 : ret = 0;
2985 : }
2986 :
2987 :
2988 199 : spin_lock(&fs_info->qgroup_lock);
2989 :
2990 199 : dstgroup = add_qgroup_rb(fs_info, objectid);
2991 199 : if (IS_ERR(dstgroup)) {
2992 0 : ret = PTR_ERR(dstgroup);
2993 0 : goto unlock;
2994 : }
2995 :
2996 199 : if (inherit && inherit->flags & BTRFS_QGROUP_INHERIT_SET_LIMITS) {
2997 0 : dstgroup->lim_flags = inherit->lim.flags;
2998 0 : dstgroup->max_rfer = inherit->lim.max_rfer;
2999 0 : dstgroup->max_excl = inherit->lim.max_excl;
3000 0 : dstgroup->rsv_rfer = inherit->lim.rsv_rfer;
3001 0 : dstgroup->rsv_excl = inherit->lim.rsv_excl;
3002 :
3003 0 : qgroup_dirty(fs_info, dstgroup);
3004 : }
3005 :
3006 199 : if (srcid) {
3007 174 : srcgroup = find_qgroup_rb(fs_info, srcid);
3008 174 : if (!srcgroup)
3009 0 : goto unlock;
3010 :
3011 : /*
3012 : * We call inherit after we clone the root in order to make sure
3013 : * our counts don't go crazy, so at this point the only
3014 : * difference between the two roots should be the root node.
3015 : */
3016 174 : level_size = fs_info->nodesize;
3017 174 : dstgroup->rfer = srcgroup->rfer;
3018 174 : dstgroup->rfer_cmpr = srcgroup->rfer_cmpr;
3019 174 : dstgroup->excl = level_size;
3020 174 : dstgroup->excl_cmpr = level_size;
3021 174 : srcgroup->excl = level_size;
3022 174 : srcgroup->excl_cmpr = level_size;
3023 :
3024 : /* inherit the limit info */
3025 174 : dstgroup->lim_flags = srcgroup->lim_flags;
3026 174 : dstgroup->max_rfer = srcgroup->max_rfer;
3027 174 : dstgroup->max_excl = srcgroup->max_excl;
3028 174 : dstgroup->rsv_rfer = srcgroup->rsv_rfer;
3029 174 : dstgroup->rsv_excl = srcgroup->rsv_excl;
3030 :
3031 174 : qgroup_dirty(fs_info, dstgroup);
3032 174 : qgroup_dirty(fs_info, srcgroup);
3033 : }
3034 :
3035 199 : if (!inherit)
3036 186 : goto unlock;
3037 :
3038 13 : i_qgroups = (u64 *)(inherit + 1);
3039 26 : for (i = 0; i < inherit->num_qgroups; ++i) {
3040 13 : if (*i_qgroups) {
3041 11 : ret = add_relation_rb(fs_info, objectid, *i_qgroups);
3042 11 : if (ret)
3043 0 : goto unlock;
3044 : }
3045 13 : ++i_qgroups;
3046 :
3047 : /*
3048 : * If we're doing a snapshot, and adding the snapshot to a new
3049 : * qgroup, the numbers are guaranteed to be incorrect.
3050 : */
3051 13 : if (srcid)
3052 3 : need_rescan = true;
3053 : }
3054 :
3055 13 : for (i = 0; i < inherit->num_ref_copies; ++i, i_qgroups += 2) {
3056 0 : struct btrfs_qgroup *src;
3057 0 : struct btrfs_qgroup *dst;
3058 :
3059 0 : if (!i_qgroups[0] || !i_qgroups[1])
3060 0 : continue;
3061 :
3062 0 : src = find_qgroup_rb(fs_info, i_qgroups[0]);
3063 0 : dst = find_qgroup_rb(fs_info, i_qgroups[1]);
3064 :
3065 0 : if (!src || !dst) {
3066 0 : ret = -EINVAL;
3067 0 : goto unlock;
3068 : }
3069 :
3070 0 : dst->rfer = src->rfer - level_size;
3071 0 : dst->rfer_cmpr = src->rfer_cmpr - level_size;
3072 :
3073 : /* Manually tweaking numbers certainly needs a rescan */
3074 0 : need_rescan = true;
3075 : }
3076 13 : for (i = 0; i < inherit->num_excl_copies; ++i, i_qgroups += 2) {
3077 0 : struct btrfs_qgroup *src;
3078 0 : struct btrfs_qgroup *dst;
3079 :
3080 0 : if (!i_qgroups[0] || !i_qgroups[1])
3081 0 : continue;
3082 :
3083 0 : src = find_qgroup_rb(fs_info, i_qgroups[0]);
3084 0 : dst = find_qgroup_rb(fs_info, i_qgroups[1]);
3085 :
3086 0 : if (!src || !dst) {
3087 0 : ret = -EINVAL;
3088 0 : goto unlock;
3089 : }
3090 :
3091 0 : dst->excl = src->excl + level_size;
3092 0 : dst->excl_cmpr = src->excl_cmpr + level_size;
3093 0 : need_rescan = true;
3094 : }
3095 :
3096 13 : unlock:
3097 199 : spin_unlock(&fs_info->qgroup_lock);
3098 199 : if (!ret)
3099 199 : ret = btrfs_sysfs_add_one_qgroup(fs_info, dstgroup);
3100 0 : out:
3101 426 : if (!committing)
3102 252 : mutex_unlock(&fs_info->qgroup_ioctl_lock);
3103 426 : if (need_rescan)
3104 3 : qgroup_mark_inconsistent(fs_info);
3105 426 : return ret;
3106 : }
3107 :
3108 3883557 : static bool qgroup_check_limits(const struct btrfs_qgroup *qg, u64 num_bytes)
3109 : {
3110 3883557 : if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_RFER) &&
3111 115893 : qgroup_rsv_total(qg) + (s64)qg->rfer + num_bytes > qg->max_rfer)
3112 : return false;
3113 :
3114 3842769 : if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_EXCL) &&
3115 3343560 : qgroup_rsv_total(qg) + (s64)qg->excl + num_bytes > qg->max_excl)
3116 16 : return false;
3117 :
3118 : return true;
3119 : }
3120 :
3121 3888882 : static int qgroup_reserve(struct btrfs_root *root, u64 num_bytes, bool enforce,
3122 : enum btrfs_qgroup_rsv_type type)
3123 : {
3124 3888882 : struct btrfs_qgroup *qgroup;
3125 3888882 : struct btrfs_fs_info *fs_info = root->fs_info;
3126 3888882 : u64 ref_root = root->root_key.objectid;
3127 3888882 : int ret = 0;
3128 3888882 : struct ulist_node *unode;
3129 3888882 : struct ulist_iterator uiter;
3130 :
3131 3888882 : if (!is_fstree(ref_root))
3132 : return 0;
3133 :
3134 3888880 : if (num_bytes == 0)
3135 : return 0;
3136 :
3137 7773658 : if (test_bit(BTRFS_FS_QUOTA_OVERRIDE, &fs_info->flags) &&
3138 0 : capable(CAP_SYS_RESOURCE))
3139 0 : enforce = false;
3140 :
3141 3886829 : spin_lock(&fs_info->qgroup_lock);
3142 3887126 : if (!fs_info->quota_root)
3143 0 : goto out;
3144 :
3145 3887126 : qgroup = find_qgroup_rb(fs_info, ref_root);
3146 3887126 : if (!qgroup)
3147 0 : goto out;
3148 :
3149 : /*
3150 : * in a first step, we check all affected qgroups if any limits would
3151 : * be exceeded
3152 : */
3153 3887126 : ulist_reinit(fs_info->qgroup_ulist);
3154 3887126 : ret = ulist_add(fs_info->qgroup_ulist, qgroup->qgroupid,
3155 : qgroup_to_aux(qgroup), GFP_ATOMIC);
3156 3887126 : if (ret < 0)
3157 0 : goto out;
3158 3887126 : ULIST_ITER_INIT(&uiter);
3159 7738506 : while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) {
3160 3892184 : struct btrfs_qgroup *qg;
3161 3892184 : struct btrfs_qgroup_list *glist;
3162 :
3163 3892184 : qg = unode_aux_to_qgroup(unode);
3164 :
3165 3892184 : if (enforce && !qgroup_check_limits(qg, num_bytes)) {
3166 40804 : ret = -EDQUOT;
3167 40804 : goto out;
3168 : }
3169 :
3170 3856438 : list_for_each_entry(glist, &qg->groups, next_group) {
3171 5058 : ret = ulist_add(fs_info->qgroup_ulist,
3172 : glist->group->qgroupid,
3173 : qgroup_to_aux(glist->group), GFP_ATOMIC);
3174 5058 : if (ret < 0)
3175 0 : goto out;
3176 : }
3177 : }
3178 3846322 : ret = 0;
3179 : /*
3180 : * no limits exceeded, now record the reservation into all qgroups
3181 : */
3182 3846322 : ULIST_ITER_INIT(&uiter);
3183 7697529 : while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) {
3184 3851207 : struct btrfs_qgroup *qg;
3185 :
3186 3851207 : qg = unode_aux_to_qgroup(unode);
3187 :
3188 3851207 : qgroup_rsv_add(fs_info, qg, num_bytes, type);
3189 : }
3190 :
3191 3846322 : out:
3192 3887126 : spin_unlock(&fs_info->qgroup_lock);
3193 3887126 : return ret;
3194 : }
3195 :
3196 : /*
3197 : * Free @num_bytes of reserved space with @type for qgroup. (Normally level 0
3198 : * qgroup).
3199 : *
3200 : * Will handle all higher level qgroup too.
3201 : *
3202 : * NOTE: If @num_bytes is (u64)-1, this means to free all bytes of this qgroup.
3203 : * This special case is only used for META_PERTRANS type.
3204 : */
3205 5072352 : void btrfs_qgroup_free_refroot(struct btrfs_fs_info *fs_info,
3206 : u64 ref_root, u64 num_bytes,
3207 : enum btrfs_qgroup_rsv_type type)
3208 : {
3209 5072352 : struct btrfs_qgroup *qgroup;
3210 5072352 : struct ulist_node *unode;
3211 5072352 : struct ulist_iterator uiter;
3212 5072352 : int ret = 0;
3213 :
3214 5072352 : if (!is_fstree(ref_root))
3215 2511251 : return;
3216 :
3217 4893609 : if (num_bytes == 0)
3218 : return;
3219 :
3220 2561101 : if (num_bytes == (u64)-1 && type != BTRFS_QGROUP_RSV_META_PERTRANS) {
3221 0 : WARN(1, "%s: Invalid type to free", __func__);
3222 0 : return;
3223 : }
3224 2561101 : spin_lock(&fs_info->qgroup_lock);
3225 :
3226 2561107 : if (!fs_info->quota_root)
3227 0 : goto out;
3228 :
3229 2561107 : qgroup = find_qgroup_rb(fs_info, ref_root);
3230 2561107 : if (!qgroup)
3231 0 : goto out;
3232 :
3233 2561107 : if (num_bytes == (u64)-1)
3234 : /*
3235 : * We're freeing all pertrans rsv, get reserved value from
3236 : * level 0 qgroup as real num_bytes to free.
3237 : */
3238 11623 : num_bytes = qgroup->rsv.values[type];
3239 :
3240 2561107 : ulist_reinit(fs_info->qgroup_ulist);
3241 2561107 : ret = ulist_add(fs_info->qgroup_ulist, qgroup->qgroupid,
3242 : qgroup_to_aux(qgroup), GFP_ATOMIC);
3243 2561107 : if (ret < 0)
3244 0 : goto out;
3245 2561107 : ULIST_ITER_INIT(&uiter);
3246 5124923 : while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) {
3247 2563816 : struct btrfs_qgroup *qg;
3248 2563816 : struct btrfs_qgroup_list *glist;
3249 :
3250 2563816 : qg = unode_aux_to_qgroup(unode);
3251 :
3252 2563816 : qgroup_rsv_release(fs_info, qg, num_bytes, type);
3253 :
3254 2566525 : list_for_each_entry(glist, &qg->groups, next_group) {
3255 2709 : ret = ulist_add(fs_info->qgroup_ulist,
3256 : glist->group->qgroupid,
3257 : qgroup_to_aux(glist->group), GFP_ATOMIC);
3258 2709 : if (ret < 0)
3259 0 : goto out;
3260 : }
3261 : }
3262 :
3263 2561107 : out:
3264 2561107 : spin_unlock(&fs_info->qgroup_lock);
3265 : }
3266 :
3267 : /*
3268 : * Check if the leaf is the last leaf. Which means all node pointers
3269 : * are at their last position.
3270 : */
3271 251 : static bool is_last_leaf(struct btrfs_path *path)
3272 : {
3273 251 : int i;
3274 :
3275 273 : for (i = 1; i < BTRFS_MAX_LEVEL && path->nodes[i]; i++) {
3276 136 : if (path->slots[i] != btrfs_header_nritems(path->nodes[i]) - 1)
3277 : return false;
3278 : }
3279 : return true;
3280 : }
3281 :
3282 : /*
3283 : * returns < 0 on error, 0 when more leafs are to be scanned.
3284 : * returns 1 when done.
3285 : */
3286 252 : static int qgroup_rescan_leaf(struct btrfs_trans_handle *trans,
3287 : struct btrfs_path *path)
3288 : {
3289 252 : struct btrfs_fs_info *fs_info = trans->fs_info;
3290 252 : struct btrfs_root *extent_root;
3291 252 : struct btrfs_key found;
3292 252 : struct extent_buffer *scratch_leaf = NULL;
3293 252 : u64 num_bytes;
3294 252 : bool done;
3295 252 : int slot;
3296 252 : int ret;
3297 :
3298 252 : mutex_lock(&fs_info->qgroup_rescan_lock);
3299 252 : extent_root = btrfs_extent_root(fs_info,
3300 : fs_info->qgroup_rescan_progress.objectid);
3301 252 : ret = btrfs_search_slot_for_read(extent_root,
3302 252 : &fs_info->qgroup_rescan_progress,
3303 : path, 1, 0);
3304 :
3305 252 : btrfs_debug(fs_info,
3306 : "current progress key (%llu %u %llu), search_slot ret %d",
3307 : fs_info->qgroup_rescan_progress.objectid,
3308 : fs_info->qgroup_rescan_progress.type,
3309 : fs_info->qgroup_rescan_progress.offset, ret);
3310 :
3311 252 : if (ret) {
3312 : /*
3313 : * The rescan is about to end, we will not be scanning any
3314 : * further blocks. We cannot unset the RESCAN flag here, because
3315 : * we want to commit the transaction if everything went well.
3316 : * To make the live accounting work in this phase, we set our
3317 : * scan progress pointer such that every real extent objectid
3318 : * will be smaller.
3319 : */
3320 1 : fs_info->qgroup_rescan_progress.objectid = (u64)-1;
3321 1 : btrfs_release_path(path);
3322 1 : mutex_unlock(&fs_info->qgroup_rescan_lock);
3323 1 : return ret;
3324 : }
3325 251 : done = is_last_leaf(path);
3326 :
3327 251 : btrfs_item_key_to_cpu(path->nodes[0], &found,
3328 251 : btrfs_header_nritems(path->nodes[0]) - 1);
3329 251 : fs_info->qgroup_rescan_progress.objectid = found.objectid + 1;
3330 :
3331 251 : scratch_leaf = btrfs_clone_extent_buffer(path->nodes[0]);
3332 251 : if (!scratch_leaf) {
3333 0 : ret = -ENOMEM;
3334 0 : mutex_unlock(&fs_info->qgroup_rescan_lock);
3335 0 : goto out;
3336 : }
3337 251 : slot = path->slots[0];
3338 251 : btrfs_release_path(path);
3339 251 : mutex_unlock(&fs_info->qgroup_rescan_lock);
3340 :
3341 29997 : for (; slot < btrfs_header_nritems(scratch_leaf); ++slot) {
3342 29495 : struct btrfs_backref_walk_ctx ctx = { 0 };
3343 :
3344 29495 : btrfs_item_key_to_cpu(scratch_leaf, &found, slot);
3345 29495 : if (found.type != BTRFS_EXTENT_ITEM_KEY &&
3346 : found.type != BTRFS_METADATA_ITEM_KEY)
3347 728 : continue;
3348 28767 : if (found.type == BTRFS_METADATA_ITEM_KEY)
3349 12695 : num_bytes = fs_info->nodesize;
3350 : else
3351 16072 : num_bytes = found.offset;
3352 :
3353 28767 : ctx.bytenr = found.objectid;
3354 28767 : ctx.fs_info = fs_info;
3355 :
3356 28767 : ret = btrfs_find_all_roots(&ctx, false);
3357 28767 : if (ret < 0)
3358 0 : goto out;
3359 : /* For rescan, just pass old_roots as NULL */
3360 28767 : ret = btrfs_qgroup_account_extent(trans, found.objectid,
3361 : num_bytes, NULL, ctx.roots);
3362 28767 : if (ret < 0)
3363 0 : goto out;
3364 : }
3365 251 : out:
3366 251 : if (scratch_leaf)
3367 251 : free_extent_buffer(scratch_leaf);
3368 :
3369 251 : if (done && !ret) {
3370 137 : ret = 1;
3371 137 : fs_info->qgroup_rescan_progress.objectid = (u64)-1;
3372 : }
3373 : return ret;
3374 : }
3375 :
3376 300 : static bool rescan_should_stop(struct btrfs_fs_info *fs_info)
3377 : {
3378 599 : return btrfs_fs_closing(fs_info) ||
3379 299 : test_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state) ||
3380 599 : !test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) ||
3381 252 : fs_info->qgroup_flags & BTRFS_QGROUP_RUNTIME_FLAG_CANCEL_RESCAN;
3382 : }
3383 :
3384 186 : static void btrfs_qgroup_rescan_worker(struct btrfs_work *work)
3385 : {
3386 186 : struct btrfs_fs_info *fs_info = container_of(work, struct btrfs_fs_info,
3387 : qgroup_rescan_work);
3388 186 : struct btrfs_path *path;
3389 186 : struct btrfs_trans_handle *trans = NULL;
3390 186 : int err = -ENOMEM;
3391 186 : int ret = 0;
3392 186 : bool stopped = false;
3393 186 : bool did_leaf_rescans = false;
3394 :
3395 186 : path = btrfs_alloc_path();
3396 186 : if (!path)
3397 0 : goto out;
3398 : /*
3399 : * Rescan should only search for commit root, and any later difference
3400 : * should be recorded by qgroup
3401 : */
3402 186 : path->search_commit_root = 1;
3403 186 : path->skip_locking = 1;
3404 :
3405 186 : err = 0;
3406 438 : while (!err && !(stopped = rescan_should_stop(fs_info))) {
3407 252 : trans = btrfs_start_transaction(fs_info->fs_root, 0);
3408 252 : if (IS_ERR(trans)) {
3409 0 : err = PTR_ERR(trans);
3410 0 : break;
3411 : }
3412 :
3413 252 : err = qgroup_rescan_leaf(trans, path);
3414 252 : did_leaf_rescans = true;
3415 :
3416 252 : if (err > 0)
3417 138 : btrfs_commit_transaction(trans);
3418 : else
3419 114 : btrfs_end_transaction(trans);
3420 : }
3421 :
3422 186 : out:
3423 186 : btrfs_free_path(path);
3424 :
3425 186 : mutex_lock(&fs_info->qgroup_rescan_lock);
3426 186 : if (err > 0 &&
3427 138 : fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT) {
3428 119 : fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
3429 67 : } else if (err < 0 || stopped) {
3430 48 : fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
3431 : }
3432 186 : mutex_unlock(&fs_info->qgroup_rescan_lock);
3433 :
3434 : /*
3435 : * Only update status, since the previous part has already updated the
3436 : * qgroup info, and only if we did any actual work. This also prevents
3437 : * race with a concurrent quota disable, which has already set
3438 : * fs_info->quota_root to NULL and cleared BTRFS_FS_QUOTA_ENABLED at
3439 : * btrfs_quota_disable().
3440 : */
3441 186 : if (did_leaf_rescans) {
3442 140 : trans = btrfs_start_transaction(fs_info->quota_root, 1);
3443 140 : if (IS_ERR(trans)) {
3444 0 : err = PTR_ERR(trans);
3445 0 : trans = NULL;
3446 0 : btrfs_err(fs_info,
3447 : "fail to start transaction for status update: %d",
3448 : err);
3449 : }
3450 : } else {
3451 : trans = NULL;
3452 : }
3453 :
3454 186 : mutex_lock(&fs_info->qgroup_rescan_lock);
3455 186 : if (!stopped ||
3456 48 : fs_info->qgroup_flags & BTRFS_QGROUP_RUNTIME_FLAG_CANCEL_RESCAN)
3457 138 : fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
3458 186 : if (trans) {
3459 140 : ret = update_qgroup_status_item(trans);
3460 140 : if (ret < 0) {
3461 0 : err = ret;
3462 0 : btrfs_err(fs_info, "fail to update qgroup status: %d",
3463 : err);
3464 : }
3465 : }
3466 186 : fs_info->qgroup_rescan_running = false;
3467 186 : fs_info->qgroup_flags &= ~BTRFS_QGROUP_RUNTIME_FLAG_CANCEL_RESCAN;
3468 186 : complete_all(&fs_info->qgroup_rescan_completion);
3469 186 : mutex_unlock(&fs_info->qgroup_rescan_lock);
3470 :
3471 186 : if (!trans)
3472 : return;
3473 :
3474 140 : btrfs_end_transaction(trans);
3475 :
3476 140 : if (stopped) {
3477 2 : btrfs_info(fs_info, "qgroup scan paused");
3478 138 : } else if (fs_info->qgroup_flags & BTRFS_QGROUP_RUNTIME_FLAG_CANCEL_RESCAN) {
3479 0 : btrfs_info(fs_info, "qgroup scan cancelled");
3480 138 : } else if (err >= 0) {
3481 138 : btrfs_info(fs_info, "qgroup scan completed%s",
3482 : err > 0 ? " (inconsistency flag cleared)" : "");
3483 : } else {
3484 0 : btrfs_err(fs_info, "qgroup scan failed with %d", err);
3485 : }
3486 : }
3487 :
3488 : /*
3489 : * Checks that (a) no rescan is running and (b) quota is enabled. Allocates all
3490 : * memory required for the rescan context.
3491 : */
3492 : static int
3493 190 : qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid,
3494 : int init_flags)
3495 : {
3496 190 : int ret = 0;
3497 :
3498 190 : if (!init_flags) {
3499 : /* we're resuming qgroup rescan at mount time */
3500 1 : if (!(fs_info->qgroup_flags &
3501 : BTRFS_QGROUP_STATUS_FLAG_RESCAN)) {
3502 0 : btrfs_warn(fs_info,
3503 : "qgroup rescan init failed, qgroup rescan is not queued");
3504 0 : ret = -EINVAL;
3505 1 : } else if (!(fs_info->qgroup_flags &
3506 : BTRFS_QGROUP_STATUS_FLAG_ON)) {
3507 0 : btrfs_warn(fs_info,
3508 : "qgroup rescan init failed, qgroup is not enabled");
3509 0 : ret = -EINVAL;
3510 : }
3511 :
3512 0 : if (ret)
3513 0 : return ret;
3514 : }
3515 :
3516 190 : mutex_lock(&fs_info->qgroup_rescan_lock);
3517 :
3518 190 : if (init_flags) {
3519 189 : if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) {
3520 4 : btrfs_warn(fs_info,
3521 : "qgroup rescan is already in progress");
3522 4 : ret = -EINPROGRESS;
3523 185 : } else if (!(fs_info->qgroup_flags &
3524 : BTRFS_QGROUP_STATUS_FLAG_ON)) {
3525 0 : btrfs_warn(fs_info,
3526 : "qgroup rescan init failed, qgroup is not enabled");
3527 0 : ret = -EINVAL;
3528 185 : } else if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) {
3529 : /* Quota disable is in progress */
3530 : ret = -EBUSY;
3531 : }
3532 :
3533 4 : if (ret) {
3534 4 : mutex_unlock(&fs_info->qgroup_rescan_lock);
3535 4 : return ret;
3536 : }
3537 185 : fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_RESCAN;
3538 : }
3539 :
3540 186 : memset(&fs_info->qgroup_rescan_progress, 0,
3541 : sizeof(fs_info->qgroup_rescan_progress));
3542 186 : fs_info->qgroup_flags &= ~(BTRFS_QGROUP_RUNTIME_FLAG_CANCEL_RESCAN |
3543 : BTRFS_QGROUP_RUNTIME_FLAG_NO_ACCOUNTING);
3544 186 : fs_info->qgroup_rescan_progress.objectid = progress_objectid;
3545 186 : init_completion(&fs_info->qgroup_rescan_completion);
3546 186 : mutex_unlock(&fs_info->qgroup_rescan_lock);
3547 :
3548 186 : btrfs_init_work(&fs_info->qgroup_rescan_work,
3549 : btrfs_qgroup_rescan_worker, NULL, NULL);
3550 186 : return 0;
3551 : }
3552 :
3553 : static void
3554 185 : qgroup_rescan_zero_tracking(struct btrfs_fs_info *fs_info)
3555 : {
3556 185 : struct rb_node *n;
3557 185 : struct btrfs_qgroup *qgroup;
3558 :
3559 185 : spin_lock(&fs_info->qgroup_lock);
3560 : /* clear all current qgroup tracking information */
3561 410 : for (n = rb_first(&fs_info->qgroup_tree); n; n = rb_next(n)) {
3562 225 : qgroup = rb_entry(n, struct btrfs_qgroup, node);
3563 225 : qgroup->rfer = 0;
3564 225 : qgroup->rfer_cmpr = 0;
3565 225 : qgroup->excl = 0;
3566 225 : qgroup->excl_cmpr = 0;
3567 225 : qgroup_dirty(fs_info, qgroup);
3568 : }
3569 185 : spin_unlock(&fs_info->qgroup_lock);
3570 185 : }
3571 :
3572 : int
3573 25 : btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info)
3574 : {
3575 25 : int ret = 0;
3576 25 : struct btrfs_trans_handle *trans;
3577 :
3578 25 : ret = qgroup_rescan_init(fs_info, 0, 1);
3579 25 : if (ret)
3580 : return ret;
3581 :
3582 : /*
3583 : * We have set the rescan_progress to 0, which means no more
3584 : * delayed refs will be accounted by btrfs_qgroup_account_ref.
3585 : * However, btrfs_qgroup_account_ref may be right after its call
3586 : * to btrfs_find_all_roots, in which case it would still do the
3587 : * accounting.
3588 : * To solve this, we're committing the transaction, which will
3589 : * ensure we run all delayed refs and only after that, we are
3590 : * going to clear all tracking information for a clean start.
3591 : */
3592 :
3593 21 : trans = btrfs_join_transaction(fs_info->fs_root);
3594 21 : if (IS_ERR(trans)) {
3595 0 : fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
3596 0 : return PTR_ERR(trans);
3597 : }
3598 21 : ret = btrfs_commit_transaction(trans);
3599 21 : if (ret) {
3600 0 : fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
3601 0 : return ret;
3602 : }
3603 :
3604 21 : qgroup_rescan_zero_tracking(fs_info);
3605 :
3606 21 : mutex_lock(&fs_info->qgroup_rescan_lock);
3607 21 : fs_info->qgroup_rescan_running = true;
3608 21 : btrfs_queue_work(fs_info->qgroup_rescan_workers,
3609 : &fs_info->qgroup_rescan_work);
3610 21 : mutex_unlock(&fs_info->qgroup_rescan_lock);
3611 :
3612 21 : return 0;
3613 : }
3614 :
3615 3376 : int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info,
3616 : bool interruptible)
3617 : {
3618 3376 : int running;
3619 3376 : int ret = 0;
3620 :
3621 3376 : mutex_lock(&fs_info->qgroup_rescan_lock);
3622 3376 : running = fs_info->qgroup_rescan_running;
3623 3376 : mutex_unlock(&fs_info->qgroup_rescan_lock);
3624 :
3625 3376 : if (!running)
3626 : return 0;
3627 :
3628 145 : if (interruptible)
3629 23 : ret = wait_for_completion_interruptible(
3630 : &fs_info->qgroup_rescan_completion);
3631 : else
3632 122 : wait_for_completion(&fs_info->qgroup_rescan_completion);
3633 :
3634 : return ret;
3635 : }
3636 :
3637 : /*
3638 : * this is only called from open_ctree where we're still single threaded, thus
3639 : * locking is omitted here.
3640 : */
3641 : void
3642 3181 : btrfs_qgroup_rescan_resume(struct btrfs_fs_info *fs_info)
3643 : {
3644 3181 : if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) {
3645 1 : mutex_lock(&fs_info->qgroup_rescan_lock);
3646 1 : fs_info->qgroup_rescan_running = true;
3647 1 : btrfs_queue_work(fs_info->qgroup_rescan_workers,
3648 : &fs_info->qgroup_rescan_work);
3649 1 : mutex_unlock(&fs_info->qgroup_rescan_lock);
3650 : }
3651 3181 : }
3652 :
3653 : #define rbtree_iterate_from_safe(node, next, start) \
3654 : for (node = start; node && ({ next = rb_next(node); 1;}); node = next)
3655 :
3656 6469 : static int qgroup_unreserve_range(struct btrfs_inode *inode,
3657 : struct extent_changeset *reserved, u64 start,
3658 : u64 len)
3659 : {
3660 6469 : struct rb_node *node;
3661 6469 : struct rb_node *next;
3662 6469 : struct ulist_node *entry;
3663 6469 : int ret = 0;
3664 :
3665 6469 : node = reserved->range_changed.root.rb_node;
3666 6469 : if (!node)
3667 : return 0;
3668 12973 : while (node) {
3669 6504 : entry = rb_entry(node, struct ulist_node, rb_node);
3670 6504 : if (entry->val < start)
3671 35 : node = node->rb_right;
3672 : else
3673 6469 : node = node->rb_left;
3674 : }
3675 :
3676 6469 : if (entry->val > start && rb_prev(&entry->rb_node))
3677 0 : entry = rb_entry(rb_prev(&entry->rb_node), struct ulist_node,
3678 : rb_node);
3679 :
3680 12931 : rbtree_iterate_from_safe(node, next, &entry->rb_node) {
3681 6472 : u64 entry_start;
3682 6472 : u64 entry_end;
3683 6472 : u64 entry_len;
3684 6472 : int clear_ret;
3685 :
3686 6472 : entry = rb_entry(node, struct ulist_node, rb_node);
3687 6472 : entry_start = entry->val;
3688 6472 : entry_end = entry->aux;
3689 6472 : entry_len = entry_end - entry_start + 1;
3690 :
3691 6472 : if (entry_start >= start + len)
3692 : break;
3693 6472 : if (entry_start + entry_len <= start)
3694 0 : continue;
3695 : /*
3696 : * Now the entry is in [start, start + len), revert the
3697 : * EXTENT_QGROUP_RESERVED bit.
3698 : */
3699 6472 : clear_ret = clear_extent_bits(&inode->io_tree, entry_start,
3700 : entry_end, EXTENT_QGROUP_RESERVED);
3701 6469 : if (!ret && clear_ret < 0)
3702 0 : ret = clear_ret;
3703 :
3704 6469 : ulist_del(&reserved->range_changed, entry->val, entry->aux);
3705 6462 : if (likely(reserved->bytes_changed >= entry_len)) {
3706 6462 : reserved->bytes_changed -= entry_len;
3707 : } else {
3708 0 : WARN_ON(1);
3709 0 : reserved->bytes_changed = 0;
3710 : }
3711 : }
3712 :
3713 : return ret;
3714 : }
3715 :
3716 : /*
3717 : * Try to free some space for qgroup.
3718 : *
3719 : * For qgroup, there are only 3 ways to free qgroup space:
3720 : * - Flush nodatacow write
3721 : * Any nodatacow write will free its reserved data space at run_delalloc_range().
3722 : * In theory, we should only flush nodatacow inodes, but it's not yet
3723 : * possible, so we need to flush the whole root.
3724 : *
3725 : * - Wait for ordered extents
3726 : * When ordered extents are finished, their reserved metadata is finally
3727 : * converted to per_trans status, which can be freed by later commit
3728 : * transaction.
3729 : *
3730 : * - Commit transaction
3731 : * This would free the meta_per_trans space.
3732 : * In theory this shouldn't provide much space, but any more qgroup space
3733 : * is needed.
3734 : */
3735 19595 : static int try_flush_qgroup(struct btrfs_root *root)
3736 : {
3737 19595 : struct btrfs_trans_handle *trans;
3738 19595 : int ret;
3739 :
3740 : /* Can't hold an open transaction or we run the risk of deadlocking. */
3741 19595 : ASSERT(current->journal_info == NULL);
3742 19595 : if (WARN_ON(current->journal_info))
3743 : return 0;
3744 :
3745 : /*
3746 : * We don't want to run flush again and again, so if there is a running
3747 : * one, we won't try to start a new flush, but exit directly.
3748 : */
3749 19595 : if (test_and_set_bit(BTRFS_ROOT_QGROUP_FLUSHING, &root->state)) {
3750 130585 : wait_event(root->qgroup_flush_wait,
3751 : !test_bit(BTRFS_ROOT_QGROUP_FLUSHING, &root->state));
3752 10598 : return 0;
3753 : }
3754 :
3755 9002 : ret = btrfs_start_delalloc_snapshot(root, true);
3756 9002 : if (ret < 0)
3757 0 : goto out;
3758 9002 : btrfs_wait_ordered_extents(root, U64_MAX, 0, (u64)-1);
3759 :
3760 9002 : trans = btrfs_join_transaction(root);
3761 9002 : if (IS_ERR(trans)) {
3762 0 : ret = PTR_ERR(trans);
3763 0 : goto out;
3764 : }
3765 :
3766 9002 : ret = btrfs_commit_transaction(trans);
3767 9001 : out:
3768 9001 : clear_bit(BTRFS_ROOT_QGROUP_FLUSHING, &root->state);
3769 9000 : wake_up(&root->qgroup_flush_wait);
3770 9000 : return ret;
3771 : }
3772 :
3773 40357586 : static int qgroup_reserve_data(struct btrfs_inode *inode,
3774 : struct extent_changeset **reserved_ret, u64 start,
3775 : u64 len)
3776 : {
3777 40357586 : struct btrfs_root *root = inode->root;
3778 40357586 : struct extent_changeset *reserved;
3779 40357586 : bool new_reserved = false;
3780 40357586 : u64 orig_reserved;
3781 40357586 : u64 to_reserve;
3782 40357586 : int ret;
3783 :
3784 40357586 : if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &root->fs_info->flags) ||
3785 3426538 : !is_fstree(root->root_key.objectid) || len == 0)
3786 : return 0;
3787 :
3788 : /* @reserved parameter is mandatory for qgroup */
3789 1909823 : if (WARN_ON(!reserved_ret))
3790 : return -EINVAL;
3791 1909823 : if (!*reserved_ret) {
3792 1904463 : new_reserved = true;
3793 1904463 : *reserved_ret = extent_changeset_alloc();
3794 1904445 : if (!*reserved_ret)
3795 : return -ENOMEM;
3796 : }
3797 1909805 : reserved = *reserved_ret;
3798 : /* Record already reserved space */
3799 1909805 : orig_reserved = reserved->bytes_changed;
3800 1909805 : ret = set_record_extent_bits(&inode->io_tree, start,
3801 1909805 : start + len -1, EXTENT_QGROUP_RESERVED, reserved);
3802 :
3803 : /* Newly reserved space */
3804 1909808 : to_reserve = reserved->bytes_changed - orig_reserved;
3805 1909808 : trace_btrfs_qgroup_reserve_data(&inode->vfs_inode, start, len,
3806 : to_reserve, QGROUP_RESERVE);
3807 1909774 : if (ret < 0)
3808 0 : goto out;
3809 1909774 : ret = qgroup_reserve(root, to_reserve, true, BTRFS_QGROUP_RSV_DATA);
3810 1909842 : if (ret < 0)
3811 6469 : goto cleanup;
3812 :
3813 : return ret;
3814 :
3815 : cleanup:
3816 6469 : qgroup_unreserve_range(inode, reserved, start, len);
3817 6447 : out:
3818 6447 : if (new_reserved) {
3819 6321 : extent_changeset_free(reserved);
3820 6311 : *reserved_ret = NULL;
3821 : }
3822 : return ret;
3823 : }
3824 :
3825 : /*
3826 : * Reserve qgroup space for range [start, start + len).
3827 : *
3828 : * This function will either reserve space from related qgroups or do nothing
3829 : * if the range is already reserved.
3830 : *
3831 : * Return 0 for successful reservation
3832 : * Return <0 for error (including -EQUOT)
3833 : *
3834 : * NOTE: This function may sleep for memory allocation, dirty page flushing and
3835 : * commit transaction. So caller should not hold any dirty page locked.
3836 : */
3837 40355420 : int btrfs_qgroup_reserve_data(struct btrfs_inode *inode,
3838 : struct extent_changeset **reserved_ret, u64 start,
3839 : u64 len)
3840 : {
3841 40355420 : int ret;
3842 :
3843 40355420 : ret = qgroup_reserve_data(inode, reserved_ret, start, len);
3844 40350334 : if (ret <= 0 && ret != -EDQUOT)
3845 : return ret;
3846 :
3847 3700 : ret = try_flush_qgroup(inode->root);
3848 3693 : if (ret < 0)
3849 : return ret;
3850 3693 : return qgroup_reserve_data(inode, reserved_ret, start, len);
3851 : }
3852 :
3853 : /* Free ranges specified by @reserved, normally in error path */
3854 1321 : static int qgroup_free_reserved_data(struct btrfs_inode *inode,
3855 : struct extent_changeset *reserved, u64 start, u64 len)
3856 : {
3857 1321 : struct btrfs_root *root = inode->root;
3858 1321 : struct ulist_node *unode;
3859 1321 : struct ulist_iterator uiter;
3860 1321 : struct extent_changeset changeset;
3861 1321 : int freed = 0;
3862 1321 : int ret;
3863 :
3864 1321 : extent_changeset_init(&changeset);
3865 1321 : len = round_up(start + len, root->fs_info->sectorsize);
3866 1321 : start = round_down(start, root->fs_info->sectorsize);
3867 :
3868 1321 : ULIST_ITER_INIT(&uiter);
3869 2653 : while ((unode = ulist_next(&reserved->range_changed, &uiter))) {
3870 1332 : u64 range_start = unode->val;
3871 : /* unode->aux is the inclusive end */
3872 1332 : u64 range_len = unode->aux - range_start + 1;
3873 1332 : u64 free_start;
3874 1332 : u64 free_len;
3875 :
3876 1332 : extent_changeset_release(&changeset);
3877 :
3878 : /* Only free range in range [start, start + len) */
3879 1332 : if (range_start >= start + len ||
3880 1332 : range_start + range_len <= start)
3881 0 : continue;
3882 1332 : free_start = max(range_start, start);
3883 1332 : free_len = min(start + len, range_start + range_len) -
3884 : free_start;
3885 : /*
3886 : * TODO: To also modify reserved->ranges_reserved to reflect
3887 : * the modification.
3888 : *
3889 : * However as long as we free qgroup reserved according to
3890 : * EXTENT_QGROUP_RESERVED, we won't double free.
3891 : * So not need to rush.
3892 : */
3893 1332 : ret = clear_record_extent_bits(&inode->io_tree, free_start,
3894 : free_start + free_len - 1,
3895 : EXTENT_QGROUP_RESERVED, &changeset);
3896 1332 : if (ret < 0)
3897 0 : goto out;
3898 1332 : freed += changeset.bytes_changed;
3899 : }
3900 1319 : btrfs_qgroup_free_refroot(root->fs_info, root->root_key.objectid, freed,
3901 : BTRFS_QGROUP_RSV_DATA);
3902 1319 : ret = freed;
3903 1321 : out:
3904 1321 : extent_changeset_release(&changeset);
3905 1321 : return ret;
3906 : }
3907 :
3908 92332682 : static int __btrfs_qgroup_release_data(struct btrfs_inode *inode,
3909 : struct extent_changeset *reserved, u64 start, u64 len,
3910 : int free)
3911 : {
3912 92332682 : struct extent_changeset changeset;
3913 92332682 : int trace_op = QGROUP_RELEASE;
3914 92332682 : int ret;
3915 :
3916 92332682 : if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &inode->root->fs_info->flags))
3917 : return 0;
3918 :
3919 : /* In release case, we shouldn't have @reserved */
3920 2987575 : WARN_ON(!free && reserved);
3921 2987575 : if (free && reserved)
3922 1321 : return qgroup_free_reserved_data(inode, reserved, start, len);
3923 2986254 : extent_changeset_init(&changeset);
3924 2986224 : ret = clear_record_extent_bits(&inode->io_tree, start, start + len -1,
3925 : EXTENT_QGROUP_RESERVED, &changeset);
3926 2986282 : if (ret < 0)
3927 0 : goto out;
3928 :
3929 2986282 : if (free)
3930 2964903 : trace_op = QGROUP_FREE;
3931 2986282 : trace_btrfs_qgroup_release_data(&inode->vfs_inode, start, len,
3932 : changeset.bytes_changed, trace_op);
3933 2986273 : if (free)
3934 2964901 : btrfs_qgroup_free_refroot(inode->root->fs_info,
3935 2964901 : inode->root->root_key.objectid,
3936 : changeset.bytes_changed, BTRFS_QGROUP_RSV_DATA);
3937 2986283 : ret = changeset.bytes_changed;
3938 2986283 : out:
3939 2986283 : extent_changeset_release(&changeset);
3940 2986283 : return ret;
3941 : }
3942 :
3943 : /*
3944 : * Free a reserved space range from io_tree and related qgroups
3945 : *
3946 : * Should be called when a range of pages get invalidated before reaching disk.
3947 : * Or for error cleanup case.
3948 : * if @reserved is given, only reserved range in [@start, @start + @len) will
3949 : * be freed.
3950 : *
3951 : * For data written to disk, use btrfs_qgroup_release_data().
3952 : *
3953 : * NOTE: This function may sleep for memory allocation.
3954 : */
3955 88521819 : int btrfs_qgroup_free_data(struct btrfs_inode *inode,
3956 : struct extent_changeset *reserved, u64 start, u64 len)
3957 : {
3958 88521819 : return __btrfs_qgroup_release_data(inode, reserved, start, len, 1);
3959 : }
3960 :
3961 : /*
3962 : * Release a reserved space range from io_tree only.
3963 : *
3964 : * Should be called when a range of pages get written to disk and corresponding
3965 : * FILE_EXTENT is inserted into corresponding root.
3966 : *
3967 : * Since new qgroup accounting framework will only update qgroup numbers at
3968 : * commit_transaction() time, its reserved space shouldn't be freed from
3969 : * related qgroups.
3970 : *
3971 : * But we should release the range from io_tree, to allow further write to be
3972 : * COWed.
3973 : *
3974 : * NOTE: This function may sleep for memory allocation.
3975 : */
3976 3812828 : int btrfs_qgroup_release_data(struct btrfs_inode *inode, u64 start, u64 len)
3977 : {
3978 3812828 : return __btrfs_qgroup_release_data(inode, NULL, start, len, 0);
3979 : }
3980 :
3981 1945000 : static void add_root_meta_rsv(struct btrfs_root *root, int num_bytes,
3982 : enum btrfs_qgroup_rsv_type type)
3983 : {
3984 1945000 : if (type != BTRFS_QGROUP_RSV_META_PREALLOC &&
3985 : type != BTRFS_QGROUP_RSV_META_PERTRANS)
3986 : return;
3987 1945000 : if (num_bytes == 0)
3988 : return;
3989 :
3990 1945000 : spin_lock(&root->qgroup_meta_rsv_lock);
3991 1945000 : if (type == BTRFS_QGROUP_RSV_META_PREALLOC)
3992 1903233 : root->qgroup_meta_rsv_prealloc += num_bytes;
3993 : else
3994 41767 : root->qgroup_meta_rsv_pertrans += num_bytes;
3995 1945000 : spin_unlock(&root->qgroup_meta_rsv_lock);
3996 : }
3997 :
3998 1973547 : static int sub_root_meta_rsv(struct btrfs_root *root, int num_bytes,
3999 : enum btrfs_qgroup_rsv_type type)
4000 : {
4001 1973547 : if (type != BTRFS_QGROUP_RSV_META_PREALLOC &&
4002 : type != BTRFS_QGROUP_RSV_META_PERTRANS)
4003 : return 0;
4004 1973547 : if (num_bytes == 0)
4005 : return 0;
4006 :
4007 1942134 : spin_lock(&root->qgroup_meta_rsv_lock);
4008 1942150 : if (type == BTRFS_QGROUP_RSV_META_PREALLOC) {
4009 1942150 : num_bytes = min_t(u64, root->qgroup_meta_rsv_prealloc,
4010 : num_bytes);
4011 1942150 : root->qgroup_meta_rsv_prealloc -= num_bytes;
4012 : } else {
4013 0 : num_bytes = min_t(u64, root->qgroup_meta_rsv_pertrans,
4014 : num_bytes);
4015 0 : root->qgroup_meta_rsv_pertrans -= num_bytes;
4016 : }
4017 1942150 : spin_unlock(&root->qgroup_meta_rsv_lock);
4018 1942150 : return num_bytes;
4019 : }
4020 :
4021 68967635 : int btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes,
4022 : enum btrfs_qgroup_rsv_type type, bool enforce)
4023 : {
4024 68967635 : struct btrfs_fs_info *fs_info = root->fs_info;
4025 68967635 : int ret;
4026 :
4027 68967635 : if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) ||
4028 3949467 : !is_fstree(root->root_key.objectid) || num_bytes == 0)
4029 : return 0;
4030 :
4031 1979118 : BUG_ON(num_bytes != round_down(num_bytes, fs_info->nodesize));
4032 1979118 : trace_qgroup_meta_reserve(root, (s64)num_bytes, type);
4033 1979125 : ret = qgroup_reserve(root, num_bytes, enforce, type);
4034 1979335 : if (ret < 0)
4035 : return ret;
4036 : /*
4037 : * Record what we have reserved into root.
4038 : *
4039 : * To avoid quota disabled->enabled underflow.
4040 : * In that case, we may try to free space we haven't reserved
4041 : * (since quota was disabled), so record what we reserved into root.
4042 : * And ensure later release won't underflow this number.
4043 : */
4044 1945000 : add_root_meta_rsv(root, num_bytes, type);
4045 1945000 : return ret;
4046 : }
4047 :
4048 66675810 : int __btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes,
4049 : enum btrfs_qgroup_rsv_type type, bool enforce,
4050 : bool noflush)
4051 : {
4052 66675810 : int ret;
4053 :
4054 66675810 : ret = btrfs_qgroup_reserve_meta(root, num_bytes, type, enforce);
4055 66624150 : if ((ret <= 0 && ret != -EDQUOT) || noflush)
4056 : return ret;
4057 :
4058 15899 : ret = try_flush_qgroup(root);
4059 15874 : if (ret < 0)
4060 : return ret;
4061 15874 : return btrfs_qgroup_reserve_meta(root, num_bytes, type, enforce);
4062 : }
4063 :
4064 156792 : void btrfs_qgroup_free_meta_all_pertrans(struct btrfs_root *root)
4065 : {
4066 156792 : struct btrfs_fs_info *fs_info = root->fs_info;
4067 :
4068 156792 : if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) ||
4069 12043 : !is_fstree(root->root_key.objectid))
4070 : return;
4071 :
4072 : /* TODO: Update trace point to handle such free */
4073 11623 : trace_qgroup_meta_free_all_pertrans(root);
4074 : /* Special value -1 means to free all reserved space */
4075 11623 : btrfs_qgroup_free_refroot(fs_info, root->root_key.objectid, (u64)-1,
4076 : BTRFS_QGROUP_RSV_META_PERTRANS);
4077 : }
4078 :
4079 41232455 : void __btrfs_qgroup_free_meta(struct btrfs_root *root, int num_bytes,
4080 : enum btrfs_qgroup_rsv_type type)
4081 : {
4082 41232455 : struct btrfs_fs_info *fs_info = root->fs_info;
4083 :
4084 41232455 : if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) ||
4085 2329501 : !is_fstree(root->root_key.objectid))
4086 : return;
4087 :
4088 : /*
4089 : * reservation for META_PREALLOC can happen before quota is enabled,
4090 : * which can lead to underflow.
4091 : * Here ensure we will only free what we really have reserved.
4092 : */
4093 1901648 : num_bytes = sub_root_meta_rsv(root, num_bytes, type);
4094 1901655 : BUG_ON(num_bytes != round_down(num_bytes, fs_info->nodesize));
4095 1901655 : trace_qgroup_meta_reserve(root, -(s64)num_bytes, type);
4096 1901649 : btrfs_qgroup_free_refroot(fs_info, root->root_key.objectid,
4097 : num_bytes, type);
4098 : }
4099 :
4100 71922 : static void qgroup_convert_meta(struct btrfs_fs_info *fs_info, u64 ref_root,
4101 : int num_bytes)
4102 : {
4103 71922 : struct btrfs_qgroup *qgroup;
4104 71922 : struct ulist_node *unode;
4105 71922 : struct ulist_iterator uiter;
4106 71922 : int ret = 0;
4107 :
4108 71922 : if (num_bytes == 0)
4109 54448 : return;
4110 17474 : if (!fs_info->quota_root)
4111 : return;
4112 :
4113 17474 : spin_lock(&fs_info->qgroup_lock);
4114 17481 : qgroup = find_qgroup_rb(fs_info, ref_root);
4115 17481 : if (!qgroup)
4116 0 : goto out;
4117 17481 : ulist_reinit(fs_info->qgroup_ulist);
4118 17481 : ret = ulist_add(fs_info->qgroup_ulist, qgroup->qgroupid,
4119 : qgroup_to_aux(qgroup), GFP_ATOMIC);
4120 17481 : if (ret < 0)
4121 0 : goto out;
4122 17481 : ULIST_ITER_INIT(&uiter);
4123 37383 : while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) {
4124 19902 : struct btrfs_qgroup *qg;
4125 19902 : struct btrfs_qgroup_list *glist;
4126 :
4127 19902 : qg = unode_aux_to_qgroup(unode);
4128 :
4129 19902 : qgroup_rsv_release(fs_info, qg, num_bytes,
4130 : BTRFS_QGROUP_RSV_META_PREALLOC);
4131 19902 : qgroup_rsv_add(fs_info, qg, num_bytes,
4132 : BTRFS_QGROUP_RSV_META_PERTRANS);
4133 22323 : list_for_each_entry(glist, &qg->groups, next_group) {
4134 2421 : ret = ulist_add(fs_info->qgroup_ulist,
4135 : glist->group->qgroupid,
4136 : qgroup_to_aux(glist->group), GFP_ATOMIC);
4137 2421 : if (ret < 0)
4138 0 : goto out;
4139 : }
4140 : }
4141 17481 : out:
4142 17481 : spin_unlock(&fs_info->qgroup_lock);
4143 : }
4144 :
4145 29394659 : void btrfs_qgroup_convert_reserved_meta(struct btrfs_root *root, int num_bytes)
4146 : {
4147 29394659 : struct btrfs_fs_info *fs_info = root->fs_info;
4148 :
4149 29394659 : if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) ||
4150 75884 : !is_fstree(root->root_key.objectid))
4151 : return;
4152 : /* Same as btrfs_qgroup_free_meta_prealloc() */
4153 71911 : num_bytes = sub_root_meta_rsv(root, num_bytes,
4154 : BTRFS_QGROUP_RSV_META_PREALLOC);
4155 71924 : trace_qgroup_meta_convert(root, num_bytes);
4156 71920 : qgroup_convert_meta(fs_info, root->root_key.objectid, num_bytes);
4157 : }
4158 :
4159 : /*
4160 : * Check qgroup reserved space leaking, normally at destroy inode
4161 : * time
4162 : */
4163 3882674 : void btrfs_qgroup_check_reserved_leak(struct btrfs_inode *inode)
4164 : {
4165 3882674 : struct extent_changeset changeset;
4166 3882674 : struct ulist_node *unode;
4167 3882674 : struct ulist_iterator iter;
4168 3882674 : int ret;
4169 :
4170 3882674 : extent_changeset_init(&changeset);
4171 3882659 : ret = clear_record_extent_bits(&inode->io_tree, 0, (u64)-1,
4172 : EXTENT_QGROUP_RESERVED, &changeset);
4173 :
4174 3882654 : WARN_ON(ret < 0);
4175 3882654 : if (WARN_ON(changeset.bytes_changed)) {
4176 0 : ULIST_ITER_INIT(&iter);
4177 0 : while ((unode = ulist_next(&changeset.range_changed, &iter))) {
4178 0 : btrfs_warn(inode->root->fs_info,
4179 : "leaking qgroup reserved space, ino: %llu, start: %llu, end: %llu",
4180 : btrfs_ino(inode), unode->val, unode->aux);
4181 : }
4182 0 : btrfs_qgroup_free_refroot(inode->root->fs_info,
4183 0 : inode->root->root_key.objectid,
4184 : changeset.bytes_changed, BTRFS_QGROUP_RSV_DATA);
4185 :
4186 : }
4187 3882654 : extent_changeset_release(&changeset);
4188 3882584 : }
4189 :
4190 66801 : void btrfs_qgroup_init_swapped_blocks(
4191 : struct btrfs_qgroup_swapped_blocks *swapped_blocks)
4192 : {
4193 66801 : int i;
4194 :
4195 66801 : spin_lock_init(&swapped_blocks->lock);
4196 668010 : for (i = 0; i < BTRFS_MAX_LEVEL; i++)
4197 534408 : swapped_blocks->blocks[i] = RB_ROOT;
4198 66801 : swapped_blocks->swapped = false;
4199 66801 : }
4200 :
4201 : /*
4202 : * Delete all swapped blocks record of @root.
4203 : * Every record here means we skipped a full subtree scan for qgroup.
4204 : *
4205 : * Gets called when committing one transaction.
4206 : */
4207 1018991 : void btrfs_qgroup_clean_swapped_blocks(struct btrfs_root *root)
4208 : {
4209 1018991 : struct btrfs_qgroup_swapped_blocks *swapped_blocks;
4210 1018991 : int i;
4211 :
4212 1018991 : swapped_blocks = &root->swapped_blocks;
4213 :
4214 1018991 : spin_lock(&swapped_blocks->lock);
4215 1018991 : if (!swapped_blocks->swapped)
4216 1018963 : goto out;
4217 252 : for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
4218 224 : struct rb_root *cur_root = &swapped_blocks->blocks[i];
4219 224 : struct btrfs_qgroup_swapped_block *entry;
4220 224 : struct btrfs_qgroup_swapped_block *next;
4221 :
4222 490 : rbtree_postorder_for_each_entry_safe(entry, next, cur_root,
4223 : node)
4224 42 : kfree(entry);
4225 224 : swapped_blocks->blocks[i] = RB_ROOT;
4226 : }
4227 28 : swapped_blocks->swapped = false;
4228 1018991 : out:
4229 1018991 : spin_unlock(&swapped_blocks->lock);
4230 1018991 : }
4231 :
4232 : /*
4233 : * Add subtree roots record into @subvol_root.
4234 : *
4235 : * @subvol_root: tree root of the subvolume tree get swapped
4236 : * @bg: block group under balance
4237 : * @subvol_parent/slot: pointer to the subtree root in subvolume tree
4238 : * @reloc_parent/slot: pointer to the subtree root in reloc tree
4239 : * BOTH POINTERS ARE BEFORE TREE SWAP
4240 : * @last_snapshot: last snapshot generation of the subvolume tree
4241 : */
4242 44975 : int btrfs_qgroup_add_swapped_blocks(struct btrfs_trans_handle *trans,
4243 : struct btrfs_root *subvol_root,
4244 : struct btrfs_block_group *bg,
4245 : struct extent_buffer *subvol_parent, int subvol_slot,
4246 : struct extent_buffer *reloc_parent, int reloc_slot,
4247 : u64 last_snapshot)
4248 : {
4249 44975 : struct btrfs_fs_info *fs_info = subvol_root->fs_info;
4250 44975 : struct btrfs_qgroup_swapped_blocks *blocks = &subvol_root->swapped_blocks;
4251 44975 : struct btrfs_qgroup_swapped_block *block;
4252 44975 : struct rb_node **cur;
4253 44975 : struct rb_node *parent = NULL;
4254 44975 : int level = btrfs_header_level(subvol_parent) - 1;
4255 44975 : int ret = 0;
4256 :
4257 44975 : if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags))
4258 : return 0;
4259 :
4260 84 : if (btrfs_node_ptr_generation(subvol_parent, subvol_slot) >
4261 : btrfs_node_ptr_generation(reloc_parent, reloc_slot)) {
4262 0 : btrfs_err_rl(fs_info,
4263 : "%s: bad parameter order, subvol_gen=%llu reloc_gen=%llu",
4264 : __func__,
4265 : btrfs_node_ptr_generation(subvol_parent, subvol_slot),
4266 : btrfs_node_ptr_generation(reloc_parent, reloc_slot));
4267 0 : return -EUCLEAN;
4268 : }
4269 :
4270 42 : block = kmalloc(sizeof(*block), GFP_NOFS);
4271 42 : if (!block) {
4272 0 : ret = -ENOMEM;
4273 0 : goto out;
4274 : }
4275 :
4276 : /*
4277 : * @reloc_parent/slot is still before swap, while @block is going to
4278 : * record the bytenr after swap, so we do the swap here.
4279 : */
4280 42 : block->subvol_bytenr = btrfs_node_blockptr(reloc_parent, reloc_slot);
4281 42 : block->subvol_generation = btrfs_node_ptr_generation(reloc_parent,
4282 : reloc_slot);
4283 42 : block->reloc_bytenr = btrfs_node_blockptr(subvol_parent, subvol_slot);
4284 42 : block->reloc_generation = btrfs_node_ptr_generation(subvol_parent,
4285 : subvol_slot);
4286 42 : block->last_snapshot = last_snapshot;
4287 42 : block->level = level;
4288 :
4289 : /*
4290 : * If we have bg == NULL, we're called from btrfs_recover_relocation(),
4291 : * no one else can modify tree blocks thus we qgroup will not change
4292 : * no matter the value of trace_leaf.
4293 : */
4294 42 : if (bg && bg->flags & BTRFS_BLOCK_GROUP_DATA)
4295 42 : block->trace_leaf = true;
4296 : else
4297 0 : block->trace_leaf = false;
4298 42 : btrfs_node_key_to_cpu(reloc_parent, &block->first_key, reloc_slot);
4299 :
4300 : /* Insert @block into @blocks */
4301 42 : spin_lock(&blocks->lock);
4302 42 : cur = &blocks->blocks[level].rb_node;
4303 60 : while (*cur) {
4304 18 : struct btrfs_qgroup_swapped_block *entry;
4305 :
4306 18 : parent = *cur;
4307 18 : entry = rb_entry(parent, struct btrfs_qgroup_swapped_block,
4308 : node);
4309 :
4310 18 : if (entry->subvol_bytenr < block->subvol_bytenr) {
4311 14 : cur = &(*cur)->rb_left;
4312 4 : } else if (entry->subvol_bytenr > block->subvol_bytenr) {
4313 4 : cur = &(*cur)->rb_right;
4314 : } else {
4315 0 : if (entry->subvol_generation !=
4316 0 : block->subvol_generation ||
4317 0 : entry->reloc_bytenr != block->reloc_bytenr ||
4318 0 : entry->reloc_generation !=
4319 0 : block->reloc_generation) {
4320 : /*
4321 : * Duplicated but mismatch entry found.
4322 : * Shouldn't happen.
4323 : *
4324 : * Marking qgroup inconsistent should be enough
4325 : * for end users.
4326 : */
4327 0 : WARN_ON(IS_ENABLED(CONFIG_BTRFS_DEBUG));
4328 0 : ret = -EEXIST;
4329 : }
4330 0 : kfree(block);
4331 0 : goto out_unlock;
4332 : }
4333 : }
4334 42 : rb_link_node(&block->node, parent, cur);
4335 42 : rb_insert_color(&block->node, &blocks->blocks[level]);
4336 42 : blocks->swapped = true;
4337 42 : out_unlock:
4338 42 : spin_unlock(&blocks->lock);
4339 : out:
4340 42 : if (ret < 0)
4341 0 : qgroup_mark_inconsistent(fs_info);
4342 : return ret;
4343 : }
4344 :
4345 : /*
4346 : * Check if the tree block is a subtree root, and if so do the needed
4347 : * delayed subtree trace for qgroup.
4348 : *
4349 : * This is called during btrfs_cow_block().
4350 : */
4351 9064379 : int btrfs_qgroup_trace_subtree_after_cow(struct btrfs_trans_handle *trans,
4352 : struct btrfs_root *root,
4353 : struct extent_buffer *subvol_eb)
4354 : {
4355 9064379 : struct btrfs_fs_info *fs_info = root->fs_info;
4356 9064379 : struct btrfs_tree_parent_check check = { 0 };
4357 9064379 : struct btrfs_qgroup_swapped_blocks *blocks = &root->swapped_blocks;
4358 9064379 : struct btrfs_qgroup_swapped_block *block;
4359 9064379 : struct extent_buffer *reloc_eb = NULL;
4360 9064379 : struct rb_node *node;
4361 9064379 : bool found = false;
4362 9064379 : bool swapped = false;
4363 9064379 : int level = btrfs_header_level(subvol_eb);
4364 9064379 : int ret = 0;
4365 9064379 : int i;
4366 :
4367 9064379 : if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags))
4368 : return 0;
4369 114706 : if (!is_fstree(root->root_key.objectid) || !root->reloc_root)
4370 : return 0;
4371 :
4372 3625 : spin_lock(&blocks->lock);
4373 3625 : if (!blocks->swapped) {
4374 3625 : spin_unlock(&blocks->lock);
4375 3625 : return 0;
4376 : }
4377 0 : node = blocks->blocks[level].rb_node;
4378 :
4379 0 : while (node) {
4380 0 : block = rb_entry(node, struct btrfs_qgroup_swapped_block, node);
4381 0 : if (block->subvol_bytenr < subvol_eb->start) {
4382 0 : node = node->rb_left;
4383 0 : } else if (block->subvol_bytenr > subvol_eb->start) {
4384 0 : node = node->rb_right;
4385 : } else {
4386 : found = true;
4387 : break;
4388 : }
4389 : }
4390 0 : if (!found) {
4391 0 : spin_unlock(&blocks->lock);
4392 0 : goto out;
4393 : }
4394 : /* Found one, remove it from @blocks first and update blocks->swapped */
4395 0 : rb_erase(&block->node, &blocks->blocks[level]);
4396 0 : for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
4397 0 : if (RB_EMPTY_ROOT(&blocks->blocks[i])) {
4398 : swapped = true;
4399 : break;
4400 : }
4401 : }
4402 0 : blocks->swapped = swapped;
4403 0 : spin_unlock(&blocks->lock);
4404 :
4405 0 : check.level = block->level;
4406 0 : check.transid = block->reloc_generation;
4407 0 : check.has_first_key = true;
4408 0 : memcpy(&check.first_key, &block->first_key, sizeof(check.first_key));
4409 :
4410 : /* Read out reloc subtree root */
4411 0 : reloc_eb = read_tree_block(fs_info, block->reloc_bytenr, &check);
4412 0 : if (IS_ERR(reloc_eb)) {
4413 0 : ret = PTR_ERR(reloc_eb);
4414 0 : reloc_eb = NULL;
4415 0 : goto free_out;
4416 : }
4417 0 : if (!extent_buffer_uptodate(reloc_eb)) {
4418 0 : ret = -EIO;
4419 0 : goto free_out;
4420 : }
4421 :
4422 0 : ret = qgroup_trace_subtree_swap(trans, reloc_eb, subvol_eb,
4423 0 : block->last_snapshot, block->trace_leaf);
4424 0 : free_out:
4425 0 : kfree(block);
4426 0 : free_extent_buffer(reloc_eb);
4427 : out:
4428 0 : if (ret < 0) {
4429 0 : btrfs_err_rl(fs_info,
4430 : "failed to account subtree at bytenr %llu: %d",
4431 : subvol_eb->start, ret);
4432 0 : qgroup_mark_inconsistent(fs_info);
4433 : }
4434 : return ret;
4435 : }
4436 :
4437 2 : void btrfs_qgroup_destroy_extent_records(struct btrfs_transaction *trans)
4438 : {
4439 2 : struct btrfs_qgroup_extent_record *entry;
4440 2 : struct btrfs_qgroup_extent_record *next;
4441 2 : struct rb_root *root;
4442 :
4443 2 : root = &trans->delayed_refs.dirty_extent_root;
4444 4 : rbtree_postorder_for_each_entry_safe(entry, next, root, node) {
4445 0 : ulist_free(entry->old_roots);
4446 0 : kfree(entry);
4447 : }
4448 2 : *root = RB_ROOT;
4449 2 : }
|