Line data Source code
1 : // SPDX-License-Identifier: GPL-2.0
2 : /*
3 : * Copyright (C) 2007 Oracle. All rights reserved.
4 : */
5 :
6 : #include <linux/blkdev.h>
7 : #include <linux/module.h>
8 : #include <linux/fs.h>
9 : #include <linux/pagemap.h>
10 : #include <linux/highmem.h>
11 : #include <linux/time.h>
12 : #include <linux/init.h>
13 : #include <linux/seq_file.h>
14 : #include <linux/string.h>
15 : #include <linux/backing-dev.h>
16 : #include <linux/mount.h>
17 : #include <linux/writeback.h>
18 : #include <linux/statfs.h>
19 : #include <linux/compat.h>
20 : #include <linux/parser.h>
21 : #include <linux/ctype.h>
22 : #include <linux/namei.h>
23 : #include <linux/miscdevice.h>
24 : #include <linux/magic.h>
25 : #include <linux/slab.h>
26 : #include <linux/ratelimit.h>
27 : #include <linux/crc32c.h>
28 : #include <linux/btrfs.h>
29 : #include "messages.h"
30 : #include "delayed-inode.h"
31 : #include "ctree.h"
32 : #include "disk-io.h"
33 : #include "transaction.h"
34 : #include "btrfs_inode.h"
35 : #include "print-tree.h"
36 : #include "props.h"
37 : #include "xattr.h"
38 : #include "bio.h"
39 : #include "export.h"
40 : #include "compression.h"
41 : #include "rcu-string.h"
42 : #include "dev-replace.h"
43 : #include "free-space-cache.h"
44 : #include "backref.h"
45 : #include "space-info.h"
46 : #include "sysfs.h"
47 : #include "zoned.h"
48 : #include "tests/btrfs-tests.h"
49 : #include "block-group.h"
50 : #include "discard.h"
51 : #include "qgroup.h"
52 : #include "raid56.h"
53 : #include "fs.h"
54 : #include "accessors.h"
55 : #include "defrag.h"
56 : #include "dir-item.h"
57 : #include "ioctl.h"
58 : #include "scrub.h"
59 : #include "verity.h"
60 : #include "super.h"
61 : #include "extent-tree.h"
62 : #define CREATE_TRACE_POINTS
63 : #include <trace/events/btrfs.h>
64 :
65 : static const struct super_operations btrfs_super_ops;
66 :
67 : /*
68 : * Types for mounting the default subvolume and a subvolume explicitly
69 : * requested by subvol=/path. That way the callchain is straightforward and we
70 : * don't have to play tricks with the mount options and recursive calls to
71 : * btrfs_mount.
72 : *
73 : * The new btrfs_root_fs_type also servers as a tag for the bdev_holder.
74 : */
75 : static struct file_system_type btrfs_fs_type;
76 : static struct file_system_type btrfs_root_fs_type;
77 :
78 : static int btrfs_remount(struct super_block *sb, int *flags, char *data);
79 :
80 3215 : static void btrfs_put_super(struct super_block *sb)
81 : {
82 3215 : close_ctree(btrfs_sb(sb));
83 3215 : }
84 :
85 : enum {
86 : Opt_acl, Opt_noacl,
87 : Opt_clear_cache,
88 : Opt_commit_interval,
89 : Opt_compress,
90 : Opt_compress_force,
91 : Opt_compress_force_type,
92 : Opt_compress_type,
93 : Opt_degraded,
94 : Opt_device,
95 : Opt_fatal_errors,
96 : Opt_flushoncommit, Opt_noflushoncommit,
97 : Opt_max_inline,
98 : Opt_barrier, Opt_nobarrier,
99 : Opt_datacow, Opt_nodatacow,
100 : Opt_datasum, Opt_nodatasum,
101 : Opt_defrag, Opt_nodefrag,
102 : Opt_discard, Opt_nodiscard,
103 : Opt_discard_mode,
104 : Opt_norecovery,
105 : Opt_ratio,
106 : Opt_rescan_uuid_tree,
107 : Opt_skip_balance,
108 : Opt_space_cache, Opt_no_space_cache,
109 : Opt_space_cache_version,
110 : Opt_ssd, Opt_nossd,
111 : Opt_ssd_spread, Opt_nossd_spread,
112 : Opt_subvol,
113 : Opt_subvol_empty,
114 : Opt_subvolid,
115 : Opt_thread_pool,
116 : Opt_treelog, Opt_notreelog,
117 : Opt_user_subvol_rm_allowed,
118 :
119 : /* Rescue options */
120 : Opt_rescue,
121 : Opt_usebackuproot,
122 : Opt_nologreplay,
123 : Opt_ignorebadroots,
124 : Opt_ignoredatacsums,
125 : Opt_rescue_all,
126 :
127 : /* Deprecated options */
128 : Opt_recovery,
129 : Opt_inode_cache, Opt_noinode_cache,
130 :
131 : /* Debugging options */
132 : Opt_check_integrity,
133 : Opt_check_integrity_including_extent_data,
134 : Opt_check_integrity_print_mask,
135 : Opt_enospc_debug, Opt_noenospc_debug,
136 : #ifdef CONFIG_BTRFS_DEBUG
137 : Opt_fragment_data, Opt_fragment_metadata, Opt_fragment_all,
138 : #endif
139 : #ifdef CONFIG_BTRFS_FS_REF_VERIFY
140 : Opt_ref_verify,
141 : #endif
142 : Opt_err,
143 : };
144 :
145 : static const match_table_t tokens = {
146 : {Opt_acl, "acl"},
147 : {Opt_noacl, "noacl"},
148 : {Opt_clear_cache, "clear_cache"},
149 : {Opt_commit_interval, "commit=%u"},
150 : {Opt_compress, "compress"},
151 : {Opt_compress_type, "compress=%s"},
152 : {Opt_compress_force, "compress-force"},
153 : {Opt_compress_force_type, "compress-force=%s"},
154 : {Opt_degraded, "degraded"},
155 : {Opt_device, "device=%s"},
156 : {Opt_fatal_errors, "fatal_errors=%s"},
157 : {Opt_flushoncommit, "flushoncommit"},
158 : {Opt_noflushoncommit, "noflushoncommit"},
159 : {Opt_inode_cache, "inode_cache"},
160 : {Opt_noinode_cache, "noinode_cache"},
161 : {Opt_max_inline, "max_inline=%s"},
162 : {Opt_barrier, "barrier"},
163 : {Opt_nobarrier, "nobarrier"},
164 : {Opt_datacow, "datacow"},
165 : {Opt_nodatacow, "nodatacow"},
166 : {Opt_datasum, "datasum"},
167 : {Opt_nodatasum, "nodatasum"},
168 : {Opt_defrag, "autodefrag"},
169 : {Opt_nodefrag, "noautodefrag"},
170 : {Opt_discard, "discard"},
171 : {Opt_discard_mode, "discard=%s"},
172 : {Opt_nodiscard, "nodiscard"},
173 : {Opt_norecovery, "norecovery"},
174 : {Opt_ratio, "metadata_ratio=%u"},
175 : {Opt_rescan_uuid_tree, "rescan_uuid_tree"},
176 : {Opt_skip_balance, "skip_balance"},
177 : {Opt_space_cache, "space_cache"},
178 : {Opt_no_space_cache, "nospace_cache"},
179 : {Opt_space_cache_version, "space_cache=%s"},
180 : {Opt_ssd, "ssd"},
181 : {Opt_nossd, "nossd"},
182 : {Opt_ssd_spread, "ssd_spread"},
183 : {Opt_nossd_spread, "nossd_spread"},
184 : {Opt_subvol, "subvol=%s"},
185 : {Opt_subvol_empty, "subvol="},
186 : {Opt_subvolid, "subvolid=%s"},
187 : {Opt_thread_pool, "thread_pool=%u"},
188 : {Opt_treelog, "treelog"},
189 : {Opt_notreelog, "notreelog"},
190 : {Opt_user_subvol_rm_allowed, "user_subvol_rm_allowed"},
191 :
192 : /* Rescue options */
193 : {Opt_rescue, "rescue=%s"},
194 : /* Deprecated, with alias rescue=nologreplay */
195 : {Opt_nologreplay, "nologreplay"},
196 : /* Deprecated, with alias rescue=usebackuproot */
197 : {Opt_usebackuproot, "usebackuproot"},
198 :
199 : /* Deprecated options */
200 : {Opt_recovery, "recovery"},
201 :
202 : /* Debugging options */
203 : {Opt_check_integrity, "check_int"},
204 : {Opt_check_integrity_including_extent_data, "check_int_data"},
205 : {Opt_check_integrity_print_mask, "check_int_print_mask=%u"},
206 : {Opt_enospc_debug, "enospc_debug"},
207 : {Opt_noenospc_debug, "noenospc_debug"},
208 : #ifdef CONFIG_BTRFS_DEBUG
209 : {Opt_fragment_data, "fragment=data"},
210 : {Opt_fragment_metadata, "fragment=metadata"},
211 : {Opt_fragment_all, "fragment=all"},
212 : #endif
213 : #ifdef CONFIG_BTRFS_FS_REF_VERIFY
214 : {Opt_ref_verify, "ref_verify"},
215 : #endif
216 : {Opt_err, NULL},
217 : };
218 :
219 : static const match_table_t rescue_tokens = {
220 : {Opt_usebackuproot, "usebackuproot"},
221 : {Opt_nologreplay, "nologreplay"},
222 : {Opt_ignorebadroots, "ignorebadroots"},
223 : {Opt_ignorebadroots, "ibadroots"},
224 : {Opt_ignoredatacsums, "ignoredatacsums"},
225 : {Opt_ignoredatacsums, "idatacsums"},
226 : {Opt_rescue_all, "all"},
227 : {Opt_err, NULL},
228 : };
229 :
230 : static bool check_ro_option(struct btrfs_fs_info *fs_info, unsigned long opt,
231 : const char *opt_name)
232 : {
233 3293 : if (fs_info->mount_opt & opt) {
234 3 : btrfs_err(fs_info, "%s must be used with ro mount option",
235 : opt_name);
236 3 : return true;
237 : }
238 : return false;
239 : }
240 :
241 4 : static int parse_rescue_options(struct btrfs_fs_info *info, const char *options)
242 : {
243 4 : char *opts;
244 4 : char *orig;
245 4 : char *p;
246 4 : substring_t args[MAX_OPT_ARGS];
247 4 : int ret = 0;
248 :
249 4 : opts = kstrdup(options, GFP_KERNEL);
250 4 : if (!opts)
251 : return -ENOMEM;
252 : orig = opts;
253 :
254 7 : while ((p = strsep(&opts, ":")) != NULL) {
255 4 : int token;
256 :
257 4 : if (!*p)
258 0 : continue;
259 4 : token = match_token(p, rescue_tokens, args);
260 4 : switch (token){
261 0 : case Opt_usebackuproot:
262 0 : btrfs_info(info,
263 : "trying to use backup root at mount time");
264 0 : btrfs_set_opt(info->mount_opt, USEBACKUPROOT);
265 0 : break;
266 3 : case Opt_nologreplay:
267 3 : btrfs_set_and_info(info, NOLOGREPLAY,
268 : "disabling log replay at mount time");
269 3 : break;
270 0 : case Opt_ignorebadroots:
271 0 : btrfs_set_and_info(info, IGNOREBADROOTS,
272 : "ignoring bad roots");
273 0 : break;
274 0 : case Opt_ignoredatacsums:
275 0 : btrfs_set_and_info(info, IGNOREDATACSUMS,
276 : "ignoring data csums");
277 0 : break;
278 0 : case Opt_rescue_all:
279 0 : btrfs_info(info, "enabling all of the rescue options");
280 0 : btrfs_set_and_info(info, IGNOREDATACSUMS,
281 : "ignoring data csums");
282 0 : btrfs_set_and_info(info, IGNOREBADROOTS,
283 : "ignoring bad roots");
284 0 : btrfs_set_and_info(info, NOLOGREPLAY,
285 : "disabling log replay at mount time");
286 0 : break;
287 1 : case Opt_err:
288 1 : btrfs_info(info, "unrecognized rescue option '%s'", p);
289 1 : ret = -EINVAL;
290 1 : goto out;
291 : default:
292 : break;
293 : }
294 :
295 : }
296 3 : out:
297 4 : kfree(orig);
298 4 : return ret;
299 : }
300 :
301 : /*
302 : * Regular mount options parser. Everything that is needed only when
303 : * reading in a new superblock is parsed here.
304 : * XXX JDM: This needs to be cleaned up for remount.
305 : */
306 3360 : int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
307 : unsigned long new_flags)
308 : {
309 3360 : substring_t args[MAX_OPT_ARGS];
310 3360 : char *p, *num;
311 3360 : int intarg;
312 3360 : int ret = 0;
313 3360 : char *compress_type;
314 3360 : bool compress_force = false;
315 3360 : enum btrfs_compression_type saved_compress_type;
316 3360 : int saved_compress_level;
317 3360 : bool saved_compress_force;
318 3360 : int no_compress = 0;
319 3360 : const bool remounting = test_bit(BTRFS_FS_STATE_REMOUNTING, &info->fs_state);
320 :
321 3360 : if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
322 3354 : btrfs_set_opt(info->mount_opt, FREE_SPACE_TREE);
323 6 : else if (btrfs_free_space_cache_v1_active(info)) {
324 6 : if (btrfs_is_zoned(info)) {
325 0 : btrfs_info(info,
326 : "zoned: clearing existing space cache");
327 0 : btrfs_set_super_cache_generation(info->super_copy, 0);
328 : } else {
329 6 : btrfs_set_opt(info->mount_opt, SPACE_CACHE);
330 : }
331 : }
332 :
333 : /*
334 : * Even the options are empty, we still need to do extra check
335 : * against new flags
336 : */
337 3360 : if (!options)
338 3065 : goto check;
339 :
340 591 : while ((p = strsep(&options, ",")) != NULL) {
341 317 : int token;
342 317 : if (!*p)
343 0 : continue;
344 :
345 317 : token = match_token(p, tokens, args);
346 317 : switch (token) {
347 1 : case Opt_degraded:
348 1 : btrfs_info(info, "allowing degraded mounts");
349 1 : btrfs_set_opt(info->mount_opt, DEGRADED);
350 1 : break;
351 : case Opt_subvol:
352 : case Opt_subvol_empty:
353 : case Opt_subvolid:
354 : case Opt_device:
355 : /*
356 : * These are parsed by btrfs_parse_subvol_options or
357 : * btrfs_parse_device_options and can be ignored here.
358 : */
359 : break;
360 4 : case Opt_nodatasum:
361 4 : btrfs_set_and_info(info, NODATASUM,
362 : "setting nodatasum");
363 4 : break;
364 9 : case Opt_datasum:
365 9 : if (btrfs_test_opt(info, NODATASUM)) {
366 6 : if (btrfs_test_opt(info, NODATACOW))
367 2 : btrfs_info(info,
368 : "setting datasum, datacow enabled");
369 : else
370 4 : btrfs_info(info, "setting datasum");
371 : }
372 9 : btrfs_clear_opt(info->mount_opt, NODATACOW);
373 9 : btrfs_clear_opt(info->mount_opt, NODATASUM);
374 9 : break;
375 18 : case Opt_nodatacow:
376 18 : if (!btrfs_test_opt(info, NODATACOW)) {
377 18 : if (!btrfs_test_opt(info, COMPRESS) ||
378 : !btrfs_test_opt(info, FORCE_COMPRESS)) {
379 16 : btrfs_info(info,
380 : "setting nodatacow, compression disabled");
381 : } else {
382 2 : btrfs_info(info, "setting nodatacow");
383 : }
384 : }
385 18 : btrfs_clear_opt(info->mount_opt, COMPRESS);
386 18 : btrfs_clear_opt(info->mount_opt, FORCE_COMPRESS);
387 18 : btrfs_set_opt(info->mount_opt, NODATACOW);
388 18 : btrfs_set_opt(info->mount_opt, NODATASUM);
389 18 : break;
390 12 : case Opt_datacow:
391 12 : btrfs_clear_and_info(info, NODATACOW,
392 : "setting datacow");
393 12 : break;
394 29 : case Opt_compress_force:
395 : case Opt_compress_force_type:
396 29 : compress_force = true;
397 73 : fallthrough;
398 73 : case Opt_compress:
399 : case Opt_compress_type:
400 73 : saved_compress_type = btrfs_test_opt(info,
401 : COMPRESS) ?
402 73 : info->compress_type : BTRFS_COMPRESS_NONE;
403 73 : saved_compress_force =
404 73 : btrfs_test_opt(info, FORCE_COMPRESS);
405 73 : saved_compress_level = info->compress_level;
406 73 : if (token == Opt_compress ||
407 52 : token == Opt_compress_force ||
408 52 : strncmp(args[0].from, "zlib", 4) == 0) {
409 31 : compress_type = "zlib";
410 :
411 31 : info->compress_type = BTRFS_COMPRESS_ZLIB;
412 31 : info->compress_level = BTRFS_ZLIB_DEFAULT_LEVEL;
413 : /*
414 : * args[0] contains uninitialized data since
415 : * for these tokens we don't expect any
416 : * parameter.
417 : */
418 31 : if (token != Opt_compress &&
419 : token != Opt_compress_force)
420 10 : info->compress_level =
421 10 : btrfs_compress_str2level(
422 : BTRFS_COMPRESS_ZLIB,
423 10 : args[0].from + 4);
424 31 : btrfs_set_opt(info->mount_opt, COMPRESS);
425 31 : btrfs_clear_opt(info->mount_opt, NODATACOW);
426 31 : btrfs_clear_opt(info->mount_opt, NODATASUM);
427 31 : no_compress = 0;
428 42 : } else if (strncmp(args[0].from, "lzo", 3) == 0) {
429 19 : compress_type = "lzo";
430 19 : info->compress_type = BTRFS_COMPRESS_LZO;
431 19 : info->compress_level = 0;
432 19 : btrfs_set_opt(info->mount_opt, COMPRESS);
433 19 : btrfs_clear_opt(info->mount_opt, NODATACOW);
434 19 : btrfs_clear_opt(info->mount_opt, NODATASUM);
435 19 : btrfs_set_fs_incompat(info, COMPRESS_LZO);
436 19 : no_compress = 0;
437 23 : } else if (strncmp(args[0].from, "zstd", 4) == 0) {
438 10 : compress_type = "zstd";
439 10 : info->compress_type = BTRFS_COMPRESS_ZSTD;
440 20 : info->compress_level =
441 10 : btrfs_compress_str2level(
442 : BTRFS_COMPRESS_ZSTD,
443 10 : args[0].from + 4);
444 10 : btrfs_set_opt(info->mount_opt, COMPRESS);
445 10 : btrfs_clear_opt(info->mount_opt, NODATACOW);
446 10 : btrfs_clear_opt(info->mount_opt, NODATASUM);
447 10 : btrfs_set_fs_incompat(info, COMPRESS_ZSTD);
448 10 : no_compress = 0;
449 13 : } else if (strncmp(args[0].from, "no", 2) == 0) {
450 11 : compress_type = "no";
451 11 : info->compress_level = 0;
452 11 : info->compress_type = 0;
453 11 : btrfs_clear_opt(info->mount_opt, COMPRESS);
454 11 : btrfs_clear_opt(info->mount_opt, FORCE_COMPRESS);
455 11 : compress_force = false;
456 11 : no_compress++;
457 : } else {
458 2 : btrfs_err(info, "unrecognized compression value %s",
459 : args[0].from);
460 2 : ret = -EINVAL;
461 2 : goto out;
462 : }
463 :
464 71 : if (compress_force) {
465 23 : btrfs_set_opt(info->mount_opt, FORCE_COMPRESS);
466 : } else {
467 : /*
468 : * If we remount from compress-force=xxx to
469 : * compress=xxx, we need clear FORCE_COMPRESS
470 : * flag, otherwise, there is no way for users
471 : * to disable forcible compression separately.
472 : */
473 48 : btrfs_clear_opt(info->mount_opt, FORCE_COMPRESS);
474 : }
475 71 : if (no_compress == 1) {
476 10 : btrfs_info(info, "use no compression");
477 61 : } else if ((info->compress_type != saved_compress_type) ||
478 1 : (compress_force != saved_compress_force) ||
479 1 : (info->compress_level != saved_compress_level)) {
480 97 : btrfs_info(info, "%s %s compression, level %d",
481 : (compress_force) ? "force" : "use",
482 : compress_type, info->compress_level);
483 : }
484 : compress_force = false;
485 : break;
486 5 : case Opt_ssd:
487 5 : btrfs_set_and_info(info, SSD,
488 : "enabling ssd optimizations");
489 5 : btrfs_clear_opt(info->mount_opt, NOSSD);
490 5 : break;
491 3 : case Opt_ssd_spread:
492 3 : btrfs_set_and_info(info, SSD,
493 : "enabling ssd optimizations");
494 3 : btrfs_set_and_info(info, SSD_SPREAD,
495 : "using spread ssd allocation scheme");
496 3 : btrfs_clear_opt(info->mount_opt, NOSSD);
497 3 : break;
498 6 : case Opt_nossd:
499 6 : btrfs_set_opt(info->mount_opt, NOSSD);
500 6 : btrfs_clear_and_info(info, SSD,
501 : "not using ssd optimizations");
502 6 : fallthrough;
503 6 : case Opt_nossd_spread:
504 6 : btrfs_clear_and_info(info, SSD_SPREAD,
505 : "not using spread ssd allocation scheme");
506 6 : break;
507 3 : case Opt_barrier:
508 3 : btrfs_clear_and_info(info, NOBARRIER,
509 : "turning on barriers");
510 3 : break;
511 3 : case Opt_nobarrier:
512 3 : btrfs_set_and_info(info, NOBARRIER,
513 : "turning off barriers");
514 3 : break;
515 9 : case Opt_thread_pool:
516 9 : ret = match_int(&args[0], &intarg);
517 9 : if (ret) {
518 0 : btrfs_err(info, "unrecognized thread_pool value %s",
519 : args[0].from);
520 0 : goto out;
521 9 : } else if (intarg == 0) {
522 1 : btrfs_err(info, "invalid value 0 for thread_pool");
523 1 : ret = -EINVAL;
524 1 : goto out;
525 : }
526 8 : info->thread_pool_size = intarg;
527 8 : break;
528 11 : case Opt_max_inline:
529 11 : num = match_strdup(&args[0]);
530 11 : if (num) {
531 11 : info->max_inline = memparse(num, NULL);
532 11 : kfree(num);
533 :
534 11 : if (info->max_inline) {
535 9 : info->max_inline = min_t(u64,
536 : info->max_inline,
537 : info->sectorsize);
538 : }
539 11 : btrfs_info(info, "max_inline at %llu",
540 : info->max_inline);
541 : } else {
542 0 : ret = -ENOMEM;
543 0 : goto out;
544 : }
545 11 : break;
546 3 : case Opt_acl:
547 : #ifdef CONFIG_BTRFS_FS_POSIX_ACL
548 3 : info->sb->s_flags |= SB_POSIXACL;
549 3 : break;
550 : #else
551 : btrfs_err(info, "support for ACL not compiled in!");
552 : ret = -EINVAL;
553 : goto out;
554 : #endif
555 4 : case Opt_noacl:
556 4 : info->sb->s_flags &= ~SB_POSIXACL;
557 4 : break;
558 3 : case Opt_notreelog:
559 3 : btrfs_set_and_info(info, NOTREELOG,
560 : "disabling tree log");
561 3 : break;
562 3 : case Opt_treelog:
563 3 : btrfs_clear_and_info(info, NOTREELOG,
564 : "enabling tree log");
565 3 : break;
566 5 : case Opt_norecovery:
567 : case Opt_nologreplay:
568 5 : btrfs_warn(info,
569 : "'nologreplay' is deprecated, use 'rescue=nologreplay' instead");
570 5 : btrfs_set_and_info(info, NOLOGREPLAY,
571 : "disabling log replay at mount time");
572 5 : break;
573 4 : case Opt_flushoncommit:
574 4 : btrfs_set_and_info(info, FLUSHONCOMMIT,
575 : "turning on flush-on-commit");
576 4 : break;
577 3 : case Opt_noflushoncommit:
578 3 : btrfs_clear_and_info(info, FLUSHONCOMMIT,
579 : "turning off flush-on-commit");
580 3 : break;
581 6 : case Opt_ratio:
582 6 : ret = match_int(&args[0], &intarg);
583 6 : if (ret) {
584 0 : btrfs_err(info, "unrecognized metadata_ratio value %s",
585 : args[0].from);
586 0 : goto out;
587 : }
588 6 : info->metadata_ratio = intarg;
589 6 : btrfs_info(info, "metadata ratio %u",
590 : info->metadata_ratio);
591 6 : break;
592 20 : case Opt_discard:
593 : case Opt_discard_mode:
594 20 : if (token == Opt_discard ||
595 15 : strcmp(args[0].from, "sync") == 0) {
596 15 : btrfs_clear_opt(info->mount_opt, DISCARD_ASYNC);
597 15 : btrfs_set_and_info(info, DISCARD_SYNC,
598 : "turning on sync discard");
599 5 : } else if (strcmp(args[0].from, "async") == 0) {
600 4 : btrfs_clear_opt(info->mount_opt, DISCARD_SYNC);
601 4 : btrfs_set_and_info(info, DISCARD_ASYNC,
602 : "turning on async discard");
603 : } else {
604 1 : btrfs_err(info, "unrecognized discard mode value %s",
605 : args[0].from);
606 1 : ret = -EINVAL;
607 1 : goto out;
608 : }
609 19 : btrfs_clear_opt(info->mount_opt, NODISCARD);
610 19 : break;
611 3 : case Opt_nodiscard:
612 3 : btrfs_clear_and_info(info, DISCARD_SYNC,
613 : "turning off discard");
614 3 : btrfs_clear_and_info(info, DISCARD_ASYNC,
615 : "turning off async discard");
616 3 : btrfs_set_opt(info->mount_opt, NODISCARD);
617 3 : break;
618 17 : case Opt_space_cache:
619 : case Opt_space_cache_version:
620 : /*
621 : * We already set FREE_SPACE_TREE above because we have
622 : * compat_ro(FREE_SPACE_TREE) set, and we aren't going
623 : * to allow v1 to be set for extent tree v2, simply
624 : * ignore this setting if we're extent tree v2.
625 : */
626 17 : if (btrfs_fs_incompat(info, EXTENT_TREE_V2))
627 : break;
628 17 : if (token == Opt_space_cache ||
629 17 : strcmp(args[0].from, "v1") == 0) {
630 6 : btrfs_clear_opt(info->mount_opt,
631 : FREE_SPACE_TREE);
632 6 : btrfs_set_and_info(info, SPACE_CACHE,
633 : "enabling disk space caching");
634 11 : } else if (strcmp(args[0].from, "v2") == 0) {
635 11 : btrfs_clear_opt(info->mount_opt,
636 : SPACE_CACHE);
637 11 : btrfs_set_and_info(info, FREE_SPACE_TREE,
638 : "enabling free space tree");
639 : } else {
640 0 : btrfs_err(info, "unrecognized space_cache value %s",
641 : args[0].from);
642 0 : ret = -EINVAL;
643 0 : goto out;
644 : }
645 : break;
646 1 : case Opt_rescan_uuid_tree:
647 1 : btrfs_set_opt(info->mount_opt, RESCAN_UUID_TREE);
648 1 : break;
649 2 : case Opt_no_space_cache:
650 : /*
651 : * We cannot operate without the free space tree with
652 : * extent tree v2, ignore this option.
653 : */
654 2 : if (btrfs_fs_incompat(info, EXTENT_TREE_V2))
655 : break;
656 2 : if (btrfs_test_opt(info, SPACE_CACHE)) {
657 0 : btrfs_clear_and_info(info, SPACE_CACHE,
658 : "disabling disk space caching");
659 : }
660 2 : if (btrfs_test_opt(info, FREE_SPACE_TREE)) {
661 2 : btrfs_clear_and_info(info, FREE_SPACE_TREE,
662 : "disabling free space tree");
663 : }
664 : break;
665 0 : case Opt_inode_cache:
666 : case Opt_noinode_cache:
667 0 : btrfs_warn(info,
668 : "the 'inode_cache' option is deprecated and has no effect since 5.11");
669 0 : break;
670 10 : case Opt_clear_cache:
671 : /*
672 : * We cannot clear the free space tree with extent tree
673 : * v2, ignore this option.
674 : */
675 10 : if (btrfs_fs_incompat(info, EXTENT_TREE_V2))
676 : break;
677 10 : btrfs_set_and_info(info, CLEAR_CACHE,
678 : "force clearing of disk cache");
679 10 : break;
680 2 : case Opt_user_subvol_rm_allowed:
681 2 : btrfs_set_opt(info->mount_opt, USER_SUBVOL_RM_ALLOWED);
682 2 : break;
683 4 : case Opt_enospc_debug:
684 4 : btrfs_set_opt(info->mount_opt, ENOSPC_DEBUG);
685 4 : break;
686 3 : case Opt_noenospc_debug:
687 3 : btrfs_clear_opt(info->mount_opt, ENOSPC_DEBUG);
688 3 : break;
689 5 : case Opt_defrag:
690 5 : btrfs_set_and_info(info, AUTO_DEFRAG,
691 : "enabling auto defrag");
692 5 : break;
693 4 : case Opt_nodefrag:
694 4 : btrfs_clear_and_info(info, AUTO_DEFRAG,
695 : "disabling auto defrag");
696 4 : break;
697 0 : case Opt_recovery:
698 : case Opt_usebackuproot:
699 0 : btrfs_warn(info,
700 : "'%s' is deprecated, use 'rescue=usebackuproot' instead",
701 : token == Opt_recovery ? "recovery" :
702 : "usebackuproot");
703 0 : btrfs_info(info,
704 : "trying to use backup root at mount time");
705 0 : btrfs_set_opt(info->mount_opt, USEBACKUPROOT);
706 0 : break;
707 1 : case Opt_skip_balance:
708 1 : btrfs_set_opt(info->mount_opt, SKIP_BALANCE);
709 1 : break;
710 : #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
711 : case Opt_check_integrity_including_extent_data:
712 : btrfs_info(info,
713 : "enabling check integrity including extent data");
714 : btrfs_set_opt(info->mount_opt, CHECK_INTEGRITY_DATA);
715 : btrfs_set_opt(info->mount_opt, CHECK_INTEGRITY);
716 : break;
717 : case Opt_check_integrity:
718 : btrfs_info(info, "enabling check integrity");
719 : btrfs_set_opt(info->mount_opt, CHECK_INTEGRITY);
720 : break;
721 : case Opt_check_integrity_print_mask:
722 : ret = match_int(&args[0], &intarg);
723 : if (ret) {
724 : btrfs_err(info,
725 : "unrecognized check_integrity_print_mask value %s",
726 : args[0].from);
727 : goto out;
728 : }
729 : info->check_integrity_print_mask = intarg;
730 : btrfs_info(info, "check_integrity_print_mask 0x%x",
731 : info->check_integrity_print_mask);
732 : break;
733 : #else
734 3 : case Opt_check_integrity_including_extent_data:
735 : case Opt_check_integrity:
736 : case Opt_check_integrity_print_mask:
737 3 : btrfs_err(info,
738 : "support for check_integrity* not compiled in!");
739 3 : ret = -EINVAL;
740 3 : goto out;
741 : #endif
742 7 : case Opt_fatal_errors:
743 7 : if (strcmp(args[0].from, "panic") == 0) {
744 3 : btrfs_set_opt(info->mount_opt,
745 : PANIC_ON_FATAL_ERROR);
746 4 : } else if (strcmp(args[0].from, "bug") == 0) {
747 3 : btrfs_clear_opt(info->mount_opt,
748 : PANIC_ON_FATAL_ERROR);
749 : } else {
750 1 : btrfs_err(info, "unrecognized fatal_errors value %s",
751 : args[0].from);
752 1 : ret = -EINVAL;
753 1 : goto out;
754 : }
755 : break;
756 15 : case Opt_commit_interval:
757 15 : intarg = 0;
758 15 : ret = match_int(&args[0], &intarg);
759 15 : if (ret) {
760 0 : btrfs_err(info, "unrecognized commit_interval value %s",
761 : args[0].from);
762 0 : ret = -EINVAL;
763 0 : goto out;
764 : }
765 15 : if (intarg == 0) {
766 3 : btrfs_info(info,
767 : "using default commit interval %us",
768 : BTRFS_DEFAULT_COMMIT_INTERVAL);
769 3 : intarg = BTRFS_DEFAULT_COMMIT_INTERVAL;
770 12 : } else if (intarg > 300) {
771 0 : btrfs_warn(info, "excessive commit interval %d",
772 : intarg);
773 : }
774 15 : info->commit_interval = intarg;
775 15 : break;
776 4 : case Opt_rescue:
777 4 : ret = parse_rescue_options(info, args[0].from);
778 4 : if (ret < 0) {
779 1 : btrfs_err(info, "unrecognized rescue value %s",
780 : args[0].from);
781 1 : goto out;
782 : }
783 : break;
784 : #ifdef CONFIG_BTRFS_DEBUG
785 : case Opt_fragment_all:
786 : btrfs_info(info, "fragmenting all space");
787 : btrfs_set_opt(info->mount_opt, FRAGMENT_DATA);
788 : btrfs_set_opt(info->mount_opt, FRAGMENT_METADATA);
789 : break;
790 : case Opt_fragment_metadata:
791 : btrfs_info(info, "fragmenting metadata");
792 : btrfs_set_opt(info->mount_opt,
793 : FRAGMENT_METADATA);
794 : break;
795 : case Opt_fragment_data:
796 : btrfs_info(info, "fragmenting data");
797 : btrfs_set_opt(info->mount_opt, FRAGMENT_DATA);
798 : break;
799 : #endif
800 : #ifdef CONFIG_BTRFS_FS_REF_VERIFY
801 : case Opt_ref_verify:
802 : btrfs_info(info, "doing ref verification");
803 : btrfs_set_opt(info->mount_opt, REF_VERIFY);
804 : break;
805 : #endif
806 12 : case Opt_err:
807 12 : btrfs_err(info, "unrecognized mount option '%s'", p);
808 12 : ret = -EINVAL;
809 12 : goto out;
810 : default:
811 : break;
812 : }
813 : }
814 274 : check:
815 : /* We're read-only, don't have to check. */
816 3339 : if (new_flags & SB_RDONLY)
817 43 : goto out;
818 :
819 3296 : if (check_ro_option(info, BTRFS_MOUNT_NOLOGREPLAY, "nologreplay") ||
820 : check_ro_option(info, BTRFS_MOUNT_IGNOREBADROOTS, "ignorebadroots") ||
821 : check_ro_option(info, BTRFS_MOUNT_IGNOREDATACSUMS, "ignoredatacsums"))
822 : ret = -EINVAL;
823 3360 : out:
824 3360 : if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE) &&
825 3354 : !btrfs_test_opt(info, FREE_SPACE_TREE) &&
826 : !btrfs_test_opt(info, CLEAR_CACHE)) {
827 2 : btrfs_err(info, "cannot disable free space tree");
828 2 : ret = -EINVAL;
829 : }
830 3360 : if (btrfs_fs_compat_ro(info, BLOCK_GROUP_TREE) &&
831 0 : !btrfs_test_opt(info, FREE_SPACE_TREE)) {
832 0 : btrfs_err(info, "cannot disable free space tree with block-group-tree feature");
833 0 : ret = -EINVAL;
834 : }
835 3360 : if (!ret)
836 3334 : ret = btrfs_check_mountopts_zoned(info);
837 3360 : if (!ret && !remounting) {
838 3216 : if (btrfs_test_opt(info, SPACE_CACHE))
839 8 : btrfs_info(info, "disk space caching is enabled");
840 3216 : if (btrfs_test_opt(info, FREE_SPACE_TREE))
841 3207 : btrfs_info(info, "using free space tree");
842 : }
843 3360 : return ret;
844 : }
845 :
846 : /*
847 : * Parse mount options that are required early in the mount process.
848 : *
849 : * All other options will be parsed on much later in the mount process and
850 : * only when we need to allocate a new super block.
851 : */
852 3472 : static int btrfs_parse_device_options(const char *options, blk_mode_t flags)
853 : {
854 3472 : substring_t args[MAX_OPT_ARGS];
855 3472 : char *device_name, *opts, *orig, *p;
856 3472 : struct btrfs_device *device = NULL;
857 3472 : int error = 0;
858 :
859 3472 : lockdep_assert_held(&uuid_mutex);
860 :
861 3472 : if (!options)
862 : return 0;
863 :
864 : /*
865 : * strsep changes the string, duplicate it because btrfs_parse_options
866 : * gets called later
867 : */
868 186 : opts = kstrdup(options, GFP_KERNEL);
869 186 : if (!opts)
870 : return -ENOMEM;
871 : orig = opts;
872 :
873 388 : while ((p = strsep(&opts, ",")) != NULL) {
874 202 : int token;
875 :
876 202 : if (!*p)
877 0 : continue;
878 :
879 202 : token = match_token(p, tokens, args);
880 202 : if (token == Opt_device) {
881 0 : device_name = match_strdup(&args[0]);
882 0 : if (!device_name) {
883 0 : error = -ENOMEM;
884 0 : goto out;
885 : }
886 0 : device = btrfs_scan_one_device(device_name, flags);
887 0 : kfree(device_name);
888 0 : if (IS_ERR(device)) {
889 0 : error = PTR_ERR(device);
890 0 : goto out;
891 : }
892 : }
893 : }
894 :
895 186 : out:
896 186 : kfree(orig);
897 186 : return error;
898 : }
899 :
900 : /*
901 : * Parse mount options that are related to subvolume id
902 : *
903 : * The value is later passed to mount_subvol()
904 : */
905 3472 : static int btrfs_parse_subvol_options(const char *options, char **subvol_name,
906 : u64 *subvol_objectid)
907 : {
908 3472 : substring_t args[MAX_OPT_ARGS];
909 3472 : char *opts, *orig, *p;
910 3472 : int error = 0;
911 3472 : u64 subvolid;
912 :
913 3472 : if (!options)
914 : return 0;
915 :
916 : /*
917 : * strsep changes the string, duplicate it because
918 : * btrfs_parse_device_options gets called later
919 : */
920 186 : opts = kstrdup(options, GFP_KERNEL);
921 186 : if (!opts)
922 : return -ENOMEM;
923 : orig = opts;
924 :
925 388 : while ((p = strsep(&opts, ",")) != NULL) {
926 202 : int token;
927 202 : if (!*p)
928 0 : continue;
929 :
930 202 : token = match_token(p, tokens, args);
931 202 : switch (token) {
932 8 : case Opt_subvol:
933 8 : kfree(*subvol_name);
934 8 : *subvol_name = match_strdup(&args[0]);
935 8 : if (!*subvol_name) {
936 0 : error = -ENOMEM;
937 0 : goto out;
938 : }
939 : break;
940 5 : case Opt_subvolid:
941 5 : error = match_u64(&args[0], &subvolid);
942 5 : if (error)
943 0 : goto out;
944 :
945 : /* we want the original fs_tree */
946 5 : if (subvolid == 0)
947 1 : subvolid = BTRFS_FS_TREE_OBJECTID;
948 :
949 5 : *subvol_objectid = subvolid;
950 5 : break;
951 : default:
952 : break;
953 : }
954 : }
955 :
956 186 : out:
957 186 : kfree(orig);
958 186 : return error;
959 : }
960 :
961 1238349 : char *btrfs_get_subvol_name_from_objectid(struct btrfs_fs_info *fs_info,
962 : u64 subvol_objectid)
963 : {
964 1238349 : struct btrfs_root *root = fs_info->tree_root;
965 1238349 : struct btrfs_root *fs_root = NULL;
966 1238349 : struct btrfs_root_ref *root_ref;
967 1238349 : struct btrfs_inode_ref *inode_ref;
968 1238349 : struct btrfs_key key;
969 1238349 : struct btrfs_path *path = NULL;
970 1238349 : char *name = NULL, *ptr;
971 1238349 : u64 dirid;
972 1238349 : int len;
973 1238349 : int ret;
974 :
975 1238349 : path = btrfs_alloc_path();
976 1240581 : if (!path) {
977 0 : ret = -ENOMEM;
978 0 : goto err;
979 : }
980 :
981 1240581 : name = kmalloc(PATH_MAX, GFP_KERNEL);
982 1242302 : if (!name) {
983 0 : ret = -ENOMEM;
984 0 : goto err;
985 : }
986 1242302 : ptr = name + PATH_MAX - 1;
987 1242302 : ptr[0] = '\0';
988 :
989 : /*
990 : * Walk up the subvolume trees in the tree of tree roots by root
991 : * backrefs until we hit the top-level subvolume.
992 : */
993 1242365 : while (subvol_objectid != BTRFS_FS_TREE_OBJECTID) {
994 1039 : key.objectid = subvol_objectid;
995 1039 : key.type = BTRFS_ROOT_BACKREF_KEY;
996 1039 : key.offset = (u64)-1;
997 :
998 1039 : ret = btrfs_search_backwards(root, &key, path);
999 64 : if (ret < 0) {
1000 0 : goto err;
1001 64 : } else if (ret > 0) {
1002 1 : ret = -ENOENT;
1003 1 : goto err;
1004 : }
1005 :
1006 63 : subvol_objectid = key.offset;
1007 :
1008 63 : root_ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
1009 : struct btrfs_root_ref);
1010 63 : len = btrfs_root_ref_name_len(path->nodes[0], root_ref);
1011 62 : ptr -= len + 1;
1012 62 : if (ptr < name) {
1013 0 : ret = -ENAMETOOLONG;
1014 0 : goto err;
1015 : }
1016 62 : read_extent_buffer(path->nodes[0], ptr + 1,
1017 62 : (unsigned long)(root_ref + 1), len);
1018 62 : ptr[0] = '/';
1019 62 : dirid = btrfs_root_ref_dirid(path->nodes[0], root_ref);
1020 62 : btrfs_release_path(path);
1021 :
1022 63 : fs_root = btrfs_get_fs_root(fs_info, subvol_objectid, true);
1023 63 : if (IS_ERR(fs_root)) {
1024 0 : ret = PTR_ERR(fs_root);
1025 0 : fs_root = NULL;
1026 0 : goto err;
1027 : }
1028 :
1029 : /*
1030 : * Walk up the filesystem tree by inode refs until we hit the
1031 : * root directory.
1032 : */
1033 63 : while (dirid != BTRFS_FIRST_FREE_OBJECTID) {
1034 0 : key.objectid = dirid;
1035 0 : key.type = BTRFS_INODE_REF_KEY;
1036 0 : key.offset = (u64)-1;
1037 :
1038 0 : ret = btrfs_search_backwards(fs_root, &key, path);
1039 0 : if (ret < 0) {
1040 0 : goto err;
1041 0 : } else if (ret > 0) {
1042 0 : ret = -ENOENT;
1043 0 : goto err;
1044 : }
1045 :
1046 0 : dirid = key.offset;
1047 :
1048 0 : inode_ref = btrfs_item_ptr(path->nodes[0],
1049 : path->slots[0],
1050 : struct btrfs_inode_ref);
1051 0 : len = btrfs_inode_ref_name_len(path->nodes[0],
1052 : inode_ref);
1053 0 : ptr -= len + 1;
1054 0 : if (ptr < name) {
1055 0 : ret = -ENAMETOOLONG;
1056 0 : goto err;
1057 : }
1058 0 : read_extent_buffer(path->nodes[0], ptr + 1,
1059 0 : (unsigned long)(inode_ref + 1), len);
1060 0 : ptr[0] = '/';
1061 0 : btrfs_release_path(path);
1062 : }
1063 63 : btrfs_put_root(fs_root);
1064 63 : fs_root = NULL;
1065 : }
1066 :
1067 1241326 : btrfs_free_path(path);
1068 1242381 : if (ptr == name + PATH_MAX - 1) {
1069 1242319 : name[0] = '/';
1070 1242319 : name[1] = '\0';
1071 : } else {
1072 124 : memmove(name, ptr, name + PATH_MAX - ptr);
1073 : }
1074 : return name;
1075 :
1076 1 : err:
1077 1 : btrfs_put_root(fs_root);
1078 1 : btrfs_free_path(path);
1079 1 : kfree(name);
1080 1 : return ERR_PTR(ret);
1081 : }
1082 :
1083 3430 : static int get_default_subvol_objectid(struct btrfs_fs_info *fs_info, u64 *objectid)
1084 : {
1085 3430 : struct btrfs_root *root = fs_info->tree_root;
1086 3430 : struct btrfs_dir_item *di;
1087 3430 : struct btrfs_path *path;
1088 3430 : struct btrfs_key location;
1089 3430 : struct fscrypt_str name = FSTR_INIT("default", 7);
1090 3430 : u64 dir_id;
1091 :
1092 3430 : path = btrfs_alloc_path();
1093 3430 : if (!path)
1094 : return -ENOMEM;
1095 :
1096 : /*
1097 : * Find the "default" dir item which points to the root item that we
1098 : * will mount by default if we haven't been given a specific subvolume
1099 : * to mount.
1100 : */
1101 3430 : dir_id = btrfs_super_root_dir(fs_info->super_copy);
1102 3430 : di = btrfs_lookup_dir_item(NULL, root, path, dir_id, &name, 0);
1103 3430 : if (IS_ERR(di)) {
1104 0 : btrfs_free_path(path);
1105 0 : return PTR_ERR(di);
1106 : }
1107 3430 : if (!di) {
1108 : /*
1109 : * Ok the default dir item isn't there. This is weird since
1110 : * it's always been there, but don't freak out, just try and
1111 : * mount the top-level subvolume.
1112 : */
1113 0 : btrfs_free_path(path);
1114 0 : *objectid = BTRFS_FS_TREE_OBJECTID;
1115 0 : return 0;
1116 : }
1117 :
1118 3430 : btrfs_dir_item_key_to_cpu(path->nodes[0], di, &location);
1119 3430 : btrfs_free_path(path);
1120 3430 : *objectid = location.objectid;
1121 3430 : return 0;
1122 : }
1123 :
1124 3242 : static int btrfs_fill_super(struct super_block *sb,
1125 : struct btrfs_fs_devices *fs_devices,
1126 : void *data)
1127 : {
1128 3242 : struct inode *inode;
1129 3242 : struct btrfs_fs_info *fs_info = btrfs_sb(sb);
1130 3242 : int err;
1131 :
1132 3242 : sb->s_maxbytes = MAX_LFS_FILESIZE;
1133 3242 : sb->s_magic = BTRFS_SUPER_MAGIC;
1134 3242 : sb->s_op = &btrfs_super_ops;
1135 3242 : sb->s_d_op = &btrfs_dentry_operations;
1136 3242 : sb->s_export_op = &btrfs_export_ops;
1137 : #ifdef CONFIG_FS_VERITY
1138 : sb->s_vop = &btrfs_verityops;
1139 : #endif
1140 3242 : sb->s_xattr = btrfs_xattr_handlers;
1141 3242 : sb->s_time_gran = 1;
1142 : #ifdef CONFIG_BTRFS_FS_POSIX_ACL
1143 3242 : sb->s_flags |= SB_POSIXACL;
1144 : #endif
1145 3242 : sb->s_flags |= SB_I_VERSION;
1146 3242 : sb->s_iflags |= SB_I_CGROUPWB;
1147 :
1148 3242 : err = super_setup_bdi(sb);
1149 3242 : if (err) {
1150 0 : btrfs_err(fs_info, "super_setup_bdi failed");
1151 0 : return err;
1152 : }
1153 :
1154 3242 : err = open_ctree(sb, fs_devices, (char *)data);
1155 3242 : if (err) {
1156 27 : btrfs_err(fs_info, "open_ctree failed");
1157 27 : return err;
1158 : }
1159 :
1160 3215 : inode = btrfs_iget(sb, BTRFS_FIRST_FREE_OBJECTID, fs_info->fs_root);
1161 3215 : if (IS_ERR(inode)) {
1162 0 : err = PTR_ERR(inode);
1163 0 : btrfs_handle_fs_error(fs_info, err, NULL);
1164 0 : goto fail_close;
1165 : }
1166 :
1167 3215 : sb->s_root = d_make_root(inode);
1168 3215 : if (!sb->s_root) {
1169 0 : err = -ENOMEM;
1170 0 : goto fail_close;
1171 : }
1172 :
1173 3215 : sb->s_flags |= SB_ACTIVE;
1174 3215 : return 0;
1175 :
1176 0 : fail_close:
1177 0 : close_ctree(fs_info);
1178 0 : return err;
1179 : }
1180 :
1181 60974 : int btrfs_sync_fs(struct super_block *sb, int wait)
1182 : {
1183 60974 : struct btrfs_trans_handle *trans;
1184 60974 : struct btrfs_fs_info *fs_info = btrfs_sb(sb);
1185 60974 : struct btrfs_root *root = fs_info->tree_root;
1186 :
1187 60974 : trace_btrfs_sync_fs(fs_info, wait);
1188 :
1189 60972 : if (!wait) {
1190 30321 : filemap_flush(fs_info->btree_inode->i_mapping);
1191 30321 : return 0;
1192 : }
1193 :
1194 30651 : btrfs_wait_ordered_roots(fs_info, U64_MAX, 0, (u64)-1);
1195 :
1196 30652 : trans = btrfs_attach_transaction_barrier(root);
1197 30638 : if (IS_ERR(trans)) {
1198 : /* no transaction, don't bother */
1199 14545 : if (PTR_ERR(trans) == -ENOENT) {
1200 : /*
1201 : * Exit unless we have some pending changes
1202 : * that need to go through commit
1203 : */
1204 14543 : if (!test_bit(BTRFS_FS_NEED_TRANS_COMMIT,
1205 : &fs_info->flags))
1206 : return 0;
1207 : /*
1208 : * A non-blocking test if the fs is frozen. We must not
1209 : * start a new transaction here otherwise a deadlock
1210 : * happens. The pending operations are delayed to the
1211 : * next commit after thawing.
1212 : */
1213 0 : if (sb_start_write_trylock(sb))
1214 0 : sb_end_write(sb);
1215 : else
1216 : return 0;
1217 0 : trans = btrfs_start_transaction(root, 0);
1218 : }
1219 2 : if (IS_ERR(trans))
1220 0 : return PTR_ERR(trans);
1221 : }
1222 16095 : return btrfs_commit_transaction(trans);
1223 : }
1224 :
1225 9 : static void print_rescue_option(struct seq_file *seq, const char *s, bool *printed)
1226 : {
1227 18 : seq_printf(seq, "%s%s", (*printed) ? ":" : ",rescue=", s);
1228 9 : *printed = true;
1229 9 : }
1230 :
1231 1234259 : static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry)
1232 : {
1233 1234259 : struct btrfs_fs_info *info = btrfs_sb(dentry->d_sb);
1234 1234259 : const char *compress_type;
1235 1234259 : const char *subvol_name;
1236 1234259 : bool printed = false;
1237 :
1238 1234259 : if (btrfs_test_opt(info, DEGRADED))
1239 3 : seq_puts(seq, ",degraded");
1240 1234259 : if (btrfs_test_opt(info, NODATASUM))
1241 89 : seq_puts(seq, ",nodatasum");
1242 1234258 : if (btrfs_test_opt(info, NODATACOW))
1243 77 : seq_puts(seq, ",nodatacow");
1244 1234259 : if (btrfs_test_opt(info, NOBARRIER))
1245 7 : seq_puts(seq, ",nobarrier");
1246 1234259 : if (info->max_inline != BTRFS_DEFAULT_MAX_INLINE)
1247 26 : seq_printf(seq, ",max_inline=%llu", info->max_inline);
1248 1234259 : if (info->thread_pool_size != min_t(unsigned long,
1249 : num_online_cpus() + 2, 8))
1250 16 : seq_printf(seq, ",thread_pool=%u", info->thread_pool_size);
1251 1234259 : if (btrfs_test_opt(info, COMPRESS)) {
1252 326 : compress_type = btrfs_compress_type2str(info->compress_type);
1253 324 : if (btrfs_test_opt(info, FORCE_COMPRESS))
1254 81 : seq_printf(seq, ",compress-force=%s", compress_type);
1255 : else
1256 243 : seq_printf(seq, ",compress=%s", compress_type);
1257 326 : if (info->compress_level)
1258 222 : seq_printf(seq, ":%d", info->compress_level);
1259 : }
1260 1234258 : if (btrfs_test_opt(info, NOSSD))
1261 10 : seq_puts(seq, ",nossd");
1262 1234258 : if (btrfs_test_opt(info, SSD_SPREAD))
1263 5 : seq_puts(seq, ",ssd_spread");
1264 1234253 : else if (btrfs_test_opt(info, SSD))
1265 64 : seq_puts(seq, ",ssd");
1266 1234258 : if (btrfs_test_opt(info, NOTREELOG))
1267 3 : seq_puts(seq, ",notreelog");
1268 1234258 : if (btrfs_test_opt(info, NOLOGREPLAY))
1269 9 : print_rescue_option(seq, "nologreplay", &printed);
1270 1234258 : if (btrfs_test_opt(info, USEBACKUPROOT))
1271 0 : print_rescue_option(seq, "usebackuproot", &printed);
1272 1234258 : if (btrfs_test_opt(info, IGNOREBADROOTS))
1273 0 : print_rescue_option(seq, "ignorebadroots", &printed);
1274 1234258 : if (btrfs_test_opt(info, IGNOREDATACSUMS))
1275 0 : print_rescue_option(seq, "ignoredatacsums", &printed);
1276 1234258 : if (btrfs_test_opt(info, FLUSHONCOMMIT))
1277 16 : seq_puts(seq, ",flushoncommit");
1278 1234258 : if (btrfs_test_opt(info, DISCARD_SYNC))
1279 31 : seq_puts(seq, ",discard");
1280 1234258 : if (btrfs_test_opt(info, DISCARD_ASYNC))
1281 1233776 : seq_puts(seq, ",discard=async");
1282 1232703 : if (!(info->sb->s_flags & SB_POSIXACL))
1283 18 : seq_puts(seq, ",noacl");
1284 1232703 : if (btrfs_free_space_cache_v1_active(info))
1285 26 : seq_puts(seq, ",space_cache");
1286 1228133 : else if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
1287 1228131 : seq_puts(seq, ",space_cache=v2");
1288 : else
1289 2 : seq_puts(seq, ",nospace_cache");
1290 1232319 : if (btrfs_test_opt(info, RESCAN_UUID_TREE))
1291 1 : seq_puts(seq, ",rescan_uuid_tree");
1292 1232319 : if (btrfs_test_opt(info, CLEAR_CACHE))
1293 0 : seq_puts(seq, ",clear_cache");
1294 1232319 : if (btrfs_test_opt(info, USER_SUBVOL_RM_ALLOWED))
1295 16 : seq_puts(seq, ",user_subvol_rm_allowed");
1296 1232318 : if (btrfs_test_opt(info, ENOSPC_DEBUG))
1297 14 : seq_puts(seq, ",enospc_debug");
1298 1232318 : if (btrfs_test_opt(info, AUTO_DEFRAG))
1299 47 : seq_puts(seq, ",autodefrag");
1300 1232318 : if (btrfs_test_opt(info, SKIP_BALANCE))
1301 1 : seq_puts(seq, ",skip_balance");
1302 : #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
1303 : if (btrfs_test_opt(info, CHECK_INTEGRITY_DATA))
1304 : seq_puts(seq, ",check_int_data");
1305 : else if (btrfs_test_opt(info, CHECK_INTEGRITY))
1306 : seq_puts(seq, ",check_int");
1307 : if (info->check_integrity_print_mask)
1308 : seq_printf(seq, ",check_int_print_mask=%d",
1309 : info->check_integrity_print_mask);
1310 : #endif
1311 1232318 : if (info->metadata_ratio)
1312 3 : seq_printf(seq, ",metadata_ratio=%u", info->metadata_ratio);
1313 1232318 : if (btrfs_test_opt(info, PANIC_ON_FATAL_ERROR))
1314 3 : seq_puts(seq, ",fatal_errors=panic");
1315 1232318 : if (info->commit_interval != BTRFS_DEFAULT_COMMIT_INTERVAL)
1316 61 : seq_printf(seq, ",commit=%u", info->commit_interval);
1317 : #ifdef CONFIG_BTRFS_DEBUG
1318 : if (btrfs_test_opt(info, FRAGMENT_DATA))
1319 : seq_puts(seq, ",fragment=data");
1320 : if (btrfs_test_opt(info, FRAGMENT_METADATA))
1321 : seq_puts(seq, ",fragment=metadata");
1322 : #endif
1323 1232318 : if (btrfs_test_opt(info, REF_VERIFY))
1324 0 : seq_puts(seq, ",ref_verify");
1325 1232318 : seq_printf(seq, ",subvolid=%llu",
1326 1232318 : BTRFS_I(d_inode(dentry))->root->root_key.objectid);
1327 1233355 : subvol_name = btrfs_get_subvol_name_from_objectid(info,
1328 1233355 : BTRFS_I(d_inode(dentry))->root->root_key.objectid);
1329 1237167 : if (!IS_ERR(subvol_name)) {
1330 1237167 : seq_puts(seq, ",subvol=");
1331 1235156 : seq_escape(seq, subvol_name, " \t\n\\");
1332 1237882 : kfree(subvol_name);
1333 : }
1334 1238900 : return 0;
1335 : }
1336 :
1337 4528 : static int btrfs_test_super(struct super_block *s, void *data)
1338 : {
1339 4528 : struct btrfs_fs_info *p = data;
1340 4528 : struct btrfs_fs_info *fs_info = btrfs_sb(s);
1341 :
1342 4528 : return fs_info->fs_devices == p->fs_devices;
1343 : }
1344 :
1345 3242 : static int btrfs_set_super(struct super_block *s, void *data)
1346 : {
1347 3242 : int err = set_anon_super(s, data);
1348 3242 : if (!err)
1349 3242 : s->s_fs_info = data;
1350 3242 : return err;
1351 : }
1352 :
1353 : /*
1354 : * subvolumes are identified by ino 256
1355 : */
1356 : static inline int is_subvolume_inode(struct inode *inode)
1357 : {
1358 3440 : if (inode && inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)
1359 : return 1;
1360 : return 0;
1361 : }
1362 :
1363 3442 : static struct dentry *mount_subvol(const char *subvol_name, u64 subvol_objectid,
1364 : struct vfsmount *mnt)
1365 : {
1366 3442 : struct dentry *root;
1367 3442 : int ret;
1368 :
1369 3442 : if (!subvol_name) {
1370 3434 : if (!subvol_objectid) {
1371 3430 : ret = get_default_subvol_objectid(btrfs_sb(mnt->mnt_sb),
1372 : &subvol_objectid);
1373 3430 : if (ret) {
1374 0 : root = ERR_PTR(ret);
1375 0 : goto out;
1376 : }
1377 : }
1378 3434 : subvol_name = btrfs_get_subvol_name_from_objectid(
1379 : btrfs_sb(mnt->mnt_sb), subvol_objectid);
1380 3434 : if (IS_ERR(subvol_name)) {
1381 1 : root = ERR_CAST(subvol_name);
1382 1 : subvol_name = NULL;
1383 1 : goto out;
1384 : }
1385 :
1386 : }
1387 :
1388 3441 : root = mount_subtree(mnt, subvol_name);
1389 : /* mount_subtree() drops our reference on the vfsmount. */
1390 3441 : mnt = NULL;
1391 :
1392 3441 : if (!IS_ERR(root)) {
1393 3440 : struct super_block *s = root->d_sb;
1394 3440 : struct btrfs_fs_info *fs_info = btrfs_sb(s);
1395 3440 : struct inode *root_inode = d_inode(root);
1396 3440 : u64 root_objectid = BTRFS_I(root_inode)->root->root_key.objectid;
1397 :
1398 3440 : ret = 0;
1399 3440 : if (!is_subvolume_inode(root_inode)) {
1400 0 : btrfs_err(fs_info, "'%s' is not a valid subvolume",
1401 : subvol_name);
1402 0 : ret = -EINVAL;
1403 : }
1404 3440 : if (subvol_objectid && root_objectid != subvol_objectid) {
1405 : /*
1406 : * This will also catch a race condition where a
1407 : * subvolume which was passed by ID is renamed and
1408 : * another subvolume is renamed over the old location.
1409 : */
1410 0 : btrfs_err(fs_info,
1411 : "subvol '%s' does not match subvolid %llu",
1412 : subvol_name, subvol_objectid);
1413 0 : ret = -EINVAL;
1414 : }
1415 3440 : if (ret) {
1416 0 : dput(root);
1417 0 : root = ERR_PTR(ret);
1418 0 : deactivate_locked_super(s);
1419 : }
1420 : }
1421 :
1422 3441 : out:
1423 3442 : mntput(mnt);
1424 3442 : kfree(subvol_name);
1425 3442 : return root;
1426 : }
1427 :
1428 : /*
1429 : * Find a superblock for the given device / mount point.
1430 : *
1431 : * Note: This is based on mount_bdev from fs/super.c with a few additions
1432 : * for multiple device setup. Make sure to keep it in sync.
1433 : */
1434 3472 : static struct dentry *btrfs_mount_root(struct file_system_type *fs_type,
1435 : int flags, const char *device_name, void *data)
1436 : {
1437 3472 : struct block_device *bdev = NULL;
1438 3472 : struct super_block *s;
1439 3472 : struct btrfs_device *device = NULL;
1440 3472 : struct btrfs_fs_devices *fs_devices = NULL;
1441 3472 : struct btrfs_fs_info *fs_info = NULL;
1442 3472 : void *new_sec_opts = NULL;
1443 3472 : blk_mode_t mode = sb_open_mode(flags);
1444 3472 : int error = 0;
1445 :
1446 3472 : if (data) {
1447 : error = security_sb_eat_lsm_opts(data, &new_sec_opts);
1448 : if (error)
1449 : return ERR_PTR(error);
1450 : }
1451 :
1452 : /*
1453 : * Setup a dummy root and fs_info for test/set super. This is because
1454 : * we don't actually fill this stuff out until open_ctree, but we need
1455 : * then open_ctree will properly initialize the file system specific
1456 : * settings later. btrfs_init_fs_info initializes the static elements
1457 : * of the fs_info (locks and such) to make cleanup easier if we find a
1458 : * superblock with our given fs_devices later on at sget() time.
1459 : */
1460 3472 : fs_info = kvzalloc(sizeof(struct btrfs_fs_info), GFP_KERNEL);
1461 3472 : if (!fs_info) {
1462 0 : error = -ENOMEM;
1463 0 : goto error_sec_opts;
1464 : }
1465 3472 : btrfs_init_fs_info(fs_info);
1466 :
1467 3472 : fs_info->super_copy = kzalloc(BTRFS_SUPER_INFO_SIZE, GFP_KERNEL);
1468 3472 : fs_info->super_for_commit = kzalloc(BTRFS_SUPER_INFO_SIZE, GFP_KERNEL);
1469 3472 : if (!fs_info->super_copy || !fs_info->super_for_commit) {
1470 0 : error = -ENOMEM;
1471 0 : goto error_fs_info;
1472 : }
1473 :
1474 3472 : mutex_lock(&uuid_mutex);
1475 3472 : error = btrfs_parse_device_options(data, mode);
1476 3472 : if (error) {
1477 0 : mutex_unlock(&uuid_mutex);
1478 0 : goto error_fs_info;
1479 : }
1480 :
1481 3472 : device = btrfs_scan_one_device(device_name, mode);
1482 3472 : if (IS_ERR(device)) {
1483 3 : mutex_unlock(&uuid_mutex);
1484 3 : error = PTR_ERR(device);
1485 3 : goto error_fs_info;
1486 : }
1487 :
1488 3469 : fs_devices = device->fs_devices;
1489 3469 : fs_info->fs_devices = fs_devices;
1490 :
1491 3469 : error = btrfs_open_devices(fs_devices, mode, fs_type);
1492 3469 : mutex_unlock(&uuid_mutex);
1493 3469 : if (error)
1494 0 : goto error_fs_info;
1495 :
1496 3469 : if (!(flags & SB_RDONLY) && fs_devices->rw_devices == 0) {
1497 0 : error = -EACCES;
1498 0 : goto error_close_devices;
1499 : }
1500 :
1501 3469 : bdev = fs_devices->latest_dev->bdev;
1502 3469 : s = sget(fs_type, btrfs_test_super, btrfs_set_super, flags | SB_NOSEC,
1503 : fs_info);
1504 3469 : if (IS_ERR(s)) {
1505 0 : error = PTR_ERR(s);
1506 0 : goto error_close_devices;
1507 : }
1508 :
1509 3469 : if (s->s_root) {
1510 227 : btrfs_close_devices(fs_devices);
1511 227 : btrfs_free_fs_info(fs_info);
1512 227 : if ((flags ^ s->s_flags) & SB_RDONLY)
1513 : error = -EBUSY;
1514 : } else {
1515 3242 : snprintf(s->s_id, sizeof(s->s_id), "%pg", bdev);
1516 3242 : shrinker_debugfs_rename(&s->s_shrink, "sb-%s:%s", fs_type->name,
1517 : s->s_id);
1518 3242 : btrfs_sb(s)->bdev_holder = fs_type;
1519 3242 : error = btrfs_fill_super(s, fs_devices, data);
1520 : }
1521 3469 : if (!error)
1522 3442 : error = security_sb_set_mnt_opts(s, new_sec_opts, 0, NULL);
1523 3469 : security_free_mnt_opts(&new_sec_opts);
1524 3469 : if (error) {
1525 27 : deactivate_locked_super(s);
1526 27 : return ERR_PTR(error);
1527 : }
1528 :
1529 3442 : return dget(s->s_root);
1530 :
1531 0 : error_close_devices:
1532 0 : btrfs_close_devices(fs_devices);
1533 3 : error_fs_info:
1534 3 : btrfs_free_fs_info(fs_info);
1535 3 : error_sec_opts:
1536 3 : security_free_mnt_opts(&new_sec_opts);
1537 3 : return ERR_PTR(error);
1538 : }
1539 :
1540 : /*
1541 : * Mount function which is called by VFS layer.
1542 : *
1543 : * In order to allow mounting a subvolume directly, btrfs uses mount_subtree()
1544 : * which needs vfsmount* of device's root (/). This means device's root has to
1545 : * be mounted internally in any case.
1546 : *
1547 : * Operation flow:
1548 : * 1. Parse subvol id related options for later use in mount_subvol().
1549 : *
1550 : * 2. Mount device's root (/) by calling vfs_kern_mount().
1551 : *
1552 : * NOTE: vfs_kern_mount() is used by VFS to call btrfs_mount() in the
1553 : * first place. In order to avoid calling btrfs_mount() again, we use
1554 : * different file_system_type which is not registered to VFS by
1555 : * register_filesystem() (btrfs_root_fs_type). As a result,
1556 : * btrfs_mount_root() is called. The return value will be used by
1557 : * mount_subtree() in mount_subvol().
1558 : *
1559 : * 3. Call mount_subvol() to get the dentry of subvolume. Since there is
1560 : * "btrfs subvolume set-default", mount_subvol() is called always.
1561 : */
1562 3472 : static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
1563 : const char *device_name, void *data)
1564 : {
1565 3472 : struct vfsmount *mnt_root;
1566 3472 : struct dentry *root;
1567 3472 : char *subvol_name = NULL;
1568 3472 : u64 subvol_objectid = 0;
1569 3472 : int error = 0;
1570 :
1571 3472 : error = btrfs_parse_subvol_options(data, &subvol_name,
1572 : &subvol_objectid);
1573 3472 : if (error) {
1574 0 : kfree(subvol_name);
1575 0 : return ERR_PTR(error);
1576 : }
1577 :
1578 : /* mount device's root (/) */
1579 3472 : mnt_root = vfs_kern_mount(&btrfs_root_fs_type, flags, device_name, data);
1580 3472 : if (PTR_ERR_OR_ZERO(mnt_root) == -EBUSY) {
1581 0 : if (flags & SB_RDONLY) {
1582 0 : mnt_root = vfs_kern_mount(&btrfs_root_fs_type,
1583 : flags & ~SB_RDONLY, device_name, data);
1584 : } else {
1585 0 : mnt_root = vfs_kern_mount(&btrfs_root_fs_type,
1586 : flags | SB_RDONLY, device_name, data);
1587 0 : if (IS_ERR(mnt_root)) {
1588 0 : root = ERR_CAST(mnt_root);
1589 0 : kfree(subvol_name);
1590 0 : goto out;
1591 : }
1592 :
1593 0 : down_write(&mnt_root->mnt_sb->s_umount);
1594 0 : error = btrfs_remount(mnt_root->mnt_sb, &flags, NULL);
1595 0 : up_write(&mnt_root->mnt_sb->s_umount);
1596 0 : if (error < 0) {
1597 0 : root = ERR_PTR(error);
1598 0 : mntput(mnt_root);
1599 0 : kfree(subvol_name);
1600 0 : goto out;
1601 : }
1602 : }
1603 : }
1604 3472 : if (IS_ERR(mnt_root)) {
1605 30 : root = ERR_CAST(mnt_root);
1606 30 : kfree(subvol_name);
1607 30 : goto out;
1608 : }
1609 :
1610 : /* mount_subvol() will free subvol_name and mnt_root */
1611 3442 : root = mount_subvol(subvol_name, subvol_objectid, mnt_root);
1612 :
1613 : out:
1614 : return root;
1615 : }
1616 :
1617 118 : static void btrfs_resize_thread_pool(struct btrfs_fs_info *fs_info,
1618 : u32 new_pool_size, u32 old_pool_size)
1619 : {
1620 118 : if (new_pool_size == old_pool_size)
1621 : return;
1622 :
1623 5 : fs_info->thread_pool_size = new_pool_size;
1624 :
1625 5 : btrfs_info(fs_info, "resize thread pool %d -> %d",
1626 : old_pool_size, new_pool_size);
1627 :
1628 5 : btrfs_workqueue_set_max(fs_info->workers, new_pool_size);
1629 5 : btrfs_workqueue_set_max(fs_info->delalloc_workers, new_pool_size);
1630 5 : btrfs_workqueue_set_max(fs_info->caching_workers, new_pool_size);
1631 5 : workqueue_set_max_active(fs_info->endio_workers, new_pool_size);
1632 5 : workqueue_set_max_active(fs_info->endio_meta_workers, new_pool_size);
1633 5 : btrfs_workqueue_set_max(fs_info->endio_write_workers, new_pool_size);
1634 5 : btrfs_workqueue_set_max(fs_info->endio_freespace_worker, new_pool_size);
1635 5 : btrfs_workqueue_set_max(fs_info->delayed_workers, new_pool_size);
1636 : }
1637 :
1638 118 : static inline void btrfs_remount_begin(struct btrfs_fs_info *fs_info,
1639 : unsigned long old_opts, int flags)
1640 : {
1641 118 : if (btrfs_raw_test_opt(old_opts, AUTO_DEFRAG) &&
1642 4 : (!btrfs_raw_test_opt(fs_info->mount_opt, AUTO_DEFRAG) ||
1643 : (flags & SB_RDONLY))) {
1644 : /* wait for any defraggers to finish */
1645 2 : wait_event(fs_info->transaction_wait,
1646 : (atomic_read(&fs_info->defrag_running) == 0));
1647 2 : if (flags & SB_RDONLY)
1648 0 : sync_filesystem(fs_info->sb);
1649 : }
1650 118 : }
1651 :
1652 118 : static inline void btrfs_remount_cleanup(struct btrfs_fs_info *fs_info,
1653 : unsigned long old_opts)
1654 : {
1655 118 : const bool cache_opt = btrfs_test_opt(fs_info, SPACE_CACHE);
1656 :
1657 : /*
1658 : * We need to cleanup all defragable inodes if the autodefragment is
1659 : * close or the filesystem is read only.
1660 : */
1661 118 : if (btrfs_raw_test_opt(old_opts, AUTO_DEFRAG) &&
1662 4 : (!btrfs_raw_test_opt(fs_info->mount_opt, AUTO_DEFRAG) || sb_rdonly(fs_info->sb))) {
1663 2 : btrfs_cleanup_defrag_inodes(fs_info);
1664 : }
1665 :
1666 : /* If we toggled discard async */
1667 118 : if (!btrfs_raw_test_opt(old_opts, DISCARD_ASYNC) &&
1668 10 : btrfs_test_opt(fs_info, DISCARD_ASYNC))
1669 2 : btrfs_discard_resume(fs_info);
1670 116 : else if (btrfs_raw_test_opt(old_opts, DISCARD_ASYNC) &&
1671 108 : !btrfs_test_opt(fs_info, DISCARD_ASYNC))
1672 2 : btrfs_discard_cleanup(fs_info);
1673 :
1674 : /* If we toggled space cache */
1675 118 : if (cache_opt != btrfs_free_space_cache_v1_active(fs_info))
1676 0 : btrfs_set_free_space_cache_v1_active(fs_info, cache_opt);
1677 118 : }
1678 :
1679 118 : static int btrfs_remount(struct super_block *sb, int *flags, char *data)
1680 : {
1681 118 : struct btrfs_fs_info *fs_info = btrfs_sb(sb);
1682 118 : unsigned old_flags = sb->s_flags;
1683 118 : unsigned long old_opts = fs_info->mount_opt;
1684 118 : unsigned long old_compress_type = fs_info->compress_type;
1685 118 : u64 old_max_inline = fs_info->max_inline;
1686 118 : u32 old_thread_pool_size = fs_info->thread_pool_size;
1687 118 : u32 old_metadata_ratio = fs_info->metadata_ratio;
1688 118 : int ret;
1689 :
1690 118 : sync_filesystem(sb);
1691 118 : set_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state);
1692 :
1693 118 : if (data) {
1694 : void *new_sec_opts = NULL;
1695 :
1696 : ret = security_sb_eat_lsm_opts(data, &new_sec_opts);
1697 : if (!ret)
1698 : ret = security_sb_remount(sb, new_sec_opts);
1699 : security_free_mnt_opts(&new_sec_opts);
1700 : if (ret)
1701 : goto restore;
1702 : }
1703 :
1704 118 : ret = btrfs_parse_options(fs_info, data, *flags);
1705 118 : if (ret)
1706 0 : goto restore;
1707 :
1708 118 : ret = btrfs_check_features(fs_info, !(*flags & SB_RDONLY));
1709 118 : if (ret < 0)
1710 0 : goto restore;
1711 :
1712 118 : btrfs_remount_begin(fs_info, old_opts, *flags);
1713 118 : btrfs_resize_thread_pool(fs_info,
1714 : fs_info->thread_pool_size, old_thread_pool_size);
1715 :
1716 118 : if ((bool)btrfs_test_opt(fs_info, FREE_SPACE_TREE) !=
1717 118 : (bool)btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE) &&
1718 0 : (!sb_rdonly(sb) || (*flags & SB_RDONLY))) {
1719 0 : btrfs_warn(fs_info,
1720 : "remount supports changing free space tree only from ro to rw");
1721 : /* Make sure free space cache options match the state on disk */
1722 0 : if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
1723 0 : btrfs_set_opt(fs_info->mount_opt, FREE_SPACE_TREE);
1724 0 : btrfs_clear_opt(fs_info->mount_opt, SPACE_CACHE);
1725 : }
1726 0 : if (btrfs_free_space_cache_v1_active(fs_info)) {
1727 0 : btrfs_clear_opt(fs_info->mount_opt, FREE_SPACE_TREE);
1728 0 : btrfs_set_opt(fs_info->mount_opt, SPACE_CACHE);
1729 : }
1730 : }
1731 :
1732 118 : if ((bool)(*flags & SB_RDONLY) == sb_rdonly(sb))
1733 112 : goto out;
1734 :
1735 6 : if (*flags & SB_RDONLY) {
1736 : /*
1737 : * this also happens on 'umount -rf' or on shutdown, when
1738 : * the filesystem is busy.
1739 : */
1740 4 : cancel_work_sync(&fs_info->async_reclaim_work);
1741 4 : cancel_work_sync(&fs_info->async_data_reclaim_work);
1742 :
1743 4 : btrfs_discard_cleanup(fs_info);
1744 :
1745 : /* wait for the uuid_scan task to finish */
1746 4 : down(&fs_info->uuid_tree_rescan_sem);
1747 : /* avoid complains from lockdep et al. */
1748 4 : up(&fs_info->uuid_tree_rescan_sem);
1749 :
1750 4 : btrfs_set_sb_rdonly(sb);
1751 :
1752 : /*
1753 : * Setting SB_RDONLY will put the cleaner thread to
1754 : * sleep at the next loop if it's already active.
1755 : * If it's already asleep, we'll leave unused block
1756 : * groups on disk until we're mounted read-write again
1757 : * unless we clean them up here.
1758 : */
1759 4 : btrfs_delete_unused_bgs(fs_info);
1760 :
1761 : /*
1762 : * The cleaner task could be already running before we set the
1763 : * flag BTRFS_FS_STATE_RO (and SB_RDONLY in the superblock).
1764 : * We must make sure that after we finish the remount, i.e. after
1765 : * we call btrfs_commit_super(), the cleaner can no longer start
1766 : * a transaction - either because it was dropping a dead root,
1767 : * running delayed iputs or deleting an unused block group (the
1768 : * cleaner picked a block group from the list of unused block
1769 : * groups before we were able to in the previous call to
1770 : * btrfs_delete_unused_bgs()).
1771 : */
1772 4 : wait_on_bit(&fs_info->flags, BTRFS_FS_CLEANER_RUNNING,
1773 : TASK_UNINTERRUPTIBLE);
1774 :
1775 : /*
1776 : * We've set the superblock to RO mode, so we might have made
1777 : * the cleaner task sleep without running all pending delayed
1778 : * iputs. Go through all the delayed iputs here, so that if an
1779 : * unmount happens without remounting RW we don't end up at
1780 : * finishing close_ctree() with a non-empty list of delayed
1781 : * iputs.
1782 : */
1783 4 : btrfs_run_delayed_iputs(fs_info);
1784 :
1785 4 : btrfs_dev_replace_suspend_for_unmount(fs_info);
1786 4 : btrfs_scrub_cancel(fs_info);
1787 4 : btrfs_pause_balance(fs_info);
1788 :
1789 : /*
1790 : * Pause the qgroup rescan worker if it is running. We don't want
1791 : * it to be still running after we are in RO mode, as after that,
1792 : * by the time we unmount, it might have left a transaction open,
1793 : * so we would leak the transaction and/or crash.
1794 : */
1795 4 : btrfs_qgroup_wait_for_completion(fs_info, false);
1796 :
1797 4 : ret = btrfs_commit_super(fs_info);
1798 4 : if (ret)
1799 0 : goto restore;
1800 : } else {
1801 2 : if (BTRFS_FS_ERROR(fs_info)) {
1802 0 : btrfs_err(fs_info,
1803 : "Remounting read-write after error is not allowed");
1804 0 : ret = -EINVAL;
1805 0 : goto restore;
1806 : }
1807 2 : if (fs_info->fs_devices->rw_devices == 0) {
1808 0 : ret = -EACCES;
1809 0 : goto restore;
1810 : }
1811 :
1812 2 : if (!btrfs_check_rw_degradable(fs_info, NULL)) {
1813 0 : btrfs_warn(fs_info,
1814 : "too many missing devices, writable remount is not allowed");
1815 0 : ret = -EACCES;
1816 0 : goto restore;
1817 : }
1818 :
1819 2 : if (btrfs_super_log_root(fs_info->super_copy) != 0) {
1820 0 : btrfs_warn(fs_info,
1821 : "mount required to replay tree-log, cannot remount read-write");
1822 0 : ret = -EINVAL;
1823 0 : goto restore;
1824 : }
1825 :
1826 : /*
1827 : * NOTE: when remounting with a change that does writes, don't
1828 : * put it anywhere above this point, as we are not sure to be
1829 : * safe to write until we pass the above checks.
1830 : */
1831 2 : ret = btrfs_start_pre_rw_mount(fs_info);
1832 2 : if (ret)
1833 0 : goto restore;
1834 :
1835 2 : btrfs_clear_sb_rdonly(sb);
1836 :
1837 2 : set_bit(BTRFS_FS_OPEN, &fs_info->flags);
1838 :
1839 : /*
1840 : * If we've gone from readonly -> read/write, we need to get
1841 : * our sync/async discard lists in the right state.
1842 : */
1843 2 : btrfs_discard_resume(fs_info);
1844 : }
1845 118 : out:
1846 : /*
1847 : * We need to set SB_I_VERSION here otherwise it'll get cleared by VFS,
1848 : * since the absence of the flag means it can be toggled off by remount.
1849 : */
1850 118 : *flags |= SB_I_VERSION;
1851 :
1852 118 : wake_up_process(fs_info->transaction_kthread);
1853 118 : btrfs_remount_cleanup(fs_info, old_opts);
1854 118 : btrfs_clear_oneshot_options(fs_info);
1855 118 : clear_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state);
1856 :
1857 118 : return 0;
1858 :
1859 0 : restore:
1860 : /* We've hit an error - don't reset SB_RDONLY */
1861 0 : if (sb_rdonly(sb))
1862 0 : old_flags |= SB_RDONLY;
1863 0 : if (!(old_flags & SB_RDONLY))
1864 0 : clear_bit(BTRFS_FS_STATE_RO, &fs_info->fs_state);
1865 0 : sb->s_flags = old_flags;
1866 0 : fs_info->mount_opt = old_opts;
1867 0 : fs_info->compress_type = old_compress_type;
1868 0 : fs_info->max_inline = old_max_inline;
1869 0 : btrfs_resize_thread_pool(fs_info,
1870 : old_thread_pool_size, fs_info->thread_pool_size);
1871 0 : fs_info->metadata_ratio = old_metadata_ratio;
1872 0 : btrfs_remount_cleanup(fs_info, old_opts);
1873 0 : clear_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state);
1874 :
1875 0 : return ret;
1876 : }
1877 :
1878 : /* Used to sort the devices by max_avail(descending sort) */
1879 2 : static int btrfs_cmp_device_free_bytes(const void *a, const void *b)
1880 : {
1881 2 : const struct btrfs_device_info *dev_info1 = a;
1882 2 : const struct btrfs_device_info *dev_info2 = b;
1883 :
1884 2 : if (dev_info1->max_avail > dev_info2->max_avail)
1885 : return -1;
1886 2 : else if (dev_info1->max_avail < dev_info2->max_avail)
1887 0 : return 1;
1888 : return 0;
1889 : }
1890 :
1891 : /*
1892 : * sort the devices by max_avail, in which max free extent size of each device
1893 : * is stored.(Descending Sort)
1894 : */
1895 : static inline void btrfs_descending_sort_devices(
1896 : struct btrfs_device_info *devices,
1897 : size_t nr_devices)
1898 : {
1899 2582358 : sort(devices, nr_devices, sizeof(struct btrfs_device_info),
1900 : btrfs_cmp_device_free_bytes, NULL);
1901 : }
1902 :
1903 : /*
1904 : * The helper to calc the free space on the devices that can be used to store
1905 : * file data.
1906 : */
1907 2583830 : static inline int btrfs_calc_avail_data_space(struct btrfs_fs_info *fs_info,
1908 : u64 *free_bytes)
1909 : {
1910 2583830 : struct btrfs_device_info *devices_info;
1911 2583830 : struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
1912 2583830 : struct btrfs_device *device;
1913 2583830 : u64 type;
1914 2583830 : u64 avail_space;
1915 2583830 : u64 min_stripe_size;
1916 2583830 : int num_stripes = 1;
1917 2583830 : int i = 0, nr_devices;
1918 2583830 : const struct btrfs_raid_attr *rattr;
1919 :
1920 : /*
1921 : * We aren't under the device list lock, so this is racy-ish, but good
1922 : * enough for our purposes.
1923 : */
1924 2583830 : nr_devices = fs_info->fs_devices->open_devices;
1925 2583830 : if (!nr_devices) {
1926 0 : smp_mb();
1927 0 : nr_devices = fs_info->fs_devices->open_devices;
1928 0 : ASSERT(nr_devices);
1929 0 : if (!nr_devices) {
1930 0 : *free_bytes = 0;
1931 0 : return 0;
1932 : }
1933 : }
1934 :
1935 2583830 : devices_info = kmalloc_array(nr_devices, sizeof(*devices_info),
1936 : GFP_KERNEL);
1937 2584129 : if (!devices_info)
1938 : return -ENOMEM;
1939 :
1940 : /* calc min stripe number for data space allocation */
1941 2584129 : type = btrfs_data_alloc_profile(fs_info);
1942 2585231 : rattr = &btrfs_raid_array[btrfs_bg_flags_to_raid_index(type)];
1943 :
1944 2585231 : if (type & BTRFS_BLOCK_GROUP_RAID0)
1945 : num_stripes = nr_devices;
1946 2583544 : else if (type & BTRFS_BLOCK_GROUP_RAID1_MASK)
1947 2 : num_stripes = rattr->ncopies;
1948 2583542 : else if (type & BTRFS_BLOCK_GROUP_RAID10)
1949 0 : num_stripes = 4;
1950 :
1951 : /* Adjust for more than 1 stripe per device */
1952 2585231 : min_stripe_size = rattr->dev_stripes * BTRFS_STRIPE_LEN;
1953 :
1954 2585231 : rcu_read_lock();
1955 5166067 : list_for_each_entry_rcu(device, &fs_devices->devices, dev_list) {
1956 5164738 : if (!test_bit(BTRFS_DEV_STATE_IN_FS_METADATA,
1957 2582369 : &device->dev_state) ||
1958 5164738 : !device->bdev ||
1959 0 : test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state))
1960 0 : continue;
1961 :
1962 2582369 : if (i >= nr_devices)
1963 : break;
1964 :
1965 2582369 : avail_space = device->total_bytes - device->bytes_used;
1966 :
1967 : /* align with stripe_len */
1968 2582369 : avail_space = rounddown(avail_space, BTRFS_STRIPE_LEN);
1969 :
1970 : /*
1971 : * Ensure we have at least min_stripe_size on top of the
1972 : * reserved space on the device.
1973 : */
1974 2582369 : if (avail_space <= BTRFS_DEVICE_RANGE_RESERVED + min_stripe_size)
1975 874722 : continue;
1976 :
1977 1707647 : avail_space -= BTRFS_DEVICE_RANGE_RESERVED;
1978 :
1979 1707647 : devices_info[i].dev = device;
1980 1707647 : devices_info[i].max_avail = avail_space;
1981 :
1982 1707647 : i++;
1983 : }
1984 2583698 : rcu_read_unlock();
1985 :
1986 2582358 : nr_devices = i;
1987 :
1988 2582358 : btrfs_descending_sort_devices(devices_info, nr_devices);
1989 :
1990 2580542 : i = nr_devices - 1;
1991 2580542 : avail_space = 0;
1992 4283291 : while (nr_devices >= rattr->devs_min) {
1993 1702749 : num_stripes = min(num_stripes, nr_devices);
1994 :
1995 1702749 : if (devices_info[i].max_avail >= min_stripe_size) {
1996 1707076 : int j;
1997 1707076 : u64 alloc_size;
1998 :
1999 1707076 : avail_space += devices_info[i].max_avail * num_stripes;
2000 1707076 : alloc_size = devices_info[i].max_avail;
2001 3413994 : for (j = i + 1 - num_stripes; j <= i; j++)
2002 1706918 : devices_info[j].max_avail -= alloc_size;
2003 : }
2004 1702749 : i--;
2005 1702749 : nr_devices--;
2006 : }
2007 :
2008 2580542 : kfree(devices_info);
2009 2582540 : *free_bytes = avail_space;
2010 2582540 : return 0;
2011 : }
2012 :
2013 : /*
2014 : * Calculate numbers for 'df', pessimistic in case of mixed raid profiles.
2015 : *
2016 : * If there's a redundant raid level at DATA block groups, use the respective
2017 : * multiplier to scale the sizes.
2018 : *
2019 : * Unused device space usage is based on simulating the chunk allocator
2020 : * algorithm that respects the device sizes and order of allocations. This is
2021 : * a close approximation of the actual use but there are other factors that may
2022 : * change the result (like a new metadata chunk).
2023 : *
2024 : * If metadata is exhausted, f_bavail will be 0.
2025 : */
2026 2584103 : static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
2027 : {
2028 2584103 : struct btrfs_fs_info *fs_info = btrfs_sb(dentry->d_sb);
2029 2584103 : struct btrfs_super_block *disk_super = fs_info->super_copy;
2030 2584103 : struct btrfs_space_info *found;
2031 2584103 : u64 total_used = 0;
2032 2584103 : u64 total_free_data = 0;
2033 2584103 : u64 total_free_meta = 0;
2034 2584103 : u32 bits = fs_info->sectorsize_bits;
2035 2584103 : __be32 *fsid = (__be32 *)fs_info->fs_devices->fsid;
2036 2584103 : unsigned factor = 1;
2037 2584103 : struct btrfs_block_rsv *block_rsv = &fs_info->global_block_rsv;
2038 2584103 : int ret;
2039 2584103 : u64 thresh = 0;
2040 2584103 : int mixed = 0;
2041 :
2042 10324552 : list_for_each_entry(found, &fs_info->space_info, list) {
2043 7743789 : if (found->flags & BTRFS_BLOCK_GROUP_DATA) {
2044 2581992 : int i;
2045 :
2046 2581992 : total_free_data += found->disk_total - found->disk_used;
2047 5163802 : total_free_data -=
2048 2581992 : btrfs_account_ro_block_groups_free_space(found);
2049 :
2050 25805158 : for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) {
2051 23226506 : if (!list_empty(&found->block_groups[i]))
2052 2579010 : factor = btrfs_bg_type_to_factor(
2053 2582168 : btrfs_raid_array[i].bg_flag);
2054 : }
2055 : }
2056 :
2057 : /*
2058 : * Metadata in mixed block group profiles are accounted in data
2059 : */
2060 7740449 : if (!mixed && found->flags & BTRFS_BLOCK_GROUP_METADATA) {
2061 2582050 : if (found->flags & BTRFS_BLOCK_GROUP_DATA)
2062 : mixed = 1;
2063 : else
2064 2581918 : total_free_meta += found->disk_total -
2065 2581918 : found->disk_used;
2066 : }
2067 :
2068 7740449 : total_used += found->disk_used;
2069 : }
2070 :
2071 2580763 : buf->f_blocks = div_u64(btrfs_super_total_bytes(disk_super), factor);
2072 2580763 : buf->f_blocks >>= bits;
2073 2580763 : buf->f_bfree = buf->f_blocks - (div_u64(total_used, factor) >> bits);
2074 :
2075 : /* Account global block reserve as used, it's in logical size already */
2076 2580763 : spin_lock(&block_rsv->lock);
2077 : /* Mixed block groups accounting is not byte-accurate, avoid overflow */
2078 2586298 : if (buf->f_bfree >= block_rsv->size >> bits)
2079 2586298 : buf->f_bfree -= block_rsv->size >> bits;
2080 : else
2081 0 : buf->f_bfree = 0;
2082 2586298 : spin_unlock(&block_rsv->lock);
2083 :
2084 2586139 : buf->f_bavail = div_u64(total_free_data, factor);
2085 2586139 : ret = btrfs_calc_avail_data_space(fs_info, &total_free_data);
2086 2582923 : if (ret)
2087 : return ret;
2088 2582923 : buf->f_bavail += div_u64(total_free_data, factor);
2089 2582923 : buf->f_bavail = buf->f_bavail >> bits;
2090 :
2091 : /*
2092 : * We calculate the remaining metadata space minus global reserve. If
2093 : * this is (supposedly) smaller than zero, there's no space. But this
2094 : * does not hold in practice, the exhausted state happens where's still
2095 : * some positive delta. So we apply some guesswork and compare the
2096 : * delta to a 4M threshold. (Practically observed delta was ~2M.)
2097 : *
2098 : * We probably cannot calculate the exact threshold value because this
2099 : * depends on the internal reservations requested by various
2100 : * operations, so some operations that consume a few metadata will
2101 : * succeed even if the Avail is zero. But this is better than the other
2102 : * way around.
2103 : */
2104 2582923 : thresh = SZ_4M;
2105 :
2106 : /*
2107 : * We only want to claim there's no available space if we can no longer
2108 : * allocate chunks for our metadata profile and our global reserve will
2109 : * not fit in the free metadata space. If we aren't ->full then we
2110 : * still can allocate chunks and thus are fine using the currently
2111 : * calculated f_bavail.
2112 : */
2113 2582923 : if (!mixed && block_rsv->space_info->full &&
2114 400197 : total_free_meta - thresh < block_rsv->size)
2115 0 : buf->f_bavail = 0;
2116 :
2117 2582923 : buf->f_type = BTRFS_SUPER_MAGIC;
2118 2582923 : buf->f_bsize = dentry->d_sb->s_blocksize;
2119 2582923 : buf->f_namelen = BTRFS_NAME_LEN;
2120 :
2121 : /* We treat it as constant endianness (it doesn't matter _which_)
2122 : because we want the fsid to come out the same whether mounted
2123 : on a big-endian or little-endian host */
2124 2582923 : buf->f_fsid.val[0] = be32_to_cpu(fsid[0]) ^ be32_to_cpu(fsid[2]);
2125 2582923 : buf->f_fsid.val[1] = be32_to_cpu(fsid[1]) ^ be32_to_cpu(fsid[3]);
2126 : /* Mask in the root object ID too, to disambiguate subvols */
2127 2582923 : buf->f_fsid.val[0] ^=
2128 2582923 : BTRFS_I(d_inode(dentry))->root->root_key.objectid >> 32;
2129 2582923 : buf->f_fsid.val[1] ^=
2130 2582923 : BTRFS_I(d_inode(dentry))->root->root_key.objectid;
2131 :
2132 2582923 : return 0;
2133 : }
2134 :
2135 3242 : static void btrfs_kill_super(struct super_block *sb)
2136 : {
2137 3242 : struct btrfs_fs_info *fs_info = btrfs_sb(sb);
2138 3242 : kill_anon_super(sb);
2139 3242 : btrfs_free_fs_info(fs_info);
2140 3242 : }
2141 :
2142 : static struct file_system_type btrfs_fs_type = {
2143 : .owner = THIS_MODULE,
2144 : .name = "btrfs",
2145 : .mount = btrfs_mount,
2146 : .kill_sb = btrfs_kill_super,
2147 : .fs_flags = FS_REQUIRES_DEV | FS_BINARY_MOUNTDATA,
2148 : };
2149 :
2150 : static struct file_system_type btrfs_root_fs_type = {
2151 : .owner = THIS_MODULE,
2152 : .name = "btrfs",
2153 : .mount = btrfs_mount_root,
2154 : .kill_sb = btrfs_kill_super,
2155 : .fs_flags = FS_REQUIRES_DEV | FS_BINARY_MOUNTDATA | FS_ALLOW_IDMAP,
2156 : };
2157 :
2158 : MODULE_ALIAS_FS("btrfs");
2159 :
2160 3276 : static int btrfs_control_open(struct inode *inode, struct file *file)
2161 : {
2162 : /*
2163 : * The control file's private_data is used to hold the
2164 : * transaction when it is started and is used to keep
2165 : * track of whether a transaction is already in progress.
2166 : */
2167 3276 : file->private_data = NULL;
2168 3276 : return 0;
2169 : }
2170 :
2171 : /*
2172 : * Used by /dev/btrfs-control for devices ioctls.
2173 : */
2174 3276 : static long btrfs_control_ioctl(struct file *file, unsigned int cmd,
2175 : unsigned long arg)
2176 : {
2177 3276 : struct btrfs_ioctl_vol_args *vol;
2178 3276 : struct btrfs_device *device = NULL;
2179 3276 : dev_t devt = 0;
2180 3276 : int ret = -ENOTTY;
2181 :
2182 3276 : if (!capable(CAP_SYS_ADMIN))
2183 : return -EPERM;
2184 :
2185 3276 : vol = memdup_user((void __user *)arg, sizeof(*vol));
2186 3276 : if (IS_ERR(vol))
2187 0 : return PTR_ERR(vol);
2188 3276 : vol->name[BTRFS_PATH_NAME_MAX] = '\0';
2189 :
2190 3276 : switch (cmd) {
2191 989 : case BTRFS_IOC_SCAN_DEV:
2192 989 : mutex_lock(&uuid_mutex);
2193 989 : device = btrfs_scan_one_device(vol->name, BLK_OPEN_READ);
2194 989 : ret = PTR_ERR_OR_ZERO(device);
2195 989 : mutex_unlock(&uuid_mutex);
2196 989 : break;
2197 5 : case BTRFS_IOC_FORGET_DEV:
2198 5 : if (vol->name[0] != 0) {
2199 0 : ret = lookup_bdev(vol->name, &devt);
2200 0 : if (ret)
2201 : break;
2202 : }
2203 5 : ret = btrfs_forget_devices(devt);
2204 5 : break;
2205 2282 : case BTRFS_IOC_DEVICES_READY:
2206 2282 : mutex_lock(&uuid_mutex);
2207 2282 : device = btrfs_scan_one_device(vol->name, BLK_OPEN_READ);
2208 2282 : if (IS_ERR(device)) {
2209 3 : mutex_unlock(&uuid_mutex);
2210 3 : ret = PTR_ERR(device);
2211 3 : break;
2212 : }
2213 2279 : ret = !(device->fs_devices->num_devices ==
2214 2279 : device->fs_devices->total_devices);
2215 2279 : mutex_unlock(&uuid_mutex);
2216 2279 : break;
2217 0 : case BTRFS_IOC_GET_SUPPORTED_FEATURES:
2218 0 : ret = btrfs_ioctl_get_supported_features((void __user*)arg);
2219 0 : break;
2220 : }
2221 :
2222 3276 : kfree(vol);
2223 3276 : return ret;
2224 : }
2225 :
2226 46 : static int btrfs_freeze(struct super_block *sb)
2227 : {
2228 46 : struct btrfs_trans_handle *trans;
2229 46 : struct btrfs_fs_info *fs_info = btrfs_sb(sb);
2230 46 : struct btrfs_root *root = fs_info->tree_root;
2231 :
2232 46 : set_bit(BTRFS_FS_FROZEN, &fs_info->flags);
2233 : /*
2234 : * We don't need a barrier here, we'll wait for any transaction that
2235 : * could be in progress on other threads (and do delayed iputs that
2236 : * we want to avoid on a frozen filesystem), or do the commit
2237 : * ourselves.
2238 : */
2239 46 : trans = btrfs_attach_transaction_barrier(root);
2240 46 : if (IS_ERR(trans)) {
2241 : /* no transaction, don't bother */
2242 46 : if (PTR_ERR(trans) == -ENOENT)
2243 : return 0;
2244 0 : return PTR_ERR(trans);
2245 : }
2246 0 : return btrfs_commit_transaction(trans);
2247 : }
2248 :
2249 46 : static int check_dev_super(struct btrfs_device *dev)
2250 : {
2251 46 : struct btrfs_fs_info *fs_info = dev->fs_info;
2252 46 : struct btrfs_super_block *sb;
2253 46 : u16 csum_type;
2254 46 : int ret = 0;
2255 :
2256 : /* This should be called with fs still frozen. */
2257 46 : ASSERT(test_bit(BTRFS_FS_FROZEN, &fs_info->flags));
2258 :
2259 : /* Missing dev, no need to check. */
2260 46 : if (!dev->bdev)
2261 : return 0;
2262 :
2263 : /* Only need to check the primary super block. */
2264 46 : sb = btrfs_read_dev_one_super(dev->bdev, 0, true);
2265 46 : if (IS_ERR(sb))
2266 0 : return PTR_ERR(sb);
2267 :
2268 : /* Verify the checksum. */
2269 46 : csum_type = btrfs_super_csum_type(sb);
2270 46 : if (csum_type != btrfs_super_csum_type(fs_info->super_copy)) {
2271 0 : btrfs_err(fs_info, "csum type changed, has %u expect %u",
2272 : csum_type, btrfs_super_csum_type(fs_info->super_copy));
2273 0 : ret = -EUCLEAN;
2274 0 : goto out;
2275 : }
2276 :
2277 46 : if (btrfs_check_super_csum(fs_info, sb)) {
2278 0 : btrfs_err(fs_info, "csum for on-disk super block no longer matches");
2279 0 : ret = -EUCLEAN;
2280 0 : goto out;
2281 : }
2282 :
2283 : /* Btrfs_validate_super() includes fsid check against super->fsid. */
2284 46 : ret = btrfs_validate_super(fs_info, sb, 0);
2285 46 : if (ret < 0)
2286 0 : goto out;
2287 :
2288 46 : if (btrfs_super_generation(sb) != fs_info->last_trans_committed) {
2289 0 : btrfs_err(fs_info, "transid mismatch, has %llu expect %llu",
2290 : btrfs_super_generation(sb),
2291 : fs_info->last_trans_committed);
2292 0 : ret = -EUCLEAN;
2293 0 : goto out;
2294 : }
2295 46 : out:
2296 46 : btrfs_release_disk_super(sb);
2297 46 : return ret;
2298 : }
2299 :
2300 46 : static int btrfs_unfreeze(struct super_block *sb)
2301 : {
2302 46 : struct btrfs_fs_info *fs_info = btrfs_sb(sb);
2303 46 : struct btrfs_device *device;
2304 46 : int ret = 0;
2305 :
2306 : /*
2307 : * Make sure the fs is not changed by accident (like hibernation then
2308 : * modified by other OS).
2309 : * If we found anything wrong, we mark the fs error immediately.
2310 : *
2311 : * And since the fs is frozen, no one can modify the fs yet, thus
2312 : * we don't need to hold device_list_mutex.
2313 : */
2314 92 : list_for_each_entry(device, &fs_info->fs_devices->devices, dev_list) {
2315 46 : ret = check_dev_super(device);
2316 46 : if (ret < 0) {
2317 0 : btrfs_handle_fs_error(fs_info, ret,
2318 : "super block on devid %llu got modified unexpectedly",
2319 : device->devid);
2320 0 : break;
2321 : }
2322 : }
2323 46 : clear_bit(BTRFS_FS_FROZEN, &fs_info->flags);
2324 :
2325 : /*
2326 : * We still return 0, to allow VFS layer to unfreeze the fs even the
2327 : * above checks failed. Since the fs is either fine or read-only, we're
2328 : * safe to continue, without causing further damage.
2329 : */
2330 46 : return 0;
2331 : }
2332 :
2333 1238794 : static int btrfs_show_devname(struct seq_file *m, struct dentry *root)
2334 : {
2335 1238794 : struct btrfs_fs_info *fs_info = btrfs_sb(root->d_sb);
2336 :
2337 : /*
2338 : * There should be always a valid pointer in latest_dev, it may be stale
2339 : * for a short moment in case it's being deleted but still valid until
2340 : * the end of RCU grace period.
2341 : */
2342 1238794 : rcu_read_lock();
2343 1233770 : seq_escape(m, btrfs_dev_name(fs_info->fs_devices->latest_dev), " \t\n\\");
2344 1232251 : rcu_read_unlock();
2345 :
2346 1232810 : return 0;
2347 : }
2348 :
2349 : static const struct super_operations btrfs_super_ops = {
2350 : .drop_inode = btrfs_drop_inode,
2351 : .evict_inode = btrfs_evict_inode,
2352 : .put_super = btrfs_put_super,
2353 : .sync_fs = btrfs_sync_fs,
2354 : .show_options = btrfs_show_options,
2355 : .show_devname = btrfs_show_devname,
2356 : .alloc_inode = btrfs_alloc_inode,
2357 : .destroy_inode = btrfs_destroy_inode,
2358 : .free_inode = btrfs_free_inode,
2359 : .statfs = btrfs_statfs,
2360 : .remount_fs = btrfs_remount,
2361 : .freeze_fs = btrfs_freeze,
2362 : .unfreeze_fs = btrfs_unfreeze,
2363 : };
2364 :
2365 : static const struct file_operations btrfs_ctl_fops = {
2366 : .open = btrfs_control_open,
2367 : .unlocked_ioctl = btrfs_control_ioctl,
2368 : .compat_ioctl = compat_ptr_ioctl,
2369 : .owner = THIS_MODULE,
2370 : .llseek = noop_llseek,
2371 : };
2372 :
2373 : static struct miscdevice btrfs_misc = {
2374 : .minor = BTRFS_MINOR,
2375 : .name = "btrfs-control",
2376 : .fops = &btrfs_ctl_fops
2377 : };
2378 :
2379 : MODULE_ALIAS_MISCDEV(BTRFS_MINOR);
2380 : MODULE_ALIAS("devname:btrfs-control");
2381 :
2382 11 : static int __init btrfs_interface_init(void)
2383 : {
2384 11 : return misc_register(&btrfs_misc);
2385 : }
2386 :
2387 0 : static __cold void btrfs_interface_exit(void)
2388 : {
2389 0 : misc_deregister(&btrfs_misc);
2390 0 : }
2391 :
2392 11 : static int __init btrfs_print_mod_info(void)
2393 : {
2394 11 : static const char options[] = ""
2395 : #ifdef CONFIG_BTRFS_DEBUG
2396 : ", debug=on"
2397 : #endif
2398 : #ifdef CONFIG_BTRFS_ASSERT
2399 : ", assert=on"
2400 : #endif
2401 : #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
2402 : ", integrity-checker=on"
2403 : #endif
2404 : #ifdef CONFIG_BTRFS_FS_REF_VERIFY
2405 : ", ref-verify=on"
2406 : #endif
2407 : #ifdef CONFIG_BLK_DEV_ZONED
2408 : ", zoned=yes"
2409 : #else
2410 : ", zoned=no"
2411 : #endif
2412 : #ifdef CONFIG_FS_VERITY
2413 : ", fsverity=yes"
2414 : #else
2415 : ", fsverity=no"
2416 : #endif
2417 : ;
2418 11 : pr_info("Btrfs loaded%s\n", options);
2419 11 : return 0;
2420 : }
2421 :
2422 11 : static int register_btrfs(void)
2423 : {
2424 11 : return register_filesystem(&btrfs_fs_type);
2425 : }
2426 :
2427 0 : static void unregister_btrfs(void)
2428 : {
2429 0 : unregister_filesystem(&btrfs_fs_type);
2430 0 : }
2431 :
2432 : /* Helper structure for long init/exit functions. */
2433 : struct init_sequence {
2434 : int (*init_func)(void);
2435 : /* Can be NULL if the init_func doesn't need cleanup. */
2436 : void (*exit_func)(void);
2437 : };
2438 :
2439 : static const struct init_sequence mod_init_seq[] = {
2440 : {
2441 : .init_func = btrfs_props_init,
2442 : .exit_func = NULL,
2443 : }, {
2444 : .init_func = btrfs_init_sysfs,
2445 : .exit_func = btrfs_exit_sysfs,
2446 : }, {
2447 : .init_func = btrfs_init_compress,
2448 : .exit_func = btrfs_exit_compress,
2449 : }, {
2450 : .init_func = btrfs_init_cachep,
2451 : .exit_func = btrfs_destroy_cachep,
2452 : }, {
2453 : .init_func = btrfs_transaction_init,
2454 : .exit_func = btrfs_transaction_exit,
2455 : }, {
2456 : .init_func = btrfs_ctree_init,
2457 : .exit_func = btrfs_ctree_exit,
2458 : }, {
2459 : .init_func = btrfs_free_space_init,
2460 : .exit_func = btrfs_free_space_exit,
2461 : }, {
2462 : .init_func = extent_state_init_cachep,
2463 : .exit_func = extent_state_free_cachep,
2464 : }, {
2465 : .init_func = extent_buffer_init_cachep,
2466 : .exit_func = extent_buffer_free_cachep,
2467 : }, {
2468 : .init_func = btrfs_bioset_init,
2469 : .exit_func = btrfs_bioset_exit,
2470 : }, {
2471 : .init_func = extent_map_init,
2472 : .exit_func = extent_map_exit,
2473 : }, {
2474 : .init_func = ordered_data_init,
2475 : .exit_func = ordered_data_exit,
2476 : }, {
2477 : .init_func = btrfs_delayed_inode_init,
2478 : .exit_func = btrfs_delayed_inode_exit,
2479 : }, {
2480 : .init_func = btrfs_auto_defrag_init,
2481 : .exit_func = btrfs_auto_defrag_exit,
2482 : }, {
2483 : .init_func = btrfs_delayed_ref_init,
2484 : .exit_func = btrfs_delayed_ref_exit,
2485 : }, {
2486 : .init_func = btrfs_prelim_ref_init,
2487 : .exit_func = btrfs_prelim_ref_exit,
2488 : }, {
2489 : .init_func = btrfs_interface_init,
2490 : .exit_func = btrfs_interface_exit,
2491 : }, {
2492 : .init_func = btrfs_print_mod_info,
2493 : .exit_func = NULL,
2494 : }, {
2495 : .init_func = btrfs_run_sanity_tests,
2496 : .exit_func = NULL,
2497 : }, {
2498 : .init_func = register_btrfs,
2499 : .exit_func = unregister_btrfs,
2500 : }
2501 : };
2502 :
2503 : static bool mod_init_result[ARRAY_SIZE(mod_init_seq)];
2504 :
2505 : static __always_inline void btrfs_exit_btrfs_fs(void)
2506 : {
2507 0 : int i;
2508 :
2509 0 : for (i = ARRAY_SIZE(mod_init_seq) - 1; i >= 0; i--) {
2510 0 : if (!mod_init_result[i])
2511 0 : continue;
2512 0 : if (mod_init_seq[i].exit_func)
2513 0 : mod_init_seq[i].exit_func();
2514 0 : mod_init_result[i] = false;
2515 : }
2516 : }
2517 :
2518 0 : static void __exit exit_btrfs_fs(void)
2519 : {
2520 0 : btrfs_exit_btrfs_fs();
2521 0 : btrfs_cleanup_fs_uuids();
2522 0 : }
2523 :
2524 11 : static int __init init_btrfs_fs(void)
2525 : {
2526 11 : int ret;
2527 11 : int i;
2528 :
2529 231 : for (i = 0; i < ARRAY_SIZE(mod_init_seq); i++) {
2530 220 : ASSERT(!mod_init_result[i]);
2531 220 : ret = mod_init_seq[i].init_func();
2532 220 : if (ret < 0) {
2533 : btrfs_exit_btrfs_fs();
2534 : return ret;
2535 : }
2536 220 : mod_init_result[i] = true;
2537 : }
2538 : return 0;
2539 : }
2540 :
2541 : late_initcall(init_btrfs_fs);
2542 : module_exit(exit_btrfs_fs)
2543 :
2544 : MODULE_LICENSE("GPL");
2545 : MODULE_SOFTDEP("pre: crc32c");
2546 : MODULE_SOFTDEP("pre: xxhash64");
2547 : MODULE_SOFTDEP("pre: sha256");
2548 : MODULE_SOFTDEP("pre: blake2b-256");
|