Line data Source code
1 : // SPDX-License-Identifier: GPL-2.0
2 : /*
3 : * linux/fs/ext4/super.c
4 : *
5 : * Copyright (C) 1992, 1993, 1994, 1995
6 : * Remy Card (card@masi.ibp.fr)
7 : * Laboratoire MASI - Institut Blaise Pascal
8 : * Universite Pierre et Marie Curie (Paris VI)
9 : *
10 : * from
11 : *
12 : * linux/fs/minix/inode.c
13 : *
14 : * Copyright (C) 1991, 1992 Linus Torvalds
15 : *
16 : * Big-endian to little-endian byte-swapping/bitmaps by
17 : * David S. Miller (davem@caip.rutgers.edu), 1995
18 : */
19 :
20 : #include <linux/module.h>
21 : #include <linux/string.h>
22 : #include <linux/fs.h>
23 : #include <linux/time.h>
24 : #include <linux/vmalloc.h>
25 : #include <linux/slab.h>
26 : #include <linux/init.h>
27 : #include <linux/blkdev.h>
28 : #include <linux/backing-dev.h>
29 : #include <linux/parser.h>
30 : #include <linux/buffer_head.h>
31 : #include <linux/exportfs.h>
32 : #include <linux/vfs.h>
33 : #include <linux/random.h>
34 : #include <linux/mount.h>
35 : #include <linux/namei.h>
36 : #include <linux/quotaops.h>
37 : #include <linux/seq_file.h>
38 : #include <linux/ctype.h>
39 : #include <linux/log2.h>
40 : #include <linux/crc16.h>
41 : #include <linux/dax.h>
42 : #include <linux/uaccess.h>
43 : #include <linux/iversion.h>
44 : #include <linux/unicode.h>
45 : #include <linux/part_stat.h>
46 : #include <linux/kthread.h>
47 : #include <linux/freezer.h>
48 : #include <linux/fsnotify.h>
49 : #include <linux/fs_context.h>
50 : #include <linux/fs_parser.h>
51 :
52 : #include "ext4.h"
53 : #include "ext4_extents.h" /* Needed for trace points definition */
54 : #include "ext4_jbd2.h"
55 : #include "xattr.h"
56 : #include "acl.h"
57 : #include "mballoc.h"
58 : #include "fsmap.h"
59 :
60 : #define CREATE_TRACE_POINTS
61 : #include <trace/events/ext4.h>
62 :
63 : static struct ext4_lazy_init *ext4_li_info;
64 : static DEFINE_MUTEX(ext4_li_mtx);
65 : static struct ratelimit_state ext4_mount_msg_ratelimit;
66 :
67 : static int ext4_load_journal(struct super_block *, struct ext4_super_block *,
68 : unsigned long journal_devnum);
69 : static int ext4_show_options(struct seq_file *seq, struct dentry *root);
70 : static void ext4_update_super(struct super_block *sb);
71 : static int ext4_commit_super(struct super_block *sb);
72 : static int ext4_mark_recovery_complete(struct super_block *sb,
73 : struct ext4_super_block *es);
74 : static int ext4_clear_journal_err(struct super_block *sb,
75 : struct ext4_super_block *es);
76 : static int ext4_sync_fs(struct super_block *sb, int wait);
77 : static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf);
78 : static int ext4_unfreeze(struct super_block *sb);
79 : static int ext4_freeze(struct super_block *sb);
80 : static inline int ext2_feature_set_ok(struct super_block *sb);
81 : static inline int ext3_feature_set_ok(struct super_block *sb);
82 : static void ext4_destroy_lazyinit_thread(void);
83 : static void ext4_unregister_li_request(struct super_block *sb);
84 : static void ext4_clear_request_list(void);
85 : static struct inode *ext4_get_journal_inode(struct super_block *sb,
86 : unsigned int journal_inum);
87 : static int ext4_validate_options(struct fs_context *fc);
88 : static int ext4_check_opt_consistency(struct fs_context *fc,
89 : struct super_block *sb);
90 : static void ext4_apply_options(struct fs_context *fc, struct super_block *sb);
91 : static int ext4_parse_param(struct fs_context *fc, struct fs_parameter *param);
92 : static int ext4_get_tree(struct fs_context *fc);
93 : static int ext4_reconfigure(struct fs_context *fc);
94 : static void ext4_fc_free(struct fs_context *fc);
95 : static int ext4_init_fs_context(struct fs_context *fc);
96 : static const struct fs_parameter_spec ext4_param_specs[];
97 :
98 : /*
99 : * Lock ordering
100 : *
101 : * page fault path:
102 : * mmap_lock -> sb_start_pagefault -> invalidate_lock (r) -> transaction start
103 : * -> page lock -> i_data_sem (rw)
104 : *
105 : * buffered write path:
106 : * sb_start_write -> i_mutex -> mmap_lock
107 : * sb_start_write -> i_mutex -> transaction start -> page lock ->
108 : * i_data_sem (rw)
109 : *
110 : * truncate:
111 : * sb_start_write -> i_mutex -> invalidate_lock (w) -> i_mmap_rwsem (w) ->
112 : * page lock
113 : * sb_start_write -> i_mutex -> invalidate_lock (w) -> transaction start ->
114 : * i_data_sem (rw)
115 : *
116 : * direct IO:
117 : * sb_start_write -> i_mutex -> mmap_lock
118 : * sb_start_write -> i_mutex -> transaction start -> i_data_sem (rw)
119 : *
120 : * writepages:
121 : * transaction start -> page lock(s) -> i_data_sem (rw)
122 : */
123 :
124 : static const struct fs_context_operations ext4_context_ops = {
125 : .parse_param = ext4_parse_param,
126 : .get_tree = ext4_get_tree,
127 : .reconfigure = ext4_reconfigure,
128 : .free = ext4_fc_free,
129 : };
130 :
131 :
132 : #if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT2)
133 : static struct file_system_type ext2_fs_type = {
134 : .owner = THIS_MODULE,
135 : .name = "ext2",
136 : .init_fs_context = ext4_init_fs_context,
137 : .parameters = ext4_param_specs,
138 : .kill_sb = kill_block_super,
139 : .fs_flags = FS_REQUIRES_DEV,
140 : };
141 : MODULE_ALIAS_FS("ext2");
142 : MODULE_ALIAS("ext2");
143 : #define IS_EXT2_SB(sb) ((sb)->s_bdev->bd_holder == &ext2_fs_type)
144 : #else
145 : #define IS_EXT2_SB(sb) (0)
146 : #endif
147 :
148 :
149 : static struct file_system_type ext3_fs_type = {
150 : .owner = THIS_MODULE,
151 : .name = "ext3",
152 : .init_fs_context = ext4_init_fs_context,
153 : .parameters = ext4_param_specs,
154 : .kill_sb = kill_block_super,
155 : .fs_flags = FS_REQUIRES_DEV,
156 : };
157 : MODULE_ALIAS_FS("ext3");
158 : MODULE_ALIAS("ext3");
159 : #define IS_EXT3_SB(sb) ((sb)->s_bdev->bd_holder == &ext3_fs_type)
160 :
161 :
162 290431 : static inline void __ext4_read_bh(struct buffer_head *bh, blk_opf_t op_flags,
163 : bh_end_io_t *end_io)
164 : {
165 : /*
166 : * buffer's verified bit is no longer valid after reading from
167 : * disk again due to write out error, clear it to make sure we
168 : * recheck the buffer contents.
169 : */
170 290431 : clear_buffer_verified(bh);
171 :
172 290432 : bh->b_end_io = end_io ? end_io : end_buffer_read_sync;
173 290432 : get_bh(bh);
174 290432 : submit_bh(REQ_OP_READ | op_flags, bh);
175 290431 : }
176 :
177 207280 : void ext4_read_bh_nowait(struct buffer_head *bh, blk_opf_t op_flags,
178 : bh_end_io_t *end_io)
179 : {
180 414560 : BUG_ON(!buffer_locked(bh));
181 :
182 207280 : if (ext4_buffer_uptodate(bh)) {
183 51630 : unlock_buffer(bh);
184 51630 : return;
185 : }
186 155649 : __ext4_read_bh(bh, op_flags, end_io);
187 : }
188 :
189 140092 : int ext4_read_bh(struct buffer_head *bh, blk_opf_t op_flags, bh_end_io_t *end_io)
190 : {
191 280184 : BUG_ON(!buffer_locked(bh));
192 :
193 140092 : if (ext4_buffer_uptodate(bh)) {
194 5309 : unlock_buffer(bh);
195 5309 : return 0;
196 : }
197 :
198 134783 : __ext4_read_bh(bh, op_flags, end_io);
199 :
200 134783 : wait_on_buffer(bh);
201 269566 : if (buffer_uptodate(bh))
202 134781 : return 0;
203 : return -EIO;
204 : }
205 :
206 103483 : int ext4_read_bh_lock(struct buffer_head *bh, blk_opf_t op_flags, bool wait)
207 : {
208 103483 : lock_buffer(bh);
209 103483 : if (!wait) {
210 1049 : ext4_read_bh_nowait(bh, op_flags, NULL);
211 1049 : return 0;
212 : }
213 102434 : return ext4_read_bh(bh, op_flags, NULL);
214 : }
215 :
216 : /*
217 : * This works like __bread_gfp() except it uses ERR_PTR for error
218 : * returns. Currently with sb_bread it's impossible to distinguish
219 : * between ENOMEM and EIO situations (since both result in a NULL
220 : * return.
221 : */
222 703095 : static struct buffer_head *__ext4_sb_bread_gfp(struct super_block *sb,
223 : sector_t block,
224 : blk_opf_t op_flags, gfp_t gfp)
225 : {
226 703095 : struct buffer_head *bh;
227 703095 : int ret;
228 :
229 703095 : bh = sb_getblk_gfp(sb, block, gfp);
230 703865 : if (bh == NULL)
231 : return ERR_PTR(-ENOMEM);
232 703865 : if (ext4_buffer_uptodate(bh))
233 : return bh;
234 :
235 11641 : ret = ext4_read_bh_lock(bh, REQ_META | op_flags, true);
236 11641 : if (ret) {
237 1 : put_bh(bh);
238 1 : return ERR_PTR(ret);
239 : }
240 : return bh;
241 : }
242 :
243 681615 : struct buffer_head *ext4_sb_bread(struct super_block *sb, sector_t block,
244 : blk_opf_t op_flags)
245 : {
246 681615 : return __ext4_sb_bread_gfp(sb, block, op_flags, __GFP_MOVABLE);
247 : }
248 :
249 0 : struct buffer_head *ext4_sb_bread_unmovable(struct super_block *sb,
250 : sector_t block)
251 : {
252 0 : return __ext4_sb_bread_gfp(sb, block, 0, 0);
253 : }
254 :
255 101196 : void ext4_sb_breadahead_unmovable(struct super_block *sb, sector_t block)
256 : {
257 101196 : struct buffer_head *bh = sb_getblk_gfp(sb, block, 0);
258 :
259 101196 : if (likely(bh)) {
260 101196 : if (trylock_buffer(bh))
261 91687 : ext4_read_bh_nowait(bh, REQ_RAHEAD, NULL);
262 101195 : brelse(bh);
263 : }
264 101196 : }
265 :
266 : static int ext4_verify_csum_type(struct super_block *sb,
267 : struct ext4_super_block *es)
268 : {
269 2551 : if (!ext4_has_feature_metadata_csum(sb))
270 : return 1;
271 :
272 2345 : return es->s_checksum_type == EXT4_CRC32C_CHKSUM;
273 : }
274 :
275 3833 : __le32 ext4_superblock_csum(struct super_block *sb,
276 : struct ext4_super_block *es)
277 : {
278 3833 : struct ext4_sb_info *sbi = EXT4_SB(sb);
279 4595531 : int offset = offsetof(struct ext4_super_block, s_checksum);
280 4595531 : __u32 csum;
281 :
282 6184 : csum = ext4_chksum(sbi, ~0, (char *)es, offset);
283 :
284 4595531 : return cpu_to_le32(csum);
285 : }
286 :
287 2551 : static int ext4_superblock_csum_verify(struct super_block *sb,
288 : struct ext4_super_block *es)
289 : {
290 2551 : if (!ext4_has_metadata_csum(sb))
291 : return 1;
292 :
293 2345 : return es->s_checksum == ext4_superblock_csum(sb, es);
294 : }
295 :
296 4590192 : void ext4_superblock_csum_set(struct super_block *sb)
297 : {
298 4590192 : struct ext4_super_block *es = EXT4_SB(sb)->s_es;
299 :
300 4590192 : if (!ext4_has_metadata_csum(sb))
301 : return;
302 :
303 4589347 : es->s_checksum = ext4_superblock_csum(sb, es);
304 : }
305 :
306 8358845 : ext4_fsblk_t ext4_block_bitmap(struct super_block *sb,
307 : struct ext4_group_desc *bg)
308 : {
309 8358845 : return le32_to_cpu(bg->bg_block_bitmap_lo) |
310 9211934 : (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
311 9211934 : (ext4_fsblk_t)le32_to_cpu(bg->bg_block_bitmap_hi) << 32 : 0);
312 : }
313 :
314 6758008 : ext4_fsblk_t ext4_inode_bitmap(struct super_block *sb,
315 : struct ext4_group_desc *bg)
316 : {
317 6758008 : return le32_to_cpu(bg->bg_inode_bitmap_lo) |
318 7611096 : (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
319 7611096 : (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_bitmap_hi) << 32 : 0);
320 : }
321 :
322 107175211 : ext4_fsblk_t ext4_inode_table(struct super_block *sb,
323 : struct ext4_group_desc *bg)
324 : {
325 107175211 : return le32_to_cpu(bg->bg_inode_table_lo) |
326 108028299 : (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
327 108028299 : (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_table_hi) << 32 : 0);
328 : }
329 :
330 7033338 : __u32 ext4_free_group_clusters(struct super_block *sb,
331 : struct ext4_group_desc *bg)
332 : {
333 7033338 : return le16_to_cpu(bg->bg_free_blocks_count_lo) |
334 7882260 : (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
335 7882260 : (__u32)le16_to_cpu(bg->bg_free_blocks_count_hi) << 16 : 0);
336 : }
337 :
338 21513361 : __u32 ext4_free_inodes_count(struct super_block *sb,
339 : struct ext4_group_desc *bg)
340 : {
341 21513361 : return le16_to_cpu(bg->bg_free_inodes_count_lo) |
342 22362287 : (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
343 22362287 : (__u32)le16_to_cpu(bg->bg_free_inodes_count_hi) << 16 : 0);
344 : }
345 :
346 1410743 : __u32 ext4_used_dirs_count(struct super_block *sb,
347 : struct ext4_group_desc *bg)
348 : {
349 1410743 : return le16_to_cpu(bg->bg_used_dirs_count_lo) |
350 2259665 : (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
351 2259665 : (__u32)le16_to_cpu(bg->bg_used_dirs_count_hi) << 16 : 0);
352 : }
353 :
354 2770659 : __u32 ext4_itable_unused_count(struct super_block *sb,
355 : struct ext4_group_desc *bg)
356 : {
357 2770659 : return le16_to_cpu(bg->bg_itable_unused_lo) |
358 2770659 : (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
359 2770659 : (__u32)le16_to_cpu(bg->bg_itable_unused_hi) << 16 : 0);
360 : }
361 :
362 4501 : void ext4_block_bitmap_set(struct super_block *sb,
363 : struct ext4_group_desc *bg, ext4_fsblk_t blk)
364 : {
365 4501 : bg->bg_block_bitmap_lo = cpu_to_le32((u32)blk);
366 4501 : if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
367 4497 : bg->bg_block_bitmap_hi = cpu_to_le32(blk >> 32);
368 4501 : }
369 :
370 4501 : void ext4_inode_bitmap_set(struct super_block *sb,
371 : struct ext4_group_desc *bg, ext4_fsblk_t blk)
372 : {
373 4501 : bg->bg_inode_bitmap_lo = cpu_to_le32((u32)blk);
374 4501 : if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
375 4497 : bg->bg_inode_bitmap_hi = cpu_to_le32(blk >> 32);
376 4501 : }
377 :
378 4501 : void ext4_inode_table_set(struct super_block *sb,
379 : struct ext4_group_desc *bg, ext4_fsblk_t blk)
380 : {
381 4501 : bg->bg_inode_table_lo = cpu_to_le32((u32)blk);
382 4501 : if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
383 4497 : bg->bg_inode_table_hi = cpu_to_le32(blk >> 32);
384 4501 : }
385 :
386 5613003 : void ext4_free_group_clusters_set(struct super_block *sb,
387 : struct ext4_group_desc *bg, __u32 count)
388 : {
389 5613003 : bg->bg_free_blocks_count_lo = cpu_to_le16((__u16)count);
390 5613003 : if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
391 5606915 : bg->bg_free_blocks_count_hi = cpu_to_le16(count >> 16);
392 5613003 : }
393 :
394 4735189 : void ext4_free_inodes_set(struct super_block *sb,
395 : struct ext4_group_desc *bg, __u32 count)
396 : {
397 4735189 : bg->bg_free_inodes_count_lo = cpu_to_le16((__u16)count);
398 4735189 : if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
399 4734515 : bg->bg_free_inodes_count_hi = cpu_to_le16(count >> 16);
400 4735189 : }
401 :
402 558326 : void ext4_used_dirs_set(struct super_block *sb,
403 : struct ext4_group_desc *bg, __u32 count)
404 : {
405 558326 : bg->bg_used_dirs_count_lo = cpu_to_le16((__u16)count);
406 558326 : if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
407 558181 : bg->bg_used_dirs_count_hi = cpu_to_le16(count >> 16);
408 558326 : }
409 :
410 1599276 : void ext4_itable_unused_set(struct super_block *sb,
411 : struct ext4_group_desc *bg, __u32 count)
412 : {
413 1599276 : bg->bg_itable_unused_lo = cpu_to_le16((__u16)count);
414 1599276 : if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
415 1592130 : bg->bg_itable_unused_hi = cpu_to_le16(count >> 16);
416 1599276 : }
417 :
418 : static void __ext4_update_tstamp(__le32 *lo, __u8 *hi, time64_t now)
419 : {
420 8652 : now = clamp_val(now, 0, (1ull << 40) - 1);
421 :
422 8652 : *lo = cpu_to_le32(lower_32_bits(now));
423 8652 : *hi = upper_32_bits(now);
424 5656 : }
425 :
426 : static time64_t __ext4_get_tstamp(__le32 *lo, __u8 *hi)
427 : {
428 0 : return ((time64_t)(*hi) << 32) + le32_to_cpu(*lo);
429 : }
430 : #define ext4_update_tstamp(es, tstamp) \
431 : __ext4_update_tstamp(&(es)->tstamp, &(es)->tstamp ## _hi, \
432 : ktime_get_real_seconds())
433 : #define ext4_get_tstamp(es, tstamp) \
434 : __ext4_get_tstamp(&(es)->tstamp, &(es)->tstamp ## _hi)
435 :
436 : /*
437 : * The del_gendisk() function uninitializes the disk-specific data
438 : * structures, including the bdi structure, without telling anyone
439 : * else. Once this happens, any attempt to call mark_buffer_dirty()
440 : * (for example, by ext4_commit_super), will cause a kernel OOPS.
441 : * This is a kludge to prevent these oops until we can put in a proper
442 : * hook in del_gendisk() to inform the VFS and file system layers.
443 : */
444 5325 : static int block_device_ejected(struct super_block *sb)
445 : {
446 5325 : struct inode *bd_inode = sb->s_bdev->bd_inode;
447 5325 : struct backing_dev_info *bdi = inode_to_bdi(bd_inode);
448 :
449 5325 : return bdi->dev == NULL;
450 : }
451 :
452 201307 : static void ext4_journal_commit_callback(journal_t *journal, transaction_t *txn)
453 : {
454 201307 : struct super_block *sb = journal->j_private;
455 201307 : struct ext4_sb_info *sbi = EXT4_SB(sb);
456 201307 : int error = is_journal_aborted(journal);
457 201307 : struct ext4_journal_cb_entry *jce;
458 :
459 201307 : BUG_ON(txn->t_state == T_FINISHED);
460 :
461 201307 : ext4_process_freed_data(sb, txn->t_tid);
462 :
463 201307 : spin_lock(&sbi->s_md_lock);
464 201307 : while (!list_empty(&txn->t_private_list)) {
465 0 : jce = list_entry(txn->t_private_list.next,
466 : struct ext4_journal_cb_entry, jce_list);
467 0 : list_del_init(&jce->jce_list);
468 0 : spin_unlock(&sbi->s_md_lock);
469 0 : jce->jce_func(sb, jce, error);
470 0 : spin_lock(&sbi->s_md_lock);
471 : }
472 201307 : spin_unlock(&sbi->s_md_lock);
473 201307 : }
474 :
475 : /*
476 : * This writepage callback for write_cache_pages()
477 : * takes care of a few cases after page cleaning.
478 : *
479 : * write_cache_pages() already checks for dirty pages
480 : * and calls clear_page_dirty_for_io(), which we want,
481 : * to write protect the pages.
482 : *
483 : * However, we may have to redirty a page (see below.)
484 : */
485 0 : static int ext4_journalled_writepage_callback(struct folio *folio,
486 : struct writeback_control *wbc,
487 : void *data)
488 : {
489 0 : transaction_t *transaction = (transaction_t *) data;
490 0 : struct buffer_head *bh, *head;
491 0 : struct journal_head *jh;
492 :
493 0 : bh = head = folio_buffers(folio);
494 0 : do {
495 : /*
496 : * We have to redirty a page in these cases:
497 : * 1) If buffer is dirty, it means the page was dirty because it
498 : * contains a buffer that needs checkpointing. So the dirty bit
499 : * needs to be preserved so that checkpointing writes the buffer
500 : * properly.
501 : * 2) If buffer is not part of the committing transaction
502 : * (we may have just accidentally come across this buffer because
503 : * inode range tracking is not exact) or if the currently running
504 : * transaction already contains this buffer as well, dirty bit
505 : * needs to be preserved so that the buffer gets writeprotected
506 : * properly on running transaction's commit.
507 : */
508 0 : jh = bh2jh(bh);
509 0 : if (buffer_dirty(bh) ||
510 0 : (jh && (jh->b_transaction != transaction ||
511 0 : jh->b_next_transaction))) {
512 0 : folio_redirty_for_writepage(wbc, folio);
513 0 : goto out;
514 : }
515 0 : } while ((bh = bh->b_this_page) != head);
516 :
517 0 : out:
518 0 : return AOP_WRITEPAGE_ACTIVATE;
519 : }
520 :
521 0 : static int ext4_journalled_submit_inode_data_buffers(struct jbd2_inode *jinode)
522 : {
523 0 : struct address_space *mapping = jinode->i_vfs_inode->i_mapping;
524 0 : struct writeback_control wbc = {
525 : .sync_mode = WB_SYNC_ALL,
526 : .nr_to_write = LONG_MAX,
527 0 : .range_start = jinode->i_dirty_start,
528 0 : .range_end = jinode->i_dirty_end,
529 : };
530 :
531 0 : return write_cache_pages(mapping, &wbc,
532 : ext4_journalled_writepage_callback,
533 0 : jinode->i_transaction);
534 : }
535 :
536 159978 : static int ext4_journal_submit_inode_data_buffers(struct jbd2_inode *jinode)
537 : {
538 159978 : int ret;
539 :
540 159978 : if (ext4_should_journal_data(jinode->i_vfs_inode))
541 0 : ret = ext4_journalled_submit_inode_data_buffers(jinode);
542 : else
543 159978 : ret = ext4_normal_submit_inode_data_buffers(jinode);
544 159978 : return ret;
545 : }
546 :
547 159983 : static int ext4_journal_finish_inode_data_buffers(struct jbd2_inode *jinode)
548 : {
549 159983 : int ret = 0;
550 :
551 159983 : if (!ext4_should_journal_data(jinode->i_vfs_inode))
552 159983 : ret = jbd2_journal_finish_inode_data_buffers(jinode);
553 :
554 159983 : return ret;
555 : }
556 :
557 : static bool system_going_down(void)
558 : {
559 0 : return system_state == SYSTEM_HALT || system_state == SYSTEM_POWER_OFF
560 0 : || system_state == SYSTEM_RESTART;
561 : }
562 :
563 : struct ext4_err_translation {
564 : int code;
565 : int errno;
566 : };
567 :
568 : #define EXT4_ERR_TRANSLATE(err) { .code = EXT4_ERR_##err, .errno = err }
569 :
570 : static struct ext4_err_translation err_translation[] = {
571 : EXT4_ERR_TRANSLATE(EIO),
572 : EXT4_ERR_TRANSLATE(ENOMEM),
573 : EXT4_ERR_TRANSLATE(EFSBADCRC),
574 : EXT4_ERR_TRANSLATE(EFSCORRUPTED),
575 : EXT4_ERR_TRANSLATE(ENOSPC),
576 : EXT4_ERR_TRANSLATE(ENOKEY),
577 : EXT4_ERR_TRANSLATE(EROFS),
578 : EXT4_ERR_TRANSLATE(EFBIG),
579 : EXT4_ERR_TRANSLATE(EEXIST),
580 : EXT4_ERR_TRANSLATE(ERANGE),
581 : EXT4_ERR_TRANSLATE(EOVERFLOW),
582 : EXT4_ERR_TRANSLATE(EBUSY),
583 : EXT4_ERR_TRANSLATE(ENOTDIR),
584 : EXT4_ERR_TRANSLATE(ENOTEMPTY),
585 : EXT4_ERR_TRANSLATE(ESHUTDOWN),
586 : EXT4_ERR_TRANSLATE(EFAULT),
587 : };
588 :
589 : static int ext4_errno_to_code(int errno)
590 : {
591 : int i;
592 :
593 1641 : for (i = 0; i < ARRAY_SIZE(err_translation); i++)
594 1640 : if (err_translation[i].errno == errno)
595 451 : return err_translation[i].code;
596 : return EXT4_ERR_UNKNOWN;
597 : }
598 :
599 428 : static void save_error_info(struct super_block *sb, int error,
600 : __u32 ino, __u64 block,
601 : const char *func, unsigned int line)
602 : {
603 428 : struct ext4_sb_info *sbi = EXT4_SB(sb);
604 :
605 : /* We default to EFSCORRUPTED error... */
606 428 : if (error == 0)
607 282 : error = EFSCORRUPTED;
608 :
609 428 : spin_lock(&sbi->s_error_lock);
610 428 : sbi->s_add_error_count++;
611 428 : sbi->s_last_error_code = error;
612 428 : sbi->s_last_error_line = line;
613 428 : sbi->s_last_error_ino = ino;
614 428 : sbi->s_last_error_block = block;
615 428 : sbi->s_last_error_func = func;
616 428 : sbi->s_last_error_time = ktime_get_real_seconds();
617 428 : if (!sbi->s_first_error_time) {
618 25 : sbi->s_first_error_code = error;
619 25 : sbi->s_first_error_line = line;
620 25 : sbi->s_first_error_ino = ino;
621 25 : sbi->s_first_error_block = block;
622 25 : sbi->s_first_error_func = func;
623 25 : sbi->s_first_error_time = sbi->s_last_error_time;
624 : }
625 428 : spin_unlock(&sbi->s_error_lock);
626 428 : }
627 :
628 : /* Deal with the reporting of failure conditions on a filesystem such as
629 : * inconsistencies detected or read IO failures.
630 : *
631 : * On ext2, we can store the error state of the filesystem in the
632 : * superblock. That is not possible on ext4, because we may have other
633 : * write ordering constraints on the superblock which prevent us from
634 : * writing it out straight away; and given that the journal is about to
635 : * be aborted, we can't rely on the current, or future, transactions to
636 : * write out the superblock safely.
637 : *
638 : * We'll just use the jbd2_journal_abort() error code to record an error in
639 : * the journal instead. On recovery, the journal will complain about
640 : * that error until we've noted it down and cleared it.
641 : *
642 : * If force_ro is set, we unconditionally force the filesystem into an
643 : * ABORT|READONLY state, unless the error response on the fs has been set to
644 : * panic in which case we take the easy way out and panic immediately. This is
645 : * used to deal with unrecoverable failures such as journal IO errors or ENOMEM
646 : * at a critical moment in log management.
647 : */
648 428 : static void ext4_handle_error(struct super_block *sb, bool force_ro, int error,
649 : __u32 ino, __u64 block,
650 : const char *func, unsigned int line)
651 : {
652 428 : journal_t *journal = EXT4_SB(sb)->s_journal;
653 428 : bool continue_fs = !force_ro && test_opt(sb, ERRORS_CONT);
654 :
655 428 : EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
656 428 : if (test_opt(sb, WARN_ON_ERROR))
657 0 : WARN_ON_ONCE(1);
658 :
659 428 : if (!continue_fs && !sb_rdonly(sb)) {
660 5 : ext4_set_mount_flag(sb, EXT4_MF_FS_ABORTED);
661 5 : if (journal)
662 1 : jbd2_journal_abort(journal, -EIO);
663 : }
664 :
665 428 : if (!bdev_read_only(sb->s_bdev)) {
666 428 : save_error_info(sb, error, ino, block, func, line);
667 : /*
668 : * In case the fs should keep running, we need to writeout
669 : * superblock through the journal. Due to lock ordering
670 : * constraints, it may not be safe to do it right here so we
671 : * defer superblock flushing to a workqueue.
672 : */
673 428 : if (continue_fs && journal)
674 423 : schedule_work(&EXT4_SB(sb)->s_error_work);
675 : else
676 5 : ext4_commit_super(sb);
677 : }
678 :
679 : /*
680 : * We force ERRORS_RO behavior when system is rebooting. Otherwise we
681 : * could panic during 'reboot -f' as the underlying device got already
682 : * disabled.
683 : */
684 428 : if (test_opt(sb, ERRORS_PANIC) && !system_going_down()) {
685 0 : panic("EXT4-fs (device %s): panic forced after error\n",
686 0 : sb->s_id);
687 : }
688 :
689 428 : if (sb_rdonly(sb) || continue_fs)
690 : return;
691 :
692 5 : ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only");
693 : /*
694 : * Make sure updated value of ->s_mount_flags will be visible before
695 : * ->s_flags update
696 : */
697 5 : smp_wmb();
698 5 : sb->s_flags |= SB_RDONLY;
699 : }
700 :
701 422 : static void flush_stashed_error_work(struct work_struct *work)
702 : {
703 422 : struct ext4_sb_info *sbi = container_of(work, struct ext4_sb_info,
704 : s_error_work);
705 422 : journal_t *journal = sbi->s_journal;
706 422 : handle_t *handle;
707 :
708 : /*
709 : * If the journal is still running, we have to write out superblock
710 : * through the journal to avoid collisions of other journalled sb
711 : * updates.
712 : *
713 : * We use directly jbd2 functions here to avoid recursing back into
714 : * ext4 error handling code during handling of previous errors.
715 : */
716 422 : if (!sb_rdonly(sbi->s_sb) && journal) {
717 422 : struct buffer_head *sbh = sbi->s_sbh;
718 422 : handle = jbd2_journal_start(journal, 1);
719 422 : if (IS_ERR(handle))
720 0 : goto write_directly;
721 422 : if (jbd2_journal_get_write_access(handle, sbh)) {
722 0 : jbd2_journal_stop(handle);
723 0 : goto write_directly;
724 : }
725 422 : ext4_update_super(sbi->s_sb);
726 1266 : if (buffer_write_io_error(sbh) || !buffer_uptodate(sbh)) {
727 0 : ext4_msg(sbi->s_sb, KERN_ERR, "previous I/O error to "
728 : "superblock detected");
729 0 : clear_buffer_write_io_error(sbh);
730 0 : set_buffer_uptodate(sbh);
731 : }
732 :
733 422 : if (jbd2_journal_dirty_metadata(handle, sbh)) {
734 0 : jbd2_journal_stop(handle);
735 0 : goto write_directly;
736 : }
737 422 : jbd2_journal_stop(handle);
738 422 : ext4_notify_error_sysfs(sbi);
739 422 : return;
740 : }
741 0 : write_directly:
742 : /*
743 : * Write through journal failed. Write sb directly to get error info
744 : * out and hope for the best.
745 : */
746 0 : ext4_commit_super(sbi->s_sb);
747 0 : ext4_notify_error_sysfs(sbi);
748 : }
749 :
750 : #define ext4_error_ratelimit(sb) \
751 : ___ratelimit(&(EXT4_SB(sb)->s_err_ratelimit_state), \
752 : "EXT4-fs error")
753 :
754 285 : void __ext4_error(struct super_block *sb, const char *function,
755 : unsigned int line, bool force_ro, int error, __u64 block,
756 : const char *fmt, ...)
757 : {
758 285 : struct va_format vaf;
759 285 : va_list args;
760 :
761 570 : if (unlikely(ext4_forced_shutdown(EXT4_SB(sb))))
762 0 : return;
763 :
764 285 : trace_ext4_error(sb, function, line);
765 285 : if (ext4_error_ratelimit(sb)) {
766 30 : va_start(args, fmt);
767 30 : vaf.fmt = fmt;
768 30 : vaf.va = &args;
769 30 : printk(KERN_CRIT
770 : "EXT4-fs error (device %s): %s:%d: comm %s: %pV\n",
771 : sb->s_id, function, line, current->comm, &vaf);
772 30 : va_end(args);
773 : }
774 565 : fsnotify_sb_error(sb, NULL, error ? error : EFSCORRUPTED);
775 :
776 285 : ext4_handle_error(sb, force_ro, error, 0, block, function, line);
777 : }
778 :
779 142 : void __ext4_error_inode(struct inode *inode, const char *function,
780 : unsigned int line, ext4_fsblk_t block, int error,
781 : const char *fmt, ...)
782 : {
783 142 : va_list args;
784 142 : struct va_format vaf;
785 :
786 284 : if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
787 0 : return;
788 :
789 142 : trace_ext4_error(inode->i_sb, function, line);
790 142 : if (ext4_error_ratelimit(inode->i_sb)) {
791 24 : va_start(args, fmt);
792 24 : vaf.fmt = fmt;
793 24 : vaf.va = &args;
794 24 : if (block)
795 1 : printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: "
796 : "inode #%lu: block %llu: comm %s: %pV\n",
797 : inode->i_sb->s_id, function, line, inode->i_ino,
798 : block, current->comm, &vaf);
799 : else
800 23 : printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: "
801 : "inode #%lu: comm %s: %pV\n",
802 : inode->i_sb->s_id, function, line, inode->i_ino,
803 : current->comm, &vaf);
804 24 : va_end(args);
805 : }
806 144 : fsnotify_sb_error(inode->i_sb, inode, error ? error : EFSCORRUPTED);
807 :
808 142 : ext4_handle_error(inode->i_sb, false, error, inode->i_ino, block,
809 : function, line);
810 : }
811 :
812 0 : void __ext4_error_file(struct file *file, const char *function,
813 : unsigned int line, ext4_fsblk_t block,
814 : const char *fmt, ...)
815 : {
816 0 : va_list args;
817 0 : struct va_format vaf;
818 0 : struct inode *inode = file_inode(file);
819 0 : char pathname[80], *path;
820 :
821 0 : if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
822 0 : return;
823 :
824 0 : trace_ext4_error(inode->i_sb, function, line);
825 0 : if (ext4_error_ratelimit(inode->i_sb)) {
826 0 : path = file_path(file, pathname, sizeof(pathname));
827 0 : if (IS_ERR(path))
828 0 : path = "(unknown)";
829 0 : va_start(args, fmt);
830 0 : vaf.fmt = fmt;
831 0 : vaf.va = &args;
832 0 : if (block)
833 0 : printk(KERN_CRIT
834 : "EXT4-fs error (device %s): %s:%d: inode #%lu: "
835 : "block %llu: comm %s: path %s: %pV\n",
836 : inode->i_sb->s_id, function, line, inode->i_ino,
837 : block, current->comm, path, &vaf);
838 : else
839 0 : printk(KERN_CRIT
840 : "EXT4-fs error (device %s): %s:%d: inode #%lu: "
841 : "comm %s: path %s: %pV\n",
842 : inode->i_sb->s_id, function, line, inode->i_ino,
843 : current->comm, path, &vaf);
844 0 : va_end(args);
845 : }
846 0 : fsnotify_sb_error(inode->i_sb, inode, EFSCORRUPTED);
847 :
848 0 : ext4_handle_error(inode->i_sb, false, EFSCORRUPTED, inode->i_ino, block,
849 : function, line);
850 : }
851 :
852 1 : const char *ext4_decode_error(struct super_block *sb, int errno,
853 : char nbuf[16])
854 : {
855 1 : char *errstr = NULL;
856 :
857 1 : switch (errno) {
858 : case -EFSCORRUPTED:
859 : errstr = "Corrupt filesystem";
860 : break;
861 0 : case -EFSBADCRC:
862 0 : errstr = "Filesystem failed CRC";
863 0 : break;
864 1 : case -EIO:
865 1 : errstr = "IO failure";
866 1 : break;
867 0 : case -ENOMEM:
868 0 : errstr = "Out of memory";
869 0 : break;
870 0 : case -EROFS:
871 0 : if (!sb || (EXT4_SB(sb)->s_journal &&
872 0 : EXT4_SB(sb)->s_journal->j_flags & JBD2_ABORT))
873 : errstr = "Journal has aborted";
874 : else
875 : errstr = "Readonly filesystem";
876 : break;
877 0 : default:
878 : /* If the caller passed in an extra buffer for unknown
879 : * errors, textualise them now. Else we just return
880 : * NULL. */
881 0 : if (nbuf) {
882 : /* Check for truncated error codes... */
883 0 : if (snprintf(nbuf, 16, "error %d", -errno) >= 0)
884 0 : errstr = nbuf;
885 : }
886 : break;
887 : }
888 :
889 1 : return errstr;
890 : }
891 :
892 : /* __ext4_std_error decodes expected errors from journaling functions
893 : * automatically and invokes the appropriate error response. */
894 :
895 50596 : void __ext4_std_error(struct super_block *sb, const char *function,
896 : unsigned int line, int errno)
897 : {
898 50596 : char nbuf[16];
899 50596 : const char *errstr;
900 :
901 101192 : if (unlikely(ext4_forced_shutdown(EXT4_SB(sb))))
902 50595 : return;
903 :
904 : /* Special case: if the error is EROFS, and we're not already
905 : * inside a transaction, then there's really no point in logging
906 : * an error. */
907 1 : if (errno == -EROFS && journal_current_handle() == NULL && sb_rdonly(sb))
908 : return;
909 :
910 1 : if (ext4_error_ratelimit(sb)) {
911 1 : errstr = ext4_decode_error(sb, errno, nbuf);
912 1 : printk(KERN_CRIT "EXT4-fs error (device %s) in %s:%d: %s\n",
913 : sb->s_id, function, line, errstr);
914 : }
915 1 : fsnotify_sb_error(sb, NULL, errno ? errno : EFSCORRUPTED);
916 :
917 1 : ext4_handle_error(sb, false, -errno, 0, 0, function, line);
918 : }
919 :
920 5723 : void __ext4_msg(struct super_block *sb,
921 : const char *prefix, const char *fmt, ...)
922 : {
923 5723 : struct va_format vaf;
924 5723 : va_list args;
925 :
926 5723 : if (sb) {
927 5471 : atomic_inc(&EXT4_SB(sb)->s_msg_count);
928 5471 : if (!___ratelimit(&(EXT4_SB(sb)->s_msg_ratelimit_state),
929 : "EXT4-fs"))
930 0 : return;
931 : }
932 :
933 5723 : va_start(args, fmt);
934 5723 : vaf.fmt = fmt;
935 5723 : vaf.va = &args;
936 5723 : if (sb)
937 5471 : printk("%sEXT4-fs (%s): %pV\n", prefix, sb->s_id, &vaf);
938 : else
939 252 : printk("%sEXT4-fs: %pV\n", prefix, &vaf);
940 5723 : va_end(args);
941 : }
942 :
943 302 : static int ext4_warning_ratelimit(struct super_block *sb)
944 : {
945 302 : atomic_inc(&EXT4_SB(sb)->s_warning_count);
946 302 : return ___ratelimit(&(EXT4_SB(sb)->s_warning_ratelimit_state),
947 : "EXT4-fs warning");
948 : }
949 :
950 172 : void __ext4_warning(struct super_block *sb, const char *function,
951 : unsigned int line, const char *fmt, ...)
952 : {
953 172 : struct va_format vaf;
954 172 : va_list args;
955 :
956 172 : if (!ext4_warning_ratelimit(sb))
957 79 : return;
958 :
959 93 : va_start(args, fmt);
960 93 : vaf.fmt = fmt;
961 93 : vaf.va = &args;
962 93 : printk(KERN_WARNING "EXT4-fs warning (device %s): %s:%d: %pV\n",
963 : sb->s_id, function, line, &vaf);
964 93 : va_end(args);
965 : }
966 :
967 130 : void __ext4_warning_inode(const struct inode *inode, const char *function,
968 : unsigned int line, const char *fmt, ...)
969 : {
970 130 : struct va_format vaf;
971 130 : va_list args;
972 :
973 130 : if (!ext4_warning_ratelimit(inode->i_sb))
974 118 : return;
975 :
976 12 : va_start(args, fmt);
977 12 : vaf.fmt = fmt;
978 12 : vaf.va = &args;
979 12 : printk(KERN_WARNING "EXT4-fs warning (device %s): %s:%d: "
980 : "inode #%lu: comm %s: %pV\n", inode->i_sb->s_id,
981 : function, line, inode->i_ino, current->comm, &vaf);
982 12 : va_end(args);
983 : }
984 :
985 0 : void __ext4_grp_locked_error(const char *function, unsigned int line,
986 : struct super_block *sb, ext4_group_t grp,
987 : unsigned long ino, ext4_fsblk_t block,
988 : const char *fmt, ...)
989 : __releases(bitlock)
990 : __acquires(bitlock)
991 : {
992 0 : struct va_format vaf;
993 0 : va_list args;
994 :
995 0 : if (unlikely(ext4_forced_shutdown(EXT4_SB(sb))))
996 : return;
997 :
998 0 : trace_ext4_error(sb, function, line);
999 0 : if (ext4_error_ratelimit(sb)) {
1000 0 : va_start(args, fmt);
1001 0 : vaf.fmt = fmt;
1002 0 : vaf.va = &args;
1003 0 : printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: group %u, ",
1004 : sb->s_id, function, line, grp);
1005 0 : if (ino)
1006 0 : printk(KERN_CONT "inode %lu: ", ino);
1007 0 : if (block)
1008 0 : printk(KERN_CONT "block %llu:",
1009 : (unsigned long long) block);
1010 0 : printk(KERN_CONT "%pV\n", &vaf);
1011 0 : va_end(args);
1012 : }
1013 :
1014 0 : if (test_opt(sb, ERRORS_CONT)) {
1015 0 : if (test_opt(sb, WARN_ON_ERROR))
1016 0 : WARN_ON_ONCE(1);
1017 0 : EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
1018 0 : if (!bdev_read_only(sb->s_bdev)) {
1019 0 : save_error_info(sb, EFSCORRUPTED, ino, block, function,
1020 : line);
1021 0 : schedule_work(&EXT4_SB(sb)->s_error_work);
1022 : }
1023 0 : return;
1024 : }
1025 0 : ext4_unlock_group(sb, grp);
1026 0 : ext4_handle_error(sb, false, EFSCORRUPTED, ino, block, function, line);
1027 : /*
1028 : * We only get here in the ERRORS_RO case; relocking the group
1029 : * may be dangerous, but nothing bad will happen since the
1030 : * filesystem will have already been marked read/only and the
1031 : * journal has been aborted. We return 1 as a hint to callers
1032 : * who might what to use the return value from
1033 : * ext4_grp_locked_error() to distinguish between the
1034 : * ERRORS_CONT and ERRORS_RO case, and perhaps return more
1035 : * aggressively from the ext4 function in question, with a
1036 : * more appropriate error code.
1037 : */
1038 0 : ext4_lock_group(sb, grp);
1039 0 : return;
1040 : }
1041 :
1042 269 : void ext4_mark_group_bitmap_corrupted(struct super_block *sb,
1043 : ext4_group_t group,
1044 : unsigned int flags)
1045 : {
1046 269 : struct ext4_sb_info *sbi = EXT4_SB(sb);
1047 269 : struct ext4_group_info *grp = ext4_get_group_info(sb, group);
1048 269 : struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group, NULL);
1049 269 : int ret;
1050 :
1051 269 : if (!grp || !gdp)
1052 : return;
1053 269 : if (flags & EXT4_GROUP_INFO_BBITMAP_CORRUPT) {
1054 265 : ret = ext4_test_and_set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT,
1055 265 : &grp->bb_state);
1056 265 : if (!ret)
1057 265 : percpu_counter_sub(&sbi->s_freeclusters_counter,
1058 265 : grp->bb_free);
1059 : }
1060 :
1061 269 : if (flags & EXT4_GROUP_INFO_IBITMAP_CORRUPT) {
1062 4 : ret = ext4_test_and_set_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT,
1063 4 : &grp->bb_state);
1064 4 : if (!ret && gdp) {
1065 4 : int count;
1066 :
1067 4 : count = ext4_free_inodes_count(sb, gdp);
1068 4 : percpu_counter_sub(&sbi->s_freeinodes_counter,
1069 : count);
1070 : }
1071 : }
1072 : }
1073 :
1074 2650 : void ext4_update_dynamic_rev(struct super_block *sb)
1075 : {
1076 2650 : struct ext4_super_block *es = EXT4_SB(sb)->s_es;
1077 :
1078 2650 : if (le32_to_cpu(es->s_rev_level) > EXT4_GOOD_OLD_REV)
1079 : return;
1080 :
1081 0 : ext4_warning(sb,
1082 : "updating to rev %d because of new feature flag, "
1083 : "running e2fsck is recommended",
1084 : EXT4_DYNAMIC_REV);
1085 :
1086 0 : es->s_first_ino = cpu_to_le32(EXT4_GOOD_OLD_FIRST_INO);
1087 0 : es->s_inode_size = cpu_to_le16(EXT4_GOOD_OLD_INODE_SIZE);
1088 0 : es->s_rev_level = cpu_to_le32(EXT4_DYNAMIC_REV);
1089 : /* leave es->s_feature_*compat flags alone */
1090 : /* es->s_uuid will be set by e2fsck if empty */
1091 :
1092 : /*
1093 : * The rest of the superblock fields should be zero, and if not it
1094 : * means they are likely already in use, so leave them alone. We
1095 : * can leave it up to e2fsck to clean up any inconsistencies there.
1096 : */
1097 : }
1098 :
1099 0 : static void ext4_bdev_mark_dead(struct block_device *bdev)
1100 : {
1101 0 : ext4_force_shutdown(bdev->bd_holder, EXT4_GOING_FLAGS_NOLOGFLUSH);
1102 0 : }
1103 :
1104 : static const struct blk_holder_ops ext4_holder_ops = {
1105 : .mark_dead = ext4_bdev_mark_dead,
1106 : };
1107 :
1108 : /*
1109 : * Open the external journal device
1110 : */
1111 14 : static struct block_device *ext4_blkdev_get(dev_t dev, struct super_block *sb)
1112 : {
1113 14 : struct block_device *bdev;
1114 :
1115 14 : bdev = blkdev_get_by_dev(dev, BLK_OPEN_READ | BLK_OPEN_WRITE, sb,
1116 : &ext4_holder_ops);
1117 14 : if (IS_ERR(bdev))
1118 2 : goto fail;
1119 : return bdev;
1120 :
1121 : fail:
1122 2 : ext4_msg(sb, KERN_ERR,
1123 : "failed to open journal device unknown-block(%u,%u) %ld",
1124 : MAJOR(dev), MINOR(dev), PTR_ERR(bdev));
1125 2 : return NULL;
1126 : }
1127 :
1128 : /*
1129 : * Release the journal device
1130 : */
1131 50 : static void ext4_blkdev_remove(struct ext4_sb_info *sbi)
1132 : {
1133 50 : struct block_device *bdev;
1134 50 : bdev = sbi->s_journal_bdev;
1135 50 : if (bdev) {
1136 : /*
1137 : * Invalidate the journal device's buffers. We don't want them
1138 : * floating about in memory - the physical journal device may
1139 : * hotswapped, and it breaks the `ro-after' testing code.
1140 : */
1141 12 : invalidate_bdev(bdev);
1142 12 : blkdev_put(bdev, sbi->s_sb);
1143 12 : sbi->s_journal_bdev = NULL;
1144 : }
1145 50 : }
1146 :
1147 : static inline struct inode *orphan_list_entry(struct list_head *l)
1148 : {
1149 0 : return &list_entry(l, struct ext4_inode_info, i_orphan)->vfs_inode;
1150 : }
1151 :
1152 0 : static void dump_orphan_list(struct super_block *sb, struct ext4_sb_info *sbi)
1153 : {
1154 0 : struct list_head *l;
1155 :
1156 0 : ext4_msg(sb, KERN_ERR, "sb orphan head is %d",
1157 : le32_to_cpu(sbi->s_es->s_last_orphan));
1158 :
1159 0 : printk(KERN_ERR "sb_info orphan list:\n");
1160 0 : list_for_each(l, &sbi->s_orphan) {
1161 0 : struct inode *inode = orphan_list_entry(l);
1162 0 : printk(KERN_ERR " "
1163 : "inode %s:%lu at %p: mode %o, nlink %d, next %d\n",
1164 : inode->i_sb->s_id, inode->i_ino, inode,
1165 : inode->i_mode, inode->i_nlink,
1166 : NEXT_ORPHAN(inode));
1167 : }
1168 0 : }
1169 :
1170 : #ifdef CONFIG_QUOTA
1171 : static int ext4_quota_off(struct super_block *sb, int type);
1172 :
1173 2513 : static inline void ext4_quotas_off(struct super_block *sb, int type)
1174 : {
1175 2513 : BUG_ON(type > EXT4_MAXQUOTAS);
1176 :
1177 : /* Use our quota_off function to clear inode flags etc. */
1178 10052 : for (type--; type >= 0; type--)
1179 7539 : ext4_quota_off(sb, type);
1180 2513 : }
1181 :
1182 : /*
1183 : * This is a helper function which is used in the mount/remount
1184 : * codepaths (which holds s_umount) to fetch the quota file name.
1185 : */
1186 : static inline char *get_qf_name(struct super_block *sb,
1187 : struct ext4_sb_info *sbi,
1188 : int type)
1189 : {
1190 19 : return rcu_dereference_protected(sbi->s_qf_names[type],
1191 : lockdep_is_held(&sb->s_umount));
1192 : }
1193 : #else
1194 : static inline void ext4_quotas_off(struct super_block *sb, int type)
1195 : {
1196 : }
1197 : #endif
1198 :
1199 2513 : static int ext4_percpu_param_init(struct ext4_sb_info *sbi)
1200 : {
1201 2513 : ext4_fsblk_t block;
1202 2513 : int err;
1203 :
1204 2513 : block = ext4_count_free_clusters(sbi->s_sb);
1205 2513 : ext4_free_blocks_count_set(sbi->s_es, EXT4_C2B(sbi, block));
1206 2513 : err = percpu_counter_init(&sbi->s_freeclusters_counter, block,
1207 : GFP_KERNEL);
1208 2513 : if (!err) {
1209 2513 : unsigned long freei = ext4_count_free_inodes(sbi->s_sb);
1210 2513 : sbi->s_es->s_free_inodes_count = cpu_to_le32(freei);
1211 2513 : err = percpu_counter_init(&sbi->s_freeinodes_counter, freei,
1212 : GFP_KERNEL);
1213 : }
1214 2513 : if (!err)
1215 2513 : err = percpu_counter_init(&sbi->s_dirs_counter,
1216 : ext4_count_dirs(sbi->s_sb), GFP_KERNEL);
1217 2513 : if (!err)
1218 2513 : err = percpu_counter_init(&sbi->s_dirtyclusters_counter, 0,
1219 : GFP_KERNEL);
1220 2513 : if (!err)
1221 2513 : err = percpu_counter_init(&sbi->s_sra_exceeded_retry_limit, 0,
1222 : GFP_KERNEL);
1223 2513 : if (!err)
1224 2513 : err = percpu_init_rwsem(&sbi->s_writepages_rwsem);
1225 :
1226 2513 : if (err)
1227 0 : ext4_msg(sbi->s_sb, KERN_ERR, "insufficient memory");
1228 :
1229 2513 : return err;
1230 : }
1231 :
1232 2513 : static void ext4_percpu_param_destroy(struct ext4_sb_info *sbi)
1233 : {
1234 2513 : percpu_counter_destroy(&sbi->s_freeclusters_counter);
1235 2513 : percpu_counter_destroy(&sbi->s_freeinodes_counter);
1236 2513 : percpu_counter_destroy(&sbi->s_dirs_counter);
1237 2513 : percpu_counter_destroy(&sbi->s_dirtyclusters_counter);
1238 2513 : percpu_counter_destroy(&sbi->s_sra_exceeded_retry_limit);
1239 2513 : percpu_free_rwsem(&sbi->s_writepages_rwsem);
1240 2513 : }
1241 :
1242 2521 : static void ext4_group_desc_free(struct ext4_sb_info *sbi)
1243 : {
1244 2521 : struct buffer_head **group_desc;
1245 2521 : int i;
1246 :
1247 2521 : rcu_read_lock();
1248 2521 : group_desc = rcu_dereference(sbi->s_group_desc);
1249 18984 : for (i = 0; i < sbi->s_gdb_count; i++)
1250 16463 : brelse(group_desc[i]);
1251 2521 : kvfree(group_desc);
1252 2521 : rcu_read_unlock();
1253 2521 : }
1254 :
1255 2513 : static void ext4_flex_groups_free(struct ext4_sb_info *sbi)
1256 : {
1257 2513 : struct flex_groups **flex_groups;
1258 2513 : int i;
1259 :
1260 2513 : rcu_read_lock();
1261 2513 : flex_groups = rcu_dereference(sbi->s_flex_groups);
1262 2513 : if (flex_groups) {
1263 56568 : for (i = 0; i < sbi->s_flex_groups_allocated; i++)
1264 54246 : kvfree(flex_groups[i]);
1265 2322 : kvfree(flex_groups);
1266 : }
1267 2513 : rcu_read_unlock();
1268 2513 : }
1269 :
1270 2513 : static void ext4_put_super(struct super_block *sb)
1271 : {
1272 2513 : struct ext4_sb_info *sbi = EXT4_SB(sb);
1273 2513 : struct ext4_super_block *es = sbi->s_es;
1274 2513 : int aborted = 0;
1275 2513 : int err;
1276 :
1277 : /*
1278 : * Unregister sysfs before destroying jbd2 journal.
1279 : * Since we could still access attr_journal_task attribute via sysfs
1280 : * path which could have sbi->s_journal->j_task as NULL
1281 : * Unregister sysfs before flush sbi->s_error_work.
1282 : * Since user may read /proc/fs/ext4/xx/mb_groups during umount, If
1283 : * read metadata verify failed then will queue error work.
1284 : * flush_stashed_error_work will call start_this_handle may trigger
1285 : * BUG_ON.
1286 : */
1287 2513 : ext4_unregister_sysfs(sb);
1288 :
1289 2513 : if (___ratelimit(&ext4_mount_msg_ratelimit, "EXT4-fs unmount"))
1290 2054 : ext4_msg(sb, KERN_INFO, "unmounting filesystem %pU.",
1291 : &sb->s_uuid);
1292 :
1293 2513 : ext4_unregister_li_request(sb);
1294 2513 : ext4_quotas_off(sb, EXT4_MAXQUOTAS);
1295 :
1296 2513 : flush_work(&sbi->s_error_work);
1297 2513 : destroy_workqueue(sbi->rsv_conversion_wq);
1298 2513 : ext4_release_orphan_info(sb);
1299 :
1300 2513 : if (sbi->s_journal) {
1301 2501 : aborted = is_journal_aborted(sbi->s_journal);
1302 2501 : err = jbd2_journal_destroy(sbi->s_journal);
1303 2501 : sbi->s_journal = NULL;
1304 2501 : if ((err < 0) && !aborted) {
1305 4 : ext4_abort(sb, -err, "Couldn't clean up the journal");
1306 : }
1307 : }
1308 :
1309 2513 : ext4_es_unregister_shrinker(sbi);
1310 2513 : timer_shutdown_sync(&sbi->s_err_report);
1311 2513 : ext4_release_system_zone(sb);
1312 2513 : ext4_mb_release(sb);
1313 2513 : ext4_ext_release(sb);
1314 :
1315 2513 : if (!sb_rdonly(sb) && !aborted) {
1316 2345 : ext4_clear_feature_journal_needs_recovery(sb);
1317 2345 : ext4_clear_feature_orphan_present(sb);
1318 2345 : es->s_state = cpu_to_le16(sbi->s_mount_state);
1319 : }
1320 2513 : if (!sb_rdonly(sb))
1321 2494 : ext4_commit_super(sb);
1322 :
1323 2513 : ext4_group_desc_free(sbi);
1324 2513 : ext4_flex_groups_free(sbi);
1325 2513 : ext4_percpu_param_destroy(sbi);
1326 : #ifdef CONFIG_QUOTA
1327 10052 : for (int i = 0; i < EXT4_MAXQUOTAS; i++)
1328 7539 : kfree(get_qf_name(sb, sbi, i));
1329 : #endif
1330 :
1331 : /* Debugging code just in case the in-memory inode orphan list
1332 : * isn't empty. The on-disk one can be non-empty if we've
1333 : * detected an error and taken the fs readonly, but the
1334 : * in-memory list had better be clean by this point. */
1335 2513 : if (!list_empty(&sbi->s_orphan))
1336 0 : dump_orphan_list(sb, sbi);
1337 2513 : ASSERT(list_empty(&sbi->s_orphan));
1338 :
1339 2513 : sync_blockdev(sb->s_bdev);
1340 2513 : invalidate_bdev(sb->s_bdev);
1341 2513 : if (sbi->s_journal_bdev) {
1342 12 : sync_blockdev(sbi->s_journal_bdev);
1343 12 : ext4_blkdev_remove(sbi);
1344 : }
1345 :
1346 2513 : ext4_xattr_destroy_cache(sbi->s_ea_inode_cache);
1347 2513 : sbi->s_ea_inode_cache = NULL;
1348 :
1349 2513 : ext4_xattr_destroy_cache(sbi->s_ea_block_cache);
1350 2513 : sbi->s_ea_block_cache = NULL;
1351 :
1352 2513 : ext4_stop_mmpd(sbi);
1353 :
1354 2513 : brelse(sbi->s_sbh);
1355 2513 : sb->s_fs_info = NULL;
1356 : /*
1357 : * Now that we are completely done shutting down the
1358 : * superblock, we need to actually destroy the kobject.
1359 : */
1360 2513 : kobject_put(&sbi->s_kobj);
1361 2513 : wait_for_completion(&sbi->s_kobj_unregister);
1362 2513 : if (sbi->s_chksum_driver)
1363 2513 : crypto_free_shash(sbi->s_chksum_driver);
1364 2513 : kfree(sbi->s_blockgroup_lock);
1365 2513 : fs_put_dax(sbi->s_daxdev, NULL);
1366 2513 : fscrypt_free_dummy_policy(&sbi->s_dummy_enc_policy);
1367 : #if IS_ENABLED(CONFIG_UNICODE)
1368 : utf8_unload(sb->s_encoding);
1369 : #endif
1370 2513 : kfree(sbi);
1371 2513 : }
1372 :
1373 : static struct kmem_cache *ext4_inode_cachep;
1374 :
1375 : /*
1376 : * Called inside transaction, so use GFP_NOFS
1377 : */
1378 3145922 : static struct inode *ext4_alloc_inode(struct super_block *sb)
1379 : {
1380 3145922 : struct ext4_inode_info *ei;
1381 :
1382 3145922 : ei = alloc_inode_sb(sb, ext4_inode_cachep, GFP_NOFS);
1383 3144797 : if (!ei)
1384 : return NULL;
1385 :
1386 3144797 : inode_set_iversion(&ei->vfs_inode, 1);
1387 3144797 : ei->i_flags = 0;
1388 3144797 : spin_lock_init(&ei->i_raw_lock);
1389 3143006 : ei->i_prealloc_node = RB_ROOT;
1390 3143006 : atomic_set(&ei->i_prealloc_active, 0);
1391 3143006 : rwlock_init(&ei->i_prealloc_lock);
1392 3141609 : ext4_es_init_tree(&ei->i_es_tree);
1393 3137821 : rwlock_init(&ei->i_es_lock);
1394 3141399 : INIT_LIST_HEAD(&ei->i_es_list);
1395 3141399 : ei->i_es_all_nr = 0;
1396 3141399 : ei->i_es_shk_nr = 0;
1397 3141399 : ei->i_es_shrink_lblk = 0;
1398 3141399 : ei->i_reserved_data_blocks = 0;
1399 3141399 : spin_lock_init(&(ei->i_block_reservation_lock));
1400 3139291 : ext4_init_pending_tree(&ei->i_pending_tree);
1401 : #ifdef CONFIG_QUOTA
1402 3146318 : ei->i_reserved_quota = 0;
1403 3146318 : memset(&ei->i_dquot, 0, sizeof(ei->i_dquot));
1404 : #endif
1405 3146318 : ei->jinode = NULL;
1406 3146318 : INIT_LIST_HEAD(&ei->i_rsv_conversion_list);
1407 3146318 : spin_lock_init(&ei->i_completed_io_lock);
1408 3134976 : ei->i_sync_tid = 0;
1409 3134976 : ei->i_datasync_tid = 0;
1410 3134976 : atomic_set(&ei->i_unwritten, 0);
1411 3134976 : INIT_WORK(&ei->i_rsv_conversion_work, ext4_end_io_rsv_work);
1412 3134976 : ext4_fc_init_inode(&ei->vfs_inode);
1413 3153644 : mutex_init(&ei->i_fc_lock);
1414 3153644 : return &ei->vfs_inode;
1415 : }
1416 :
1417 3189930 : static int ext4_drop_inode(struct inode *inode)
1418 : {
1419 3189930 : int drop = generic_drop_inode(inode);
1420 :
1421 1019126 : if (!drop)
1422 : drop = fscrypt_drop_inode(inode);
1423 :
1424 3189930 : trace_ext4_drop_inode(inode, drop);
1425 3189541 : return drop;
1426 : }
1427 :
1428 3152053 : static void ext4_free_in_core_inode(struct inode *inode)
1429 : {
1430 3152053 : fscrypt_free_inode(inode);
1431 3152053 : if (!list_empty(&(EXT4_I(inode)->i_fc_list))) {
1432 0 : pr_warn("%s: inode %ld still in fc list",
1433 : __func__, inode->i_ino);
1434 : }
1435 3152053 : kmem_cache_free(ext4_inode_cachep, EXT4_I(inode));
1436 3154328 : }
1437 :
1438 3160599 : static void ext4_destroy_inode(struct inode *inode)
1439 : {
1440 3160599 : if (!list_empty(&(EXT4_I(inode)->i_orphan))) {
1441 0 : ext4_msg(inode->i_sb, KERN_ERR,
1442 : "Inode %lu (%p): orphan list check failed!",
1443 : inode->i_ino, EXT4_I(inode));
1444 0 : print_hex_dump(KERN_INFO, "", DUMP_PREFIX_ADDRESS, 16, 4,
1445 0 : EXT4_I(inode), sizeof(struct ext4_inode_info),
1446 : true);
1447 0 : dump_stack();
1448 : }
1449 :
1450 3160599 : if (EXT4_I(inode)->i_reserved_data_blocks)
1451 0 : ext4_msg(inode->i_sb, KERN_ERR,
1452 : "Inode %lu (%p): i_reserved_data_blocks (%u) not cleared!",
1453 : inode->i_ino, EXT4_I(inode),
1454 : EXT4_I(inode)->i_reserved_data_blocks);
1455 3160599 : }
1456 :
1457 0 : static void ext4_shutdown(struct super_block *sb)
1458 : {
1459 0 : ext4_force_shutdown(sb, EXT4_GOING_FLAGS_NOLOGFLUSH);
1460 0 : }
1461 :
1462 2503323 : static void init_once(void *foo)
1463 : {
1464 2503323 : struct ext4_inode_info *ei = foo;
1465 :
1466 2503323 : INIT_LIST_HEAD(&ei->i_orphan);
1467 2503323 : init_rwsem(&ei->xattr_sem);
1468 2504123 : init_rwsem(&ei->i_data_sem);
1469 2505723 : inode_init_once(&ei->vfs_inode);
1470 2508378 : ext4_fc_init_inode(&ei->vfs_inode);
1471 2508998 : }
1472 :
1473 12 : static int __init init_inodecache(void)
1474 : {
1475 12 : ext4_inode_cachep = kmem_cache_create_usercopy("ext4_inode_cache",
1476 : sizeof(struct ext4_inode_info), 0,
1477 : (SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD|
1478 : SLAB_ACCOUNT),
1479 : offsetof(struct ext4_inode_info, i_data),
1480 : sizeof_field(struct ext4_inode_info, i_data),
1481 : init_once);
1482 12 : if (ext4_inode_cachep == NULL)
1483 0 : return -ENOMEM;
1484 : return 0;
1485 : }
1486 :
1487 0 : static void destroy_inodecache(void)
1488 : {
1489 : /*
1490 : * Make sure all delayed rcu free inodes are flushed before we
1491 : * destroy cache.
1492 : */
1493 0 : rcu_barrier();
1494 0 : kmem_cache_destroy(ext4_inode_cachep);
1495 0 : }
1496 :
1497 3160925 : void ext4_clear_inode(struct inode *inode)
1498 : {
1499 3160925 : ext4_fc_del(inode);
1500 3160818 : invalidate_inode_buffers(inode);
1501 3160811 : clear_inode(inode);
1502 3160806 : ext4_discard_preallocations(inode, 0);
1503 3160710 : ext4_es_remove_extent(inode, 0, EXT_MAX_BLOCKS);
1504 3160498 : dquot_drop(inode);
1505 3160525 : if (EXT4_I(inode)->jinode) {
1506 2005130 : jbd2_journal_release_jbd_inode(EXT4_JOURNAL(inode),
1507 2005130 : EXT4_I(inode)->jinode);
1508 2005496 : jbd2_free_inode(EXT4_I(inode)->jinode);
1509 2005320 : EXT4_I(inode)->jinode = NULL;
1510 : }
1511 3160715 : fscrypt_put_encryption_info(inode);
1512 3160715 : fsverity_cleanup_inode(inode);
1513 3160715 : }
1514 :
1515 4223 : static struct inode *ext4_nfs_get_inode(struct super_block *sb,
1516 : u64 ino, u32 generation)
1517 : {
1518 4223 : struct inode *inode;
1519 :
1520 : /*
1521 : * Currently we don't know the generation for parent directory, so
1522 : * a generation of 0 means "accept any"
1523 : */
1524 4223 : inode = ext4_iget(sb, ino, EXT4_IGET_HANDLE);
1525 4223 : if (IS_ERR(inode))
1526 : return ERR_CAST(inode);
1527 3188 : if (generation && inode->i_generation != generation) {
1528 0 : iput(inode);
1529 0 : return ERR_PTR(-ESTALE);
1530 : }
1531 :
1532 : return inode;
1533 : }
1534 :
1535 4223 : static struct dentry *ext4_fh_to_dentry(struct super_block *sb, struct fid *fid,
1536 : int fh_len, int fh_type)
1537 : {
1538 4223 : return generic_fh_to_dentry(sb, fid, fh_len, fh_type,
1539 : ext4_nfs_get_inode);
1540 : }
1541 :
1542 0 : static struct dentry *ext4_fh_to_parent(struct super_block *sb, struct fid *fid,
1543 : int fh_len, int fh_type)
1544 : {
1545 0 : return generic_fh_to_parent(sb, fid, fh_len, fh_type,
1546 : ext4_nfs_get_inode);
1547 : }
1548 :
1549 0 : static int ext4_nfs_commit_metadata(struct inode *inode)
1550 : {
1551 0 : struct writeback_control wbc = {
1552 : .sync_mode = WB_SYNC_ALL
1553 : };
1554 :
1555 0 : trace_ext4_nfs_commit_metadata(inode);
1556 0 : return ext4_write_inode(inode, &wbc);
1557 : }
1558 :
1559 : #ifdef CONFIG_QUOTA
1560 : static const char * const quotatypes[] = INITQFNAMES;
1561 : #define QTYPE2NAME(t) (quotatypes[t])
1562 :
1563 : static int ext4_write_dquot(struct dquot *dquot);
1564 : static int ext4_acquire_dquot(struct dquot *dquot);
1565 : static int ext4_release_dquot(struct dquot *dquot);
1566 : static int ext4_mark_dquot_dirty(struct dquot *dquot);
1567 : static int ext4_write_info(struct super_block *sb, int type);
1568 : static int ext4_quota_on(struct super_block *sb, int type, int format_id,
1569 : const struct path *path);
1570 : static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data,
1571 : size_t len, loff_t off);
1572 : static ssize_t ext4_quota_write(struct super_block *sb, int type,
1573 : const char *data, size_t len, loff_t off);
1574 : static int ext4_quota_enable(struct super_block *sb, int type, int format_id,
1575 : unsigned int flags);
1576 :
1577 4529251 : static struct dquot **ext4_get_dquots(struct inode *inode)
1578 : {
1579 4529251 : return EXT4_I(inode)->i_dquot;
1580 : }
1581 :
1582 : static const struct dquot_operations ext4_quota_operations = {
1583 : .get_reserved_space = ext4_get_reserved_space,
1584 : .write_dquot = ext4_write_dquot,
1585 : .acquire_dquot = ext4_acquire_dquot,
1586 : .release_dquot = ext4_release_dquot,
1587 : .mark_dirty = ext4_mark_dquot_dirty,
1588 : .write_info = ext4_write_info,
1589 : .alloc_dquot = dquot_alloc,
1590 : .destroy_dquot = dquot_destroy,
1591 : .get_projid = ext4_get_projid,
1592 : .get_inode_usage = ext4_get_inode_usage,
1593 : .get_next_id = dquot_get_next_id,
1594 : };
1595 :
1596 : static const struct quotactl_ops ext4_qctl_operations = {
1597 : .quota_on = ext4_quota_on,
1598 : .quota_off = ext4_quota_off,
1599 : .quota_sync = dquot_quota_sync,
1600 : .get_state = dquot_get_state,
1601 : .set_info = dquot_set_dqinfo,
1602 : .get_dqblk = dquot_get_dqblk,
1603 : .set_dqblk = dquot_set_dqblk,
1604 : .get_nextdqblk = dquot_get_next_dqblk,
1605 : };
1606 : #endif
1607 :
1608 : static const struct super_operations ext4_sops = {
1609 : .alloc_inode = ext4_alloc_inode,
1610 : .free_inode = ext4_free_in_core_inode,
1611 : .destroy_inode = ext4_destroy_inode,
1612 : .write_inode = ext4_write_inode,
1613 : .dirty_inode = ext4_dirty_inode,
1614 : .drop_inode = ext4_drop_inode,
1615 : .evict_inode = ext4_evict_inode,
1616 : .put_super = ext4_put_super,
1617 : .sync_fs = ext4_sync_fs,
1618 : .freeze_fs = ext4_freeze,
1619 : .unfreeze_fs = ext4_unfreeze,
1620 : .statfs = ext4_statfs,
1621 : .show_options = ext4_show_options,
1622 : .shutdown = ext4_shutdown,
1623 : #ifdef CONFIG_QUOTA
1624 : .quota_read = ext4_quota_read,
1625 : .quota_write = ext4_quota_write,
1626 : .get_dquots = ext4_get_dquots,
1627 : #endif
1628 : };
1629 :
1630 : static const struct export_operations ext4_export_ops = {
1631 : .fh_to_dentry = ext4_fh_to_dentry,
1632 : .fh_to_parent = ext4_fh_to_parent,
1633 : .get_parent = ext4_get_parent,
1634 : .commit_metadata = ext4_nfs_commit_metadata,
1635 : };
1636 :
1637 : enum {
1638 : Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid,
1639 : Opt_resgid, Opt_resuid, Opt_sb,
1640 : Opt_nouid32, Opt_debug, Opt_removed,
1641 : Opt_user_xattr, Opt_acl,
1642 : Opt_auto_da_alloc, Opt_noauto_da_alloc, Opt_noload,
1643 : Opt_commit, Opt_min_batch_time, Opt_max_batch_time, Opt_journal_dev,
1644 : Opt_journal_path, Opt_journal_checksum, Opt_journal_async_commit,
1645 : Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
1646 : Opt_data_err_abort, Opt_data_err_ignore, Opt_test_dummy_encryption,
1647 : Opt_inlinecrypt,
1648 : Opt_usrjquota, Opt_grpjquota, Opt_quota,
1649 : Opt_noquota, Opt_barrier, Opt_nobarrier, Opt_err,
1650 : Opt_usrquota, Opt_grpquota, Opt_prjquota,
1651 : Opt_dax, Opt_dax_always, Opt_dax_inode, Opt_dax_never,
1652 : Opt_stripe, Opt_delalloc, Opt_nodelalloc, Opt_warn_on_error,
1653 : Opt_nowarn_on_error, Opt_mblk_io_submit, Opt_debug_want_extra_isize,
1654 : Opt_nomblk_io_submit, Opt_block_validity, Opt_noblock_validity,
1655 : Opt_inode_readahead_blks, Opt_journal_ioprio,
1656 : Opt_dioread_nolock, Opt_dioread_lock,
1657 : Opt_discard, Opt_nodiscard, Opt_init_itable, Opt_noinit_itable,
1658 : Opt_max_dir_size_kb, Opt_nojournal_checksum, Opt_nombcache,
1659 : Opt_no_prefetch_block_bitmaps, Opt_mb_optimize_scan,
1660 : Opt_errors, Opt_data, Opt_data_err, Opt_jqfmt, Opt_dax_type,
1661 : #ifdef CONFIG_EXT4_DEBUG
1662 : Opt_fc_debug_max_replay, Opt_fc_debug_force
1663 : #endif
1664 : };
1665 :
1666 : static const struct constant_table ext4_param_errors[] = {
1667 : {"continue", EXT4_MOUNT_ERRORS_CONT},
1668 : {"panic", EXT4_MOUNT_ERRORS_PANIC},
1669 : {"remount-ro", EXT4_MOUNT_ERRORS_RO},
1670 : {}
1671 : };
1672 :
1673 : static const struct constant_table ext4_param_data[] = {
1674 : {"journal", EXT4_MOUNT_JOURNAL_DATA},
1675 : {"ordered", EXT4_MOUNT_ORDERED_DATA},
1676 : {"writeback", EXT4_MOUNT_WRITEBACK_DATA},
1677 : {}
1678 : };
1679 :
1680 : static const struct constant_table ext4_param_data_err[] = {
1681 : {"abort", Opt_data_err_abort},
1682 : {"ignore", Opt_data_err_ignore},
1683 : {}
1684 : };
1685 :
1686 : static const struct constant_table ext4_param_jqfmt[] = {
1687 : {"vfsold", QFMT_VFS_OLD},
1688 : {"vfsv0", QFMT_VFS_V0},
1689 : {"vfsv1", QFMT_VFS_V1},
1690 : {}
1691 : };
1692 :
1693 : static const struct constant_table ext4_param_dax[] = {
1694 : {"always", Opt_dax_always},
1695 : {"inode", Opt_dax_inode},
1696 : {"never", Opt_dax_never},
1697 : {}
1698 : };
1699 :
1700 : /* String parameter that allows empty argument */
1701 : #define fsparam_string_empty(NAME, OPT) \
1702 : __fsparam(fs_param_is_string, NAME, OPT, fs_param_can_be_empty, NULL)
1703 :
1704 : /*
1705 : * Mount option specification
1706 : * We don't use fsparam_flag_no because of the way we set the
1707 : * options and the way we show them in _ext4_show_options(). To
1708 : * keep the changes to a minimum, let's keep the negative options
1709 : * separate for now.
1710 : */
1711 : static const struct fs_parameter_spec ext4_param_specs[] = {
1712 : fsparam_flag ("bsddf", Opt_bsd_df),
1713 : fsparam_flag ("minixdf", Opt_minix_df),
1714 : fsparam_flag ("grpid", Opt_grpid),
1715 : fsparam_flag ("bsdgroups", Opt_grpid),
1716 : fsparam_flag ("nogrpid", Opt_nogrpid),
1717 : fsparam_flag ("sysvgroups", Opt_nogrpid),
1718 : fsparam_u32 ("resgid", Opt_resgid),
1719 : fsparam_u32 ("resuid", Opt_resuid),
1720 : fsparam_u32 ("sb", Opt_sb),
1721 : fsparam_enum ("errors", Opt_errors, ext4_param_errors),
1722 : fsparam_flag ("nouid32", Opt_nouid32),
1723 : fsparam_flag ("debug", Opt_debug),
1724 : fsparam_flag ("oldalloc", Opt_removed),
1725 : fsparam_flag ("orlov", Opt_removed),
1726 : fsparam_flag ("user_xattr", Opt_user_xattr),
1727 : fsparam_flag ("acl", Opt_acl),
1728 : fsparam_flag ("norecovery", Opt_noload),
1729 : fsparam_flag ("noload", Opt_noload),
1730 : fsparam_flag ("bh", Opt_removed),
1731 : fsparam_flag ("nobh", Opt_removed),
1732 : fsparam_u32 ("commit", Opt_commit),
1733 : fsparam_u32 ("min_batch_time", Opt_min_batch_time),
1734 : fsparam_u32 ("max_batch_time", Opt_max_batch_time),
1735 : fsparam_u32 ("journal_dev", Opt_journal_dev),
1736 : fsparam_bdev ("journal_path", Opt_journal_path),
1737 : fsparam_flag ("journal_checksum", Opt_journal_checksum),
1738 : fsparam_flag ("nojournal_checksum", Opt_nojournal_checksum),
1739 : fsparam_flag ("journal_async_commit",Opt_journal_async_commit),
1740 : fsparam_flag ("abort", Opt_abort),
1741 : fsparam_enum ("data", Opt_data, ext4_param_data),
1742 : fsparam_enum ("data_err", Opt_data_err,
1743 : ext4_param_data_err),
1744 : fsparam_string_empty
1745 : ("usrjquota", Opt_usrjquota),
1746 : fsparam_string_empty
1747 : ("grpjquota", Opt_grpjquota),
1748 : fsparam_enum ("jqfmt", Opt_jqfmt, ext4_param_jqfmt),
1749 : fsparam_flag ("grpquota", Opt_grpquota),
1750 : fsparam_flag ("quota", Opt_quota),
1751 : fsparam_flag ("noquota", Opt_noquota),
1752 : fsparam_flag ("usrquota", Opt_usrquota),
1753 : fsparam_flag ("prjquota", Opt_prjquota),
1754 : fsparam_flag ("barrier", Opt_barrier),
1755 : fsparam_u32 ("barrier", Opt_barrier),
1756 : fsparam_flag ("nobarrier", Opt_nobarrier),
1757 : fsparam_flag ("i_version", Opt_removed),
1758 : fsparam_flag ("dax", Opt_dax),
1759 : fsparam_enum ("dax", Opt_dax_type, ext4_param_dax),
1760 : fsparam_u32 ("stripe", Opt_stripe),
1761 : fsparam_flag ("delalloc", Opt_delalloc),
1762 : fsparam_flag ("nodelalloc", Opt_nodelalloc),
1763 : fsparam_flag ("warn_on_error", Opt_warn_on_error),
1764 : fsparam_flag ("nowarn_on_error", Opt_nowarn_on_error),
1765 : fsparam_u32 ("debug_want_extra_isize",
1766 : Opt_debug_want_extra_isize),
1767 : fsparam_flag ("mblk_io_submit", Opt_removed),
1768 : fsparam_flag ("nomblk_io_submit", Opt_removed),
1769 : fsparam_flag ("block_validity", Opt_block_validity),
1770 : fsparam_flag ("noblock_validity", Opt_noblock_validity),
1771 : fsparam_u32 ("inode_readahead_blks",
1772 : Opt_inode_readahead_blks),
1773 : fsparam_u32 ("journal_ioprio", Opt_journal_ioprio),
1774 : fsparam_u32 ("auto_da_alloc", Opt_auto_da_alloc),
1775 : fsparam_flag ("auto_da_alloc", Opt_auto_da_alloc),
1776 : fsparam_flag ("noauto_da_alloc", Opt_noauto_da_alloc),
1777 : fsparam_flag ("dioread_nolock", Opt_dioread_nolock),
1778 : fsparam_flag ("nodioread_nolock", Opt_dioread_lock),
1779 : fsparam_flag ("dioread_lock", Opt_dioread_lock),
1780 : fsparam_flag ("discard", Opt_discard),
1781 : fsparam_flag ("nodiscard", Opt_nodiscard),
1782 : fsparam_u32 ("init_itable", Opt_init_itable),
1783 : fsparam_flag ("init_itable", Opt_init_itable),
1784 : fsparam_flag ("noinit_itable", Opt_noinit_itable),
1785 : #ifdef CONFIG_EXT4_DEBUG
1786 : fsparam_flag ("fc_debug_force", Opt_fc_debug_force),
1787 : fsparam_u32 ("fc_debug_max_replay", Opt_fc_debug_max_replay),
1788 : #endif
1789 : fsparam_u32 ("max_dir_size_kb", Opt_max_dir_size_kb),
1790 : fsparam_flag ("test_dummy_encryption",
1791 : Opt_test_dummy_encryption),
1792 : fsparam_string ("test_dummy_encryption",
1793 : Opt_test_dummy_encryption),
1794 : fsparam_flag ("inlinecrypt", Opt_inlinecrypt),
1795 : fsparam_flag ("nombcache", Opt_nombcache),
1796 : fsparam_flag ("no_mbcache", Opt_nombcache), /* for backward compatibility */
1797 : fsparam_flag ("prefetch_block_bitmaps",
1798 : Opt_removed),
1799 : fsparam_flag ("no_prefetch_block_bitmaps",
1800 : Opt_no_prefetch_block_bitmaps),
1801 : fsparam_s32 ("mb_optimize_scan", Opt_mb_optimize_scan),
1802 : fsparam_string ("check", Opt_removed), /* mount option from ext2/3 */
1803 : fsparam_flag ("nocheck", Opt_removed), /* mount option from ext2/3 */
1804 : fsparam_flag ("reservation", Opt_removed), /* mount option from ext2/3 */
1805 : fsparam_flag ("noreservation", Opt_removed), /* mount option from ext2/3 */
1806 : fsparam_u32 ("journal", Opt_removed), /* mount option from ext2/3 */
1807 : {}
1808 : };
1809 :
1810 : #define DEFAULT_JOURNAL_IOPRIO (IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 3))
1811 :
1812 : #define MOPT_SET 0x0001
1813 : #define MOPT_CLEAR 0x0002
1814 : #define MOPT_NOSUPPORT 0x0004
1815 : #define MOPT_EXPLICIT 0x0008
1816 : #ifdef CONFIG_QUOTA
1817 : #define MOPT_Q 0
1818 : #define MOPT_QFMT 0x0010
1819 : #else
1820 : #define MOPT_Q MOPT_NOSUPPORT
1821 : #define MOPT_QFMT MOPT_NOSUPPORT
1822 : #endif
1823 : #define MOPT_NO_EXT2 0x0020
1824 : #define MOPT_NO_EXT3 0x0040
1825 : #define MOPT_EXT4_ONLY (MOPT_NO_EXT2 | MOPT_NO_EXT3)
1826 : #define MOPT_SKIP 0x0080
1827 : #define MOPT_2 0x0100
1828 :
1829 : static const struct mount_opts {
1830 : int token;
1831 : int mount_opt;
1832 : int flags;
1833 : } ext4_mount_opts[] = {
1834 : {Opt_minix_df, EXT4_MOUNT_MINIX_DF, MOPT_SET},
1835 : {Opt_bsd_df, EXT4_MOUNT_MINIX_DF, MOPT_CLEAR},
1836 : {Opt_grpid, EXT4_MOUNT_GRPID, MOPT_SET},
1837 : {Opt_nogrpid, EXT4_MOUNT_GRPID, MOPT_CLEAR},
1838 : {Opt_block_validity, EXT4_MOUNT_BLOCK_VALIDITY, MOPT_SET},
1839 : {Opt_noblock_validity, EXT4_MOUNT_BLOCK_VALIDITY, MOPT_CLEAR},
1840 : {Opt_dioread_nolock, EXT4_MOUNT_DIOREAD_NOLOCK,
1841 : MOPT_EXT4_ONLY | MOPT_SET},
1842 : {Opt_dioread_lock, EXT4_MOUNT_DIOREAD_NOLOCK,
1843 : MOPT_EXT4_ONLY | MOPT_CLEAR},
1844 : {Opt_discard, EXT4_MOUNT_DISCARD, MOPT_SET},
1845 : {Opt_nodiscard, EXT4_MOUNT_DISCARD, MOPT_CLEAR},
1846 : {Opt_delalloc, EXT4_MOUNT_DELALLOC,
1847 : MOPT_EXT4_ONLY | MOPT_SET | MOPT_EXPLICIT},
1848 : {Opt_nodelalloc, EXT4_MOUNT_DELALLOC,
1849 : MOPT_EXT4_ONLY | MOPT_CLEAR},
1850 : {Opt_warn_on_error, EXT4_MOUNT_WARN_ON_ERROR, MOPT_SET},
1851 : {Opt_nowarn_on_error, EXT4_MOUNT_WARN_ON_ERROR, MOPT_CLEAR},
1852 : {Opt_commit, 0, MOPT_NO_EXT2},
1853 : {Opt_nojournal_checksum, EXT4_MOUNT_JOURNAL_CHECKSUM,
1854 : MOPT_EXT4_ONLY | MOPT_CLEAR},
1855 : {Opt_journal_checksum, EXT4_MOUNT_JOURNAL_CHECKSUM,
1856 : MOPT_EXT4_ONLY | MOPT_SET | MOPT_EXPLICIT},
1857 : {Opt_journal_async_commit, (EXT4_MOUNT_JOURNAL_ASYNC_COMMIT |
1858 : EXT4_MOUNT_JOURNAL_CHECKSUM),
1859 : MOPT_EXT4_ONLY | MOPT_SET | MOPT_EXPLICIT},
1860 : {Opt_noload, EXT4_MOUNT_NOLOAD, MOPT_NO_EXT2 | MOPT_SET},
1861 : {Opt_data_err, EXT4_MOUNT_DATA_ERR_ABORT, MOPT_NO_EXT2},
1862 : {Opt_barrier, EXT4_MOUNT_BARRIER, MOPT_SET},
1863 : {Opt_nobarrier, EXT4_MOUNT_BARRIER, MOPT_CLEAR},
1864 : {Opt_noauto_da_alloc, EXT4_MOUNT_NO_AUTO_DA_ALLOC, MOPT_SET},
1865 : {Opt_auto_da_alloc, EXT4_MOUNT_NO_AUTO_DA_ALLOC, MOPT_CLEAR},
1866 : {Opt_noinit_itable, EXT4_MOUNT_INIT_INODE_TABLE, MOPT_CLEAR},
1867 : {Opt_dax_type, 0, MOPT_EXT4_ONLY},
1868 : {Opt_journal_dev, 0, MOPT_NO_EXT2},
1869 : {Opt_journal_path, 0, MOPT_NO_EXT2},
1870 : {Opt_journal_ioprio, 0, MOPT_NO_EXT2},
1871 : {Opt_data, 0, MOPT_NO_EXT2},
1872 : {Opt_user_xattr, EXT4_MOUNT_XATTR_USER, MOPT_SET},
1873 : #ifdef CONFIG_EXT4_FS_POSIX_ACL
1874 : {Opt_acl, EXT4_MOUNT_POSIX_ACL, MOPT_SET},
1875 : #else
1876 : {Opt_acl, 0, MOPT_NOSUPPORT},
1877 : #endif
1878 : {Opt_nouid32, EXT4_MOUNT_NO_UID32, MOPT_SET},
1879 : {Opt_debug, EXT4_MOUNT_DEBUG, MOPT_SET},
1880 : {Opt_quota, EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA, MOPT_SET | MOPT_Q},
1881 : {Opt_usrquota, EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA,
1882 : MOPT_SET | MOPT_Q},
1883 : {Opt_grpquota, EXT4_MOUNT_QUOTA | EXT4_MOUNT_GRPQUOTA,
1884 : MOPT_SET | MOPT_Q},
1885 : {Opt_prjquota, EXT4_MOUNT_QUOTA | EXT4_MOUNT_PRJQUOTA,
1886 : MOPT_SET | MOPT_Q},
1887 : {Opt_noquota, (EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA |
1888 : EXT4_MOUNT_GRPQUOTA | EXT4_MOUNT_PRJQUOTA),
1889 : MOPT_CLEAR | MOPT_Q},
1890 : {Opt_usrjquota, 0, MOPT_Q},
1891 : {Opt_grpjquota, 0, MOPT_Q},
1892 : {Opt_jqfmt, 0, MOPT_QFMT},
1893 : {Opt_nombcache, EXT4_MOUNT_NO_MBCACHE, MOPT_SET},
1894 : {Opt_no_prefetch_block_bitmaps, EXT4_MOUNT_NO_PREFETCH_BLOCK_BITMAPS,
1895 : MOPT_SET},
1896 : #ifdef CONFIG_EXT4_DEBUG
1897 : {Opt_fc_debug_force, EXT4_MOUNT2_JOURNAL_FAST_COMMIT,
1898 : MOPT_SET | MOPT_2 | MOPT_EXT4_ONLY},
1899 : #endif
1900 : {Opt_err, 0, 0}
1901 : };
1902 :
1903 : #if IS_ENABLED(CONFIG_UNICODE)
1904 : static const struct ext4_sb_encodings {
1905 : __u16 magic;
1906 : char *name;
1907 : unsigned int version;
1908 : } ext4_sb_encoding_map[] = {
1909 : {EXT4_ENC_UTF8_12_1, "utf8", UNICODE_AGE(12, 1, 0)},
1910 : };
1911 :
1912 : static const struct ext4_sb_encodings *
1913 : ext4_sb_read_encoding(const struct ext4_super_block *es)
1914 : {
1915 : __u16 magic = le16_to_cpu(es->s_encoding);
1916 : int i;
1917 :
1918 : for (i = 0; i < ARRAY_SIZE(ext4_sb_encoding_map); i++)
1919 : if (magic == ext4_sb_encoding_map[i].magic)
1920 : return &ext4_sb_encoding_map[i];
1921 :
1922 : return NULL;
1923 : }
1924 : #endif
1925 :
1926 : #define EXT4_SPEC_JQUOTA (1 << 0)
1927 : #define EXT4_SPEC_JQFMT (1 << 1)
1928 : #define EXT4_SPEC_DATAJ (1 << 2)
1929 : #define EXT4_SPEC_SB_BLOCK (1 << 3)
1930 : #define EXT4_SPEC_JOURNAL_DEV (1 << 4)
1931 : #define EXT4_SPEC_JOURNAL_IOPRIO (1 << 5)
1932 : #define EXT4_SPEC_s_want_extra_isize (1 << 7)
1933 : #define EXT4_SPEC_s_max_batch_time (1 << 8)
1934 : #define EXT4_SPEC_s_min_batch_time (1 << 9)
1935 : #define EXT4_SPEC_s_inode_readahead_blks (1 << 10)
1936 : #define EXT4_SPEC_s_li_wait_mult (1 << 11)
1937 : #define EXT4_SPEC_s_max_dir_size_kb (1 << 12)
1938 : #define EXT4_SPEC_s_stripe (1 << 13)
1939 : #define EXT4_SPEC_s_resuid (1 << 14)
1940 : #define EXT4_SPEC_s_resgid (1 << 15)
1941 : #define EXT4_SPEC_s_commit_interval (1 << 16)
1942 : #define EXT4_SPEC_s_fc_debug_max_replay (1 << 17)
1943 : #define EXT4_SPEC_s_sb_block (1 << 18)
1944 : #define EXT4_SPEC_mb_optimize_scan (1 << 19)
1945 :
1946 : struct ext4_fs_context {
1947 : char *s_qf_names[EXT4_MAXQUOTAS];
1948 : struct fscrypt_dummy_policy dummy_enc_policy;
1949 : int s_jquota_fmt; /* Format of quota to use */
1950 : #ifdef CONFIG_EXT4_DEBUG
1951 : int s_fc_debug_max_replay;
1952 : #endif
1953 : unsigned short qname_spec;
1954 : unsigned long vals_s_flags; /* Bits to set in s_flags */
1955 : unsigned long mask_s_flags; /* Bits changed in s_flags */
1956 : unsigned long journal_devnum;
1957 : unsigned long s_commit_interval;
1958 : unsigned long s_stripe;
1959 : unsigned int s_inode_readahead_blks;
1960 : unsigned int s_want_extra_isize;
1961 : unsigned int s_li_wait_mult;
1962 : unsigned int s_max_dir_size_kb;
1963 : unsigned int journal_ioprio;
1964 : unsigned int vals_s_mount_opt;
1965 : unsigned int mask_s_mount_opt;
1966 : unsigned int vals_s_mount_opt2;
1967 : unsigned int mask_s_mount_opt2;
1968 : unsigned long vals_s_mount_flags;
1969 : unsigned long mask_s_mount_flags;
1970 : unsigned int opt_flags; /* MOPT flags */
1971 : unsigned int spec;
1972 : u32 s_max_batch_time;
1973 : u32 s_min_batch_time;
1974 : kuid_t s_resuid;
1975 : kgid_t s_resgid;
1976 : ext4_fsblk_t s_sb_block;
1977 : };
1978 :
1979 3690 : static void ext4_fc_free(struct fs_context *fc)
1980 : {
1981 3690 : struct ext4_fs_context *ctx = fc->fs_private;
1982 3690 : int i;
1983 :
1984 3690 : if (!ctx)
1985 : return;
1986 :
1987 14760 : for (i = 0; i < EXT4_MAXQUOTAS; i++)
1988 11070 : kfree(ctx->s_qf_names[i]);
1989 :
1990 3690 : fscrypt_free_dummy_policy(&ctx->dummy_enc_policy);
1991 3690 : kfree(ctx);
1992 : }
1993 :
1994 3690 : int ext4_init_fs_context(struct fs_context *fc)
1995 : {
1996 3690 : struct ext4_fs_context *ctx;
1997 :
1998 3690 : ctx = kzalloc(sizeof(struct ext4_fs_context), GFP_KERNEL);
1999 3690 : if (!ctx)
2000 : return -ENOMEM;
2001 :
2002 3690 : fc->fs_private = ctx;
2003 3690 : fc->ops = &ext4_context_ops;
2004 :
2005 3690 : return 0;
2006 : }
2007 :
2008 : #ifdef CONFIG_QUOTA
2009 : /*
2010 : * Note the name of the specified quota file.
2011 : */
2012 128 : static int note_qf_name(struct fs_context *fc, int qtype,
2013 : struct fs_parameter *param)
2014 : {
2015 128 : struct ext4_fs_context *ctx = fc->fs_private;
2016 128 : char *qname;
2017 :
2018 128 : if (param->size < 1) {
2019 0 : ext4_msg(NULL, KERN_ERR, "Missing quota name");
2020 0 : return -EINVAL;
2021 : }
2022 128 : if (strchr(param->string, '/')) {
2023 0 : ext4_msg(NULL, KERN_ERR,
2024 : "quotafile must be on filesystem root");
2025 0 : return -EINVAL;
2026 : }
2027 128 : if (ctx->s_qf_names[qtype]) {
2028 0 : if (strcmp(ctx->s_qf_names[qtype], param->string) != 0) {
2029 0 : ext4_msg(NULL, KERN_ERR,
2030 : "%s quota file already specified",
2031 : QTYPE2NAME(qtype));
2032 0 : return -EINVAL;
2033 : }
2034 : return 0;
2035 : }
2036 :
2037 128 : qname = kmemdup_nul(param->string, param->size, GFP_KERNEL);
2038 128 : if (!qname) {
2039 0 : ext4_msg(NULL, KERN_ERR,
2040 : "Not enough memory for storing quotafile name");
2041 0 : return -ENOMEM;
2042 : }
2043 128 : ctx->s_qf_names[qtype] = qname;
2044 128 : ctx->qname_spec |= 1 << qtype;
2045 128 : ctx->spec |= EXT4_SPEC_JQUOTA;
2046 128 : return 0;
2047 : }
2048 :
2049 : /*
2050 : * Clear the name of the specified quota file.
2051 : */
2052 36 : static int unnote_qf_name(struct fs_context *fc, int qtype)
2053 : {
2054 36 : struct ext4_fs_context *ctx = fc->fs_private;
2055 :
2056 36 : if (ctx->s_qf_names[qtype])
2057 0 : kfree(ctx->s_qf_names[qtype]);
2058 :
2059 36 : ctx->s_qf_names[qtype] = NULL;
2060 36 : ctx->qname_spec |= 1 << qtype;
2061 36 : ctx->spec |= EXT4_SPEC_JQUOTA;
2062 36 : return 0;
2063 : }
2064 : #endif
2065 :
2066 : static int ext4_parse_test_dummy_encryption(const struct fs_parameter *param,
2067 : struct ext4_fs_context *ctx)
2068 : {
2069 24 : int err;
2070 :
2071 24 : if (!IS_ENABLED(CONFIG_FS_ENCRYPTION)) {
2072 24 : ext4_msg(NULL, KERN_WARNING,
2073 : "test_dummy_encryption option not supported");
2074 24 : return -EINVAL;
2075 : }
2076 : err = fscrypt_parse_test_dummy_encryption(param,
2077 : &ctx->dummy_enc_policy);
2078 : if (err == -EINVAL) {
2079 : ext4_msg(NULL, KERN_WARNING,
2080 : "Value of option \"%s\" is unrecognized", param->key);
2081 : } else if (err == -EEXIST) {
2082 : ext4_msg(NULL, KERN_WARNING,
2083 : "Conflicting test_dummy_encryption options");
2084 : return -EINVAL;
2085 : }
2086 : return err;
2087 : }
2088 :
2089 : #define EXT4_SET_CTX(name) \
2090 : static inline void ctx_set_##name(struct ext4_fs_context *ctx, \
2091 : unsigned long flag) \
2092 : { \
2093 : ctx->mask_s_##name |= flag; \
2094 : ctx->vals_s_##name |= flag; \
2095 : }
2096 :
2097 : #define EXT4_CLEAR_CTX(name) \
2098 : static inline void ctx_clear_##name(struct ext4_fs_context *ctx, \
2099 : unsigned long flag) \
2100 : { \
2101 : ctx->mask_s_##name |= flag; \
2102 : ctx->vals_s_##name &= ~flag; \
2103 : }
2104 :
2105 : #define EXT4_TEST_CTX(name) \
2106 : static inline unsigned long \
2107 : ctx_test_##name(struct ext4_fs_context *ctx, unsigned long flag) \
2108 : { \
2109 : return (ctx->vals_s_##name & flag); \
2110 : }
2111 :
2112 : EXT4_SET_CTX(flags); /* set only */
2113 4506 : EXT4_SET_CTX(mount_opt);
2114 411 : EXT4_CLEAR_CTX(mount_opt);
2115 13679 : EXT4_TEST_CTX(mount_opt);
2116 27 : EXT4_SET_CTX(mount_opt2);
2117 13 : EXT4_CLEAR_CTX(mount_opt2);
2118 1434 : EXT4_TEST_CTX(mount_opt2);
2119 :
2120 2 : static inline void ctx_set_mount_flag(struct ext4_fs_context *ctx, int bit)
2121 : {
2122 2 : set_bit(bit, &ctx->mask_s_mount_flags);
2123 2 : set_bit(bit, &ctx->vals_s_mount_flags);
2124 2 : }
2125 :
2126 7959 : static int ext4_parse_param(struct fs_context *fc, struct fs_parameter *param)
2127 : {
2128 7959 : struct ext4_fs_context *ctx = fc->fs_private;
2129 7959 : struct fs_parse_result result;
2130 7959 : const struct mount_opts *m;
2131 7959 : int is_remount;
2132 7959 : kuid_t uid;
2133 7959 : kgid_t gid;
2134 7959 : int token;
2135 :
2136 7959 : token = fs_parse(fc, ext4_param_specs, param, &result);
2137 7959 : if (token < 0)
2138 : return token;
2139 5043 : is_remount = fc->purpose == FS_CONTEXT_FOR_RECONFIGURE;
2140 :
2141 160255 : for (m = ext4_mount_opts; m->token != Opt_err; m++)
2142 160105 : if (token == m->token)
2143 : break;
2144 :
2145 5043 : ctx->opt_flags |= m->flags;
2146 :
2147 5043 : if (m->flags & MOPT_EXPLICIT) {
2148 27 : if (m->mount_opt & EXT4_MOUNT_DELALLOC) {
2149 5 : ctx_set_mount_opt2(ctx, EXT4_MOUNT2_EXPLICIT_DELALLOC);
2150 22 : } else if (m->mount_opt & EXT4_MOUNT_JOURNAL_CHECKSUM) {
2151 22 : ctx_set_mount_opt2(ctx,
2152 : EXT4_MOUNT2_EXPLICIT_JOURNAL_CHECKSUM);
2153 : } else
2154 : return -EINVAL;
2155 : }
2156 :
2157 5043 : if (m->flags & MOPT_NOSUPPORT) {
2158 0 : ext4_msg(NULL, KERN_ERR, "%s option not supported",
2159 : param->key);
2160 0 : return 0;
2161 : }
2162 :
2163 5043 : switch (token) {
2164 : #ifdef CONFIG_QUOTA
2165 80 : case Opt_usrjquota:
2166 80 : if (!*param->string)
2167 16 : return unnote_qf_name(fc, USRQUOTA);
2168 : else
2169 64 : return note_qf_name(fc, USRQUOTA, param);
2170 84 : case Opt_grpjquota:
2171 84 : if (!*param->string)
2172 20 : return unnote_qf_name(fc, GRPQUOTA);
2173 : else
2174 64 : return note_qf_name(fc, GRPQUOTA, param);
2175 : #endif
2176 4 : case Opt_sb:
2177 4 : if (fc->purpose == FS_CONTEXT_FOR_RECONFIGURE) {
2178 2 : ext4_msg(NULL, KERN_WARNING,
2179 : "Ignoring %s option on remount", param->key);
2180 : } else {
2181 2 : ctx->s_sb_block = result.uint_32;
2182 2 : ctx->spec |= EXT4_SPEC_s_sb_block;
2183 : }
2184 : return 0;
2185 48 : case Opt_removed:
2186 48 : ext4_msg(NULL, KERN_WARNING, "Ignoring removed %s option",
2187 : param->key);
2188 48 : return 0;
2189 2 : case Opt_abort:
2190 2 : ctx_set_mount_flag(ctx, EXT4_MF_FS_ABORTED);
2191 2 : return 0;
2192 4 : case Opt_inlinecrypt:
2193 : #ifdef CONFIG_FS_ENCRYPTION_INLINE_CRYPT
2194 : ctx_set_flags(ctx, SB_INLINECRYPT);
2195 : #else
2196 4 : ext4_msg(NULL, KERN_ERR, "inline encryption not supported");
2197 : #endif
2198 4 : return 0;
2199 : case Opt_errors:
2200 13 : ctx_clear_mount_opt(ctx, EXT4_MOUNT_ERRORS_MASK);
2201 13 : ctx_set_mount_opt(ctx, result.uint_32);
2202 13 : return 0;
2203 : #ifdef CONFIG_QUOTA
2204 94 : case Opt_jqfmt:
2205 94 : ctx->s_jquota_fmt = result.uint_32;
2206 94 : ctx->spec |= EXT4_SPEC_JQFMT;
2207 94 : return 0;
2208 : #endif
2209 : case Opt_data:
2210 126 : ctx_clear_mount_opt(ctx, EXT4_MOUNT_DATA_FLAGS);
2211 126 : ctx_set_mount_opt(ctx, result.uint_32);
2212 126 : ctx->spec |= EXT4_SPEC_DATAJ;
2213 126 : return 0;
2214 4 : case Opt_commit:
2215 4 : if (result.uint_32 == 0)
2216 0 : result.uint_32 = JBD2_DEFAULT_MAX_COMMIT_AGE;
2217 4 : else if (result.uint_32 > INT_MAX / HZ) {
2218 0 : ext4_msg(NULL, KERN_ERR,
2219 : "Invalid commit interval %d, "
2220 : "must be smaller than %d",
2221 : result.uint_32, INT_MAX / HZ);
2222 0 : return -EINVAL;
2223 : }
2224 4 : ctx->s_commit_interval = HZ * result.uint_32;
2225 4 : ctx->spec |= EXT4_SPEC_s_commit_interval;
2226 4 : return 0;
2227 9 : case Opt_debug_want_extra_isize:
2228 9 : if ((result.uint_32 & 1) || (result.uint_32 < 4)) {
2229 0 : ext4_msg(NULL, KERN_ERR,
2230 : "Invalid want_extra_isize %d", result.uint_32);
2231 0 : return -EINVAL;
2232 : }
2233 9 : ctx->s_want_extra_isize = result.uint_32;
2234 9 : ctx->spec |= EXT4_SPEC_s_want_extra_isize;
2235 9 : return 0;
2236 4 : case Opt_max_batch_time:
2237 4 : ctx->s_max_batch_time = result.uint_32;
2238 4 : ctx->spec |= EXT4_SPEC_s_max_batch_time;
2239 4 : return 0;
2240 4 : case Opt_min_batch_time:
2241 4 : ctx->s_min_batch_time = result.uint_32;
2242 4 : ctx->spec |= EXT4_SPEC_s_min_batch_time;
2243 4 : return 0;
2244 4 : case Opt_inode_readahead_blks:
2245 4 : if (result.uint_32 &&
2246 4 : (result.uint_32 > (1 << 30) ||
2247 4 : !is_power_of_2(result.uint_32))) {
2248 0 : ext4_msg(NULL, KERN_ERR,
2249 : "EXT4-fs: inode_readahead_blks must be "
2250 : "0 or a power of 2 smaller than 2^31");
2251 0 : return -EINVAL;
2252 : }
2253 4 : ctx->s_inode_readahead_blks = result.uint_32;
2254 4 : ctx->spec |= EXT4_SPEC_s_inode_readahead_blks;
2255 4 : return 0;
2256 : case Opt_init_itable:
2257 12 : ctx_set_mount_opt(ctx, EXT4_MOUNT_INIT_INODE_TABLE);
2258 12 : ctx->s_li_wait_mult = EXT4_DEF_LI_WAIT_MULT;
2259 12 : if (param->type == fs_value_is_string)
2260 8 : ctx->s_li_wait_mult = result.uint_32;
2261 12 : ctx->spec |= EXT4_SPEC_s_li_wait_mult;
2262 12 : return 0;
2263 4 : case Opt_max_dir_size_kb:
2264 4 : ctx->s_max_dir_size_kb = result.uint_32;
2265 4 : ctx->spec |= EXT4_SPEC_s_max_dir_size_kb;
2266 4 : return 0;
2267 : #ifdef CONFIG_EXT4_DEBUG
2268 0 : case Opt_fc_debug_max_replay:
2269 0 : ctx->s_fc_debug_max_replay = result.uint_32;
2270 0 : ctx->spec |= EXT4_SPEC_s_fc_debug_max_replay;
2271 0 : return 0;
2272 : #endif
2273 4 : case Opt_stripe:
2274 4 : ctx->s_stripe = result.uint_32;
2275 4 : ctx->spec |= EXT4_SPEC_s_stripe;
2276 4 : return 0;
2277 4 : case Opt_resuid:
2278 4 : uid = make_kuid(current_user_ns(), result.uint_32);
2279 4 : if (!uid_valid(uid)) {
2280 0 : ext4_msg(NULL, KERN_ERR, "Invalid uid value %d",
2281 : result.uint_32);
2282 0 : return -EINVAL;
2283 : }
2284 4 : ctx->s_resuid = uid;
2285 4 : ctx->spec |= EXT4_SPEC_s_resuid;
2286 4 : return 0;
2287 4 : case Opt_resgid:
2288 4 : gid = make_kgid(current_user_ns(), result.uint_32);
2289 4 : if (!gid_valid(gid)) {
2290 0 : ext4_msg(NULL, KERN_ERR, "Invalid gid value %d",
2291 : result.uint_32);
2292 0 : return -EINVAL;
2293 : }
2294 4 : ctx->s_resgid = gid;
2295 4 : ctx->spec |= EXT4_SPEC_s_resgid;
2296 4 : return 0;
2297 8 : case Opt_journal_dev:
2298 8 : if (is_remount) {
2299 4 : ext4_msg(NULL, KERN_ERR,
2300 : "Cannot specify journal on remount");
2301 4 : return -EINVAL;
2302 : }
2303 4 : ctx->journal_devnum = result.uint_32;
2304 4 : ctx->spec |= EXT4_SPEC_JOURNAL_DEV;
2305 4 : return 0;
2306 8 : case Opt_journal_path:
2307 : {
2308 8 : struct inode *journal_inode;
2309 8 : struct path path;
2310 8 : int error;
2311 :
2312 8 : if (is_remount) {
2313 4 : ext4_msg(NULL, KERN_ERR,
2314 : "Cannot specify journal on remount");
2315 4 : return -EINVAL;
2316 : }
2317 :
2318 4 : error = fs_lookup_param(fc, param, 1, LOOKUP_FOLLOW, &path);
2319 4 : if (error) {
2320 2 : ext4_msg(NULL, KERN_ERR, "error: could not find "
2321 : "journal device path");
2322 2 : return -EINVAL;
2323 : }
2324 :
2325 2 : journal_inode = d_inode(path.dentry);
2326 2 : ctx->journal_devnum = new_encode_dev(journal_inode->i_rdev);
2327 2 : ctx->spec |= EXT4_SPEC_JOURNAL_DEV;
2328 2 : path_put(&path);
2329 2 : return 0;
2330 : }
2331 4 : case Opt_journal_ioprio:
2332 4 : if (result.uint_32 > 7) {
2333 0 : ext4_msg(NULL, KERN_ERR, "Invalid journal IO priority"
2334 : " (must be 0-7)");
2335 0 : return -EINVAL;
2336 : }
2337 4 : ctx->journal_ioprio =
2338 4 : IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, result.uint_32);
2339 4 : ctx->spec |= EXT4_SPEC_JOURNAL_IOPRIO;
2340 4 : return 0;
2341 : case Opt_test_dummy_encryption:
2342 24 : return ext4_parse_test_dummy_encryption(param, ctx);
2343 13 : case Opt_dax:
2344 : case Opt_dax_type:
2345 : #ifdef CONFIG_FS_DAX
2346 : {
2347 13 : int type = (token == Opt_dax) ?
2348 13 : Opt_dax : result.uint_32;
2349 :
2350 7 : switch (type) {
2351 : case Opt_dax:
2352 : case Opt_dax_always:
2353 13 : ctx_set_mount_opt(ctx, EXT4_MOUNT_DAX_ALWAYS);
2354 13 : ctx_clear_mount_opt2(ctx, EXT4_MOUNT2_DAX_NEVER);
2355 : break;
2356 : case Opt_dax_never:
2357 0 : ctx_set_mount_opt2(ctx, EXT4_MOUNT2_DAX_NEVER);
2358 0 : ctx_clear_mount_opt(ctx, EXT4_MOUNT_DAX_ALWAYS);
2359 : break;
2360 : case Opt_dax_inode:
2361 0 : ctx_clear_mount_opt(ctx, EXT4_MOUNT_DAX_ALWAYS);
2362 0 : ctx_clear_mount_opt2(ctx, EXT4_MOUNT2_DAX_NEVER);
2363 : /* Strictly for printing options */
2364 0 : ctx_set_mount_opt2(ctx, EXT4_MOUNT2_DAX_INODE);
2365 : break;
2366 : }
2367 : return 0;
2368 : }
2369 : #else
2370 : ext4_msg(NULL, KERN_INFO, "dax option not supported");
2371 : return -EINVAL;
2372 : #endif
2373 20 : case Opt_data_err:
2374 20 : if (result.uint_32 == Opt_data_err_abort)
2375 16 : ctx_set_mount_opt(ctx, m->mount_opt);
2376 4 : else if (result.uint_32 == Opt_data_err_ignore)
2377 4 : ctx_clear_mount_opt(ctx, m->mount_opt);
2378 : return 0;
2379 0 : case Opt_mb_optimize_scan:
2380 0 : if (result.int_32 == 1) {
2381 0 : ctx_set_mount_opt2(ctx, EXT4_MOUNT2_MB_OPTIMIZE_SCAN);
2382 0 : ctx->spec |= EXT4_SPEC_mb_optimize_scan;
2383 0 : } else if (result.int_32 == 0) {
2384 0 : ctx_clear_mount_opt2(ctx, EXT4_MOUNT2_MB_OPTIMIZE_SCAN);
2385 0 : ctx->spec |= EXT4_SPEC_mb_optimize_scan;
2386 : } else {
2387 0 : ext4_msg(NULL, KERN_WARNING,
2388 : "mb_optimize_scan should be set to 0 or 1.");
2389 0 : return -EINVAL;
2390 : }
2391 : return 0;
2392 : }
2393 :
2394 : /*
2395 : * At this point we should only be getting options requiring MOPT_SET,
2396 : * or MOPT_CLEAR. Anything else is a bug
2397 : */
2398 4458 : if (m->token == Opt_err) {
2399 0 : ext4_msg(NULL, KERN_WARNING, "buggy handling of option %s",
2400 : param->key);
2401 0 : WARN_ON(1);
2402 0 : return -EINVAL;
2403 : }
2404 :
2405 : else {
2406 4458 : unsigned int set = 0;
2407 :
2408 4458 : if ((param->type == fs_value_is_flag) ||
2409 24 : result.uint_32 > 0)
2410 4450 : set = 1;
2411 :
2412 4458 : if (m->flags & MOPT_CLEAR)
2413 132 : set = !set;
2414 4326 : else if (unlikely(!(m->flags & MOPT_SET))) {
2415 0 : ext4_msg(NULL, KERN_WARNING,
2416 : "buggy handling of option %s",
2417 : param->key);
2418 0 : WARN_ON(1);
2419 0 : return -EINVAL;
2420 : }
2421 4458 : if (m->flags & MOPT_2) {
2422 0 : if (set != 0)
2423 0 : ctx_set_mount_opt2(ctx, m->mount_opt);
2424 : else
2425 0 : ctx_clear_mount_opt2(ctx, m->mount_opt);
2426 : } else {
2427 4458 : if (set != 0)
2428 4326 : ctx_set_mount_opt(ctx, m->mount_opt);
2429 : else
2430 132 : ctx_clear_mount_opt(ctx, m->mount_opt);
2431 : }
2432 : }
2433 :
2434 : return 0;
2435 : }
2436 :
2437 0 : static int parse_options(struct fs_context *fc, char *options)
2438 : {
2439 0 : struct fs_parameter param;
2440 0 : int ret;
2441 0 : char *key;
2442 :
2443 0 : if (!options)
2444 : return 0;
2445 :
2446 0 : while ((key = strsep(&options, ",")) != NULL) {
2447 0 : if (*key) {
2448 0 : size_t v_len = 0;
2449 0 : char *value = strchr(key, '=');
2450 :
2451 0 : param.type = fs_value_is_flag;
2452 0 : param.string = NULL;
2453 :
2454 0 : if (value) {
2455 0 : if (value == key)
2456 0 : continue;
2457 :
2458 0 : *value++ = 0;
2459 0 : v_len = strlen(value);
2460 0 : param.string = kmemdup_nul(value, v_len,
2461 : GFP_KERNEL);
2462 0 : if (!param.string)
2463 : return -ENOMEM;
2464 0 : param.type = fs_value_is_string;
2465 : }
2466 :
2467 0 : param.key = key;
2468 0 : param.size = v_len;
2469 :
2470 0 : ret = ext4_parse_param(fc, ¶m);
2471 0 : if (param.string)
2472 0 : kfree(param.string);
2473 0 : if (ret < 0)
2474 0 : return ret;
2475 : }
2476 : }
2477 :
2478 0 : ret = ext4_validate_options(fc);
2479 0 : if (ret < 0)
2480 : return ret;
2481 :
2482 : return 0;
2483 : }
2484 :
2485 2551 : static int parse_apply_sb_mount_options(struct super_block *sb,
2486 : struct ext4_fs_context *m_ctx)
2487 : {
2488 2551 : struct ext4_sb_info *sbi = EXT4_SB(sb);
2489 2551 : char *s_mount_opts = NULL;
2490 2551 : struct ext4_fs_context *s_ctx = NULL;
2491 2551 : struct fs_context *fc = NULL;
2492 2551 : int ret = -ENOMEM;
2493 :
2494 2551 : if (!sbi->s_es->s_mount_opts[0])
2495 : return 0;
2496 :
2497 0 : s_mount_opts = kstrndup(sbi->s_es->s_mount_opts,
2498 : sizeof(sbi->s_es->s_mount_opts),
2499 : GFP_KERNEL);
2500 0 : if (!s_mount_opts)
2501 : return ret;
2502 :
2503 0 : fc = kzalloc(sizeof(struct fs_context), GFP_KERNEL);
2504 0 : if (!fc)
2505 0 : goto out_free;
2506 :
2507 0 : s_ctx = kzalloc(sizeof(struct ext4_fs_context), GFP_KERNEL);
2508 0 : if (!s_ctx)
2509 0 : goto out_free;
2510 :
2511 0 : fc->fs_private = s_ctx;
2512 0 : fc->s_fs_info = sbi;
2513 :
2514 0 : ret = parse_options(fc, s_mount_opts);
2515 0 : if (ret < 0)
2516 0 : goto parse_failed;
2517 :
2518 0 : ret = ext4_check_opt_consistency(fc, sb);
2519 0 : if (ret < 0) {
2520 0 : parse_failed:
2521 0 : ext4_msg(sb, KERN_WARNING,
2522 : "failed to parse options in superblock: %s",
2523 : s_mount_opts);
2524 0 : ret = 0;
2525 0 : goto out_free;
2526 : }
2527 :
2528 0 : if (s_ctx->spec & EXT4_SPEC_JOURNAL_DEV)
2529 0 : m_ctx->journal_devnum = s_ctx->journal_devnum;
2530 0 : if (s_ctx->spec & EXT4_SPEC_JOURNAL_IOPRIO)
2531 0 : m_ctx->journal_ioprio = s_ctx->journal_ioprio;
2532 :
2533 0 : ext4_apply_options(fc, sb);
2534 0 : ret = 0;
2535 :
2536 0 : out_free:
2537 0 : if (fc) {
2538 0 : ext4_fc_free(fc);
2539 0 : kfree(fc);
2540 : }
2541 0 : kfree(s_mount_opts);
2542 0 : return ret;
2543 : }
2544 :
2545 3162 : static void ext4_apply_quota_options(struct fs_context *fc,
2546 : struct super_block *sb)
2547 : {
2548 : #ifdef CONFIG_QUOTA
2549 3162 : bool quota_feature = ext4_has_feature_quota(sb);
2550 3162 : struct ext4_fs_context *ctx = fc->fs_private;
2551 3162 : struct ext4_sb_info *sbi = EXT4_SB(sb);
2552 3162 : char *qname;
2553 3162 : int i;
2554 :
2555 3162 : if (quota_feature)
2556 : return;
2557 :
2558 3082 : if (ctx->spec & EXT4_SPEC_JQUOTA) {
2559 328 : for (i = 0; i < EXT4_MAXQUOTAS; i++) {
2560 246 : if (!(ctx->qname_spec & (1 << i)))
2561 154 : continue;
2562 :
2563 92 : qname = ctx->s_qf_names[i]; /* May be NULL */
2564 92 : if (qname)
2565 68 : set_opt(sb, QUOTA);
2566 92 : ctx->s_qf_names[i] = NULL;
2567 92 : qname = rcu_replace_pointer(sbi->s_qf_names[i], qname,
2568 : lockdep_is_held(&sb->s_umount));
2569 92 : if (qname)
2570 246 : kfree_rcu_mightsleep(qname);
2571 : }
2572 : }
2573 :
2574 3082 : if (ctx->spec & EXT4_SPEC_JQFMT)
2575 64 : sbi->s_jquota_fmt = ctx->s_jquota_fmt;
2576 : #endif
2577 : }
2578 :
2579 : /*
2580 : * Check quota settings consistency.
2581 : */
2582 3256 : static int ext4_check_quota_consistency(struct fs_context *fc,
2583 : struct super_block *sb)
2584 : {
2585 : #ifdef CONFIG_QUOTA
2586 3256 : struct ext4_fs_context *ctx = fc->fs_private;
2587 3256 : struct ext4_sb_info *sbi = EXT4_SB(sb);
2588 3256 : bool quota_feature = ext4_has_feature_quota(sb);
2589 3256 : bool quota_loaded = sb_any_quota_loaded(sb);
2590 3256 : bool usr_qf_name, grp_qf_name, usrquota, grpquota;
2591 3256 : int quota_flags, i;
2592 :
2593 : /*
2594 : * We do the test below only for project quotas. 'usrquota' and
2595 : * 'grpquota' mount options are allowed even without quota feature
2596 : * to support legacy quotas in quota files.
2597 : */
2598 3256 : if (ctx_test_mount_opt(ctx, EXT4_MOUNT_PRJQUOTA) &&
2599 : !ext4_has_feature_project(sb)) {
2600 6 : ext4_msg(NULL, KERN_ERR, "Project quota feature not enabled. "
2601 : "Cannot enable project quota enforcement.");
2602 6 : return -EINVAL;
2603 : }
2604 :
2605 3250 : quota_flags = EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA |
2606 : EXT4_MOUNT_GRPQUOTA | EXT4_MOUNT_PRJQUOTA;
2607 3250 : if (quota_loaded &&
2608 75 : ctx->mask_s_mount_opt & quota_flags &&
2609 : !ctx_test_mount_opt(ctx, quota_flags))
2610 8 : goto err_quota_change;
2611 :
2612 3242 : if (ctx->spec & EXT4_SPEC_JQUOTA) {
2613 :
2614 512 : for (i = 0; i < EXT4_MAXQUOTAS; i++) {
2615 398 : if (!(ctx->qname_spec & (1 << i)))
2616 234 : continue;
2617 :
2618 164 : if (quota_loaded &&
2619 40 : !!sbi->s_qf_names[i] != !!ctx->s_qf_names[i])
2620 20 : goto err_jquota_change;
2621 :
2622 144 : if (sbi->s_qf_names[i] && ctx->s_qf_names[i] &&
2623 16 : strcmp(get_qf_name(sb, sbi, i),
2624 16 : ctx->s_qf_names[i]) != 0)
2625 16 : goto err_jquota_specified;
2626 : }
2627 :
2628 114 : if (quota_feature) {
2629 8 : ext4_msg(NULL, KERN_INFO,
2630 : "Journaled quota options ignored when "
2631 : "QUOTA feature is enabled");
2632 8 : return 0;
2633 : }
2634 : }
2635 :
2636 3198 : if (ctx->spec & EXT4_SPEC_JQFMT) {
2637 90 : if (sbi->s_jquota_fmt != ctx->s_jquota_fmt && quota_loaded)
2638 12 : goto err_jquota_change;
2639 78 : if (quota_feature) {
2640 2 : ext4_msg(NULL, KERN_INFO, "Quota format mount options "
2641 : "ignored when QUOTA feature is enabled");
2642 2 : return 0;
2643 : }
2644 : }
2645 :
2646 : /* Make sure we don't mix old and new quota format */
2647 3184 : usr_qf_name = (get_qf_name(sb, sbi, USRQUOTA) ||
2648 3162 : ctx->s_qf_names[USRQUOTA]);
2649 3184 : grp_qf_name = (get_qf_name(sb, sbi, GRPQUOTA) ||
2650 3166 : ctx->s_qf_names[GRPQUOTA]);
2651 :
2652 3184 : usrquota = (ctx_test_mount_opt(ctx, EXT4_MOUNT_USRQUOTA) ||
2653 3068 : test_opt(sb, USRQUOTA));
2654 :
2655 3184 : grpquota = (ctx_test_mount_opt(ctx, EXT4_MOUNT_GRPQUOTA) ||
2656 3107 : test_opt(sb, GRPQUOTA));
2657 :
2658 3184 : if (usr_qf_name) {
2659 68 : ctx_clear_mount_opt(ctx, EXT4_MOUNT_USRQUOTA);
2660 68 : usrquota = false;
2661 : }
2662 3184 : if (grp_qf_name) {
2663 64 : ctx_clear_mount_opt(ctx, EXT4_MOUNT_GRPQUOTA);
2664 64 : grpquota = false;
2665 : }
2666 :
2667 3184 : if (usr_qf_name || grp_qf_name) {
2668 116 : if (usrquota || grpquota) {
2669 20 : ext4_msg(NULL, KERN_ERR, "old and new quota "
2670 : "format mixing");
2671 20 : return -EINVAL;
2672 : }
2673 :
2674 96 : if (!(ctx->spec & EXT4_SPEC_JQFMT || sbi->s_jquota_fmt)) {
2675 12 : ext4_msg(NULL, KERN_ERR, "journaled quota format "
2676 : "not specified");
2677 12 : return -EINVAL;
2678 : }
2679 : }
2680 :
2681 : return 0;
2682 :
2683 : err_quota_change:
2684 8 : ext4_msg(NULL, KERN_ERR,
2685 : "Cannot change quota options when quota turned on");
2686 8 : return -EINVAL;
2687 32 : err_jquota_change:
2688 32 : ext4_msg(NULL, KERN_ERR, "Cannot change journaled quota "
2689 : "options when quota turned on");
2690 32 : return -EINVAL;
2691 : err_jquota_specified:
2692 16 : ext4_msg(NULL, KERN_ERR, "%s quota file already specified",
2693 : QTYPE2NAME(i));
2694 16 : return -EINVAL;
2695 : #else
2696 : return 0;
2697 : #endif
2698 : }
2699 :
2700 : static int ext4_check_test_dummy_encryption(const struct fs_context *fc,
2701 : struct super_block *sb)
2702 : {
2703 : const struct ext4_fs_context *ctx = fc->fs_private;
2704 : const struct ext4_sb_info *sbi = EXT4_SB(sb);
2705 :
2706 : if (!fscrypt_is_dummy_policy_set(&ctx->dummy_enc_policy))
2707 : return 0;
2708 :
2709 : if (!ext4_has_feature_encrypt(sb)) {
2710 : ext4_msg(NULL, KERN_WARNING,
2711 : "test_dummy_encryption requires encrypt feature");
2712 : return -EINVAL;
2713 : }
2714 : /*
2715 : * This mount option is just for testing, and it's not worthwhile to
2716 : * implement the extra complexity (e.g. RCU protection) that would be
2717 : * needed to allow it to be set or changed during remount. We do allow
2718 : * it to be specified during remount, but only if there is no change.
2719 : */
2720 : if (fc->purpose == FS_CONTEXT_FOR_RECONFIGURE) {
2721 : if (fscrypt_dummy_policies_equal(&sbi->s_dummy_enc_policy,
2722 : &ctx->dummy_enc_policy))
2723 : return 0;
2724 : ext4_msg(NULL, KERN_WARNING,
2725 : "Can't set or change test_dummy_encryption on remount");
2726 : return -EINVAL;
2727 : }
2728 : /* Also make sure s_mount_opts didn't contain a conflicting value. */
2729 : if (fscrypt_is_dummy_policy_set(&sbi->s_dummy_enc_policy)) {
2730 : if (fscrypt_dummy_policies_equal(&sbi->s_dummy_enc_policy,
2731 : &ctx->dummy_enc_policy))
2732 : return 0;
2733 : ext4_msg(NULL, KERN_WARNING,
2734 : "Conflicting test_dummy_encryption options");
2735 : return -EINVAL;
2736 : }
2737 : return 0;
2738 : }
2739 :
2740 : static void ext4_apply_test_dummy_encryption(struct ext4_fs_context *ctx,
2741 : struct super_block *sb)
2742 : {
2743 : if (!fscrypt_is_dummy_policy_set(&ctx->dummy_enc_policy) ||
2744 : /* if already set, it was already verified to be the same */
2745 : fscrypt_is_dummy_policy_set(&EXT4_SB(sb)->s_dummy_enc_policy))
2746 : return;
2747 : EXT4_SB(sb)->s_dummy_enc_policy = ctx->dummy_enc_policy;
2748 : memset(&ctx->dummy_enc_policy, 0, sizeof(ctx->dummy_enc_policy));
2749 : ext4_msg(sb, KERN_WARNING, "Test dummy encryption mode enabled");
2750 : }
2751 :
2752 3312 : static int ext4_check_opt_consistency(struct fs_context *fc,
2753 : struct super_block *sb)
2754 : {
2755 3312 : struct ext4_fs_context *ctx = fc->fs_private;
2756 3312 : struct ext4_sb_info *sbi = fc->s_fs_info;
2757 3312 : int is_remount = fc->purpose == FS_CONTEXT_FOR_RECONFIGURE;
2758 3312 : int err;
2759 :
2760 3312 : if ((ctx->opt_flags & MOPT_NO_EXT2) && IS_EXT2_SB(sb)) {
2761 : ext4_msg(NULL, KERN_ERR,
2762 : "Mount option(s) incompatible with ext2");
2763 : return -EINVAL;
2764 : }
2765 3312 : if ((ctx->opt_flags & MOPT_NO_EXT3) && IS_EXT3_SB(sb)) {
2766 25 : ext4_msg(NULL, KERN_ERR,
2767 : "Mount option(s) incompatible with ext3");
2768 25 : return -EINVAL;
2769 : }
2770 :
2771 3287 : if (ctx->s_want_extra_isize >
2772 3287 : (sbi->s_inode_size - EXT4_GOOD_OLD_INODE_SIZE)) {
2773 7 : ext4_msg(NULL, KERN_ERR,
2774 : "Invalid want_extra_isize %d",
2775 : ctx->s_want_extra_isize);
2776 7 : return -EINVAL;
2777 : }
2778 :
2779 3280 : if (ctx_test_mount_opt(ctx, EXT4_MOUNT_DIOREAD_NOLOCK)) {
2780 8 : int blocksize =
2781 4 : BLOCK_SIZE << le32_to_cpu(sbi->s_es->s_log_block_size);
2782 4 : if (blocksize < PAGE_SIZE)
2783 0 : ext4_msg(NULL, KERN_WARNING, "Warning: mounting with an "
2784 : "experimental mount option 'dioread_nolock' "
2785 : "for blocksize < PAGE_SIZE");
2786 : }
2787 :
2788 3280 : err = ext4_check_test_dummy_encryption(fc, sb);
2789 3280 : if (err)
2790 : return err;
2791 :
2792 3280 : if ((ctx->spec & EXT4_SPEC_DATAJ) && is_remount) {
2793 44 : if (!sbi->s_journal) {
2794 4 : ext4_msg(NULL, KERN_WARNING,
2795 : "Remounting file system with no journal "
2796 : "so ignoring journalled data option");
2797 4 : ctx_clear_mount_opt(ctx, EXT4_MOUNT_DATA_FLAGS);
2798 40 : } else if (ctx_test_mount_opt(ctx, EXT4_MOUNT_DATA_FLAGS) !=
2799 40 : test_opt(sb, DATA_FLAGS)) {
2800 24 : ext4_msg(NULL, KERN_ERR, "Cannot change data mode "
2801 : "on remount");
2802 24 : return -EINVAL;
2803 : }
2804 : }
2805 :
2806 3256 : if (is_remount) {
2807 717 : if (ctx_test_mount_opt(ctx, EXT4_MOUNT_DAX_ALWAYS) &&
2808 0 : (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)) {
2809 0 : ext4_msg(NULL, KERN_ERR, "can't mount with "
2810 : "both data=journal and dax");
2811 0 : return -EINVAL;
2812 : }
2813 :
2814 717 : if (ctx_test_mount_opt(ctx, EXT4_MOUNT_DAX_ALWAYS) &&
2815 0 : (!(sbi->s_mount_opt & EXT4_MOUNT_DAX_ALWAYS) ||
2816 : (sbi->s_mount_opt2 & EXT4_MOUNT2_DAX_NEVER))) {
2817 0 : fail_dax_change_remount:
2818 0 : ext4_msg(NULL, KERN_ERR, "can't change "
2819 : "dax mount option while remounting");
2820 0 : return -EINVAL;
2821 717 : } else if (ctx_test_mount_opt2(ctx, EXT4_MOUNT2_DAX_NEVER) &&
2822 0 : (!(sbi->s_mount_opt2 & EXT4_MOUNT2_DAX_NEVER) ||
2823 : (sbi->s_mount_opt & EXT4_MOUNT_DAX_ALWAYS))) {
2824 0 : goto fail_dax_change_remount;
2825 717 : } else if (ctx_test_mount_opt2(ctx, EXT4_MOUNT2_DAX_INODE) &&
2826 : ((sbi->s_mount_opt & EXT4_MOUNT_DAX_ALWAYS) ||
2827 0 : (sbi->s_mount_opt2 & EXT4_MOUNT2_DAX_NEVER) ||
2828 : !(sbi->s_mount_opt2 & EXT4_MOUNT2_DAX_INODE))) {
2829 0 : goto fail_dax_change_remount;
2830 : }
2831 : }
2832 :
2833 3256 : return ext4_check_quota_consistency(fc, sb);
2834 : }
2835 :
2836 3162 : static void ext4_apply_options(struct fs_context *fc, struct super_block *sb)
2837 : {
2838 3162 : struct ext4_fs_context *ctx = fc->fs_private;
2839 3162 : struct ext4_sb_info *sbi = fc->s_fs_info;
2840 :
2841 3162 : sbi->s_mount_opt &= ~ctx->mask_s_mount_opt;
2842 3162 : sbi->s_mount_opt |= ctx->vals_s_mount_opt;
2843 3162 : sbi->s_mount_opt2 &= ~ctx->mask_s_mount_opt2;
2844 3162 : sbi->s_mount_opt2 |= ctx->vals_s_mount_opt2;
2845 3162 : sbi->s_mount_flags &= ~ctx->mask_s_mount_flags;
2846 3162 : sbi->s_mount_flags |= ctx->vals_s_mount_flags;
2847 3162 : sb->s_flags &= ~ctx->mask_s_flags;
2848 3162 : sb->s_flags |= ctx->vals_s_flags;
2849 :
2850 : #define APPLY(X) ({ if (ctx->spec & EXT4_SPEC_##X) sbi->X = ctx->X; })
2851 3162 : APPLY(s_commit_interval);
2852 3162 : APPLY(s_stripe);
2853 3162 : APPLY(s_max_batch_time);
2854 3162 : APPLY(s_min_batch_time);
2855 3162 : APPLY(s_want_extra_isize);
2856 3162 : APPLY(s_inode_readahead_blks);
2857 3162 : APPLY(s_max_dir_size_kb);
2858 3162 : APPLY(s_li_wait_mult);
2859 3162 : APPLY(s_resgid);
2860 3162 : APPLY(s_resuid);
2861 :
2862 : #ifdef CONFIG_EXT4_DEBUG
2863 3162 : APPLY(s_fc_debug_max_replay);
2864 : #endif
2865 :
2866 3162 : ext4_apply_quota_options(fc, sb);
2867 3162 : ext4_apply_test_dummy_encryption(ctx, sb);
2868 3162 : }
2869 :
2870 :
2871 0 : static int ext4_validate_options(struct fs_context *fc)
2872 : {
2873 : #ifdef CONFIG_QUOTA
2874 0 : struct ext4_fs_context *ctx = fc->fs_private;
2875 0 : char *usr_qf_name, *grp_qf_name;
2876 :
2877 0 : usr_qf_name = ctx->s_qf_names[USRQUOTA];
2878 0 : grp_qf_name = ctx->s_qf_names[GRPQUOTA];
2879 :
2880 0 : if (usr_qf_name || grp_qf_name) {
2881 0 : if (ctx_test_mount_opt(ctx, EXT4_MOUNT_USRQUOTA) && usr_qf_name)
2882 0 : ctx_clear_mount_opt(ctx, EXT4_MOUNT_USRQUOTA);
2883 :
2884 0 : if (ctx_test_mount_opt(ctx, EXT4_MOUNT_GRPQUOTA) && grp_qf_name)
2885 0 : ctx_clear_mount_opt(ctx, EXT4_MOUNT_GRPQUOTA);
2886 :
2887 0 : if (ctx_test_mount_opt(ctx, EXT4_MOUNT_USRQUOTA) ||
2888 : ctx_test_mount_opt(ctx, EXT4_MOUNT_GRPQUOTA)) {
2889 0 : ext4_msg(NULL, KERN_ERR, "old and new quota "
2890 : "format mixing");
2891 0 : return -EINVAL;
2892 : }
2893 : }
2894 : #endif
2895 : return 1;
2896 : }
2897 :
2898 332150 : static inline void ext4_show_quota_options(struct seq_file *seq,
2899 : struct super_block *sb)
2900 : {
2901 : #if defined(CONFIG_QUOTA)
2902 332150 : struct ext4_sb_info *sbi = EXT4_SB(sb);
2903 332150 : char *usr_qf_name, *grp_qf_name;
2904 :
2905 332150 : if (sbi->s_jquota_fmt) {
2906 307 : char *fmtname = "";
2907 :
2908 307 : switch (sbi->s_jquota_fmt) {
2909 : case QFMT_VFS_OLD:
2910 : fmtname = "vfsold";
2911 : break;
2912 : case QFMT_VFS_V0:
2913 : fmtname = "vfsv0";
2914 : break;
2915 : case QFMT_VFS_V1:
2916 : fmtname = "vfsv1";
2917 : break;
2918 : }
2919 307 : seq_printf(seq, ",jqfmt=%s", fmtname);
2920 : }
2921 :
2922 332150 : rcu_read_lock();
2923 332259 : usr_qf_name = rcu_dereference(sbi->s_qf_names[USRQUOTA]);
2924 332259 : grp_qf_name = rcu_dereference(sbi->s_qf_names[GRPQUOTA]);
2925 332259 : if (usr_qf_name)
2926 164 : seq_show_option(seq, "usrjquota", usr_qf_name);
2927 332109 : if (grp_qf_name)
2928 138 : seq_show_option(seq, "grpjquota", grp_qf_name);
2929 332110 : rcu_read_unlock();
2930 : #endif
2931 331742 : }
2932 :
2933 44965 : static const char *token2str(int token)
2934 : {
2935 44965 : const struct fs_parameter_spec *spec;
2936 :
2937 1625129 : for (spec = ext4_param_specs; spec->name != NULL; spec++)
2938 1625317 : if (spec->opt == token && !spec->type)
2939 : break;
2940 44965 : return spec->name;
2941 : }
2942 :
2943 : /*
2944 : * Show an option if
2945 : * - it's set to a non-default value OR
2946 : * - if the per-sb default is different from the global default
2947 : */
2948 332666 : static int _ext4_show_options(struct seq_file *seq, struct super_block *sb,
2949 : int nodefs)
2950 : {
2951 332666 : struct ext4_sb_info *sbi = EXT4_SB(sb);
2952 332666 : struct ext4_super_block *es = sbi->s_es;
2953 332666 : int def_errors;
2954 332666 : const struct mount_opts *m;
2955 332666 : char sep = nodefs ? '\n' : ',';
2956 :
2957 : #define SEQ_OPTS_PUTS(str) seq_printf(seq, "%c" str, sep)
2958 : #define SEQ_OPTS_PRINT(str, arg) seq_printf(seq, "%c" str, sep, arg)
2959 :
2960 332666 : if (sbi->s_sb_block != 1)
2961 12 : SEQ_OPTS_PRINT("sb=%llu", sbi->s_sb_block);
2962 :
2963 15203015 : for (m = ext4_mount_opts; m->token != Opt_err; m++) {
2964 14870561 : int want_set = m->flags & MOPT_SET;
2965 14870561 : int opt_2 = m->flags & MOPT_2;
2966 14870561 : unsigned int mount_opt, def_mount_opt;
2967 :
2968 14870561 : if (((m->flags & (MOPT_SET|MOPT_CLEAR)) == 0) ||
2969 11552839 : m->flags & MOPT_SKIP)
2970 3317722 : continue;
2971 :
2972 11552839 : if (opt_2) {
2973 332204 : mount_opt = sbi->s_mount_opt2;
2974 332204 : def_mount_opt = sbi->s_def_mount_opt2;
2975 : } else {
2976 11220635 : mount_opt = sbi->s_mount_opt;
2977 11220635 : def_mount_opt = sbi->s_def_mount_opt;
2978 : }
2979 : /* skip if same as the default */
2980 11552839 : if (!nodefs && !(m->mount_opt & (mount_opt ^ def_mount_opt)))
2981 11464317 : continue;
2982 : /* select Opt_noFoo vs Opt_Foo */
2983 88522 : if ((want_set &&
2984 88522 : (mount_opt & m->mount_opt) != m->mount_opt) ||
2985 21873 : (!want_set && (mount_opt & m->mount_opt)))
2986 43449 : continue;
2987 45073 : SEQ_OPTS_PRINT("%s", token2str(m->token));
2988 : }
2989 :
2990 332454 : if (nodefs || !uid_eq(sbi->s_resuid, make_kuid(&init_user_ns, EXT4_DEF_RESUID)) ||
2991 330888 : le16_to_cpu(es->s_def_resuid) != EXT4_DEF_RESUID)
2992 636 : SEQ_OPTS_PRINT("resuid=%u",
2993 : from_kuid_munged(&init_user_ns, sbi->s_resuid));
2994 331953 : if (nodefs || !gid_eq(sbi->s_resgid, make_kgid(&init_user_ns, EXT4_DEF_RESGID)) ||
2995 331477 : le16_to_cpu(es->s_def_resgid) != EXT4_DEF_RESGID)
2996 0 : SEQ_OPTS_PRINT("resgid=%u",
2997 : from_kgid_munged(&init_user_ns, sbi->s_resgid));
2998 331393 : def_errors = nodefs ? -1 : le16_to_cpu(es->s_errors);
2999 331393 : if (test_opt(sb, ERRORS_RO) && def_errors != EXT4_ERRORS_RO)
3000 23 : SEQ_OPTS_PUTS("errors=remount-ro");
3001 331393 : if (test_opt(sb, ERRORS_CONT) && def_errors != EXT4_ERRORS_CONTINUE)
3002 796 : SEQ_OPTS_PUTS("errors=continue");
3003 331393 : if (test_opt(sb, ERRORS_PANIC) && def_errors != EXT4_ERRORS_PANIC)
3004 12 : SEQ_OPTS_PUTS("errors=panic");
3005 331393 : if (nodefs || sbi->s_commit_interval != JBD2_DEFAULT_MAX_COMMIT_AGE*HZ)
3006 949 : SEQ_OPTS_PRINT("commit=%lu", sbi->s_commit_interval / HZ);
3007 331471 : if (nodefs || sbi->s_min_batch_time != EXT4_DEF_MIN_BATCH_TIME)
3008 814 : SEQ_OPTS_PRINT("min_batch_time=%u", sbi->s_min_batch_time);
3009 331413 : if (nodefs || sbi->s_max_batch_time != EXT4_DEF_MAX_BATCH_TIME)
3010 739 : SEQ_OPTS_PRINT("max_batch_time=%u", sbi->s_max_batch_time);
3011 331478 : if (nodefs || sbi->s_stripe)
3012 331340 : SEQ_OPTS_PRINT("stripe=%lu", sbi->s_stripe);
3013 332247 : if (nodefs || EXT4_MOUNT_DATA_FLAGS &
3014 331440 : (sbi->s_mount_opt ^ sbi->s_def_mount_opt)) {
3015 1053 : if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
3016 62 : SEQ_OPTS_PUTS("data=journal");
3017 991 : else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA)
3018 856 : SEQ_OPTS_PUTS("data=ordered");
3019 135 : else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)
3020 113 : SEQ_OPTS_PUTS("data=writeback");
3021 : }
3022 332245 : if (nodefs ||
3023 331437 : sbi->s_inode_readahead_blks != EXT4_DEF_INODE_READAHEAD_BLKS)
3024 424 : SEQ_OPTS_PRINT("inode_readahead_blks=%u",
3025 : sbi->s_inode_readahead_blks);
3026 :
3027 331688 : if (test_opt(sb, INIT_INODE_TABLE) && (nodefs ||
3028 330799 : (sbi->s_li_wait_mult != EXT4_DEF_LI_WAIT_MULT)))
3029 662 : SEQ_OPTS_PRINT("init_itable=%u", sbi->s_li_wait_mult);
3030 331843 : if (nodefs || sbi->s_max_dir_size_kb)
3031 814 : SEQ_OPTS_PRINT("max_dir_size_kb=%u", sbi->s_max_dir_size_kb);
3032 331934 : if (test_opt(sb, DATA_ERR_ABORT))
3033 47 : SEQ_OPTS_PUTS("data_err=abort");
3034 :
3035 331934 : fscrypt_show_test_dummy_encryption(seq, sep, sb);
3036 :
3037 331934 : if (sb->s_flags & SB_INLINECRYPT)
3038 0 : SEQ_OPTS_PUTS("inlinecrypt");
3039 :
3040 331934 : if (test_opt(sb, DAX_ALWAYS)) {
3041 0 : if (IS_EXT2_SB(sb))
3042 : SEQ_OPTS_PUTS("dax");
3043 : else
3044 0 : SEQ_OPTS_PUTS("dax=always");
3045 331934 : } else if (test_opt2(sb, DAX_NEVER)) {
3046 0 : SEQ_OPTS_PUTS("dax=never");
3047 331934 : } else if (test_opt2(sb, DAX_INODE)) {
3048 0 : SEQ_OPTS_PUTS("dax=inode");
3049 : }
3050 :
3051 331934 : if (sbi->s_groups_count >= MB_DEFAULT_LINEAR_SCAN_THRESHOLD &&
3052 271400 : !test_opt2(sb, MB_OPTIMIZE_SCAN)) {
3053 9 : SEQ_OPTS_PUTS("mb_optimize_scan=0");
3054 331925 : } else if (sbi->s_groups_count < MB_DEFAULT_LINEAR_SCAN_THRESHOLD &&
3055 60311 : test_opt2(sb, MB_OPTIMIZE_SCAN)) {
3056 0 : SEQ_OPTS_PUTS("mb_optimize_scan=1");
3057 : }
3058 :
3059 331934 : ext4_show_quota_options(seq, sb);
3060 331572 : return 0;
3061 : }
3062 :
3063 332517 : static int ext4_show_options(struct seq_file *seq, struct dentry *root)
3064 : {
3065 332517 : return _ext4_show_options(seq, root->d_sb, 0);
3066 : }
3067 :
3068 808 : int ext4_seq_options_show(struct seq_file *seq, void *offset)
3069 : {
3070 808 : struct super_block *sb = seq->private;
3071 808 : int rc;
3072 :
3073 1616 : seq_puts(seq, sb_rdonly(sb) ? "ro" : "rw");
3074 808 : rc = _ext4_show_options(seq, sb, 1);
3075 808 : seq_puts(seq, "\n");
3076 808 : return rc;
3077 : }
3078 :
3079 2556 : static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
3080 : int read_only)
3081 : {
3082 2556 : struct ext4_sb_info *sbi = EXT4_SB(sb);
3083 2556 : int err = 0;
3084 :
3085 2556 : if (le32_to_cpu(es->s_rev_level) > EXT4_MAX_SUPP_REV) {
3086 0 : ext4_msg(sb, KERN_ERR, "revision level too high, "
3087 : "forcing read-only mode");
3088 0 : err = -EROFS;
3089 0 : goto done;
3090 : }
3091 2556 : if (read_only)
3092 12 : goto done;
3093 2544 : if (!(sbi->s_mount_state & EXT4_VALID_FS))
3094 2 : ext4_msg(sb, KERN_WARNING, "warning: mounting unchecked fs, "
3095 : "running e2fsck is recommended");
3096 2542 : else if (sbi->s_mount_state & EXT4_ERROR_FS)
3097 0 : ext4_msg(sb, KERN_WARNING,
3098 : "warning: mounting fs with errors, "
3099 : "running e2fsck is recommended");
3100 2542 : else if ((__s16) le16_to_cpu(es->s_max_mnt_count) > 0 &&
3101 0 : le16_to_cpu(es->s_mnt_count) >=
3102 : (unsigned short) (__s16) le16_to_cpu(es->s_max_mnt_count))
3103 0 : ext4_msg(sb, KERN_WARNING,
3104 : "warning: maximal mount count reached, "
3105 : "running e2fsck is recommended");
3106 2542 : else if (le32_to_cpu(es->s_checkinterval) &&
3107 0 : (ext4_get_tstamp(es, s_lastcheck) +
3108 0 : le32_to_cpu(es->s_checkinterval) <= ktime_get_real_seconds()))
3109 0 : ext4_msg(sb, KERN_WARNING,
3110 : "warning: checktime reached, "
3111 : "running e2fsck is recommended");
3112 2544 : if (!sbi->s_journal)
3113 9 : es->s_state &= cpu_to_le16(~EXT4_VALID_FS);
3114 2544 : if (!(__s16) le16_to_cpu(es->s_max_mnt_count))
3115 0 : es->s_max_mnt_count = cpu_to_le16(EXT4_DFL_MAX_MNT_COUNT);
3116 2544 : le16_add_cpu(&es->s_mnt_count, 1);
3117 2544 : ext4_update_tstamp(es, s_mtime);
3118 2544 : if (sbi->s_journal) {
3119 2535 : ext4_set_feature_journal_needs_recovery(sb);
3120 2535 : if (ext4_has_feature_orphan_file(sb))
3121 2 : ext4_set_feature_orphan_present(sb);
3122 : }
3123 :
3124 2544 : err = ext4_commit_super(sb);
3125 2556 : done:
3126 2556 : if (test_opt(sb, DEBUG))
3127 2 : printk(KERN_INFO "[EXT4 FS bs=%lu, gc=%u, "
3128 : "bpg=%lu, ipg=%lu, mo=%04x, mo2=%04x]\n",
3129 : sb->s_blocksize,
3130 : sbi->s_groups_count,
3131 : EXT4_BLOCKS_PER_GROUP(sb),
3132 : EXT4_INODES_PER_GROUP(sb),
3133 : sbi->s_mount_opt, sbi->s_mount_opt2);
3134 2556 : return err;
3135 : }
3136 :
3137 2347 : int ext4_alloc_flex_bg_array(struct super_block *sb, ext4_group_t ngroup)
3138 : {
3139 2347 : struct ext4_sb_info *sbi = EXT4_SB(sb);
3140 2347 : struct flex_groups **old_groups, **new_groups;
3141 2347 : int size, i, j;
3142 :
3143 2347 : if (!sbi->s_log_groups_per_flex)
3144 : return 0;
3145 :
3146 2347 : size = ext4_flex_group(sbi, ngroup - 1) + 1;
3147 2347 : if (size <= sbi->s_flex_groups_allocated)
3148 : return 0;
3149 :
3150 2334 : new_groups = kvzalloc(roundup_pow_of_two(size *
3151 : sizeof(*sbi->s_flex_groups)), GFP_KERNEL);
3152 2334 : if (!new_groups) {
3153 0 : ext4_msg(sb, KERN_ERR,
3154 : "not enough memory for %d flex group pointers", size);
3155 0 : return -ENOMEM;
3156 : }
3157 56580 : for (i = sbi->s_flex_groups_allocated; i < size; i++) {
3158 54246 : new_groups[i] = kvzalloc(roundup_pow_of_two(
3159 : sizeof(struct flex_groups)),
3160 : GFP_KERNEL);
3161 54246 : if (!new_groups[i]) {
3162 0 : for (j = sbi->s_flex_groups_allocated; j < i; j++)
3163 0 : kvfree(new_groups[j]);
3164 0 : kvfree(new_groups);
3165 0 : ext4_msg(sb, KERN_ERR,
3166 : "not enough memory for %d flex groups", size);
3167 0 : return -ENOMEM;
3168 : }
3169 : }
3170 2334 : rcu_read_lock();
3171 2334 : old_groups = rcu_dereference(sbi->s_flex_groups);
3172 2334 : if (old_groups)
3173 24 : memcpy(new_groups, old_groups,
3174 : (sbi->s_flex_groups_allocated *
3175 : sizeof(struct flex_groups *)));
3176 2334 : rcu_read_unlock();
3177 2334 : rcu_assign_pointer(sbi->s_flex_groups, new_groups);
3178 2334 : sbi->s_flex_groups_allocated = size;
3179 2334 : if (old_groups)
3180 12 : ext4_kvfree_array_rcu(old_groups);
3181 : return 0;
3182 : }
3183 :
3184 2322 : static int ext4_fill_flex_info(struct super_block *sb)
3185 : {
3186 2322 : struct ext4_sb_info *sbi = EXT4_SB(sb);
3187 2322 : struct ext4_group_desc *gdp = NULL;
3188 2322 : struct flex_groups *fg;
3189 2322 : ext4_group_t flex_group;
3190 2322 : int i, err;
3191 :
3192 2322 : sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex;
3193 2322 : if (sbi->s_log_groups_per_flex < 1 || sbi->s_log_groups_per_flex > 31) {
3194 0 : sbi->s_log_groups_per_flex = 0;
3195 0 : return 1;
3196 : }
3197 :
3198 2322 : err = ext4_alloc_flex_bg_array(sb, sbi->s_groups_count);
3199 2322 : if (err)
3200 0 : goto failed;
3201 :
3202 851244 : for (i = 0; i < sbi->s_groups_count; i++) {
3203 848922 : gdp = ext4_get_group_desc(sb, i, NULL);
3204 :
3205 848922 : flex_group = ext4_flex_group(sbi, i);
3206 848922 : fg = sbi_array_rcu_deref(sbi, s_flex_groups, flex_group);
3207 1697840 : atomic_add(ext4_free_inodes_count(sb, gdp), &fg->free_inodes);
3208 1697840 : atomic64_add(ext4_free_group_clusters(sb, gdp),
3209 : &fg->free_clusters);
3210 1697840 : atomic_add(ext4_used_dirs_count(sb, gdp), &fg->used_dirs);
3211 : }
3212 :
3213 : return 1;
3214 : failed:
3215 0 : return 0;
3216 : }
3217 :
3218 11255623 : static __le16 ext4_group_desc_csum(struct super_block *sb, __u32 block_group,
3219 : struct ext4_group_desc *gdp)
3220 : {
3221 11255623 : int offset = offsetof(struct ext4_group_desc, bg_checksum);
3222 11255623 : __u16 crc = 0;
3223 11255623 : __le32 le_group = cpu_to_le32(block_group);
3224 11255623 : struct ext4_sb_info *sbi = EXT4_SB(sb);
3225 :
3226 11255623 : if (ext4_has_metadata_csum(sbi->s_sb)) {
3227 : /* Use new metadata_csum algorithm */
3228 11263718 : __u32 csum32;
3229 11263718 : __u16 dummy_csum = 0;
3230 :
3231 11263718 : csum32 = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)&le_group,
3232 : sizeof(le_group));
3233 11277799 : csum32 = ext4_chksum(sbi, csum32, (__u8 *)gdp, offset);
3234 11281959 : csum32 = ext4_chksum(sbi, csum32, (__u8 *)&dummy_csum,
3235 : sizeof(dummy_csum));
3236 11273610 : offset += sizeof(dummy_csum);
3237 11273610 : if (offset < sbi->s_desc_size)
3238 11272586 : csum32 = ext4_chksum(sbi, csum32, (__u8 *)gdp + offset,
3239 : sbi->s_desc_size - offset);
3240 :
3241 11276865 : crc = csum32 & 0xFFFF;
3242 11276865 : goto out;
3243 : }
3244 :
3245 : /* old crc16 code */
3246 1173 : if (!ext4_has_feature_gdt_csum(sb))
3247 : return 0;
3248 :
3249 1173 : crc = crc16(~0, sbi->s_es->s_uuid, sizeof(sbi->s_es->s_uuid));
3250 1172 : crc = crc16(crc, (__u8 *)&le_group, sizeof(le_group));
3251 1173 : crc = crc16(crc, (__u8 *)gdp, offset);
3252 1173 : offset += sizeof(gdp->bg_checksum); /* skip checksum */
3253 : /* for checksum of struct ext4_group_desc do the rest...*/
3254 1173 : if (ext4_has_feature_64bit(sb) && offset < sbi->s_desc_size)
3255 0 : crc = crc16(crc, (__u8 *)gdp + offset,
3256 : sbi->s_desc_size - offset);
3257 :
3258 1173 : out:
3259 : return cpu_to_le16(crc);
3260 : }
3261 :
3262 925142 : int ext4_group_desc_csum_verify(struct super_block *sb, __u32 block_group,
3263 : struct ext4_group_desc *gdp)
3264 : {
3265 1847453 : if (ext4_has_group_desc_csum(sb) &&
3266 922311 : (gdp->bg_checksum != ext4_group_desc_csum(sb, block_group, gdp)))
3267 0 : return 0;
3268 :
3269 : return 1;
3270 : }
3271 :
3272 10343401 : void ext4_group_desc_csum_set(struct super_block *sb, __u32 block_group,
3273 : struct ext4_group_desc *gdp)
3274 : {
3275 10343401 : if (!ext4_has_group_desc_csum(sb))
3276 : return;
3277 10325794 : gdp->bg_checksum = ext4_group_desc_csum(sb, block_group, gdp);
3278 : }
3279 :
3280 : /* Called at mount-time, super-block is locked */
3281 2520 : static int ext4_check_descriptors(struct super_block *sb,
3282 : ext4_fsblk_t sb_block,
3283 : ext4_group_t *first_not_zeroed)
3284 : {
3285 2520 : struct ext4_sb_info *sbi = EXT4_SB(sb);
3286 2520 : ext4_fsblk_t first_block = le32_to_cpu(sbi->s_es->s_first_data_block);
3287 2520 : ext4_fsblk_t last_block;
3288 2520 : ext4_fsblk_t last_bg_block = sb_block + ext4_bg_num_gdb(sb, 0);
3289 2520 : ext4_fsblk_t block_bitmap;
3290 2520 : ext4_fsblk_t inode_bitmap;
3291 2520 : ext4_fsblk_t inode_table;
3292 2520 : int flexbg_flag = 0;
3293 2520 : ext4_group_t i, grp = sbi->s_groups_count;
3294 :
3295 2520 : if (ext4_has_feature_flex_bg(sb))
3296 2328 : flexbg_flag = 1;
3297 :
3298 2520 : ext4_debug("Checking group descriptors");
3299 :
3300 855608 : for (i = 0; i < sbi->s_groups_count; i++) {
3301 853089 : struct ext4_group_desc *gdp = ext4_get_group_desc(sb, i, NULL);
3302 :
3303 853089 : if (i == sbi->s_groups_count - 1 || flexbg_flag)
3304 1700186 : last_block = ext4_blocks_count(sbi->s_es) - 1;
3305 : else
3306 2898 : last_block = first_block +
3307 2898 : (EXT4_BLOCKS_PER_GROUP(sb) - 1);
3308 :
3309 853089 : if ((grp == sbi->s_groups_count) &&
3310 160538 : !(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED)))
3311 1942 : grp = i;
3312 :
3313 853089 : block_bitmap = ext4_block_bitmap(sb, gdp);
3314 853089 : if (block_bitmap == sb_block) {
3315 1 : ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
3316 : "Block bitmap for group %u overlaps "
3317 : "superblock", i);
3318 1 : if (!sb_rdonly(sb))
3319 : return 0;
3320 : }
3321 853088 : if (block_bitmap >= sb_block + 1 &&
3322 : block_bitmap <= last_bg_block) {
3323 0 : ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
3324 : "Block bitmap for group %u overlaps "
3325 : "block group descriptors", i);
3326 0 : if (!sb_rdonly(sb))
3327 : return 0;
3328 : }
3329 853088 : if (block_bitmap < first_block || block_bitmap > last_block) {
3330 0 : ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
3331 : "Block bitmap for group %u not in group "
3332 : "(block %llu)!", i, block_bitmap);
3333 0 : return 0;
3334 : }
3335 853088 : inode_bitmap = ext4_inode_bitmap(sb, gdp);
3336 853088 : if (inode_bitmap == sb_block) {
3337 0 : ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
3338 : "Inode bitmap for group %u overlaps "
3339 : "superblock", i);
3340 0 : if (!sb_rdonly(sb))
3341 : return 0;
3342 : }
3343 853088 : if (inode_bitmap >= sb_block + 1 &&
3344 : inode_bitmap <= last_bg_block) {
3345 0 : ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
3346 : "Inode bitmap for group %u overlaps "
3347 : "block group descriptors", i);
3348 0 : if (!sb_rdonly(sb))
3349 : return 0;
3350 : }
3351 853088 : if (inode_bitmap < first_block || inode_bitmap > last_block) {
3352 0 : ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
3353 : "Inode bitmap for group %u not in group "
3354 : "(block %llu)!", i, inode_bitmap);
3355 0 : return 0;
3356 : }
3357 853088 : inode_table = ext4_inode_table(sb, gdp);
3358 853088 : if (inode_table == sb_block) {
3359 0 : ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
3360 : "Inode table for group %u overlaps "
3361 : "superblock", i);
3362 0 : if (!sb_rdonly(sb))
3363 : return 0;
3364 : }
3365 853088 : if (inode_table >= sb_block + 1 &&
3366 : inode_table <= last_bg_block) {
3367 0 : ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
3368 : "Inode table for group %u overlaps "
3369 : "block group descriptors", i);
3370 0 : if (!sb_rdonly(sb))
3371 : return 0;
3372 : }
3373 853088 : if (inode_table < first_block ||
3374 853088 : inode_table + sbi->s_itb_per_group - 1 > last_block) {
3375 0 : ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
3376 : "Inode table for group %u not in group "
3377 : "(block %llu)!", i, inode_table);
3378 0 : return 0;
3379 : }
3380 853088 : ext4_lock_group(sb, i);
3381 853088 : if (!ext4_group_desc_csum_verify(sb, i, gdp)) {
3382 0 : ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
3383 : "Checksum for group %u failed (%u!=%u)",
3384 : i, le16_to_cpu(ext4_group_desc_csum(sb, i,
3385 : gdp)), le16_to_cpu(gdp->bg_checksum));
3386 0 : if (!sb_rdonly(sb)) {
3387 0 : ext4_unlock_group(sb, i);
3388 0 : return 0;
3389 : }
3390 : }
3391 853088 : ext4_unlock_group(sb, i);
3392 853088 : if (!flexbg_flag)
3393 3090 : first_block += EXT4_BLOCKS_PER_GROUP(sb);
3394 : }
3395 2519 : if (NULL != first_not_zeroed)
3396 2519 : *first_not_zeroed = grp;
3397 : return 1;
3398 : }
3399 :
3400 : /*
3401 : * Maximal extent format file size.
3402 : * Resulting logical blkno at s_maxbytes must fit in our on-disk
3403 : * extent format containers, within a sector_t, and within i_blocks
3404 : * in the vfs. ext4 inode has 48 bits of i_block in fsblock units,
3405 : * so that won't be a limiting factor.
3406 : *
3407 : * However there is other limiting factor. We do store extents in the form
3408 : * of starting block and length, hence the resulting length of the extent
3409 : * covering maximum file size must fit into on-disk format containers as
3410 : * well. Given that length is always by 1 unit bigger than max unit (because
3411 : * we count 0 as well) we have to lower the s_maxbytes by one fs block.
3412 : *
3413 : * Note, this does *not* consider any metadata overhead for vfs i_blocks.
3414 : */
3415 2521 : static loff_t ext4_max_size(int blkbits, int has_huge_files)
3416 : {
3417 2521 : loff_t res;
3418 2521 : loff_t upper_limit = MAX_LFS_FILESIZE;
3419 :
3420 2521 : BUILD_BUG_ON(sizeof(blkcnt_t) < sizeof(u64));
3421 :
3422 2521 : if (!has_huge_files) {
3423 192 : upper_limit = (1LL << 32) - 1;
3424 :
3425 : /* total blocks in file system block size */
3426 192 : upper_limit >>= (blkbits - 9);
3427 192 : upper_limit <<= blkbits;
3428 : }
3429 :
3430 : /*
3431 : * 32-bit extent-start container, ee_block. We lower the maxbytes
3432 : * by one fs block, so ee_len can cover the extent of maximum file
3433 : * size
3434 : */
3435 2521 : res = (1LL << 32) - 1;
3436 2521 : res <<= blkbits;
3437 :
3438 : /* Sanity check against vm- & vfs- imposed limits */
3439 2521 : if (res > upper_limit)
3440 : res = upper_limit;
3441 :
3442 2521 : return res;
3443 : }
3444 :
3445 : /*
3446 : * Maximal bitmap file size. There is a direct, and {,double-,triple-}indirect
3447 : * block limit, and also a limit of (2^48 - 1) 512-byte sectors in i_blocks.
3448 : * We need to be 1 filesystem block less than the 2^48 sector limit.
3449 : */
3450 2521 : static loff_t ext4_max_bitmap_size(int bits, int has_huge_files)
3451 : {
3452 2521 : loff_t upper_limit, res = EXT4_NDIR_BLOCKS;
3453 2521 : int meta_blocks;
3454 2521 : unsigned int ppb = 1 << (bits - 2);
3455 :
3456 : /*
3457 : * This is calculated to be the largest file size for a dense, block
3458 : * mapped file such that the file's total number of 512-byte sectors,
3459 : * including data and all indirect blocks, does not exceed (2^48 - 1).
3460 : *
3461 : * __u32 i_blocks_lo and _u16 i_blocks_high represent the total
3462 : * number of 512-byte sectors of the file.
3463 : */
3464 2521 : if (!has_huge_files) {
3465 : /*
3466 : * !has_huge_files or implies that the inode i_block field
3467 : * represents total file blocks in 2^32 512-byte sectors ==
3468 : * size of vfs inode i_blocks * 8
3469 : */
3470 192 : upper_limit = (1LL << 32) - 1;
3471 :
3472 : /* total blocks in file system block size */
3473 192 : upper_limit >>= (bits - 9);
3474 :
3475 : } else {
3476 : /*
3477 : * We use 48 bit ext4_inode i_blocks
3478 : * With EXT4_HUGE_FILE_FL set the i_blocks
3479 : * represent total number of blocks in
3480 : * file system block size
3481 : */
3482 : upper_limit = (1LL << 48) - 1;
3483 :
3484 : }
3485 :
3486 : /* Compute how many blocks we can address by block tree */
3487 2521 : res += ppb;
3488 2521 : res += ppb * ppb;
3489 2521 : res += ((loff_t)ppb) * ppb * ppb;
3490 : /* Compute how many metadata blocks are needed */
3491 2521 : meta_blocks = 1;
3492 2521 : meta_blocks += 1 + ppb;
3493 2521 : meta_blocks += 1 + ppb + ppb * ppb;
3494 : /* Does block tree limit file size? */
3495 2521 : if (res + meta_blocks <= upper_limit)
3496 2329 : goto check_lfs;
3497 :
3498 192 : res = upper_limit;
3499 : /* How many metadata blocks are needed for addressing upper_limit? */
3500 192 : upper_limit -= EXT4_NDIR_BLOCKS;
3501 : /* indirect blocks */
3502 192 : meta_blocks = 1;
3503 192 : upper_limit -= ppb;
3504 : /* double indirect blocks */
3505 192 : if (upper_limit < ppb * ppb) {
3506 0 : meta_blocks += 1 + DIV_ROUND_UP_ULL(upper_limit, ppb);
3507 0 : res -= meta_blocks;
3508 0 : goto check_lfs;
3509 : }
3510 192 : meta_blocks += 1 + ppb;
3511 192 : upper_limit -= ppb * ppb;
3512 : /* tripple indirect blocks for the rest */
3513 192 : meta_blocks += 1 + DIV_ROUND_UP_ULL(upper_limit, ppb) +
3514 192 : DIV_ROUND_UP_ULL(upper_limit, ppb*ppb);
3515 192 : res -= meta_blocks;
3516 2521 : check_lfs:
3517 2521 : res <<= bits;
3518 2521 : if (res > MAX_LFS_FILESIZE)
3519 : res = MAX_LFS_FILESIZE;
3520 :
3521 2521 : return res;
3522 : }
3523 :
3524 32796 : static ext4_fsblk_t descriptor_loc(struct super_block *sb,
3525 : ext4_fsblk_t logical_sb_block, int nr)
3526 : {
3527 32796 : struct ext4_sb_info *sbi = EXT4_SB(sb);
3528 32796 : ext4_group_t bg, first_meta_bg;
3529 32796 : int has_super = 0;
3530 :
3531 32796 : first_meta_bg = le32_to_cpu(sbi->s_es->s_first_meta_bg);
3532 :
3533 32796 : if (!ext4_has_feature_meta_bg(sb) || nr < first_meta_bg)
3534 30656 : return logical_sb_block + nr + 1;
3535 2140 : bg = sbi->s_desc_per_block * nr;
3536 2140 : if (ext4_bg_has_super(sb, bg))
3537 6 : has_super = 1;
3538 :
3539 : /*
3540 : * If we have a meta_bg fs with 1k blocks, group 0's GDT is at
3541 : * block 2, not 1. If s_first_data_block == 0 (bigalloc is enabled
3542 : * on modern mke2fs or blksize > 1k on older mke2fs) then we must
3543 : * compensate.
3544 : */
3545 2140 : if (sb->s_blocksize == 1024 && nr == 0 &&
3546 0 : le32_to_cpu(sbi->s_es->s_first_data_block) == 0)
3547 0 : has_super++;
3548 :
3549 2140 : return (has_super + ext4_group_first_block_no(sb, bg));
3550 : }
3551 :
3552 : /**
3553 : * ext4_get_stripe_size: Get the stripe size.
3554 : * @sbi: In memory super block info
3555 : *
3556 : * If we have specified it via mount option, then
3557 : * use the mount option value. If the value specified at mount time is
3558 : * greater than the blocks per group use the super block value.
3559 : * If the super block value is greater than blocks per group return 0.
3560 : * Allocator needs it be less than blocks per group.
3561 : *
3562 : */
3563 2519 : static unsigned long ext4_get_stripe_size(struct ext4_sb_info *sbi)
3564 : {
3565 2519 : unsigned long stride = le16_to_cpu(sbi->s_es->s_raid_stride);
3566 2519 : unsigned long stripe_width =
3567 2519 : le32_to_cpu(sbi->s_es->s_raid_stripe_width);
3568 2519 : int ret;
3569 :
3570 2519 : if (sbi->s_stripe && sbi->s_stripe <= sbi->s_blocks_per_group)
3571 2 : ret = sbi->s_stripe;
3572 2517 : else if (stripe_width && stripe_width <= sbi->s_blocks_per_group)
3573 2474 : ret = stripe_width;
3574 43 : else if (stride && stride <= sbi->s_blocks_per_group)
3575 0 : ret = stride;
3576 : else
3577 : ret = 0;
3578 :
3579 : /*
3580 : * If the stripe width is 1, this makes no sense and
3581 : * we set it to 0 to turn off stripe handling code.
3582 : */
3583 2476 : if (ret <= 1)
3584 43 : ret = 0;
3585 :
3586 2519 : return ret;
3587 : }
3588 :
3589 : /*
3590 : * Check whether this filesystem can be mounted based on
3591 : * the features present and the RDONLY/RDWR mount requested.
3592 : * Returns 1 if this filesystem can be mounted as requested,
3593 : * 0 if it cannot be.
3594 : */
3595 2594 : int ext4_feature_set_ok(struct super_block *sb, int readonly)
3596 : {
3597 2594 : if (ext4_has_unknown_ext4_incompat_features(sb)) {
3598 0 : ext4_msg(sb, KERN_ERR,
3599 : "Couldn't mount because of "
3600 : "unsupported optional features (%x)",
3601 : (le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_incompat) &
3602 : ~EXT4_FEATURE_INCOMPAT_SUPP));
3603 0 : return 0;
3604 : }
3605 :
3606 : #if !IS_ENABLED(CONFIG_UNICODE)
3607 2594 : if (ext4_has_feature_casefold(sb)) {
3608 0 : ext4_msg(sb, KERN_ERR,
3609 : "Filesystem with casefold feature cannot be "
3610 : "mounted without CONFIG_UNICODE");
3611 0 : return 0;
3612 : }
3613 : #endif
3614 :
3615 2594 : if (readonly)
3616 : return 1;
3617 :
3618 2581 : if (ext4_has_feature_readonly(sb)) {
3619 0 : ext4_msg(sb, KERN_INFO, "filesystem is read-only");
3620 0 : sb->s_flags |= SB_RDONLY;
3621 0 : return 1;
3622 : }
3623 :
3624 : /* Check that feature set is OK for a read-write mount */
3625 2581 : if (ext4_has_unknown_ext4_ro_compat_features(sb)) {
3626 0 : ext4_msg(sb, KERN_ERR, "couldn't mount RDWR because of "
3627 : "unsupported optional features (%x)",
3628 : (le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_ro_compat) &
3629 : ~EXT4_FEATURE_RO_COMPAT_SUPP));
3630 0 : return 0;
3631 : }
3632 2581 : if (ext4_has_feature_bigalloc(sb) && !ext4_has_feature_extents(sb)) {
3633 0 : ext4_msg(sb, KERN_ERR,
3634 : "Can't support bigalloc feature without "
3635 : "extents feature\n");
3636 0 : return 0;
3637 : }
3638 :
3639 : #if !IS_ENABLED(CONFIG_QUOTA) || !IS_ENABLED(CONFIG_QFMT_V2)
3640 : if (!readonly && (ext4_has_feature_quota(sb) ||
3641 : ext4_has_feature_project(sb))) {
3642 : ext4_msg(sb, KERN_ERR,
3643 : "The kernel was not built with CONFIG_QUOTA and CONFIG_QFMT_V2");
3644 : return 0;
3645 : }
3646 : #endif /* CONFIG_QUOTA */
3647 : return 1;
3648 : }
3649 :
3650 : /*
3651 : * This function is called once a day if we have errors logged
3652 : * on the file system
3653 : */
3654 0 : static void print_daily_error_info(struct timer_list *t)
3655 : {
3656 0 : struct ext4_sb_info *sbi = from_timer(sbi, t, s_err_report);
3657 0 : struct super_block *sb = sbi->s_sb;
3658 0 : struct ext4_super_block *es = sbi->s_es;
3659 :
3660 0 : if (es->s_error_count)
3661 : /* fsck newer than v1.41.13 is needed to clean this condition. */
3662 0 : ext4_msg(sb, KERN_NOTICE, "error count since last fsck: %u",
3663 : le32_to_cpu(es->s_error_count));
3664 0 : if (es->s_first_error_time) {
3665 0 : printk(KERN_NOTICE "EXT4-fs (%s): initial error at time %llu: %.*s:%d",
3666 : sb->s_id,
3667 : ext4_get_tstamp(es, s_first_error_time),
3668 : (int) sizeof(es->s_first_error_func),
3669 : es->s_first_error_func,
3670 : le32_to_cpu(es->s_first_error_line));
3671 0 : if (es->s_first_error_ino)
3672 0 : printk(KERN_CONT ": inode %u",
3673 : le32_to_cpu(es->s_first_error_ino));
3674 0 : if (es->s_first_error_block)
3675 0 : printk(KERN_CONT ": block %llu", (unsigned long long)
3676 : le64_to_cpu(es->s_first_error_block));
3677 0 : printk(KERN_CONT "\n");
3678 : }
3679 0 : if (es->s_last_error_time) {
3680 0 : printk(KERN_NOTICE "EXT4-fs (%s): last error at time %llu: %.*s:%d",
3681 : sb->s_id,
3682 : ext4_get_tstamp(es, s_last_error_time),
3683 : (int) sizeof(es->s_last_error_func),
3684 : es->s_last_error_func,
3685 : le32_to_cpu(es->s_last_error_line));
3686 0 : if (es->s_last_error_ino)
3687 0 : printk(KERN_CONT ": inode %u",
3688 : le32_to_cpu(es->s_last_error_ino));
3689 0 : if (es->s_last_error_block)
3690 0 : printk(KERN_CONT ": block %llu", (unsigned long long)
3691 : le64_to_cpu(es->s_last_error_block));
3692 0 : printk(KERN_CONT "\n");
3693 : }
3694 0 : mod_timer(&sbi->s_err_report, jiffies + 24*60*60*HZ); /* Once a day */
3695 0 : }
3696 :
3697 : /* Find next suitable group and run ext4_init_inode_table */
3698 10551 : static int ext4_run_li_request(struct ext4_li_request *elr)
3699 : {
3700 10551 : struct ext4_group_desc *gdp = NULL;
3701 10551 : struct super_block *sb = elr->lr_super;
3702 10551 : ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count;
3703 10551 : ext4_group_t group = elr->lr_next_group;
3704 10551 : unsigned int prefetch_ios = 0;
3705 10551 : int ret = 0;
3706 10551 : int nr = EXT4_SB(sb)->s_mb_prefetch;
3707 10551 : u64 start_time;
3708 :
3709 10551 : if (elr->lr_mode == EXT4_LI_MODE_PREFETCH_BBITMAP) {
3710 1713 : elr->lr_next_group = ext4_mb_prefetch(sb, group, nr, &prefetch_ios);
3711 1713 : ext4_mb_prefetch_fini(sb, elr->lr_next_group, nr);
3712 1713 : trace_ext4_prefetch_bitmaps(sb, group, elr->lr_next_group, nr);
3713 1713 : if (group >= elr->lr_next_group) {
3714 584 : ret = 1;
3715 584 : if (elr->lr_first_not_zeroed != ngroups &&
3716 157 : !sb_rdonly(sb) && test_opt(sb, INIT_INODE_TABLE)) {
3717 157 : elr->lr_next_group = elr->lr_first_not_zeroed;
3718 157 : elr->lr_mode = EXT4_LI_MODE_ITABLE;
3719 157 : ret = 0;
3720 : }
3721 : }
3722 1713 : return ret;
3723 : }
3724 :
3725 9105 : for (; group < ngroups; group++) {
3726 9071 : gdp = ext4_get_group_desc(sb, group, NULL);
3727 9071 : if (!gdp) {
3728 : ret = 1;
3729 : break;
3730 : }
3731 :
3732 9071 : if (!(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED)))
3733 : break;
3734 : }
3735 :
3736 8838 : if (group >= ngroups)
3737 : ret = 1;
3738 :
3739 8804 : if (!ret) {
3740 8804 : start_time = ktime_get_real_ns();
3741 8804 : ret = ext4_init_inode_table(sb, group,
3742 8804 : elr->lr_timeout ? 0 : 1);
3743 8804 : trace_ext4_lazy_itable_init(sb, group);
3744 8804 : if (elr->lr_timeout == 0) {
3745 1606 : elr->lr_timeout = nsecs_to_jiffies((ktime_get_real_ns() - start_time) *
3746 803 : EXT4_SB(elr->lr_super)->s_li_wait_mult);
3747 : }
3748 8804 : elr->lr_next_sched = jiffies + elr->lr_timeout;
3749 8804 : elr->lr_next_group = group + 1;
3750 : }
3751 : return ret;
3752 : }
3753 :
3754 : /*
3755 : * Remove lr_request from the list_request and free the
3756 : * request structure. Should be called with li_list_mtx held
3757 : */
3758 2824 : static void ext4_remove_li_request(struct ext4_li_request *elr)
3759 : {
3760 2824 : if (!elr)
3761 : return;
3762 :
3763 2545 : list_del(&elr->lr_request);
3764 2545 : EXT4_SB(elr->lr_super)->s_li_request = NULL;
3765 2545 : kfree(elr);
3766 : }
3767 :
3768 2564 : static void ext4_unregister_li_request(struct super_block *sb)
3769 : {
3770 2564 : mutex_lock(&ext4_li_mtx);
3771 2564 : if (!ext4_li_info) {
3772 207 : mutex_unlock(&ext4_li_mtx);
3773 207 : return;
3774 : }
3775 :
3776 2357 : mutex_lock(&ext4_li_info->li_list_mtx);
3777 2357 : ext4_remove_li_request(EXT4_SB(sb)->s_li_request);
3778 2357 : mutex_unlock(&ext4_li_info->li_list_mtx);
3779 2357 : mutex_unlock(&ext4_li_mtx);
3780 : }
3781 :
3782 : static struct task_struct *ext4_lazyinit_task;
3783 :
3784 : /*
3785 : * This is the function where ext4lazyinit thread lives. It walks
3786 : * through the request list searching for next scheduled filesystem.
3787 : * When such a fs is found, run the lazy initialization request
3788 : * (ext4_rn_li_request) and keep track of the time spend in this
3789 : * function. Based on that time we compute next schedule time of
3790 : * the request. When walking through the list is complete, compute
3791 : * next waking time and put itself into sleep.
3792 : */
3793 913 : static int ext4_lazyinit_thread(void *arg)
3794 : {
3795 913 : struct ext4_lazy_init *eli = arg;
3796 913 : struct list_head *pos, *n;
3797 913 : struct ext4_li_request *elr;
3798 913 : unsigned long next_wakeup, cur;
3799 :
3800 913 : BUG_ON(NULL == eli);
3801 913 : set_freezable();
3802 :
3803 : cont_thread:
3804 12665 : while (true) {
3805 12665 : next_wakeup = MAX_JIFFY_OFFSET;
3806 :
3807 12665 : mutex_lock(&eli->li_list_mtx);
3808 12665 : if (list_empty(&eli->li_request_list)) {
3809 914 : mutex_unlock(&eli->li_list_mtx);
3810 914 : goto exit_thread;
3811 : }
3812 23723 : list_for_each_safe(pos, n, &eli->li_request_list) {
3813 11972 : int err = 0;
3814 11972 : int progress = 0;
3815 11972 : elr = list_entry(pos, struct ext4_li_request,
3816 : lr_request);
3817 :
3818 11972 : if (time_before(jiffies, elr->lr_next_sched)) {
3819 1358 : if (time_before(elr->lr_next_sched, next_wakeup))
3820 1347 : next_wakeup = elr->lr_next_sched;
3821 1358 : continue;
3822 : }
3823 10614 : if (down_read_trylock(&elr->lr_super->s_umount)) {
3824 10561 : if (sb_start_write_trylock(elr->lr_super)) {
3825 10551 : progress = 1;
3826 : /*
3827 : * We hold sb->s_umount, sb can not
3828 : * be removed from the list, it is
3829 : * now safe to drop li_list_mtx
3830 : */
3831 10551 : mutex_unlock(&eli->li_list_mtx);
3832 10551 : err = ext4_run_li_request(elr);
3833 10551 : sb_end_write(elr->lr_super);
3834 10551 : mutex_lock(&eli->li_list_mtx);
3835 10551 : n = pos->next;
3836 : }
3837 10561 : up_read((&elr->lr_super->s_umount));
3838 : }
3839 : /* error, remove the lazy_init job */
3840 10614 : if (err) {
3841 467 : ext4_remove_li_request(elr);
3842 467 : continue;
3843 : }
3844 10147 : if (!progress) {
3845 126 : elr->lr_next_sched = jiffies +
3846 63 : get_random_u32_below(EXT4_DEF_LI_MAX_START_DELAY * HZ);
3847 : }
3848 10147 : if (time_before(elr->lr_next_sched, next_wakeup))
3849 10136 : next_wakeup = elr->lr_next_sched;
3850 : }
3851 11751 : mutex_unlock(&eli->li_list_mtx);
3852 :
3853 11751 : try_to_freeze();
3854 :
3855 11751 : cur = jiffies;
3856 11751 : if ((time_after_eq(cur, next_wakeup)) ||
3857 : (MAX_JIFFY_OFFSET == next_wakeup)) {
3858 2362 : cond_resched();
3859 2362 : continue;
3860 : }
3861 :
3862 9389 : schedule_timeout_interruptible(next_wakeup - cur);
3863 :
3864 9390 : if (kthread_should_stop()) {
3865 0 : ext4_clear_request_list();
3866 0 : goto exit_thread;
3867 : }
3868 : }
3869 :
3870 914 : exit_thread:
3871 : /*
3872 : * It looks like the request list is empty, but we need
3873 : * to check it under the li_list_mtx lock, to prevent any
3874 : * additions into it, and of course we should lock ext4_li_mtx
3875 : * to atomically free the list and ext4_li_info, because at
3876 : * this point another ext4 filesystem could be registering
3877 : * new one.
3878 : */
3879 914 : mutex_lock(&ext4_li_mtx);
3880 914 : mutex_lock(&eli->li_list_mtx);
3881 914 : if (!list_empty(&eli->li_request_list)) {
3882 0 : mutex_unlock(&eli->li_list_mtx);
3883 0 : mutex_unlock(&ext4_li_mtx);
3884 0 : goto cont_thread;
3885 : }
3886 914 : mutex_unlock(&eli->li_list_mtx);
3887 914 : kfree(ext4_li_info);
3888 914 : ext4_li_info = NULL;
3889 914 : mutex_unlock(&ext4_li_mtx);
3890 :
3891 914 : return 0;
3892 : }
3893 :
3894 0 : static void ext4_clear_request_list(void)
3895 : {
3896 0 : struct list_head *pos, *n;
3897 0 : struct ext4_li_request *elr;
3898 :
3899 0 : mutex_lock(&ext4_li_info->li_list_mtx);
3900 0 : list_for_each_safe(pos, n, &ext4_li_info->li_request_list) {
3901 0 : elr = list_entry(pos, struct ext4_li_request,
3902 : lr_request);
3903 0 : ext4_remove_li_request(elr);
3904 : }
3905 0 : mutex_unlock(&ext4_li_info->li_list_mtx);
3906 0 : }
3907 :
3908 913 : static int ext4_run_lazyinit_thread(void)
3909 : {
3910 913 : ext4_lazyinit_task = kthread_run(ext4_lazyinit_thread,
3911 : ext4_li_info, "ext4lazyinit");
3912 913 : if (IS_ERR(ext4_lazyinit_task)) {
3913 0 : int err = PTR_ERR(ext4_lazyinit_task);
3914 0 : ext4_clear_request_list();
3915 0 : kfree(ext4_li_info);
3916 0 : ext4_li_info = NULL;
3917 0 : printk(KERN_CRIT "EXT4-fs: error %d creating inode table "
3918 : "initialization thread\n",
3919 : err);
3920 0 : return err;
3921 : }
3922 913 : ext4_li_info->li_state |= EXT4_LAZYINIT_RUNNING;
3923 913 : return 0;
3924 : }
3925 :
3926 : /*
3927 : * Check whether it make sense to run itable init. thread or not.
3928 : * If there is at least one uninitialized inode table, return
3929 : * corresponding group number, else the loop goes through all
3930 : * groups and return total number of groups.
3931 : */
3932 577 : static ext4_group_t ext4_has_uninit_itable(struct super_block *sb)
3933 : {
3934 577 : ext4_group_t group, ngroups = EXT4_SB(sb)->s_groups_count;
3935 577 : struct ext4_group_desc *gdp = NULL;
3936 :
3937 577 : if (!ext4_has_group_desc_csum(sb))
3938 : return ngroups;
3939 :
3940 1050 : for (group = 0; group < ngroups; group++) {
3941 1047 : gdp = ext4_get_group_desc(sb, group, NULL);
3942 1047 : if (!gdp)
3943 0 : continue;
3944 :
3945 1047 : if (!(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED)))
3946 : break;
3947 : }
3948 :
3949 : return group;
3950 : }
3951 :
3952 913 : static int ext4_li_info_new(void)
3953 : {
3954 913 : struct ext4_lazy_init *eli = NULL;
3955 :
3956 913 : eli = kzalloc(sizeof(*eli), GFP_KERNEL);
3957 913 : if (!eli)
3958 : return -ENOMEM;
3959 :
3960 913 : INIT_LIST_HEAD(&eli->li_request_list);
3961 913 : mutex_init(&eli->li_list_mtx);
3962 :
3963 913 : eli->li_state |= EXT4_LAZYINIT_QUIT;
3964 :
3965 913 : ext4_li_info = eli;
3966 :
3967 913 : return 0;
3968 : }
3969 :
3970 2544 : static struct ext4_li_request *ext4_li_request_new(struct super_block *sb,
3971 : ext4_group_t start)
3972 : {
3973 2544 : struct ext4_li_request *elr;
3974 :
3975 2544 : elr = kzalloc(sizeof(*elr), GFP_KERNEL);
3976 2544 : if (!elr)
3977 : return NULL;
3978 :
3979 2544 : elr->lr_super = sb;
3980 2544 : elr->lr_first_not_zeroed = start;
3981 2544 : if (test_opt(sb, NO_PREFETCH_BLOCK_BITMAPS)) {
3982 2 : elr->lr_mode = EXT4_LI_MODE_ITABLE;
3983 2 : elr->lr_next_group = start;
3984 : } else {
3985 2542 : elr->lr_mode = EXT4_LI_MODE_PREFETCH_BBITMAP;
3986 : }
3987 :
3988 : /*
3989 : * Randomize first schedule time of the request to
3990 : * spread the inode table initialization requests
3991 : * better.
3992 : */
3993 2544 : elr->lr_next_sched = jiffies + get_random_u32_below(EXT4_DEF_LI_MAX_START_DELAY * HZ);
3994 2544 : return elr;
3995 : }
3996 :
3997 3112 : int ext4_register_li_request(struct super_block *sb,
3998 : ext4_group_t first_not_zeroed)
3999 : {
4000 3112 : struct ext4_sb_info *sbi = EXT4_SB(sb);
4001 3112 : struct ext4_li_request *elr = NULL;
4002 3112 : ext4_group_t ngroups = sbi->s_groups_count;
4003 3112 : int ret = 0;
4004 :
4005 3112 : mutex_lock(&ext4_li_mtx);
4006 3112 : if (sbi->s_li_request != NULL) {
4007 : /*
4008 : * Reset timeout so it can be computed again, because
4009 : * s_li_wait_mult might have changed.
4010 : */
4011 556 : sbi->s_li_request->lr_timeout = 0;
4012 556 : goto out;
4013 : }
4014 :
4015 2556 : if (sb_rdonly(sb) ||
4016 2544 : (test_opt(sb, NO_PREFETCH_BLOCK_BITMAPS) &&
4017 2 : (first_not_zeroed == ngroups || !test_opt(sb, INIT_INODE_TABLE))))
4018 12 : goto out;
4019 :
4020 2544 : elr = ext4_li_request_new(sb, first_not_zeroed);
4021 2544 : if (!elr) {
4022 0 : ret = -ENOMEM;
4023 0 : goto out;
4024 : }
4025 :
4026 2544 : if (NULL == ext4_li_info) {
4027 913 : ret = ext4_li_info_new();
4028 913 : if (ret)
4029 0 : goto out;
4030 : }
4031 :
4032 2544 : mutex_lock(&ext4_li_info->li_list_mtx);
4033 2544 : list_add(&elr->lr_request, &ext4_li_info->li_request_list);
4034 2544 : mutex_unlock(&ext4_li_info->li_list_mtx);
4035 :
4036 2544 : sbi->s_li_request = elr;
4037 : /*
4038 : * set elr to NULL here since it has been inserted to
4039 : * the request_list and the removal and free of it is
4040 : * handled by ext4_clear_request_list from now on.
4041 : */
4042 2544 : elr = NULL;
4043 :
4044 2544 : if (!(ext4_li_info->li_state & EXT4_LAZYINIT_RUNNING)) {
4045 913 : ret = ext4_run_lazyinit_thread();
4046 913 : if (ret)
4047 0 : goto out;
4048 : }
4049 2544 : out:
4050 3112 : mutex_unlock(&ext4_li_mtx);
4051 3112 : if (ret)
4052 0 : kfree(elr);
4053 3112 : return ret;
4054 : }
4055 :
4056 : /*
4057 : * We do not need to lock anything since this is called on
4058 : * module unload.
4059 : */
4060 0 : static void ext4_destroy_lazyinit_thread(void)
4061 : {
4062 : /*
4063 : * If thread exited earlier
4064 : * there's nothing to be done.
4065 : */
4066 0 : if (!ext4_li_info || !ext4_lazyinit_task)
4067 : return;
4068 :
4069 0 : kthread_stop(ext4_lazyinit_task);
4070 : }
4071 :
4072 2501 : static int set_journal_csum_feature_set(struct super_block *sb)
4073 : {
4074 2501 : int ret = 1;
4075 2501 : int compat, incompat;
4076 2501 : struct ext4_sb_info *sbi = EXT4_SB(sb);
4077 :
4078 2501 : if (ext4_has_metadata_csum(sb)) {
4079 : /* journal checksum v3 */
4080 : compat = 0;
4081 : incompat = JBD2_FEATURE_INCOMPAT_CSUM_V3;
4082 : } else {
4083 : /* journal checksum v1 */
4084 188 : compat = JBD2_FEATURE_COMPAT_CHECKSUM;
4085 188 : incompat = 0;
4086 : }
4087 :
4088 2501 : jbd2_journal_clear_features(sbi->s_journal,
4089 : JBD2_FEATURE_COMPAT_CHECKSUM, 0,
4090 : JBD2_FEATURE_INCOMPAT_CSUM_V3 |
4091 : JBD2_FEATURE_INCOMPAT_CSUM_V2);
4092 2501 : if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) {
4093 1 : ret = jbd2_journal_set_features(sbi->s_journal,
4094 : compat, 0,
4095 1 : JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT |
4096 : incompat);
4097 2500 : } else if (test_opt(sb, JOURNAL_CHECKSUM)) {
4098 2287 : ret = jbd2_journal_set_features(sbi->s_journal,
4099 : compat, 0,
4100 : incompat);
4101 2287 : jbd2_journal_clear_features(sbi->s_journal, 0, 0,
4102 : JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT);
4103 : } else {
4104 213 : jbd2_journal_clear_features(sbi->s_journal, 0, 0,
4105 : JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT);
4106 : }
4107 :
4108 2501 : return ret;
4109 : }
4110 :
4111 : /*
4112 : * Note: calculating the overhead so we can be compatible with
4113 : * historical BSD practice is quite difficult in the face of
4114 : * clusters/bigalloc. This is because multiple metadata blocks from
4115 : * different block group can end up in the same allocation cluster.
4116 : * Calculating the exact overhead in the face of clustered allocation
4117 : * requires either O(all block bitmaps) in memory or O(number of block
4118 : * groups**2) in time. We will still calculate the superblock for
4119 : * older file systems --- and if we come across with a bigalloc file
4120 : * system with zero in s_overhead_clusters the estimate will be close to
4121 : * correct especially for very large cluster sizes --- but for newer
4122 : * file systems, it's better to calculate this figure once at mkfs
4123 : * time, and store it in the superblock. If the superblock value is
4124 : * present (even for non-bigalloc file systems), we will use it.
4125 : */
4126 983105 : static int count_overhead(struct super_block *sb, ext4_group_t grp,
4127 : char *buf)
4128 : {
4129 983105 : struct ext4_sb_info *sbi = EXT4_SB(sb);
4130 983105 : struct ext4_group_desc *gdp;
4131 983105 : ext4_fsblk_t first_block, last_block, b;
4132 983105 : ext4_group_t i, ngroups = ext4_get_groups_count(sb);
4133 983105 : int s, j, count = 0;
4134 983105 : int has_super = ext4_bg_has_super(sb, grp);
4135 :
4136 983105 : if (!ext4_has_feature_bigalloc(sb))
4137 983105 : return (has_super + ext4_bg_num_gdb(sb, grp) +
4138 24630 : (has_super ? le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) : 0) +
4139 983105 : sbi->s_itb_per_group + 2);
4140 :
4141 0 : first_block = le32_to_cpu(sbi->s_es->s_first_data_block) +
4142 0 : (grp * EXT4_BLOCKS_PER_GROUP(sb));
4143 0 : last_block = first_block + EXT4_BLOCKS_PER_GROUP(sb) - 1;
4144 0 : for (i = 0; i < ngroups; i++) {
4145 0 : gdp = ext4_get_group_desc(sb, i, NULL);
4146 0 : b = ext4_block_bitmap(sb, gdp);
4147 0 : if (b >= first_block && b <= last_block) {
4148 0 : ext4_set_bit(EXT4_B2C(sbi, b - first_block), buf);
4149 0 : count++;
4150 : }
4151 0 : b = ext4_inode_bitmap(sb, gdp);
4152 0 : if (b >= first_block && b <= last_block) {
4153 0 : ext4_set_bit(EXT4_B2C(sbi, b - first_block), buf);
4154 0 : count++;
4155 : }
4156 0 : b = ext4_inode_table(sb, gdp);
4157 0 : if (b >= first_block && b + sbi->s_itb_per_group <= last_block)
4158 0 : for (j = 0; j < sbi->s_itb_per_group; j++, b++) {
4159 0 : int c = EXT4_B2C(sbi, b - first_block);
4160 0 : ext4_set_bit(c, buf);
4161 0 : count++;
4162 : }
4163 0 : if (i != grp)
4164 0 : continue;
4165 0 : s = 0;
4166 0 : if (ext4_bg_has_super(sb, grp)) {
4167 0 : ext4_set_bit(s++, buf);
4168 0 : count++;
4169 : }
4170 0 : j = ext4_bg_num_gdb(sb, grp);
4171 0 : if (s + j > EXT4_BLOCKS_PER_GROUP(sb)) {
4172 0 : ext4_error(sb, "Invalid number of block group "
4173 : "descriptor blocks: %d", j);
4174 0 : j = EXT4_BLOCKS_PER_GROUP(sb) - s;
4175 : }
4176 0 : count += j;
4177 0 : for (; j > 0; j--)
4178 0 : ext4_set_bit(EXT4_B2C(sbi, s++), buf);
4179 : }
4180 0 : if (!count)
4181 : return 0;
4182 0 : return EXT4_CLUSTERS_PER_GROUP(sb) -
4183 0 : ext4_count_free(buf, EXT4_CLUSTERS_PER_GROUP(sb) / 8);
4184 : }
4185 :
4186 : /*
4187 : * Compute the overhead and stash it in sbi->s_overhead
4188 : */
4189 2486 : int ext4_calculate_overhead(struct super_block *sb)
4190 : {
4191 2486 : struct ext4_sb_info *sbi = EXT4_SB(sb);
4192 2486 : struct ext4_super_block *es = sbi->s_es;
4193 2486 : struct inode *j_inode;
4194 2486 : unsigned int j_blocks, j_inum = le32_to_cpu(es->s_journal_inum);
4195 2486 : ext4_group_t i, ngroups = ext4_get_groups_count(sb);
4196 2486 : ext4_fsblk_t overhead = 0;
4197 2486 : char *buf = (char *) get_zeroed_page(GFP_NOFS);
4198 :
4199 2486 : if (!buf)
4200 : return -ENOMEM;
4201 :
4202 : /*
4203 : * Compute the overhead (FS structures). This is constant
4204 : * for a given filesystem unless the number of block groups
4205 : * changes so we cache the previous value until it does.
4206 : */
4207 :
4208 : /*
4209 : * All of the blocks before first_data_block are overhead
4210 : */
4211 2486 : overhead = EXT4_B2C(sbi, le32_to_cpu(es->s_first_data_block));
4212 :
4213 : /*
4214 : * Add the overhead found in each block group
4215 : */
4216 985591 : for (i = 0; i < ngroups; i++) {
4217 983105 : int blks;
4218 :
4219 983105 : blks = count_overhead(sb, i, buf);
4220 983105 : overhead += blks;
4221 983105 : if (blks)
4222 1966210 : memset(buf, 0, PAGE_SIZE);
4223 983105 : cond_resched();
4224 : }
4225 :
4226 : /*
4227 : * Add the internal journal blocks whether the journal has been
4228 : * loaded or not
4229 : */
4230 2486 : if (sbi->s_journal && !sbi->s_journal_bdev)
4231 2462 : overhead += EXT4_NUM_B2C(sbi, sbi->s_journal->j_total_len);
4232 24 : else if (ext4_has_feature_journal(sb) && !sbi->s_journal && j_inum) {
4233 : /* j_inum for internal journal is non-zero */
4234 10 : j_inode = ext4_get_journal_inode(sb, j_inum);
4235 10 : if (j_inode) {
4236 10 : j_blocks = j_inode->i_size >> sb->s_blocksize_bits;
4237 10 : overhead += EXT4_NUM_B2C(sbi, j_blocks);
4238 10 : iput(j_inode);
4239 : } else {
4240 0 : ext4_msg(sb, KERN_ERR, "can't get journal size");
4241 : }
4242 : }
4243 2486 : sbi->s_overhead = overhead;
4244 2486 : smp_wmb();
4245 2486 : free_page((unsigned long) buf);
4246 2486 : return 0;
4247 : }
4248 :
4249 2513 : static void ext4_set_resv_clusters(struct super_block *sb)
4250 : {
4251 2513 : ext4_fsblk_t resv_clusters;
4252 2513 : struct ext4_sb_info *sbi = EXT4_SB(sb);
4253 :
4254 : /*
4255 : * There's no need to reserve anything when we aren't using extents.
4256 : * The space estimates are exact, there are no unwritten extents,
4257 : * hole punching doesn't need new metadata... This is needed especially
4258 : * to keep ext2/3 backward compatibility.
4259 : */
4260 2513 : if (!ext4_has_feature_extents(sb))
4261 : return;
4262 : /*
4263 : * By default we reserve 2% or 4096 clusters, whichever is smaller.
4264 : * This should cover the situations where we can not afford to run
4265 : * out of space like for example punch hole, or converting
4266 : * unwritten extents in delalloc path. In most cases such
4267 : * allocation would require 1, or 2 blocks, higher numbers are
4268 : * very rare.
4269 : */
4270 2322 : resv_clusters = (ext4_blocks_count(sbi->s_es) >>
4271 2322 : sbi->s_cluster_bits);
4272 :
4273 2322 : do_div(resv_clusters, 50);
4274 2322 : resv_clusters = min_t(ext4_fsblk_t, resv_clusters, 4096);
4275 :
4276 2322 : atomic64_set(&sbi->s_resv_clusters, resv_clusters);
4277 : }
4278 :
4279 2681 : static const char *ext4_quota_mode(struct super_block *sb)
4280 : {
4281 : #ifdef CONFIG_QUOTA
4282 2681 : if (!ext4_quota_capable(sb))
4283 : return "none";
4284 :
4285 299 : if (EXT4_SB(sb)->s_journal && ext4_is_quota_journalled(sb))
4286 : return "journalled";
4287 : else
4288 219 : return "writeback";
4289 : #else
4290 : return "disabled";
4291 : #endif
4292 : }
4293 :
4294 : static void ext4_setup_csum_trigger(struct super_block *sb,
4295 : enum ext4_journal_trigger_type type,
4296 : void (*trigger)(
4297 : struct jbd2_buffer_trigger_type *type,
4298 : struct buffer_head *bh,
4299 : void *mapped_data,
4300 : size_t size))
4301 : {
4302 2551 : struct ext4_sb_info *sbi = EXT4_SB(sb);
4303 :
4304 2551 : sbi->s_journal_triggers[type].sb = sb;
4305 2551 : sbi->s_journal_triggers[type].tr_triggers.t_frozen = trigger;
4306 : }
4307 :
4308 56 : static void ext4_free_sbi(struct ext4_sb_info *sbi)
4309 : {
4310 56 : if (!sbi)
4311 : return;
4312 :
4313 56 : kfree(sbi->s_blockgroup_lock);
4314 56 : fs_put_dax(sbi->s_daxdev, NULL);
4315 56 : kfree(sbi);
4316 : }
4317 :
4318 2569 : static struct ext4_sb_info *ext4_alloc_sbi(struct super_block *sb)
4319 : {
4320 2569 : struct ext4_sb_info *sbi;
4321 :
4322 2569 : sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
4323 2569 : if (!sbi)
4324 : return NULL;
4325 :
4326 2569 : sbi->s_daxdev = fs_dax_get_by_bdev(sb->s_bdev, &sbi->s_dax_part_off,
4327 : NULL, NULL);
4328 :
4329 5138 : sbi->s_blockgroup_lock =
4330 2569 : kzalloc(sizeof(struct blockgroup_lock), GFP_KERNEL);
4331 :
4332 2569 : if (!sbi->s_blockgroup_lock)
4333 0 : goto err_out;
4334 :
4335 2569 : sb->s_fs_info = sbi;
4336 2569 : sbi->s_sb = sb;
4337 2569 : return sbi;
4338 : err_out:
4339 0 : fs_put_dax(sbi->s_daxdev, NULL);
4340 0 : kfree(sbi);
4341 0 : return NULL;
4342 : }
4343 :
4344 2551 : static void ext4_set_def_opts(struct super_block *sb,
4345 : struct ext4_super_block *es)
4346 : {
4347 2551 : unsigned long def_mount_opts;
4348 :
4349 : /* Set defaults before we parse the mount options */
4350 2551 : def_mount_opts = le32_to_cpu(es->s_default_mount_opts);
4351 2551 : set_opt(sb, INIT_INODE_TABLE);
4352 2551 : if (def_mount_opts & EXT4_DEFM_DEBUG)
4353 0 : set_opt(sb, DEBUG);
4354 2551 : if (def_mount_opts & EXT4_DEFM_BSDGROUPS)
4355 2 : set_opt(sb, GRPID);
4356 2551 : if (def_mount_opts & EXT4_DEFM_UID16)
4357 0 : set_opt(sb, NO_UID32);
4358 : /* xattr user namespace & acls are now defaulted on */
4359 2551 : set_opt(sb, XATTR_USER);
4360 : #ifdef CONFIG_EXT4_FS_POSIX_ACL
4361 2551 : set_opt(sb, POSIX_ACL);
4362 : #endif
4363 2551 : if (ext4_has_feature_fast_commit(sb))
4364 0 : set_opt2(sb, JOURNAL_FAST_COMMIT);
4365 : /* don't forget to enable journal_csum when metadata_csum is enabled. */
4366 2551 : if (ext4_has_metadata_csum(sb))
4367 2345 : set_opt(sb, JOURNAL_CHECKSUM);
4368 :
4369 2551 : if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_DATA)
4370 2 : set_opt(sb, JOURNAL_DATA);
4371 2549 : else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_ORDERED)
4372 2 : set_opt(sb, ORDERED_DATA);
4373 2547 : else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_WBACK)
4374 2 : set_opt(sb, WRITEBACK_DATA);
4375 :
4376 2551 : if (le16_to_cpu(es->s_errors) == EXT4_ERRORS_PANIC)
4377 0 : set_opt(sb, ERRORS_PANIC);
4378 2551 : else if (le16_to_cpu(es->s_errors) == EXT4_ERRORS_CONTINUE)
4379 2551 : set_opt(sb, ERRORS_CONT);
4380 : else
4381 0 : set_opt(sb, ERRORS_RO);
4382 : /* block_validity enabled by default; disable with noblock_validity */
4383 2551 : set_opt(sb, BLOCK_VALIDITY);
4384 2551 : if (def_mount_opts & EXT4_DEFM_DISCARD)
4385 2 : set_opt(sb, DISCARD);
4386 :
4387 2551 : if ((def_mount_opts & EXT4_DEFM_NOBARRIER) == 0)
4388 2549 : set_opt(sb, BARRIER);
4389 :
4390 : /*
4391 : * enable delayed allocation by default
4392 : * Use -o nodelalloc to turn it off
4393 : */
4394 2551 : if (!IS_EXT3_SB(sb) && !IS_EXT2_SB(sb) &&
4395 : ((def_mount_opts & EXT4_DEFM_NODELALLOC) == 0))
4396 2366 : set_opt(sb, DELALLOC);
4397 :
4398 2551 : if (sb->s_blocksize == PAGE_SIZE)
4399 2543 : set_opt(sb, DIOREAD_NOLOCK);
4400 2551 : }
4401 :
4402 2521 : static int ext4_handle_clustersize(struct super_block *sb)
4403 : {
4404 2521 : struct ext4_sb_info *sbi = EXT4_SB(sb);
4405 2521 : struct ext4_super_block *es = sbi->s_es;
4406 2521 : int clustersize;
4407 :
4408 : /* Handle clustersize */
4409 2521 : clustersize = BLOCK_SIZE << le32_to_cpu(es->s_log_cluster_size);
4410 2521 : if (ext4_has_feature_bigalloc(sb)) {
4411 33 : if (clustersize < sb->s_blocksize) {
4412 0 : ext4_msg(sb, KERN_ERR,
4413 : "cluster size (%d) smaller than "
4414 : "block size (%lu)", clustersize, sb->s_blocksize);
4415 0 : return -EINVAL;
4416 : }
4417 33 : sbi->s_cluster_bits = le32_to_cpu(es->s_log_cluster_size) -
4418 33 : le32_to_cpu(es->s_log_block_size);
4419 33 : sbi->s_clusters_per_group =
4420 33 : le32_to_cpu(es->s_clusters_per_group);
4421 33 : if (sbi->s_clusters_per_group > sb->s_blocksize * 8) {
4422 0 : ext4_msg(sb, KERN_ERR,
4423 : "#clusters per group too big: %lu",
4424 : sbi->s_clusters_per_group);
4425 0 : return -EINVAL;
4426 : }
4427 33 : if (sbi->s_blocks_per_group !=
4428 33 : (sbi->s_clusters_per_group * (clustersize / sb->s_blocksize))) {
4429 0 : ext4_msg(sb, KERN_ERR, "blocks per group (%lu) and "
4430 : "clusters per group (%lu) inconsistent",
4431 : sbi->s_blocks_per_group,
4432 : sbi->s_clusters_per_group);
4433 0 : return -EINVAL;
4434 : }
4435 : } else {
4436 2488 : if (clustersize != sb->s_blocksize) {
4437 0 : ext4_msg(sb, KERN_ERR,
4438 : "fragment/cluster size (%d) != "
4439 : "block size (%lu)", clustersize, sb->s_blocksize);
4440 0 : return -EINVAL;
4441 : }
4442 2488 : if (sbi->s_blocks_per_group > sb->s_blocksize * 8) {
4443 0 : ext4_msg(sb, KERN_ERR,
4444 : "#blocks per group too big: %lu",
4445 : sbi->s_blocks_per_group);
4446 0 : return -EINVAL;
4447 : }
4448 2488 : sbi->s_clusters_per_group = sbi->s_blocks_per_group;
4449 2488 : sbi->s_cluster_bits = 0;
4450 : }
4451 2521 : sbi->s_cluster_ratio = clustersize / sb->s_blocksize;
4452 :
4453 : /* Do we have standard group size of clustersize * 8 blocks ? */
4454 2521 : if (sbi->s_blocks_per_group == clustersize << 3)
4455 2516 : set_opt2(sb, STD_GROUP_SIZE);
4456 :
4457 : return 0;
4458 : }
4459 :
4460 2519 : static void ext4_fast_commit_init(struct super_block *sb)
4461 : {
4462 2519 : struct ext4_sb_info *sbi = EXT4_SB(sb);
4463 :
4464 : /* Initialize fast commit stuff */
4465 2519 : atomic_set(&sbi->s_fc_subtid, 0);
4466 2519 : INIT_LIST_HEAD(&sbi->s_fc_q[FC_Q_MAIN]);
4467 2519 : INIT_LIST_HEAD(&sbi->s_fc_q[FC_Q_STAGING]);
4468 2519 : INIT_LIST_HEAD(&sbi->s_fc_dentry_q[FC_Q_MAIN]);
4469 2519 : INIT_LIST_HEAD(&sbi->s_fc_dentry_q[FC_Q_STAGING]);
4470 2519 : sbi->s_fc_bytes = 0;
4471 2519 : ext4_clear_mount_flag(sb, EXT4_MF_FC_INELIGIBLE);
4472 2519 : sbi->s_fc_ineligible_tid = 0;
4473 2519 : spin_lock_init(&sbi->s_fc_lock);
4474 2519 : memset(&sbi->s_fc_stats, 0, sizeof(sbi->s_fc_stats));
4475 2519 : sbi->s_fc_replay_state.fc_regions = NULL;
4476 2519 : sbi->s_fc_replay_state.fc_regions_size = 0;
4477 2519 : sbi->s_fc_replay_state.fc_regions_used = 0;
4478 2519 : sbi->s_fc_replay_state.fc_regions_valid = 0;
4479 2519 : sbi->s_fc_replay_state.fc_modified_inodes = NULL;
4480 2519 : sbi->s_fc_replay_state.fc_modified_inodes_size = 0;
4481 2519 : sbi->s_fc_replay_state.fc_modified_inodes_used = 0;
4482 2519 : }
4483 :
4484 2551 : static int ext4_inode_info_init(struct super_block *sb,
4485 : struct ext4_super_block *es)
4486 : {
4487 2551 : struct ext4_sb_info *sbi = EXT4_SB(sb);
4488 :
4489 2551 : if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV) {
4490 0 : sbi->s_inode_size = EXT4_GOOD_OLD_INODE_SIZE;
4491 0 : sbi->s_first_ino = EXT4_GOOD_OLD_FIRST_INO;
4492 : } else {
4493 2551 : sbi->s_inode_size = le16_to_cpu(es->s_inode_size);
4494 2551 : sbi->s_first_ino = le32_to_cpu(es->s_first_ino);
4495 2551 : if (sbi->s_first_ino < EXT4_GOOD_OLD_FIRST_INO) {
4496 0 : ext4_msg(sb, KERN_ERR, "invalid first ino: %u",
4497 : sbi->s_first_ino);
4498 0 : return -EINVAL;
4499 : }
4500 5102 : if ((sbi->s_inode_size < EXT4_GOOD_OLD_INODE_SIZE) ||
4501 2551 : (!is_power_of_2(sbi->s_inode_size)) ||
4502 2551 : (sbi->s_inode_size > sb->s_blocksize)) {
4503 0 : ext4_msg(sb, KERN_ERR,
4504 : "unsupported inode size: %d",
4505 : sbi->s_inode_size);
4506 0 : ext4_msg(sb, KERN_ERR, "blocksize: %lu", sb->s_blocksize);
4507 0 : return -EINVAL;
4508 : }
4509 : /*
4510 : * i_atime_extra is the last extra field available for
4511 : * [acm]times in struct ext4_inode. Checking for that
4512 : * field should suffice to ensure we have extra space
4513 : * for all three.
4514 : */
4515 2551 : if (sbi->s_inode_size >= offsetof(struct ext4_inode, i_atime_extra) +
4516 : sizeof(((struct ext4_inode *)0)->i_atime_extra)) {
4517 2367 : sb->s_time_gran = 1;
4518 2367 : sb->s_time_max = EXT4_EXTRA_TIMESTAMP_MAX;
4519 : } else {
4520 184 : sb->s_time_gran = NSEC_PER_SEC;
4521 184 : sb->s_time_max = EXT4_NON_EXTRA_TIMESTAMP_MAX;
4522 : }
4523 2551 : sb->s_time_min = EXT4_TIMESTAMP_MIN;
4524 : }
4525 :
4526 2551 : if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE) {
4527 2367 : sbi->s_want_extra_isize = sizeof(struct ext4_inode) -
4528 : EXT4_GOOD_OLD_INODE_SIZE;
4529 2367 : if (ext4_has_feature_extra_isize(sb)) {
4530 2343 : unsigned v, max = (sbi->s_inode_size -
4531 : EXT4_GOOD_OLD_INODE_SIZE);
4532 :
4533 2343 : v = le16_to_cpu(es->s_want_extra_isize);
4534 2343 : if (v > max) {
4535 0 : ext4_msg(sb, KERN_ERR,
4536 : "bad s_want_extra_isize: %d", v);
4537 0 : return -EINVAL;
4538 : }
4539 2343 : if (sbi->s_want_extra_isize < v)
4540 2 : sbi->s_want_extra_isize = v;
4541 :
4542 2343 : v = le16_to_cpu(es->s_min_extra_isize);
4543 2343 : if (v > max) {
4544 0 : ext4_msg(sb, KERN_ERR,
4545 : "bad s_min_extra_isize: %d", v);
4546 0 : return -EINVAL;
4547 : }
4548 2343 : if (sbi->s_want_extra_isize < v)
4549 0 : sbi->s_want_extra_isize = v;
4550 : }
4551 : }
4552 :
4553 : return 0;
4554 : }
4555 :
4556 : #if IS_ENABLED(CONFIG_UNICODE)
4557 : static int ext4_encoding_init(struct super_block *sb, struct ext4_super_block *es)
4558 : {
4559 : const struct ext4_sb_encodings *encoding_info;
4560 : struct unicode_map *encoding;
4561 : __u16 encoding_flags = le16_to_cpu(es->s_encoding_flags);
4562 :
4563 : if (!ext4_has_feature_casefold(sb) || sb->s_encoding)
4564 : return 0;
4565 :
4566 : encoding_info = ext4_sb_read_encoding(es);
4567 : if (!encoding_info) {
4568 : ext4_msg(sb, KERN_ERR,
4569 : "Encoding requested by superblock is unknown");
4570 : return -EINVAL;
4571 : }
4572 :
4573 : encoding = utf8_load(encoding_info->version);
4574 : if (IS_ERR(encoding)) {
4575 : ext4_msg(sb, KERN_ERR,
4576 : "can't mount with superblock charset: %s-%u.%u.%u "
4577 : "not supported by the kernel. flags: 0x%x.",
4578 : encoding_info->name,
4579 : unicode_major(encoding_info->version),
4580 : unicode_minor(encoding_info->version),
4581 : unicode_rev(encoding_info->version),
4582 : encoding_flags);
4583 : return -EINVAL;
4584 : }
4585 : ext4_msg(sb, KERN_INFO,"Using encoding defined by superblock: "
4586 : "%s-%u.%u.%u with flags 0x%hx", encoding_info->name,
4587 : unicode_major(encoding_info->version),
4588 : unicode_minor(encoding_info->version),
4589 : unicode_rev(encoding_info->version),
4590 : encoding_flags);
4591 :
4592 : sb->s_encoding = encoding;
4593 : sb->s_encoding_flags = encoding_flags;
4594 :
4595 : return 0;
4596 : }
4597 : #else
4598 : static inline int ext4_encoding_init(struct super_block *sb, struct ext4_super_block *es)
4599 : {
4600 : return 0;
4601 : }
4602 : #endif
4603 :
4604 2551 : static int ext4_init_metadata_csum(struct super_block *sb, struct ext4_super_block *es)
4605 : {
4606 2551 : struct ext4_sb_info *sbi = EXT4_SB(sb);
4607 :
4608 : /* Warn if metadata_csum and gdt_csum are both set. */
4609 2551 : if (ext4_has_feature_metadata_csum(sb) &&
4610 : ext4_has_feature_gdt_csum(sb))
4611 0 : ext4_warning(sb, "metadata_csum and uninit_bg are "
4612 : "redundant flags; please run fsck.");
4613 :
4614 : /* Check for a known checksum algorithm */
4615 2551 : if (!ext4_verify_csum_type(sb, es)) {
4616 0 : ext4_msg(sb, KERN_ERR, "VFS: Found ext4 filesystem with "
4617 : "unknown checksum algorithm.");
4618 0 : return -EINVAL;
4619 : }
4620 2551 : ext4_setup_csum_trigger(sb, EXT4_JTR_ORPHAN_FILE,
4621 : ext4_orphan_file_block_trigger);
4622 :
4623 : /* Load the checksum driver */
4624 2551 : sbi->s_chksum_driver = crypto_alloc_shash("crc32c", 0, 0);
4625 2551 : if (IS_ERR(sbi->s_chksum_driver)) {
4626 0 : int ret = PTR_ERR(sbi->s_chksum_driver);
4627 0 : ext4_msg(sb, KERN_ERR, "Cannot load crc32c driver.");
4628 0 : sbi->s_chksum_driver = NULL;
4629 0 : return ret;
4630 : }
4631 :
4632 : /* Check superblock checksum */
4633 2551 : if (!ext4_superblock_csum_verify(sb, es)) {
4634 0 : ext4_msg(sb, KERN_ERR, "VFS: Found ext4 filesystem with "
4635 : "invalid superblock checksum. Run e2fsck?");
4636 0 : return -EFSBADCRC;
4637 : }
4638 :
4639 : /* Precompute checksum seed for all metadata */
4640 2551 : if (ext4_has_feature_csum_seed(sb))
4641 3 : sbi->s_csum_seed = le32_to_cpu(es->s_checksum_seed);
4642 2548 : else if (ext4_has_metadata_csum(sb) || ext4_has_feature_ea_inode(sb))
4643 2342 : sbi->s_csum_seed = ext4_chksum(sbi, ~0, es->s_uuid,
4644 : sizeof(es->s_uuid));
4645 : return 0;
4646 : }
4647 :
4648 2533 : static int ext4_check_feature_compatibility(struct super_block *sb,
4649 : struct ext4_super_block *es,
4650 : int silent)
4651 : {
4652 2533 : struct ext4_sb_info *sbi = EXT4_SB(sb);
4653 :
4654 2533 : if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV &&
4655 0 : (ext4_has_compat_features(sb) ||
4656 0 : ext4_has_ro_compat_features(sb) ||
4657 : ext4_has_incompat_features(sb)))
4658 0 : ext4_msg(sb, KERN_WARNING,
4659 : "feature flags set on rev 0 fs, "
4660 : "running e2fsck is recommended");
4661 :
4662 2533 : if (es->s_creator_os == cpu_to_le32(EXT4_OS_HURD)) {
4663 0 : set_opt2(sb, HURD_COMPAT);
4664 0 : if (ext4_has_feature_64bit(sb)) {
4665 0 : ext4_msg(sb, KERN_ERR,
4666 : "The Hurd can't support 64-bit file systems");
4667 0 : return -EINVAL;
4668 : }
4669 :
4670 : /*
4671 : * ea_inode feature uses l_i_version field which is not
4672 : * available in HURD_COMPAT mode.
4673 : */
4674 0 : if (ext4_has_feature_ea_inode(sb)) {
4675 0 : ext4_msg(sb, KERN_ERR,
4676 : "ea_inode feature is not supported for Hurd");
4677 0 : return -EINVAL;
4678 : }
4679 : }
4680 :
4681 2533 : if (IS_EXT2_SB(sb)) {
4682 : if (ext2_feature_set_ok(sb))
4683 : ext4_msg(sb, KERN_INFO, "mounting ext2 file system "
4684 : "using the ext4 subsystem");
4685 : else {
4686 : /*
4687 : * If we're probing be silent, if this looks like
4688 : * it's actually an ext[34] filesystem.
4689 : */
4690 : if (silent && ext4_feature_set_ok(sb, sb_rdonly(sb)))
4691 : return -EINVAL;
4692 : ext4_msg(sb, KERN_ERR, "couldn't mount as ext2 due "
4693 : "to feature incompatibilities");
4694 : return -EINVAL;
4695 : }
4696 : }
4697 :
4698 2533 : if (IS_EXT3_SB(sb)) {
4699 170 : if (ext3_feature_set_ok(sb))
4700 170 : ext4_msg(sb, KERN_INFO, "mounting ext3 file system "
4701 : "using the ext4 subsystem");
4702 : else {
4703 : /*
4704 : * If we're probing be silent, if this looks like
4705 : * it's actually an ext4 filesystem.
4706 : */
4707 0 : if (silent && ext4_feature_set_ok(sb, sb_rdonly(sb)))
4708 : return -EINVAL;
4709 0 : ext4_msg(sb, KERN_ERR, "couldn't mount as ext3 due "
4710 : "to feature incompatibilities");
4711 0 : return -EINVAL;
4712 : }
4713 : }
4714 :
4715 : /*
4716 : * Check feature flags regardless of the revision level, since we
4717 : * previously didn't change the revision level when setting the flags,
4718 : * so there is a chance incompat flags are set on a rev 0 filesystem.
4719 : */
4720 2533 : if (!ext4_feature_set_ok(sb, (sb_rdonly(sb))))
4721 : return -EINVAL;
4722 :
4723 2533 : if (sbi->s_daxdev) {
4724 0 : if (sb->s_blocksize == PAGE_SIZE)
4725 0 : set_bit(EXT4_FLAGS_BDEV_IS_DAX, &sbi->s_ext4_flags);
4726 : else
4727 0 : ext4_msg(sb, KERN_ERR, "unsupported blocksize for DAX\n");
4728 : }
4729 :
4730 2533 : if (sbi->s_mount_opt & EXT4_MOUNT_DAX_ALWAYS) {
4731 12 : if (ext4_has_feature_inline_data(sb)) {
4732 0 : ext4_msg(sb, KERN_ERR, "Cannot use DAX on a filesystem"
4733 : " that may contain inline data");
4734 0 : return -EINVAL;
4735 : }
4736 12 : if (!test_bit(EXT4_FLAGS_BDEV_IS_DAX, &sbi->s_ext4_flags)) {
4737 12 : ext4_msg(sb, KERN_ERR,
4738 : "DAX unsupported by block device.");
4739 12 : return -EINVAL;
4740 : }
4741 : }
4742 :
4743 2521 : if (ext4_has_feature_encrypt(sb) && es->s_encryption_level) {
4744 0 : ext4_msg(sb, KERN_ERR, "Unsupported encryption level %d",
4745 : es->s_encryption_level);
4746 0 : return -EINVAL;
4747 : }
4748 :
4749 : return 0;
4750 : }
4751 :
4752 2521 : static int ext4_check_geometry(struct super_block *sb,
4753 : struct ext4_super_block *es)
4754 : {
4755 2521 : struct ext4_sb_info *sbi = EXT4_SB(sb);
4756 2521 : __u64 blocks_count;
4757 2521 : int err;
4758 :
4759 2521 : if (le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) > (sb->s_blocksize / 4)) {
4760 0 : ext4_msg(sb, KERN_ERR,
4761 : "Number of reserved GDT blocks insanely large: %d",
4762 : le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks));
4763 0 : return -EINVAL;
4764 : }
4765 : /*
4766 : * Test whether we have more sectors than will fit in sector_t,
4767 : * and whether the max offset is addressable by the page cache.
4768 : */
4769 4849 : err = generic_check_addressable(sb->s_blocksize_bits,
4770 : ext4_blocks_count(es));
4771 2521 : if (err) {
4772 0 : ext4_msg(sb, KERN_ERR, "filesystem"
4773 : " too large to mount safely on this system");
4774 0 : return err;
4775 : }
4776 :
4777 : /* check blocks count against device size */
4778 2521 : blocks_count = sb_bdev_nr_blocks(sb);
4779 5042 : if (blocks_count && ext4_blocks_count(es) > blocks_count) {
4780 0 : ext4_msg(sb, KERN_WARNING, "bad geometry: block count %llu "
4781 : "exceeds size of device (%llu blocks)",
4782 : ext4_blocks_count(es), blocks_count);
4783 0 : return -EINVAL;
4784 : }
4785 :
4786 : /*
4787 : * It makes no sense for the first data block to be beyond the end
4788 : * of the filesystem.
4789 : */
4790 4849 : if (le32_to_cpu(es->s_first_data_block) >= ext4_blocks_count(es)) {
4791 0 : ext4_msg(sb, KERN_WARNING, "bad geometry: first data "
4792 : "block %u is beyond end of filesystem (%llu)",
4793 : le32_to_cpu(es->s_first_data_block),
4794 : ext4_blocks_count(es));
4795 0 : return -EINVAL;
4796 : }
4797 2521 : if ((es->s_first_data_block == 0) && (es->s_log_block_size == 0) &&
4798 0 : (sbi->s_cluster_ratio == 1)) {
4799 0 : ext4_msg(sb, KERN_WARNING, "bad geometry: first data "
4800 : "block is 0 with a 1k block and cluster size");
4801 0 : return -EINVAL;
4802 : }
4803 :
4804 2521 : blocks_count = (ext4_blocks_count(es) -
4805 2521 : le32_to_cpu(es->s_first_data_block) +
4806 2521 : EXT4_BLOCKS_PER_GROUP(sb) - 1);
4807 2521 : do_div(blocks_count, EXT4_BLOCKS_PER_GROUP(sb));
4808 2521 : if (blocks_count > ((uint64_t)1<<32) - EXT4_DESC_PER_BLOCK(sb)) {
4809 0 : ext4_msg(sb, KERN_WARNING, "groups count too large: %llu "
4810 : "(block count %llu, first data block %u, "
4811 : "blocks per group %lu)", blocks_count,
4812 : ext4_blocks_count(es),
4813 : le32_to_cpu(es->s_first_data_block),
4814 : EXT4_BLOCKS_PER_GROUP(sb));
4815 0 : return -EINVAL;
4816 : }
4817 2521 : sbi->s_groups_count = blocks_count;
4818 2521 : sbi->s_blockfile_groups = min_t(ext4_group_t, sbi->s_groups_count,
4819 : (EXT4_MAX_BLOCK_FILE_PHYS / EXT4_BLOCKS_PER_GROUP(sb)));
4820 2521 : if (((u64)sbi->s_groups_count * sbi->s_inodes_per_group) !=
4821 2521 : le32_to_cpu(es->s_inodes_count)) {
4822 0 : ext4_msg(sb, KERN_ERR, "inodes count not valid: %u vs %llu",
4823 : le32_to_cpu(es->s_inodes_count),
4824 : ((u64)sbi->s_groups_count * sbi->s_inodes_per_group));
4825 0 : return -EINVAL;
4826 : }
4827 :
4828 : return 0;
4829 : }
4830 :
4831 2521 : static int ext4_group_desc_init(struct super_block *sb,
4832 : struct ext4_super_block *es,
4833 : ext4_fsblk_t logical_sb_block,
4834 : ext4_group_t *first_not_zeroed)
4835 : {
4836 2521 : struct ext4_sb_info *sbi = EXT4_SB(sb);
4837 2521 : unsigned int db_count;
4838 2521 : ext4_fsblk_t block;
4839 2521 : int i;
4840 :
4841 0 : db_count = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) /
4842 2521 : EXT4_DESC_PER_BLOCK(sb);
4843 2521 : if (ext4_has_feature_meta_bg(sb)) {
4844 5 : if (le32_to_cpu(es->s_first_meta_bg) > db_count) {
4845 1 : ext4_msg(sb, KERN_WARNING,
4846 : "first meta block group too large: %u "
4847 : "(group descriptor block count %u)",
4848 : le32_to_cpu(es->s_first_meta_bg), db_count);
4849 1 : return -EINVAL;
4850 : }
4851 : }
4852 2520 : rcu_assign_pointer(sbi->s_group_desc,
4853 : kvmalloc_array(db_count,
4854 : sizeof(struct buffer_head *),
4855 : GFP_KERNEL));
4856 2520 : if (sbi->s_group_desc == NULL) {
4857 0 : ext4_msg(sb, KERN_ERR, "not enough memory");
4858 0 : return -ENOMEM;
4859 : }
4860 :
4861 2520 : bgl_lock_init(sbi->s_blockgroup_lock);
4862 :
4863 : /* Pre-read the descriptors into the buffer cache */
4864 21438 : for (i = 0; i < db_count; i++) {
4865 16398 : block = descriptor_loc(sb, logical_sb_block, i);
4866 16398 : ext4_sb_breadahead_unmovable(sb, block);
4867 : }
4868 :
4869 18918 : for (i = 0; i < db_count; i++) {
4870 16398 : struct buffer_head *bh;
4871 :
4872 16398 : block = descriptor_loc(sb, logical_sb_block, i);
4873 16398 : bh = ext4_sb_bread_unmovable(sb, block);
4874 16398 : if (IS_ERR(bh)) {
4875 0 : ext4_msg(sb, KERN_ERR,
4876 : "can't read group descriptor %d", i);
4877 0 : sbi->s_gdb_count = i;
4878 0 : return PTR_ERR(bh);
4879 : }
4880 16398 : rcu_read_lock();
4881 16398 : rcu_dereference(sbi->s_group_desc)[i] = bh;
4882 16398 : rcu_read_unlock();
4883 : }
4884 2520 : sbi->s_gdb_count = db_count;
4885 2520 : if (!ext4_check_descriptors(sb, logical_sb_block, first_not_zeroed)) {
4886 1 : ext4_msg(sb, KERN_ERR, "group descriptors corrupted!");
4887 1 : return -EFSCORRUPTED;
4888 : }
4889 :
4890 : return 0;
4891 : }
4892 :
4893 2505 : static int ext4_load_and_init_journal(struct super_block *sb,
4894 : struct ext4_super_block *es,
4895 : struct ext4_fs_context *ctx)
4896 : {
4897 2505 : struct ext4_sb_info *sbi = EXT4_SB(sb);
4898 2505 : int err;
4899 :
4900 2505 : err = ext4_load_journal(sb, es, ctx->journal_devnum);
4901 2505 : if (err)
4902 : return err;
4903 :
4904 4813 : if (ext4_has_feature_64bit(sb) &&
4905 2312 : !jbd2_journal_set_features(EXT4_SB(sb)->s_journal, 0, 0,
4906 : JBD2_FEATURE_INCOMPAT_64BIT)) {
4907 0 : ext4_msg(sb, KERN_ERR, "Failed to set 64-bit journal feature");
4908 0 : goto out;
4909 : }
4910 :
4911 2501 : if (!set_journal_csum_feature_set(sb)) {
4912 0 : ext4_msg(sb, KERN_ERR, "Failed to set journal checksum "
4913 : "feature set");
4914 0 : goto out;
4915 : }
4916 :
4917 2501 : if (test_opt2(sb, JOURNAL_FAST_COMMIT) &&
4918 0 : !jbd2_journal_set_features(EXT4_SB(sb)->s_journal, 0, 0,
4919 : JBD2_FEATURE_INCOMPAT_FAST_COMMIT)) {
4920 0 : ext4_msg(sb, KERN_ERR,
4921 : "Failed to set fast commit journal feature");
4922 0 : goto out;
4923 : }
4924 :
4925 : /* We have now updated the journal if required, so we can
4926 : * validate the data journaling mode. */
4927 2501 : switch (test_opt(sb, DATA_FLAGS)) {
4928 2416 : case 0:
4929 : /* No mode set, assume a default based on the journal
4930 : * capabilities: ORDERED_DATA if the journal can
4931 : * cope, else JOURNAL_DATA
4932 : */
4933 2416 : if (jbd2_journal_check_available_features
4934 2416 : (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)) {
4935 2416 : set_opt(sb, ORDERED_DATA);
4936 2416 : sbi->s_def_mount_opt |= EXT4_MOUNT_ORDERED_DATA;
4937 : } else {
4938 0 : set_opt(sb, JOURNAL_DATA);
4939 0 : sbi->s_def_mount_opt |= EXT4_MOUNT_JOURNAL_DATA;
4940 : }
4941 : break;
4942 :
4943 65 : case EXT4_MOUNT_ORDERED_DATA:
4944 : case EXT4_MOUNT_WRITEBACK_DATA:
4945 65 : if (!jbd2_journal_check_available_features
4946 65 : (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)) {
4947 0 : ext4_msg(sb, KERN_ERR, "Journal does not support "
4948 : "requested data journaling mode");
4949 0 : goto out;
4950 : }
4951 : break;
4952 : default:
4953 : break;
4954 : }
4955 :
4956 2501 : if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA &&
4957 2454 : test_opt(sb, JOURNAL_ASYNC_COMMIT)) {
4958 0 : ext4_msg(sb, KERN_ERR, "can't mount with "
4959 : "journal_async_commit in data=ordered mode");
4960 0 : goto out;
4961 : }
4962 :
4963 2501 : set_task_ioprio(sbi->s_journal->j_task, ctx->journal_ioprio);
4964 :
4965 2501 : sbi->s_journal->j_submit_inode_data_buffers =
4966 : ext4_journal_submit_inode_data_buffers;
4967 2501 : sbi->s_journal->j_finish_inode_data_buffers =
4968 : ext4_journal_finish_inode_data_buffers;
4969 :
4970 2501 : return 0;
4971 :
4972 0 : out:
4973 : /* flush s_error_work before journal destroy. */
4974 0 : flush_work(&sbi->s_error_work);
4975 0 : jbd2_journal_destroy(sbi->s_journal);
4976 0 : sbi->s_journal = NULL;
4977 0 : return -EINVAL;
4978 : }
4979 :
4980 2533 : static int ext4_check_journal_data_mode(struct super_block *sb)
4981 : {
4982 2533 : if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
4983 20 : printk_once(KERN_WARNING "EXT4-fs: Warning: mounting with "
4984 : "data=journal disables delayed allocation, "
4985 : "dioread_nolock, O_DIRECT and fast_commit support!\n");
4986 : /* can't mount with both data=journal and dioread_nolock. */
4987 20 : clear_opt(sb, DIOREAD_NOLOCK);
4988 20 : clear_opt2(sb, JOURNAL_FAST_COMMIT);
4989 20 : if (test_opt2(sb, EXPLICIT_DELALLOC)) {
4990 0 : ext4_msg(sb, KERN_ERR, "can't mount with "
4991 : "both data=journal and delalloc");
4992 0 : return -EINVAL;
4993 : }
4994 20 : if (test_opt(sb, DAX_ALWAYS)) {
4995 0 : ext4_msg(sb, KERN_ERR, "can't mount with "
4996 : "both data=journal and dax");
4997 0 : return -EINVAL;
4998 : }
4999 20 : if (ext4_has_feature_encrypt(sb)) {
5000 0 : ext4_msg(sb, KERN_WARNING,
5001 : "encrypted files will use data=ordered "
5002 : "instead of data journaling mode");
5003 : }
5004 20 : if (test_opt(sb, DELALLOC))
5005 16 : clear_opt(sb, DELALLOC);
5006 : } else {
5007 2513 : sb->s_iflags |= SB_I_CGROUPWB;
5008 : }
5009 :
5010 : return 0;
5011 : }
5012 :
5013 2569 : static int ext4_load_super(struct super_block *sb, ext4_fsblk_t *lsb,
5014 : int silent)
5015 : {
5016 2569 : struct ext4_sb_info *sbi = EXT4_SB(sb);
5017 2569 : struct ext4_super_block *es;
5018 2569 : ext4_fsblk_t logical_sb_block;
5019 2569 : unsigned long offset = 0;
5020 2569 : struct buffer_head *bh;
5021 2569 : int ret = -EINVAL;
5022 2569 : int blocksize;
5023 :
5024 2569 : blocksize = sb_min_blocksize(sb, EXT4_MIN_BLOCK_SIZE);
5025 2569 : if (!blocksize) {
5026 0 : ext4_msg(sb, KERN_ERR, "unable to set blocksize");
5027 0 : return -EINVAL;
5028 : }
5029 :
5030 : /*
5031 : * The ext4 superblock will not be buffer aligned for other than 1kB
5032 : * block sizes. We need to calculate the offset from buffer start.
5033 : */
5034 2569 : if (blocksize != EXT4_MIN_BLOCK_SIZE) {
5035 0 : logical_sb_block = sbi->s_sb_block * EXT4_MIN_BLOCK_SIZE;
5036 0 : offset = do_div(logical_sb_block, blocksize);
5037 : } else {
5038 2569 : logical_sb_block = sbi->s_sb_block;
5039 : }
5040 :
5041 2569 : bh = ext4_sb_bread_unmovable(sb, logical_sb_block);
5042 2569 : if (IS_ERR(bh)) {
5043 1 : ext4_msg(sb, KERN_ERR, "unable to read superblock");
5044 1 : return PTR_ERR(bh);
5045 : }
5046 : /*
5047 : * Note: s_es must be initialized as soon as possible because
5048 : * some ext4 macro-instructions depend on its value
5049 : */
5050 2568 : es = (struct ext4_super_block *) (bh->b_data + offset);
5051 2568 : sbi->s_es = es;
5052 2568 : sb->s_magic = le16_to_cpu(es->s_magic);
5053 2568 : if (sb->s_magic != EXT4_SUPER_MAGIC) {
5054 13 : if (!silent)
5055 13 : ext4_msg(sb, KERN_ERR, "VFS: Can't find ext4 filesystem");
5056 13 : goto out;
5057 : }
5058 :
5059 2555 : if (le32_to_cpu(es->s_log_block_size) >
5060 : (EXT4_MAX_BLOCK_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE)) {
5061 0 : ext4_msg(sb, KERN_ERR,
5062 : "Invalid log block size: %u",
5063 : le32_to_cpu(es->s_log_block_size));
5064 0 : goto out;
5065 : }
5066 2555 : if (le32_to_cpu(es->s_log_cluster_size) >
5067 : (EXT4_MAX_CLUSTER_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE)) {
5068 0 : ext4_msg(sb, KERN_ERR,
5069 : "Invalid log cluster size: %u",
5070 : le32_to_cpu(es->s_log_cluster_size));
5071 0 : goto out;
5072 : }
5073 :
5074 2555 : blocksize = EXT4_MIN_BLOCK_SIZE << le32_to_cpu(es->s_log_block_size);
5075 :
5076 : /*
5077 : * If the default block size is not the same as the real block size,
5078 : * we need to reload it.
5079 : */
5080 2555 : if (sb->s_blocksize == blocksize) {
5081 7 : *lsb = logical_sb_block;
5082 7 : sbi->s_sbh = bh;
5083 7 : return 0;
5084 : }
5085 :
5086 : /*
5087 : * bh must be released before kill_bdev(), otherwise
5088 : * it won't be freed and its page also. kill_bdev()
5089 : * is called by sb_set_blocksize().
5090 : */
5091 2548 : brelse(bh);
5092 : /* Validate the filesystem blocksize */
5093 2548 : if (!sb_set_blocksize(sb, blocksize)) {
5094 4 : ext4_msg(sb, KERN_ERR, "bad block size %d",
5095 : blocksize);
5096 4 : bh = NULL;
5097 4 : goto out;
5098 : }
5099 :
5100 2544 : logical_sb_block = sbi->s_sb_block * EXT4_MIN_BLOCK_SIZE;
5101 2544 : offset = do_div(logical_sb_block, blocksize);
5102 2544 : bh = ext4_sb_bread_unmovable(sb, logical_sb_block);
5103 2544 : if (IS_ERR(bh)) {
5104 0 : ext4_msg(sb, KERN_ERR, "Can't read superblock on 2nd try");
5105 0 : ret = PTR_ERR(bh);
5106 0 : bh = NULL;
5107 0 : goto out;
5108 : }
5109 2544 : es = (struct ext4_super_block *)(bh->b_data + offset);
5110 2544 : sbi->s_es = es;
5111 2544 : if (es->s_magic != cpu_to_le16(EXT4_SUPER_MAGIC)) {
5112 0 : ext4_msg(sb, KERN_ERR, "Magic mismatch, very weird!");
5113 0 : goto out;
5114 : }
5115 2544 : *lsb = logical_sb_block;
5116 2544 : sbi->s_sbh = bh;
5117 2544 : return 0;
5118 13 : out:
5119 17 : brelse(bh);
5120 : return ret;
5121 : }
5122 :
5123 2521 : static void ext4_hash_info_init(struct super_block *sb)
5124 : {
5125 2521 : struct ext4_sb_info *sbi = EXT4_SB(sb);
5126 2521 : struct ext4_super_block *es = sbi->s_es;
5127 2521 : unsigned int i;
5128 :
5129 12605 : for (i = 0; i < 4; i++)
5130 10084 : sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]);
5131 :
5132 2521 : sbi->s_def_hash_version = es->s_def_hash_version;
5133 2521 : if (ext4_has_feature_dir_index(sb)) {
5134 2521 : i = le32_to_cpu(es->s_flags);
5135 2521 : if (i & EXT2_FLAGS_UNSIGNED_HASH)
5136 0 : sbi->s_hash_unsigned = 3;
5137 2521 : else if ((i & EXT2_FLAGS_SIGNED_HASH) == 0) {
5138 : #ifdef __CHAR_UNSIGNED__
5139 0 : if (!sb_rdonly(sb))
5140 0 : es->s_flags |=
5141 : cpu_to_le32(EXT2_FLAGS_UNSIGNED_HASH);
5142 0 : sbi->s_hash_unsigned = 3;
5143 : #else
5144 : if (!sb_rdonly(sb))
5145 : es->s_flags |=
5146 : cpu_to_le32(EXT2_FLAGS_SIGNED_HASH);
5147 : #endif
5148 : }
5149 : }
5150 2521 : }
5151 :
5152 2521 : static int ext4_block_group_meta_init(struct super_block *sb, int silent)
5153 : {
5154 2521 : struct ext4_sb_info *sbi = EXT4_SB(sb);
5155 2521 : struct ext4_super_block *es = sbi->s_es;
5156 2521 : int has_huge_files;
5157 :
5158 2521 : has_huge_files = ext4_has_feature_huge_file(sb);
5159 2521 : sbi->s_bitmap_maxbytes = ext4_max_bitmap_size(sb->s_blocksize_bits,
5160 : has_huge_files);
5161 2521 : sb->s_maxbytes = ext4_max_size(sb->s_blocksize_bits, has_huge_files);
5162 :
5163 2521 : sbi->s_desc_size = le16_to_cpu(es->s_desc_size);
5164 2521 : if (ext4_has_feature_64bit(sb)) {
5165 2328 : if (sbi->s_desc_size < EXT4_MIN_DESC_SIZE_64BIT ||
5166 2328 : sbi->s_desc_size > EXT4_MAX_DESC_SIZE ||
5167 : !is_power_of_2(sbi->s_desc_size)) {
5168 0 : ext4_msg(sb, KERN_ERR,
5169 : "unsupported descriptor size %lu",
5170 : sbi->s_desc_size);
5171 0 : return -EINVAL;
5172 : }
5173 : } else
5174 193 : sbi->s_desc_size = EXT4_MIN_DESC_SIZE;
5175 :
5176 2521 : sbi->s_blocks_per_group = le32_to_cpu(es->s_blocks_per_group);
5177 2521 : sbi->s_inodes_per_group = le32_to_cpu(es->s_inodes_per_group);
5178 :
5179 2521 : sbi->s_inodes_per_block = sb->s_blocksize / EXT4_INODE_SIZE(sb);
5180 2521 : if (sbi->s_inodes_per_block == 0 || sbi->s_blocks_per_group == 0) {
5181 0 : if (!silent)
5182 0 : ext4_msg(sb, KERN_ERR, "VFS: Can't find ext4 filesystem");
5183 0 : return -EINVAL;
5184 : }
5185 2521 : if (sbi->s_inodes_per_group < sbi->s_inodes_per_block ||
5186 2521 : sbi->s_inodes_per_group > sb->s_blocksize * 8) {
5187 0 : ext4_msg(sb, KERN_ERR, "invalid inodes per group: %lu\n",
5188 : sbi->s_inodes_per_group);
5189 0 : return -EINVAL;
5190 : }
5191 2521 : sbi->s_itb_per_group = sbi->s_inodes_per_group /
5192 : sbi->s_inodes_per_block;
5193 2521 : sbi->s_desc_per_block = sb->s_blocksize / EXT4_DESC_SIZE(sb);
5194 2521 : sbi->s_mount_state = le16_to_cpu(es->s_state) & ~EXT4_FC_REPLAY;
5195 2521 : sbi->s_addr_per_block_bits = ilog2(EXT4_ADDR_PER_BLOCK(sb));
5196 2521 : sbi->s_desc_per_block_bits = ilog2(EXT4_DESC_PER_BLOCK(sb));
5197 :
5198 2521 : return 0;
5199 : }
5200 :
5201 2569 : static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb)
5202 : {
5203 2569 : struct ext4_super_block *es = NULL;
5204 2569 : struct ext4_sb_info *sbi = EXT4_SB(sb);
5205 2569 : ext4_fsblk_t logical_sb_block;
5206 2569 : struct inode *root;
5207 2569 : int needs_recovery;
5208 2569 : int err;
5209 2569 : ext4_group_t first_not_zeroed;
5210 2569 : struct ext4_fs_context *ctx = fc->fs_private;
5211 2569 : int silent = fc->sb_flags & SB_SILENT;
5212 :
5213 : /* Set defaults for the variables that will be set during parsing */
5214 2569 : if (!(ctx->spec & EXT4_SPEC_JOURNAL_IOPRIO))
5215 2567 : ctx->journal_ioprio = DEFAULT_JOURNAL_IOPRIO;
5216 :
5217 2569 : sbi->s_inode_readahead_blks = EXT4_DEF_INODE_READAHEAD_BLKS;
5218 5138 : sbi->s_sectors_written_start =
5219 12845 : part_stat_read(sb->s_bdev, sectors[STAT_WRITE]);
5220 :
5221 2569 : err = ext4_load_super(sb, &logical_sb_block, silent);
5222 2569 : if (err)
5223 18 : goto out_fail;
5224 :
5225 2551 : es = sbi->s_es;
5226 2551 : sbi->s_kbytes_written = le64_to_cpu(es->s_kbytes_written);
5227 :
5228 2551 : err = ext4_init_metadata_csum(sb, es);
5229 2551 : if (err)
5230 0 : goto failed_mount;
5231 :
5232 2551 : ext4_set_def_opts(sb, es);
5233 :
5234 2551 : sbi->s_resuid = make_kuid(&init_user_ns, le16_to_cpu(es->s_def_resuid));
5235 2551 : sbi->s_resgid = make_kgid(&init_user_ns, le16_to_cpu(es->s_def_resgid));
5236 2551 : sbi->s_commit_interval = JBD2_DEFAULT_MAX_COMMIT_AGE * HZ;
5237 2551 : sbi->s_min_batch_time = EXT4_DEF_MIN_BATCH_TIME;
5238 2551 : sbi->s_max_batch_time = EXT4_DEF_MAX_BATCH_TIME;
5239 :
5240 : /*
5241 : * set default s_li_wait_mult for lazyinit, for the case there is
5242 : * no mount option specified.
5243 : */
5244 2551 : sbi->s_li_wait_mult = EXT4_DEF_LI_WAIT_MULT;
5245 :
5246 2551 : err = ext4_inode_info_init(sb, es);
5247 2551 : if (err)
5248 0 : goto failed_mount;
5249 :
5250 2551 : err = parse_apply_sb_mount_options(sb, ctx);
5251 2551 : if (err < 0)
5252 0 : goto failed_mount;
5253 :
5254 2551 : sbi->s_def_mount_opt = sbi->s_mount_opt;
5255 2551 : sbi->s_def_mount_opt2 = sbi->s_mount_opt2;
5256 :
5257 2551 : err = ext4_check_opt_consistency(fc, sb);
5258 2551 : if (err < 0)
5259 18 : goto failed_mount;
5260 :
5261 2533 : ext4_apply_options(fc, sb);
5262 :
5263 2533 : err = ext4_encoding_init(sb, es);
5264 2533 : if (err)
5265 : goto failed_mount;
5266 :
5267 2533 : err = ext4_check_journal_data_mode(sb);
5268 2533 : if (err)
5269 0 : goto failed_mount;
5270 :
5271 2533 : sb->s_flags = (sb->s_flags & ~SB_POSIXACL) |
5272 2533 : (test_opt(sb, POSIX_ACL) ? SB_POSIXACL : 0);
5273 :
5274 : /* i_version is always enabled now */
5275 2533 : sb->s_flags |= SB_I_VERSION;
5276 :
5277 2533 : err = ext4_check_feature_compatibility(sb, es, silent);
5278 2533 : if (err)
5279 12 : goto failed_mount;
5280 :
5281 2521 : err = ext4_block_group_meta_init(sb, silent);
5282 2521 : if (err)
5283 0 : goto failed_mount;
5284 :
5285 2521 : ext4_hash_info_init(sb);
5286 :
5287 2521 : err = ext4_handle_clustersize(sb);
5288 2521 : if (err)
5289 0 : goto failed_mount;
5290 :
5291 2521 : err = ext4_check_geometry(sb, es);
5292 2521 : if (err)
5293 0 : goto failed_mount;
5294 :
5295 2521 : timer_setup(&sbi->s_err_report, print_daily_error_info, 0);
5296 2521 : spin_lock_init(&sbi->s_error_lock);
5297 2521 : INIT_WORK(&sbi->s_error_work, flush_stashed_error_work);
5298 :
5299 2521 : err = ext4_group_desc_init(sb, es, logical_sb_block, &first_not_zeroed);
5300 2521 : if (err)
5301 2 : goto failed_mount3;
5302 :
5303 2519 : err = ext4_es_register_shrinker(sbi);
5304 2519 : if (err)
5305 0 : goto failed_mount3;
5306 :
5307 2519 : sbi->s_stripe = ext4_get_stripe_size(sbi);
5308 : /*
5309 : * It's hard to get stripe aligned blocks if stripe is not aligned with
5310 : * cluster, just disable stripe and alert user to simpfy code and avoid
5311 : * stripe aligned allocation which will rarely successes.
5312 : */
5313 2519 : if (sbi->s_stripe > 0 && sbi->s_cluster_ratio > 1 &&
5314 4 : sbi->s_stripe % sbi->s_cluster_ratio != 0) {
5315 0 : ext4_msg(sb, KERN_WARNING,
5316 : "stripe (%lu) is not aligned with cluster size (%u), "
5317 : "stripe is disabled",
5318 : sbi->s_stripe, sbi->s_cluster_ratio);
5319 0 : sbi->s_stripe = 0;
5320 : }
5321 2519 : sbi->s_extent_max_zeroout_kb = 32;
5322 :
5323 : /*
5324 : * set up enough so that it can read an inode
5325 : */
5326 2519 : sb->s_op = &ext4_sops;
5327 2519 : sb->s_export_op = &ext4_export_ops;
5328 2519 : sb->s_xattr = ext4_xattr_handlers;
5329 : #ifdef CONFIG_FS_ENCRYPTION
5330 : sb->s_cop = &ext4_cryptops;
5331 : #endif
5332 : #ifdef CONFIG_FS_VERITY
5333 : sb->s_vop = &ext4_verityops;
5334 : #endif
5335 : #ifdef CONFIG_QUOTA
5336 2519 : sb->dq_op = &ext4_quota_operations;
5337 2519 : if (ext4_has_feature_quota(sb))
5338 64 : sb->s_qcop = &dquot_quotactl_sysfile_ops;
5339 : else
5340 2455 : sb->s_qcop = &ext4_qctl_operations;
5341 2519 : sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP | QTYPE_MASK_PRJ;
5342 : #endif
5343 5038 : memcpy(&sb->s_uuid, es->s_uuid, sizeof(es->s_uuid));
5344 :
5345 2519 : INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */
5346 2519 : mutex_init(&sbi->s_orphan_lock);
5347 :
5348 2519 : ext4_fast_commit_init(sb);
5349 :
5350 2519 : sb->s_root = NULL;
5351 :
5352 2504 : needs_recovery = (es->s_last_orphan != 0 ||
5353 5023 : ext4_has_feature_orphan_present(sb) ||
5354 : ext4_has_feature_journal_needs_recovery(sb));
5355 :
5356 2519 : if (ext4_has_feature_mmp(sb) && !sb_rdonly(sb)) {
5357 4 : err = ext4_multi_mount_protect(sb, le64_to_cpu(es->s_mmp_block));
5358 4 : if (err)
5359 1 : goto failed_mount3a;
5360 : }
5361 :
5362 2518 : err = -EINVAL;
5363 : /*
5364 : * The first inode we look at is the journal inode. Don't try
5365 : * root first: it may be modified in the journal!
5366 : */
5367 2518 : if (!test_opt(sb, NOLOAD) && ext4_has_feature_journal(sb)) {
5368 2505 : err = ext4_load_and_init_journal(sb, es, ctx);
5369 2505 : if (err)
5370 4 : goto failed_mount3a;
5371 13 : } else if (test_opt(sb, NOLOAD) && !sb_rdonly(sb) &&
5372 : ext4_has_feature_journal_needs_recovery(sb)) {
5373 1 : ext4_msg(sb, KERN_ERR, "required journal recovery "
5374 : "suppressed and not mounted read-only");
5375 1 : goto failed_mount3a;
5376 : } else {
5377 : /* Nojournal mode, all journal mount options are illegal */
5378 12 : if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) {
5379 0 : ext4_msg(sb, KERN_ERR, "can't mount with "
5380 : "journal_async_commit, fs mounted w/o journal");
5381 0 : goto failed_mount3a;
5382 : }
5383 :
5384 12 : if (test_opt2(sb, EXPLICIT_JOURNAL_CHECKSUM)) {
5385 0 : ext4_msg(sb, KERN_ERR, "can't mount with "
5386 : "journal_checksum, fs mounted w/o journal");
5387 0 : goto failed_mount3a;
5388 : }
5389 12 : if (sbi->s_commit_interval != JBD2_DEFAULT_MAX_COMMIT_AGE*HZ) {
5390 0 : ext4_msg(sb, KERN_ERR, "can't mount with "
5391 : "commit=%lu, fs mounted w/o journal",
5392 : sbi->s_commit_interval / HZ);
5393 0 : goto failed_mount3a;
5394 : }
5395 12 : if (EXT4_MOUNT_DATA_FLAGS &
5396 12 : (sbi->s_mount_opt ^ sbi->s_def_mount_opt)) {
5397 0 : ext4_msg(sb, KERN_ERR, "can't mount with "
5398 : "data=, fs mounted w/o journal");
5399 0 : goto failed_mount3a;
5400 : }
5401 12 : sbi->s_def_mount_opt &= ~EXT4_MOUNT_JOURNAL_CHECKSUM;
5402 12 : clear_opt(sb, JOURNAL_CHECKSUM);
5403 12 : clear_opt(sb, DATA_FLAGS);
5404 12 : clear_opt2(sb, JOURNAL_FAST_COMMIT);
5405 12 : sbi->s_journal = NULL;
5406 12 : needs_recovery = 0;
5407 : }
5408 :
5409 2513 : if (!test_opt(sb, NO_MBCACHE)) {
5410 2509 : sbi->s_ea_block_cache = ext4_xattr_create_cache();
5411 2509 : if (!sbi->s_ea_block_cache) {
5412 0 : ext4_msg(sb, KERN_ERR,
5413 : "Failed to create ea_block_cache");
5414 0 : err = -EINVAL;
5415 0 : goto failed_mount_wq;
5416 : }
5417 :
5418 2509 : if (ext4_has_feature_ea_inode(sb)) {
5419 3 : sbi->s_ea_inode_cache = ext4_xattr_create_cache();
5420 3 : if (!sbi->s_ea_inode_cache) {
5421 0 : ext4_msg(sb, KERN_ERR,
5422 : "Failed to create ea_inode_cache");
5423 0 : err = -EINVAL;
5424 0 : goto failed_mount_wq;
5425 : }
5426 : }
5427 : }
5428 :
5429 : /*
5430 : * Get the # of file system overhead blocks from the
5431 : * superblock if present.
5432 : */
5433 2513 : sbi->s_overhead = le32_to_cpu(es->s_overhead_clusters);
5434 : /* ignore the precalculated value if it is ridiculous */
5435 4834 : if (sbi->s_overhead > ext4_blocks_count(es))
5436 0 : sbi->s_overhead = 0;
5437 : /*
5438 : * If the bigalloc feature is not enabled recalculating the
5439 : * overhead doesn't take long, so we might as well just redo
5440 : * it to make sure we are using the correct value.
5441 : */
5442 2513 : if (!ext4_has_feature_bigalloc(sb))
5443 2481 : sbi->s_overhead = 0;
5444 2513 : if (sbi->s_overhead == 0) {
5445 2481 : err = ext4_calculate_overhead(sb);
5446 2481 : if (err)
5447 0 : goto failed_mount_wq;
5448 : }
5449 :
5450 : /*
5451 : * The maximum number of concurrent works can be high and
5452 : * concurrency isn't really necessary. Limit it to 1.
5453 : */
5454 2513 : EXT4_SB(sb)->rsv_conversion_wq =
5455 2513 : alloc_workqueue("ext4-rsv-conversion", WQ_MEM_RECLAIM | WQ_UNBOUND, 1);
5456 2513 : if (!EXT4_SB(sb)->rsv_conversion_wq) {
5457 0 : printk(KERN_ERR "EXT4-fs: failed to create workqueue\n");
5458 0 : err = -ENOMEM;
5459 0 : goto failed_mount4;
5460 : }
5461 :
5462 : /*
5463 : * The jbd2_journal_load will have done any necessary log recovery,
5464 : * so we can safely mount the rest of the filesystem now.
5465 : */
5466 :
5467 2513 : root = ext4_iget(sb, EXT4_ROOT_INO, EXT4_IGET_SPECIAL);
5468 2513 : if (IS_ERR(root)) {
5469 0 : ext4_msg(sb, KERN_ERR, "get root inode failed");
5470 0 : err = PTR_ERR(root);
5471 0 : root = NULL;
5472 0 : goto failed_mount4;
5473 : }
5474 2513 : if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) {
5475 0 : ext4_msg(sb, KERN_ERR, "corrupt root inode, run e2fsck");
5476 0 : iput(root);
5477 0 : err = -EFSCORRUPTED;
5478 0 : goto failed_mount4;
5479 : }
5480 :
5481 2513 : sb->s_root = d_make_root(root);
5482 2513 : if (!sb->s_root) {
5483 0 : ext4_msg(sb, KERN_ERR, "get root dentry failed");
5484 0 : err = -ENOMEM;
5485 0 : goto failed_mount4;
5486 : }
5487 :
5488 2513 : err = ext4_setup_super(sb, es, sb_rdonly(sb));
5489 2513 : if (err == -EROFS) {
5490 0 : sb->s_flags |= SB_RDONLY;
5491 2513 : } else if (err)
5492 0 : goto failed_mount4a;
5493 :
5494 2513 : ext4_set_resv_clusters(sb);
5495 :
5496 2513 : if (test_opt(sb, BLOCK_VALIDITY)) {
5497 2511 : err = ext4_setup_system_zone(sb);
5498 2511 : if (err) {
5499 0 : ext4_msg(sb, KERN_ERR, "failed to initialize system "
5500 : "zone (%d)", err);
5501 0 : goto failed_mount4a;
5502 : }
5503 : }
5504 2513 : ext4_fc_replay_cleanup(sb);
5505 :
5506 2513 : ext4_ext_init(sb);
5507 :
5508 : /*
5509 : * Enable optimize_scan if number of groups is > threshold. This can be
5510 : * turned off by passing "mb_optimize_scan=0". This can also be
5511 : * turned on forcefully by passing "mb_optimize_scan=1".
5512 : */
5513 2513 : if (!(ctx->spec & EXT4_SPEC_mb_optimize_scan)) {
5514 2513 : if (sbi->s_groups_count >= MB_DEFAULT_LINEAR_SCAN_THRESHOLD)
5515 1844 : set_opt2(sb, MB_OPTIMIZE_SCAN);
5516 : else
5517 669 : clear_opt2(sb, MB_OPTIMIZE_SCAN);
5518 : }
5519 :
5520 2513 : err = ext4_mb_init(sb);
5521 2513 : if (err) {
5522 0 : ext4_msg(sb, KERN_ERR, "failed to initialize mballoc (%d)",
5523 : err);
5524 0 : goto failed_mount5;
5525 : }
5526 :
5527 : /*
5528 : * We can only set up the journal commit callback once
5529 : * mballoc is initialized
5530 : */
5531 2513 : if (sbi->s_journal)
5532 2501 : sbi->s_journal->j_commit_callback =
5533 : ext4_journal_commit_callback;
5534 :
5535 2513 : err = ext4_percpu_param_init(sbi);
5536 2513 : if (err)
5537 0 : goto failed_mount6;
5538 :
5539 2513 : if (ext4_has_feature_flex_bg(sb))
5540 2322 : if (!ext4_fill_flex_info(sb)) {
5541 0 : ext4_msg(sb, KERN_ERR,
5542 : "unable to initialize "
5543 : "flex_bg meta info!");
5544 0 : err = -ENOMEM;
5545 0 : goto failed_mount6;
5546 : }
5547 :
5548 2513 : err = ext4_register_li_request(sb, first_not_zeroed);
5549 2513 : if (err)
5550 0 : goto failed_mount6;
5551 :
5552 2513 : err = ext4_register_sysfs(sb);
5553 2513 : if (err)
5554 0 : goto failed_mount7;
5555 :
5556 2513 : err = ext4_init_orphan_info(sb);
5557 2513 : if (err)
5558 0 : goto failed_mount8;
5559 : #ifdef CONFIG_QUOTA
5560 : /* Enable quota usage during mount. */
5561 2513 : if (ext4_has_feature_quota(sb) && !sb_rdonly(sb)) {
5562 64 : err = ext4_enable_quotas(sb);
5563 64 : if (err)
5564 0 : goto failed_mount9;
5565 : }
5566 : #endif /* CONFIG_QUOTA */
5567 :
5568 : /*
5569 : * Save the original bdev mapping's wb_err value which could be
5570 : * used to detect the metadata async write error.
5571 : */
5572 2513 : spin_lock_init(&sbi->s_bdev_wb_lock);
5573 2513 : errseq_check_and_advance(&sb->s_bdev->bd_inode->i_mapping->wb_err,
5574 : &sbi->s_bdev_wb_err);
5575 2513 : sb->s_bdev->bd_super = sb;
5576 2513 : EXT4_SB(sb)->s_mount_state |= EXT4_ORPHAN_FS;
5577 2513 : ext4_orphan_cleanup(sb, es);
5578 2513 : EXT4_SB(sb)->s_mount_state &= ~EXT4_ORPHAN_FS;
5579 : /*
5580 : * Update the checksum after updating free space/inode counters and
5581 : * ext4_orphan_cleanup. Otherwise the superblock can have an incorrect
5582 : * checksum in the buffer cache until it is written out and
5583 : * e2fsprogs programs trying to open a file system immediately
5584 : * after it is mounted can fail.
5585 : */
5586 2513 : ext4_superblock_csum_set(sb);
5587 2513 : if (needs_recovery) {
5588 261 : ext4_msg(sb, KERN_INFO, "recovery complete");
5589 261 : err = ext4_mark_recovery_complete(sb, es);
5590 261 : if (err)
5591 0 : goto failed_mount10;
5592 : }
5593 :
5594 2513 : if (test_opt(sb, DISCARD) && !bdev_max_discard_sectors(sb->s_bdev))
5595 0 : ext4_msg(sb, KERN_WARNING,
5596 : "mounting with \"discard\" option, but the device does not support discard");
5597 :
5598 2513 : if (es->s_error_count)
5599 9 : mod_timer(&sbi->s_err_report, jiffies + 300*HZ); /* 5 minutes */
5600 :
5601 : /* Enable message ratelimiting. Default is 10 messages per 5 secs. */
5602 2513 : ratelimit_state_init(&sbi->s_err_ratelimit_state, 5 * HZ, 10);
5603 2513 : ratelimit_state_init(&sbi->s_warning_ratelimit_state, 5 * HZ, 10);
5604 2513 : ratelimit_state_init(&sbi->s_msg_ratelimit_state, 5 * HZ, 10);
5605 2513 : atomic_set(&sbi->s_warning_count, 0);
5606 2513 : atomic_set(&sbi->s_msg_count, 0);
5607 :
5608 2513 : return 0;
5609 :
5610 : failed_mount10:
5611 0 : ext4_quotas_off(sb, EXT4_MAXQUOTAS);
5612 0 : failed_mount9: __maybe_unused
5613 0 : ext4_release_orphan_info(sb);
5614 0 : failed_mount8:
5615 0 : ext4_unregister_sysfs(sb);
5616 0 : kobject_put(&sbi->s_kobj);
5617 0 : failed_mount7:
5618 0 : ext4_unregister_li_request(sb);
5619 0 : failed_mount6:
5620 0 : ext4_mb_release(sb);
5621 0 : ext4_flex_groups_free(sbi);
5622 0 : ext4_percpu_param_destroy(sbi);
5623 0 : failed_mount5:
5624 0 : ext4_ext_release(sb);
5625 0 : ext4_release_system_zone(sb);
5626 0 : failed_mount4a:
5627 0 : dput(sb->s_root);
5628 0 : sb->s_root = NULL;
5629 0 : failed_mount4:
5630 0 : ext4_msg(sb, KERN_ERR, "mount failed");
5631 0 : if (EXT4_SB(sb)->rsv_conversion_wq)
5632 0 : destroy_workqueue(EXT4_SB(sb)->rsv_conversion_wq);
5633 0 : failed_mount_wq:
5634 0 : ext4_xattr_destroy_cache(sbi->s_ea_inode_cache);
5635 0 : sbi->s_ea_inode_cache = NULL;
5636 :
5637 0 : ext4_xattr_destroy_cache(sbi->s_ea_block_cache);
5638 0 : sbi->s_ea_block_cache = NULL;
5639 :
5640 0 : if (sbi->s_journal) {
5641 : /* flush s_error_work before journal destroy. */
5642 0 : flush_work(&sbi->s_error_work);
5643 0 : jbd2_journal_destroy(sbi->s_journal);
5644 0 : sbi->s_journal = NULL;
5645 : }
5646 0 : failed_mount3a:
5647 6 : ext4_es_unregister_shrinker(sbi);
5648 8 : failed_mount3:
5649 : /* flush s_error_work before sbi destroy */
5650 8 : flush_work(&sbi->s_error_work);
5651 8 : del_timer_sync(&sbi->s_err_report);
5652 8 : ext4_stop_mmpd(sbi);
5653 8 : ext4_group_desc_free(sbi);
5654 38 : failed_mount:
5655 38 : if (sbi->s_chksum_driver)
5656 38 : crypto_free_shash(sbi->s_chksum_driver);
5657 :
5658 : #if IS_ENABLED(CONFIG_UNICODE)
5659 : utf8_unload(sb->s_encoding);
5660 : #endif
5661 :
5662 : #ifdef CONFIG_QUOTA
5663 152 : for (unsigned int i = 0; i < EXT4_MAXQUOTAS; i++)
5664 114 : kfree(get_qf_name(sb, sbi, i));
5665 : #endif
5666 38 : fscrypt_free_dummy_policy(&sbi->s_dummy_enc_policy);
5667 : /* ext4_blkdev_remove() calls kill_bdev(), release bh before it. */
5668 38 : brelse(sbi->s_sbh);
5669 38 : ext4_blkdev_remove(sbi);
5670 56 : out_fail:
5671 56 : invalidate_bdev(sb->s_bdev);
5672 56 : sb->s_fs_info = NULL;
5673 56 : return err;
5674 : }
5675 :
5676 2569 : static int ext4_fill_super(struct super_block *sb, struct fs_context *fc)
5677 : {
5678 2569 : struct ext4_fs_context *ctx = fc->fs_private;
5679 2569 : struct ext4_sb_info *sbi;
5680 2569 : const char *descr;
5681 2569 : int ret;
5682 :
5683 2569 : sbi = ext4_alloc_sbi(sb);
5684 2569 : if (!sbi)
5685 : return -ENOMEM;
5686 :
5687 2569 : fc->s_fs_info = sbi;
5688 :
5689 : /* Cleanup superblock name */
5690 2569 : strreplace(sb->s_id, '/', '!');
5691 :
5692 2569 : sbi->s_sb_block = 1; /* Default super block location */
5693 2569 : if (ctx->spec & EXT4_SPEC_s_sb_block)
5694 2 : sbi->s_sb_block = ctx->s_sb_block;
5695 :
5696 2569 : ret = __ext4_fill_super(fc, sb);
5697 2569 : if (ret < 0)
5698 56 : goto free_sbi;
5699 :
5700 2513 : if (sbi->s_journal) {
5701 2501 : if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
5702 : descr = " journalled data mode";
5703 2481 : else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA)
5704 : descr = " ordered data mode";
5705 : else
5706 27 : descr = " writeback data mode";
5707 : } else
5708 : descr = "out journal";
5709 :
5710 2513 : if (___ratelimit(&ext4_mount_msg_ratelimit, "EXT4-fs mount"))
5711 4096 : ext4_msg(sb, KERN_INFO, "mounted filesystem %pU %s with%s. "
5712 : "Quota mode: %s.", &sb->s_uuid,
5713 : sb_rdonly(sb) ? "ro" : "r/w", descr,
5714 : ext4_quota_mode(sb));
5715 :
5716 : /* Update the s_overhead_clusters if necessary */
5717 2513 : ext4_update_overhead(sb, false);
5718 2513 : return 0;
5719 :
5720 : free_sbi:
5721 56 : ext4_free_sbi(sbi);
5722 56 : fc->s_fs_info = NULL;
5723 56 : return ret;
5724 : }
5725 :
5726 2863 : static int ext4_get_tree(struct fs_context *fc)
5727 : {
5728 2863 : return get_tree_bdev(fc, ext4_fill_super);
5729 : }
5730 :
5731 : /*
5732 : * Setup any per-fs journal parameters now. We'll do this both on
5733 : * initial mount, once the journal has been initialised but before we've
5734 : * done any recovery; and again on any subsequent remount.
5735 : */
5736 3116 : static void ext4_init_journal_params(struct super_block *sb, journal_t *journal)
5737 : {
5738 3116 : struct ext4_sb_info *sbi = EXT4_SB(sb);
5739 :
5740 3116 : journal->j_commit_interval = sbi->s_commit_interval;
5741 3116 : journal->j_min_batch_time = sbi->s_min_batch_time;
5742 3116 : journal->j_max_batch_time = sbi->s_max_batch_time;
5743 3116 : ext4_fc_init(sb, journal);
5744 :
5745 3116 : write_lock(&journal->j_state_lock);
5746 3116 : if (test_opt(sb, BARRIER))
5747 3086 : journal->j_flags |= JBD2_BARRIER;
5748 : else
5749 30 : journal->j_flags &= ~JBD2_BARRIER;
5750 3116 : if (test_opt(sb, DATA_ERR_ABORT))
5751 18 : journal->j_flags |= JBD2_ABORT_ON_SYNCDATA_ERR;
5752 : else
5753 3098 : journal->j_flags &= ~JBD2_ABORT_ON_SYNCDATA_ERR;
5754 : /*
5755 : * Always enable journal cycle record option, letting the journal
5756 : * records log transactions continuously between each mount.
5757 : */
5758 3116 : journal->j_flags |= JBD2_CYCLE_RECORD;
5759 3116 : write_unlock(&journal->j_state_lock);
5760 3116 : }
5761 :
5762 2501 : static struct inode *ext4_get_journal_inode(struct super_block *sb,
5763 : unsigned int journal_inum)
5764 : {
5765 2501 : struct inode *journal_inode;
5766 :
5767 : /*
5768 : * Test for the existence of a valid inode on disk. Bad things
5769 : * happen if we iget() an unused inode, as the subsequent iput()
5770 : * will try to delete it.
5771 : */
5772 2501 : journal_inode = ext4_iget(sb, journal_inum, EXT4_IGET_SPECIAL);
5773 2501 : if (IS_ERR(journal_inode)) {
5774 0 : ext4_msg(sb, KERN_ERR, "no journal found");
5775 0 : return NULL;
5776 : }
5777 2501 : if (!journal_inode->i_nlink) {
5778 0 : make_bad_inode(journal_inode);
5779 0 : iput(journal_inode);
5780 0 : ext4_msg(sb, KERN_ERR, "journal inode is deleted");
5781 0 : return NULL;
5782 : }
5783 :
5784 2501 : ext4_debug("Journal inode found at %p: %lld bytes\n",
5785 : journal_inode, journal_inode->i_size);
5786 2501 : if (!S_ISREG(journal_inode->i_mode) || IS_ENCRYPTED(journal_inode)) {
5787 0 : ext4_msg(sb, KERN_ERR, "invalid journal inode");
5788 0 : iput(journal_inode);
5789 0 : return NULL;
5790 : }
5791 : return journal_inode;
5792 : }
5793 :
5794 3691533 : static int ext4_journal_bmap(journal_t *journal, sector_t *block)
5795 : {
5796 3691533 : struct ext4_map_blocks map;
5797 3691533 : int ret;
5798 :
5799 3691533 : if (journal->j_inode == NULL)
5800 : return 0;
5801 :
5802 3691533 : map.m_lblk = *block;
5803 3691533 : map.m_len = 1;
5804 3691533 : ret = ext4_map_blocks(NULL, journal->j_inode, &map, 0);
5805 3691533 : if (ret <= 0) {
5806 0 : ext4_msg(journal->j_inode->i_sb, KERN_CRIT,
5807 : "journal bmap failed: block %llu ret %d\n",
5808 : *block, ret);
5809 0 : jbd2_journal_abort(journal, ret ? ret : -EIO);
5810 0 : return ret;
5811 : }
5812 3691533 : *block = map.m_pblk;
5813 3691533 : return 0;
5814 : }
5815 :
5816 2491 : static journal_t *ext4_get_journal(struct super_block *sb,
5817 : unsigned int journal_inum)
5818 : {
5819 2491 : struct inode *journal_inode;
5820 2491 : journal_t *journal;
5821 :
5822 2491 : if (WARN_ON_ONCE(!ext4_has_feature_journal(sb)))
5823 : return NULL;
5824 :
5825 2491 : journal_inode = ext4_get_journal_inode(sb, journal_inum);
5826 2491 : if (!journal_inode)
5827 : return NULL;
5828 :
5829 2491 : journal = jbd2_journal_init_inode(journal_inode);
5830 2491 : if (!journal) {
5831 0 : ext4_msg(sb, KERN_ERR, "Could not load journal inode");
5832 0 : iput(journal_inode);
5833 0 : return NULL;
5834 : }
5835 2491 : journal->j_private = sb;
5836 2491 : journal->j_bmap = ext4_journal_bmap;
5837 2491 : ext4_init_journal_params(sb, journal);
5838 2491 : return journal;
5839 : }
5840 :
5841 14 : static journal_t *ext4_get_dev_journal(struct super_block *sb,
5842 : dev_t j_dev)
5843 : {
5844 14 : struct buffer_head *bh;
5845 14 : journal_t *journal;
5846 14 : ext4_fsblk_t start;
5847 14 : ext4_fsblk_t len;
5848 14 : int hblock, blocksize;
5849 14 : ext4_fsblk_t sb_block;
5850 14 : unsigned long offset;
5851 14 : struct ext4_super_block *es;
5852 14 : struct block_device *bdev;
5853 :
5854 14 : if (WARN_ON_ONCE(!ext4_has_feature_journal(sb)))
5855 : return NULL;
5856 :
5857 14 : bdev = ext4_blkdev_get(j_dev, sb);
5858 14 : if (bdev == NULL)
5859 : return NULL;
5860 :
5861 12 : blocksize = sb->s_blocksize;
5862 12 : hblock = bdev_logical_block_size(bdev);
5863 12 : if (blocksize < hblock) {
5864 0 : ext4_msg(sb, KERN_ERR,
5865 : "blocksize too small for journal device");
5866 0 : goto out_bdev;
5867 : }
5868 :
5869 12 : sb_block = EXT4_MIN_BLOCK_SIZE / blocksize;
5870 12 : offset = EXT4_MIN_BLOCK_SIZE % blocksize;
5871 12 : set_blocksize(bdev, blocksize);
5872 12 : if (!(bh = __bread(bdev, sb_block, blocksize))) {
5873 0 : ext4_msg(sb, KERN_ERR, "couldn't read superblock of "
5874 : "external journal");
5875 0 : goto out_bdev;
5876 : }
5877 :
5878 12 : es = (struct ext4_super_block *) (bh->b_data + offset);
5879 12 : if ((le16_to_cpu(es->s_magic) != EXT4_SUPER_MAGIC) ||
5880 12 : !(le32_to_cpu(es->s_feature_incompat) &
5881 : EXT4_FEATURE_INCOMPAT_JOURNAL_DEV)) {
5882 0 : ext4_msg(sb, KERN_ERR, "external journal has "
5883 : "bad superblock");
5884 0 : brelse(bh);
5885 0 : goto out_bdev;
5886 : }
5887 :
5888 12 : if ((le32_to_cpu(es->s_feature_ro_compat) &
5889 6 : EXT4_FEATURE_RO_COMPAT_METADATA_CSUM) &&
5890 6 : es->s_checksum != ext4_superblock_csum(sb, es)) {
5891 0 : ext4_msg(sb, KERN_ERR, "external journal has "
5892 : "corrupt superblock");
5893 0 : brelse(bh);
5894 0 : goto out_bdev;
5895 : }
5896 :
5897 24 : if (memcmp(EXT4_SB(sb)->s_es->s_journal_uuid, es->s_uuid, 16)) {
5898 0 : ext4_msg(sb, KERN_ERR, "journal UUID does not match");
5899 0 : brelse(bh);
5900 0 : goto out_bdev;
5901 : }
5902 :
5903 12 : len = ext4_blocks_count(es);
5904 12 : start = sb_block + 1;
5905 12 : brelse(bh); /* we're done with the superblock */
5906 :
5907 12 : journal = jbd2_journal_init_dev(bdev, sb->s_bdev,
5908 : start, len, blocksize);
5909 12 : if (!journal) {
5910 0 : ext4_msg(sb, KERN_ERR, "failed to create device journal");
5911 0 : goto out_bdev;
5912 : }
5913 12 : journal->j_private = sb;
5914 12 : if (ext4_read_bh_lock(journal->j_sb_buffer, REQ_META | REQ_PRIO, true)) {
5915 0 : ext4_msg(sb, KERN_ERR, "I/O error on journal device");
5916 0 : goto out_journal;
5917 : }
5918 12 : if (be32_to_cpu(journal->j_superblock->s_nr_users) != 1) {
5919 0 : ext4_msg(sb, KERN_ERR, "External journal has more than one "
5920 : "user (unsupported) - %d",
5921 : be32_to_cpu(journal->j_superblock->s_nr_users));
5922 0 : goto out_journal;
5923 : }
5924 12 : EXT4_SB(sb)->s_journal_bdev = bdev;
5925 12 : ext4_init_journal_params(sb, journal);
5926 12 : return journal;
5927 :
5928 0 : out_journal:
5929 0 : jbd2_journal_destroy(journal);
5930 0 : out_bdev:
5931 0 : blkdev_put(bdev, sb);
5932 0 : return NULL;
5933 : }
5934 :
5935 2505 : static int ext4_load_journal(struct super_block *sb,
5936 : struct ext4_super_block *es,
5937 : unsigned long journal_devnum)
5938 : {
5939 2505 : journal_t *journal;
5940 2505 : unsigned int journal_inum = le32_to_cpu(es->s_journal_inum);
5941 2505 : dev_t journal_dev;
5942 2505 : int err = 0;
5943 2505 : int really_read_only;
5944 2505 : int journal_dev_ro;
5945 :
5946 2505 : if (WARN_ON_ONCE(!ext4_has_feature_journal(sb)))
5947 : return -EFSCORRUPTED;
5948 :
5949 2505 : if (journal_devnum &&
5950 6 : journal_devnum != le32_to_cpu(es->s_journal_dev)) {
5951 2 : ext4_msg(sb, KERN_INFO, "external journal device major/minor "
5952 : "numbers have changed");
5953 2 : journal_dev = new_decode_dev(journal_devnum);
5954 : } else
5955 2503 : journal_dev = new_decode_dev(le32_to_cpu(es->s_journal_dev));
5956 :
5957 2505 : if (journal_inum && journal_dev) {
5958 0 : ext4_msg(sb, KERN_ERR,
5959 : "filesystem has both journal inode and journal device!");
5960 0 : return -EINVAL;
5961 : }
5962 :
5963 2505 : if (journal_inum) {
5964 2491 : journal = ext4_get_journal(sb, journal_inum);
5965 2491 : if (!journal)
5966 : return -EINVAL;
5967 : } else {
5968 14 : journal = ext4_get_dev_journal(sb, journal_dev);
5969 14 : if (!journal)
5970 : return -EINVAL;
5971 : }
5972 :
5973 2503 : journal_dev_ro = bdev_read_only(journal->j_dev);
5974 2503 : really_read_only = bdev_read_only(sb->s_bdev) | journal_dev_ro;
5975 :
5976 2503 : if (journal_dev_ro && !sb_rdonly(sb)) {
5977 0 : ext4_msg(sb, KERN_ERR,
5978 : "journal device read-only, try mounting with '-o ro'");
5979 0 : err = -EROFS;
5980 0 : goto err_out;
5981 : }
5982 :
5983 : /*
5984 : * Are we loading a blank journal or performing recovery after a
5985 : * crash? For recovery, we need to check in advance whether we
5986 : * can get read-write access to the device.
5987 : */
5988 2503 : if (ext4_has_feature_journal_needs_recovery(sb)) {
5989 251 : if (sb_rdonly(sb)) {
5990 4 : ext4_msg(sb, KERN_INFO, "INFO: recovery "
5991 : "required on readonly filesystem");
5992 4 : if (really_read_only) {
5993 1 : ext4_msg(sb, KERN_ERR, "write access "
5994 : "unavailable, cannot proceed "
5995 : "(try mounting with noload)");
5996 1 : err = -EROFS;
5997 1 : goto err_out;
5998 : }
5999 3 : ext4_msg(sb, KERN_INFO, "write access will "
6000 : "be enabled during recovery");
6001 : }
6002 : }
6003 :
6004 2502 : if (!(journal->j_flags & JBD2_BARRIER))
6005 10 : ext4_msg(sb, KERN_INFO, "barriers disabled");
6006 :
6007 2502 : if (!ext4_has_feature_journal_needs_recovery(sb))
6008 2252 : err = jbd2_journal_wipe(journal, !really_read_only);
6009 2252 : if (!err) {
6010 2501 : char *save = kmalloc(EXT4_S_ERR_LEN, GFP_KERNEL);
6011 2501 : __le16 orig_state;
6012 2501 : bool changed = false;
6013 :
6014 2501 : if (save)
6015 5002 : memcpy(save, ((char *) es) +
6016 : EXT4_S_ERR_START, EXT4_S_ERR_LEN);
6017 2501 : err = jbd2_journal_load(journal);
6018 5002 : if (save && memcmp(((char *) es) + EXT4_S_ERR_START,
6019 : save, EXT4_S_ERR_LEN)) {
6020 0 : memcpy(((char *) es) + EXT4_S_ERR_START,
6021 : save, EXT4_S_ERR_LEN);
6022 0 : changed = true;
6023 : }
6024 2501 : kfree(save);
6025 2501 : orig_state = es->s_state;
6026 2501 : es->s_state |= cpu_to_le16(EXT4_SB(sb)->s_mount_state &
6027 : EXT4_ERROR_FS);
6028 2501 : if (orig_state != es->s_state)
6029 0 : changed = true;
6030 : /* Write out restored error information to the superblock */
6031 2501 : if (changed && !really_read_only) {
6032 0 : int err2;
6033 0 : err2 = ext4_commit_super(sb);
6034 0 : err = err ? : err2;
6035 : }
6036 : }
6037 :
6038 2502 : if (err) {
6039 1 : ext4_msg(sb, KERN_ERR, "error loading journal");
6040 1 : goto err_out;
6041 : }
6042 :
6043 2501 : EXT4_SB(sb)->s_journal = journal;
6044 2501 : err = ext4_clear_journal_err(sb, es);
6045 2501 : if (err) {
6046 0 : EXT4_SB(sb)->s_journal = NULL;
6047 0 : jbd2_journal_destroy(journal);
6048 0 : return err;
6049 : }
6050 :
6051 2501 : if (!really_read_only && journal_devnum &&
6052 4 : journal_devnum != le32_to_cpu(es->s_journal_dev)) {
6053 0 : es->s_journal_dev = cpu_to_le32(journal_devnum);
6054 0 : ext4_commit_super(sb);
6055 : }
6056 2501 : if (!really_read_only && journal_inum &&
6057 2487 : journal_inum != le32_to_cpu(es->s_journal_inum)) {
6058 0 : es->s_journal_inum = cpu_to_le32(journal_inum);
6059 0 : ext4_commit_super(sb);
6060 : }
6061 :
6062 : return 0;
6063 :
6064 2 : err_out:
6065 2 : jbd2_journal_destroy(journal);
6066 2 : return err;
6067 : }
6068 :
6069 : /* Copy state of EXT4_SB(sb) into buffer for on-disk superblock */
6070 5747 : static void ext4_update_super(struct super_block *sb)
6071 : {
6072 5747 : struct ext4_sb_info *sbi = EXT4_SB(sb);
6073 5747 : struct ext4_super_block *es = sbi->s_es;
6074 5747 : struct buffer_head *sbh = sbi->s_sbh;
6075 :
6076 5747 : lock_buffer(sbh);
6077 : /*
6078 : * If the file system is mounted read-only, don't update the
6079 : * superblock write time. This avoids updating the superblock
6080 : * write time when we are mounting the root file system
6081 : * read/only but we need to replay the journal; at that point,
6082 : * for people who are east of GMT and who make their clock
6083 : * tick in localtime for Windows bug-for-bug compatibility,
6084 : * the clock is set in the future, and this will cause e2fsck
6085 : * to complain and force a full file system check.
6086 : */
6087 5747 : if (!(sb->s_flags & SB_RDONLY))
6088 5656 : ext4_update_tstamp(es, s_wtime);
6089 11494 : es->s_kbytes_written =
6090 28735 : cpu_to_le64(sbi->s_kbytes_written +
6091 : ((part_stat_read(sb->s_bdev, sectors[STAT_WRITE]) -
6092 : sbi->s_sectors_written_start) >> 1));
6093 5747 : if (percpu_counter_initialized(&sbi->s_freeclusters_counter))
6094 3246 : ext4_free_blocks_count_set(es,
6095 6492 : EXT4_C2B(sbi, percpu_counter_sum_positive(
6096 : &sbi->s_freeclusters_counter)));
6097 5747 : if (percpu_counter_initialized(&sbi->s_freeinodes_counter))
6098 6492 : es->s_free_inodes_count =
6099 3246 : cpu_to_le32(percpu_counter_sum_positive(
6100 : &sbi->s_freeinodes_counter));
6101 : /* Copy error information to the on-disk superblock */
6102 5747 : spin_lock(&sbi->s_error_lock);
6103 5747 : if (sbi->s_add_error_count > 0) {
6104 427 : es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
6105 427 : if (!es->s_first_error_time && !es->s_first_error_time_hi) {
6106 25 : __ext4_update_tstamp(&es->s_first_error_time,
6107 : &es->s_first_error_time_hi,
6108 : sbi->s_first_error_time);
6109 25 : strncpy(es->s_first_error_func, sbi->s_first_error_func,
6110 : sizeof(es->s_first_error_func));
6111 25 : es->s_first_error_line =
6112 25 : cpu_to_le32(sbi->s_first_error_line);
6113 25 : es->s_first_error_ino =
6114 25 : cpu_to_le32(sbi->s_first_error_ino);
6115 25 : es->s_first_error_block =
6116 25 : cpu_to_le64(sbi->s_first_error_block);
6117 25 : es->s_first_error_errcode =
6118 25 : ext4_errno_to_code(sbi->s_first_error_code);
6119 : }
6120 427 : __ext4_update_tstamp(&es->s_last_error_time,
6121 : &es->s_last_error_time_hi,
6122 : sbi->s_last_error_time);
6123 427 : strncpy(es->s_last_error_func, sbi->s_last_error_func,
6124 : sizeof(es->s_last_error_func));
6125 427 : es->s_last_error_line = cpu_to_le32(sbi->s_last_error_line);
6126 427 : es->s_last_error_ino = cpu_to_le32(sbi->s_last_error_ino);
6127 427 : es->s_last_error_block = cpu_to_le64(sbi->s_last_error_block);
6128 854 : es->s_last_error_errcode =
6129 427 : ext4_errno_to_code(sbi->s_last_error_code);
6130 : /*
6131 : * Start the daily error reporting function if it hasn't been
6132 : * started already
6133 : */
6134 427 : if (!es->s_error_count)
6135 25 : mod_timer(&sbi->s_err_report, jiffies + 24*60*60*HZ);
6136 427 : le32_add_cpu(&es->s_error_count, sbi->s_add_error_count);
6137 427 : sbi->s_add_error_count = 0;
6138 : }
6139 5747 : spin_unlock(&sbi->s_error_lock);
6140 :
6141 5747 : ext4_superblock_csum_set(sb);
6142 5747 : unlock_buffer(sbh);
6143 5747 : }
6144 :
6145 5325 : static int ext4_commit_super(struct super_block *sb)
6146 : {
6147 5325 : struct buffer_head *sbh = EXT4_SB(sb)->s_sbh;
6148 :
6149 5325 : if (!sbh)
6150 : return -EINVAL;
6151 5325 : if (block_device_ejected(sb))
6152 : return -ENODEV;
6153 :
6154 5325 : ext4_update_super(sb);
6155 :
6156 5325 : lock_buffer(sbh);
6157 : /* Buffer got discarded which means block device got invalidated */
6158 10650 : if (!buffer_mapped(sbh)) {
6159 0 : unlock_buffer(sbh);
6160 0 : return -EIO;
6161 : }
6162 :
6163 15974 : if (buffer_write_io_error(sbh) || !buffer_uptodate(sbh)) {
6164 : /*
6165 : * Oh, dear. A previous attempt to write the
6166 : * superblock failed. This could happen because the
6167 : * USB device was yanked out. Or it could happen to
6168 : * be a transient write error and maybe the block will
6169 : * be remapped. Nothing we can do but to retry the
6170 : * write and hope for the best.
6171 : */
6172 1 : ext4_msg(sb, KERN_ERR, "previous I/O error to "
6173 : "superblock detected");
6174 1 : clear_buffer_write_io_error(sbh);
6175 1 : set_buffer_uptodate(sbh);
6176 : }
6177 5325 : get_bh(sbh);
6178 : /* Clear potential dirty bit if it was journalled update */
6179 5325 : clear_buffer_dirty(sbh);
6180 5325 : sbh->b_end_io = end_buffer_write_sync;
6181 5325 : submit_bh(REQ_OP_WRITE | REQ_SYNC |
6182 5325 : (test_opt(sb, BARRIER) ? REQ_FUA : 0), sbh);
6183 5325 : wait_on_buffer(sbh);
6184 10650 : if (buffer_write_io_error(sbh)) {
6185 5 : ext4_msg(sb, KERN_ERR, "I/O error while writing "
6186 : "superblock");
6187 5 : clear_buffer_write_io_error(sbh);
6188 5 : set_buffer_uptodate(sbh);
6189 5 : return -EIO;
6190 : }
6191 : return 0;
6192 : }
6193 :
6194 : /*
6195 : * Have we just finished recovery? If so, and if we are mounting (or
6196 : * remounting) the filesystem readonly, then we will end up with a
6197 : * consistent fs on disk. Record that fact.
6198 : */
6199 306 : static int ext4_mark_recovery_complete(struct super_block *sb,
6200 : struct ext4_super_block *es)
6201 : {
6202 306 : int err;
6203 306 : journal_t *journal = EXT4_SB(sb)->s_journal;
6204 :
6205 306 : if (!ext4_has_feature_journal(sb)) {
6206 0 : if (journal != NULL) {
6207 0 : ext4_error(sb, "Journal got removed while the fs was "
6208 : "mounted!");
6209 0 : return -EFSCORRUPTED;
6210 : }
6211 : return 0;
6212 : }
6213 306 : jbd2_journal_lock_updates(journal);
6214 306 : err = jbd2_journal_flush(journal, 0);
6215 306 : if (err < 0)
6216 0 : goto out;
6217 :
6218 306 : if (sb_rdonly(sb) && (ext4_has_feature_journal_needs_recovery(sb) ||
6219 : ext4_has_feature_orphan_present(sb))) {
6220 48 : if (!ext4_orphan_file_empty(sb)) {
6221 0 : ext4_error(sb, "Orphan file not empty on read-only fs.");
6222 0 : err = -EFSCORRUPTED;
6223 0 : goto out;
6224 : }
6225 48 : ext4_clear_feature_journal_needs_recovery(sb);
6226 48 : ext4_clear_feature_orphan_present(sb);
6227 48 : ext4_commit_super(sb);
6228 : }
6229 258 : out:
6230 306 : jbd2_journal_unlock_updates(journal);
6231 306 : return err;
6232 : }
6233 :
6234 : /*
6235 : * If we are mounting (or read-write remounting) a filesystem whose journal
6236 : * has recorded an error from a previous lifetime, move that error to the
6237 : * main filesystem now.
6238 : */
6239 2544 : static int ext4_clear_journal_err(struct super_block *sb,
6240 : struct ext4_super_block *es)
6241 : {
6242 2544 : journal_t *journal;
6243 2544 : int j_errno;
6244 2544 : const char *errstr;
6245 :
6246 2544 : if (!ext4_has_feature_journal(sb)) {
6247 0 : ext4_error(sb, "Journal got removed while the fs was mounted!");
6248 0 : return -EFSCORRUPTED;
6249 : }
6250 :
6251 2544 : journal = EXT4_SB(sb)->s_journal;
6252 :
6253 : /*
6254 : * Now check for any error status which may have been recorded in the
6255 : * journal by a prior ext4_error() or ext4_abort()
6256 : */
6257 :
6258 2544 : j_errno = jbd2_journal_errno(journal);
6259 2544 : if (j_errno) {
6260 0 : char nbuf[16];
6261 :
6262 0 : errstr = ext4_decode_error(sb, j_errno, nbuf);
6263 0 : ext4_warning(sb, "Filesystem error recorded "
6264 : "from previous mount: %s", errstr);
6265 :
6266 0 : EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
6267 0 : es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
6268 0 : j_errno = ext4_commit_super(sb);
6269 0 : if (j_errno)
6270 0 : return j_errno;
6271 0 : ext4_warning(sb, "Marked fs in need of filesystem check.");
6272 :
6273 0 : jbd2_journal_clear_err(journal);
6274 0 : jbd2_journal_update_sb_errno(journal);
6275 : }
6276 : return 0;
6277 : }
6278 :
6279 : /*
6280 : * Force the running and committing transactions to commit,
6281 : * and wait on the commit.
6282 : */
6283 3352 : int ext4_force_commit(struct super_block *sb)
6284 : {
6285 3352 : journal_t *journal;
6286 :
6287 3352 : if (sb_rdonly(sb))
6288 : return 0;
6289 :
6290 3352 : journal = EXT4_SB(sb)->s_journal;
6291 3352 : return ext4_journal_force_commit(journal);
6292 : }
6293 :
6294 71841 : static int ext4_sync_fs(struct super_block *sb, int wait)
6295 : {
6296 71841 : int ret = 0;
6297 71841 : tid_t target;
6298 71841 : bool needs_barrier = false;
6299 71841 : struct ext4_sb_info *sbi = EXT4_SB(sb);
6300 :
6301 143682 : if (unlikely(ext4_forced_shutdown(sbi)))
6302 : return 0;
6303 :
6304 70683 : trace_ext4_sync_fs(sb, wait);
6305 70676 : flush_workqueue(sbi->rsv_conversion_wq);
6306 : /*
6307 : * Writeback quota in non-journalled quota case - journalled quota has
6308 : * no dirty dquots
6309 : */
6310 70686 : dquot_writeback_dquots(sb, -1);
6311 : /*
6312 : * Data writeback is possible w/o journal transaction, so barrier must
6313 : * being sent at the end of the function. But we can skip it if
6314 : * transaction_commit will do it for us.
6315 : */
6316 70682 : if (sbi->s_journal) {
6317 70628 : target = jbd2_get_latest_transaction(sbi->s_journal);
6318 105991 : if (wait && sbi->s_journal->j_flags & JBD2_BARRIER &&
6319 35370 : !jbd2_trans_will_send_data_barrier(sbi->s_journal, target))
6320 19336 : needs_barrier = true;
6321 :
6322 70621 : if (jbd2_journal_start_commit(sbi->s_journal, &target)) {
6323 32138 : if (wait)
6324 16069 : ret = jbd2_log_wait_commit(sbi->s_journal,
6325 : target);
6326 : }
6327 54 : } else if (wait && test_opt(sb, BARRIER))
6328 : needs_barrier = true;
6329 70626 : if (needs_barrier) {
6330 19369 : int err;
6331 19369 : err = blkdev_issue_flush(sb->s_bdev);
6332 19344 : if (!ret)
6333 19344 : ret = err;
6334 : }
6335 :
6336 : return ret;
6337 : }
6338 :
6339 : /*
6340 : * LVM calls this function before a (read-only) snapshot is created. This
6341 : * gives us a chance to flush the journal completely and mark the fs clean.
6342 : *
6343 : * Note that only this function cannot bring a filesystem to be in a clean
6344 : * state independently. It relies on upper layer to stop all data & metadata
6345 : * modifications.
6346 : */
6347 109 : static int ext4_freeze(struct super_block *sb)
6348 : {
6349 109 : int error = 0;
6350 109 : journal_t *journal;
6351 :
6352 109 : if (sb_rdonly(sb))
6353 : return 0;
6354 :
6355 109 : journal = EXT4_SB(sb)->s_journal;
6356 :
6357 109 : if (journal) {
6358 : /* Now we set up the journal barrier. */
6359 109 : jbd2_journal_lock_updates(journal);
6360 :
6361 : /*
6362 : * Don't clear the needs_recovery flag if we failed to
6363 : * flush the journal.
6364 : */
6365 109 : error = jbd2_journal_flush(journal, 0);
6366 109 : if (error < 0)
6367 0 : goto out;
6368 :
6369 : /* Journal blocked and flushed, clear needs_recovery flag. */
6370 109 : ext4_clear_feature_journal_needs_recovery(sb);
6371 109 : if (ext4_orphan_file_empty(sb))
6372 109 : ext4_clear_feature_orphan_present(sb);
6373 : }
6374 :
6375 109 : error = ext4_commit_super(sb);
6376 109 : out:
6377 109 : if (journal)
6378 : /* we rely on upper layer to stop further updates */
6379 109 : jbd2_journal_unlock_updates(journal);
6380 : return error;
6381 : }
6382 :
6383 : /*
6384 : * Called by LVM after the snapshot is done. We need to reset the RECOVER
6385 : * flag here, even though the filesystem is not technically dirty yet.
6386 : */
6387 109 : static int ext4_unfreeze(struct super_block *sb)
6388 : {
6389 218 : if (sb_rdonly(sb) || ext4_forced_shutdown(EXT4_SB(sb)))
6390 : return 0;
6391 :
6392 109 : if (EXT4_SB(sb)->s_journal) {
6393 : /* Reset the needs_recovery flag before the fs is unlocked. */
6394 109 : ext4_set_feature_journal_needs_recovery(sb);
6395 109 : if (ext4_has_feature_orphan_file(sb))
6396 0 : ext4_set_feature_orphan_present(sb);
6397 : }
6398 :
6399 109 : ext4_commit_super(sb);
6400 109 : return 0;
6401 : }
6402 :
6403 : /*
6404 : * Structure to save mount options for ext4_remount's benefit
6405 : */
6406 : struct ext4_mount_options {
6407 : unsigned long s_mount_opt;
6408 : unsigned long s_mount_opt2;
6409 : kuid_t s_resuid;
6410 : kgid_t s_resgid;
6411 : unsigned long s_commit_interval;
6412 : u32 s_min_batch_time, s_max_batch_time;
6413 : #ifdef CONFIG_QUOTA
6414 : int s_jquota_fmt;
6415 : char *s_qf_names[EXT4_MAXQUOTAS];
6416 : #endif
6417 : };
6418 :
6419 629 : static int __ext4_remount(struct fs_context *fc, struct super_block *sb)
6420 : {
6421 629 : struct ext4_fs_context *ctx = fc->fs_private;
6422 629 : struct ext4_super_block *es;
6423 629 : struct ext4_sb_info *sbi = EXT4_SB(sb);
6424 629 : unsigned long old_sb_flags;
6425 629 : struct ext4_mount_options old_opts;
6426 629 : ext4_group_t g;
6427 629 : int err = 0;
6428 : #ifdef CONFIG_QUOTA
6429 629 : int enable_quota = 0;
6430 629 : int i, j;
6431 629 : char *to_free[EXT4_MAXQUOTAS];
6432 : #endif
6433 :
6434 :
6435 : /* Store the original options */
6436 629 : old_sb_flags = sb->s_flags;
6437 629 : old_opts.s_mount_opt = sbi->s_mount_opt;
6438 629 : old_opts.s_mount_opt2 = sbi->s_mount_opt2;
6439 629 : old_opts.s_resuid = sbi->s_resuid;
6440 629 : old_opts.s_resgid = sbi->s_resgid;
6441 629 : old_opts.s_commit_interval = sbi->s_commit_interval;
6442 629 : old_opts.s_min_batch_time = sbi->s_min_batch_time;
6443 629 : old_opts.s_max_batch_time = sbi->s_max_batch_time;
6444 : #ifdef CONFIG_QUOTA
6445 629 : old_opts.s_jquota_fmt = sbi->s_jquota_fmt;
6446 2516 : for (i = 0; i < EXT4_MAXQUOTAS; i++)
6447 1887 : if (sbi->s_qf_names[i]) {
6448 32 : char *qf_name = get_qf_name(sb, sbi, i);
6449 :
6450 32 : old_opts.s_qf_names[i] = kstrdup(qf_name, GFP_KERNEL);
6451 32 : if (!old_opts.s_qf_names[i]) {
6452 0 : for (j = 0; j < i; j++)
6453 0 : kfree(old_opts.s_qf_names[j]);
6454 : return -ENOMEM;
6455 : }
6456 : } else
6457 1855 : old_opts.s_qf_names[i] = NULL;
6458 : #endif
6459 629 : if (!(ctx->spec & EXT4_SPEC_JOURNAL_IOPRIO)) {
6460 627 : if (sbi->s_journal && sbi->s_journal->j_task->io_context)
6461 611 : ctx->journal_ioprio =
6462 611 : sbi->s_journal->j_task->io_context->ioprio;
6463 : else
6464 16 : ctx->journal_ioprio = DEFAULT_JOURNAL_IOPRIO;
6465 :
6466 : }
6467 :
6468 629 : ext4_apply_options(fc, sb);
6469 :
6470 629 : if ((old_opts.s_mount_opt & EXT4_MOUNT_JOURNAL_CHECKSUM) ^
6471 629 : test_opt(sb, JOURNAL_CHECKSUM)) {
6472 0 : ext4_msg(sb, KERN_ERR, "changing journal_checksum "
6473 : "during remount not supported; ignoring");
6474 0 : sbi->s_mount_opt ^= EXT4_MOUNT_JOURNAL_CHECKSUM;
6475 : }
6476 :
6477 629 : if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
6478 6 : if (test_opt2(sb, EXPLICIT_DELALLOC)) {
6479 0 : ext4_msg(sb, KERN_ERR, "can't mount with "
6480 : "both data=journal and delalloc");
6481 0 : err = -EINVAL;
6482 0 : goto restore_opts;
6483 : }
6484 6 : if (test_opt(sb, DIOREAD_NOLOCK)) {
6485 0 : ext4_msg(sb, KERN_ERR, "can't mount with "
6486 : "both data=journal and dioread_nolock");
6487 0 : err = -EINVAL;
6488 0 : goto restore_opts;
6489 : }
6490 623 : } else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA) {
6491 593 : if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) {
6492 0 : ext4_msg(sb, KERN_ERR, "can't mount with "
6493 : "journal_async_commit in data=ordered mode");
6494 0 : err = -EINVAL;
6495 0 : goto restore_opts;
6496 : }
6497 : }
6498 :
6499 629 : if ((sbi->s_mount_opt ^ old_opts.s_mount_opt) & EXT4_MOUNT_NO_MBCACHE) {
6500 0 : ext4_msg(sb, KERN_ERR, "can't enable nombcache during remount");
6501 0 : err = -EINVAL;
6502 0 : goto restore_opts;
6503 : }
6504 :
6505 629 : if (ext4_test_mount_flag(sb, EXT4_MF_FS_ABORTED))
6506 0 : ext4_abort(sb, ESHUTDOWN, "Abort forced by user");
6507 :
6508 629 : sb->s_flags = (sb->s_flags & ~SB_POSIXACL) |
6509 629 : (test_opt(sb, POSIX_ACL) ? SB_POSIXACL : 0);
6510 :
6511 629 : es = sbi->s_es;
6512 :
6513 629 : if (sbi->s_journal) {
6514 613 : ext4_init_journal_params(sb, sbi->s_journal);
6515 613 : set_task_ioprio(sbi->s_journal->j_task, ctx->journal_ioprio);
6516 : }
6517 :
6518 : /* Flush outstanding errors before changing fs state */
6519 629 : flush_work(&sbi->s_error_work);
6520 :
6521 629 : if ((bool)(fc->sb_flags & SB_RDONLY) != sb_rdonly(sb)) {
6522 89 : if (ext4_test_mount_flag(sb, EXT4_MF_FS_ABORTED)) {
6523 0 : err = -EROFS;
6524 0 : goto restore_opts;
6525 : }
6526 :
6527 89 : if (fc->sb_flags & SB_RDONLY) {
6528 46 : err = sync_filesystem(sb);
6529 46 : if (err < 0)
6530 1 : goto restore_opts;
6531 45 : err = dquot_suspend(sb, -1);
6532 45 : if (err < 0)
6533 0 : goto restore_opts;
6534 :
6535 : /*
6536 : * First of all, the unconditional stuff we have to do
6537 : * to disable replay of the journal when we next remount
6538 : */
6539 45 : sb->s_flags |= SB_RDONLY;
6540 :
6541 : /*
6542 : * OK, test if we are remounting a valid rw partition
6543 : * readonly, and if so set the rdonly flag and then
6544 : * mark the partition as valid again.
6545 : */
6546 45 : if (!(es->s_state & cpu_to_le16(EXT4_VALID_FS)) &&
6547 0 : (sbi->s_mount_state & EXT4_VALID_FS))
6548 0 : es->s_state = cpu_to_le16(sbi->s_mount_state);
6549 :
6550 45 : if (sbi->s_journal) {
6551 : /*
6552 : * We let remount-ro finish even if marking fs
6553 : * as clean failed...
6554 : */
6555 45 : ext4_mark_recovery_complete(sb, es);
6556 : }
6557 : } else {
6558 : /* Make sure we can mount this feature set readwrite */
6559 86 : if (ext4_has_feature_readonly(sb) ||
6560 43 : !ext4_feature_set_ok(sb, 0)) {
6561 0 : err = -EROFS;
6562 0 : goto restore_opts;
6563 : }
6564 : /*
6565 : * Make sure the group descriptor checksums
6566 : * are sane. If they aren't, refuse to remount r/w.
6567 : */
6568 10480 : for (g = 0; g < sbi->s_groups_count; g++) {
6569 10437 : struct ext4_group_desc *gdp =
6570 10437 : ext4_get_group_desc(sb, g, NULL);
6571 :
6572 10437 : if (!ext4_group_desc_csum_verify(sb, g, gdp)) {
6573 0 : ext4_msg(sb, KERN_ERR,
6574 : "ext4_remount: Checksum for group %u failed (%u!=%u)",
6575 : g, le16_to_cpu(ext4_group_desc_csum(sb, g, gdp)),
6576 : le16_to_cpu(gdp->bg_checksum));
6577 0 : err = -EFSBADCRC;
6578 0 : goto restore_opts;
6579 : }
6580 : }
6581 :
6582 : /*
6583 : * If we have an unprocessed orphan list hanging
6584 : * around from a previously readonly bdev mount,
6585 : * require a full umount/remount for now.
6586 : */
6587 43 : if (es->s_last_orphan || !ext4_orphan_file_empty(sb)) {
6588 0 : ext4_msg(sb, KERN_WARNING, "Couldn't "
6589 : "remount RDWR because of unprocessed "
6590 : "orphan inode list. Please "
6591 : "umount/remount instead");
6592 0 : err = -EINVAL;
6593 0 : goto restore_opts;
6594 : }
6595 :
6596 : /*
6597 : * Mounting a RDONLY partition read-write, so reread
6598 : * and store the current valid flag. (It may have
6599 : * been changed by e2fsck since we originally mounted
6600 : * the partition.)
6601 : */
6602 43 : if (sbi->s_journal) {
6603 43 : err = ext4_clear_journal_err(sb, es);
6604 43 : if (err)
6605 0 : goto restore_opts;
6606 : }
6607 43 : sbi->s_mount_state = (le16_to_cpu(es->s_state) &
6608 : ~EXT4_FC_REPLAY);
6609 :
6610 43 : err = ext4_setup_super(sb, es, 0);
6611 43 : if (err)
6612 0 : goto restore_opts;
6613 :
6614 43 : sb->s_flags &= ~SB_RDONLY;
6615 43 : if (ext4_has_feature_mmp(sb)) {
6616 0 : err = ext4_multi_mount_protect(sb,
6617 0 : le64_to_cpu(es->s_mmp_block));
6618 0 : if (err)
6619 0 : goto restore_opts;
6620 : }
6621 : #ifdef CONFIG_QUOTA
6622 : enable_quota = 1;
6623 : #endif
6624 : }
6625 : }
6626 :
6627 : /*
6628 : * Handle creation of system zone data early because it can fail.
6629 : * Releasing of existing data is done when we are sure remount will
6630 : * succeed.
6631 : */
6632 628 : if (test_opt(sb, BLOCK_VALIDITY) && !sbi->s_system_blks) {
6633 0 : err = ext4_setup_system_zone(sb);
6634 0 : if (err)
6635 0 : goto restore_opts;
6636 : }
6637 :
6638 628 : if (sbi->s_journal == NULL && !(old_sb_flags & SB_RDONLY)) {
6639 16 : err = ext4_commit_super(sb);
6640 16 : if (err)
6641 0 : goto restore_opts;
6642 : }
6643 :
6644 : #ifdef CONFIG_QUOTA
6645 628 : if (enable_quota) {
6646 43 : if (sb_any_quota_suspended(sb))
6647 1 : dquot_resume(sb, -1);
6648 42 : else if (ext4_has_feature_quota(sb)) {
6649 0 : err = ext4_enable_quotas(sb);
6650 0 : if (err)
6651 0 : goto restore_opts;
6652 : }
6653 : }
6654 : /* Release old quota file names */
6655 2512 : for (i = 0; i < EXT4_MAXQUOTAS; i++)
6656 1884 : kfree(old_opts.s_qf_names[i]);
6657 : #endif
6658 628 : if (!test_opt(sb, BLOCK_VALIDITY) && sbi->s_system_blks)
6659 0 : ext4_release_system_zone(sb);
6660 :
6661 : /*
6662 : * Reinitialize lazy itable initialization thread based on
6663 : * current settings
6664 : */
6665 628 : if (sb_rdonly(sb) || !test_opt(sb, INIT_INODE_TABLE))
6666 51 : ext4_unregister_li_request(sb);
6667 : else {
6668 577 : ext4_group_t first_not_zeroed;
6669 577 : first_not_zeroed = ext4_has_uninit_itable(sb);
6670 577 : ext4_register_li_request(sb, first_not_zeroed);
6671 : }
6672 :
6673 628 : if (!ext4_has_feature_mmp(sb) || sb_rdonly(sb))
6674 628 : ext4_stop_mmpd(sbi);
6675 :
6676 : return 0;
6677 :
6678 1 : restore_opts:
6679 : /*
6680 : * If there was a failing r/w to ro transition, we may need to
6681 : * re-enable quota
6682 : */
6683 1 : if ((sb->s_flags & SB_RDONLY) && !(old_sb_flags & SB_RDONLY) &&
6684 : sb_any_quota_suspended(sb))
6685 0 : dquot_resume(sb, -1);
6686 1 : sb->s_flags = old_sb_flags;
6687 1 : sbi->s_mount_opt = old_opts.s_mount_opt;
6688 1 : sbi->s_mount_opt2 = old_opts.s_mount_opt2;
6689 1 : sbi->s_resuid = old_opts.s_resuid;
6690 1 : sbi->s_resgid = old_opts.s_resgid;
6691 1 : sbi->s_commit_interval = old_opts.s_commit_interval;
6692 1 : sbi->s_min_batch_time = old_opts.s_min_batch_time;
6693 1 : sbi->s_max_batch_time = old_opts.s_max_batch_time;
6694 1 : if (!test_opt(sb, BLOCK_VALIDITY) && sbi->s_system_blks)
6695 0 : ext4_release_system_zone(sb);
6696 : #ifdef CONFIG_QUOTA
6697 1 : sbi->s_jquota_fmt = old_opts.s_jquota_fmt;
6698 4 : for (i = 0; i < EXT4_MAXQUOTAS; i++) {
6699 3 : to_free[i] = get_qf_name(sb, sbi, i);
6700 3 : rcu_assign_pointer(sbi->s_qf_names[i], old_opts.s_qf_names[i]);
6701 : }
6702 1 : synchronize_rcu();
6703 5 : for (i = 0; i < EXT4_MAXQUOTAS; i++)
6704 3 : kfree(to_free[i]);
6705 : #endif
6706 1 : if (!ext4_has_feature_mmp(sb) || sb_rdonly(sb))
6707 1 : ext4_stop_mmpd(sbi);
6708 : return err;
6709 : }
6710 :
6711 761 : static int ext4_reconfigure(struct fs_context *fc)
6712 : {
6713 761 : struct super_block *sb = fc->root->d_sb;
6714 761 : int ret;
6715 :
6716 761 : fc->s_fs_info = EXT4_SB(sb);
6717 :
6718 761 : ret = ext4_check_opt_consistency(fc, sb);
6719 761 : if (ret < 0)
6720 : return ret;
6721 :
6722 629 : ret = __ext4_remount(fc, sb);
6723 629 : if (ret < 0)
6724 : return ret;
6725 :
6726 628 : ext4_msg(sb, KERN_INFO, "re-mounted %pU %s. Quota mode: %s.",
6727 : &sb->s_uuid, sb_rdonly(sb) ? "ro" : "r/w",
6728 : ext4_quota_mode(sb));
6729 :
6730 628 : return 0;
6731 : }
6732 :
6733 : #ifdef CONFIG_QUOTA
6734 137 : static int ext4_statfs_project(struct super_block *sb,
6735 : kprojid_t projid, struct kstatfs *buf)
6736 : {
6737 137 : struct kqid qid;
6738 137 : struct dquot *dquot;
6739 137 : u64 limit;
6740 137 : u64 curblock;
6741 :
6742 137 : qid = make_kqid_projid(projid);
6743 137 : dquot = dqget(sb, qid);
6744 137 : if (IS_ERR(dquot))
6745 0 : return PTR_ERR(dquot);
6746 137 : spin_lock(&dquot->dq_dqb_lock);
6747 :
6748 137 : limit = min_not_zero(dquot->dq_dqb.dqb_bsoftlimit,
6749 : dquot->dq_dqb.dqb_bhardlimit);
6750 137 : limit >>= sb->s_blocksize_bits;
6751 :
6752 137 : if (limit && buf->f_blocks > limit) {
6753 250 : curblock = (dquot->dq_dqb.dqb_curspace +
6754 125 : dquot->dq_dqb.dqb_rsvspace) >> sb->s_blocksize_bits;
6755 125 : buf->f_blocks = limit;
6756 125 : buf->f_bfree = buf->f_bavail =
6757 : (buf->f_blocks > curblock) ?
6758 125 : (buf->f_blocks - curblock) : 0;
6759 : }
6760 :
6761 137 : limit = min_not_zero(dquot->dq_dqb.dqb_isoftlimit,
6762 : dquot->dq_dqb.dqb_ihardlimit);
6763 137 : if (limit && buf->f_files > limit) {
6764 5 : buf->f_files = limit;
6765 5 : buf->f_ffree =
6766 5 : (buf->f_files > dquot->dq_dqb.dqb_curinodes) ?
6767 5 : (buf->f_files - dquot->dq_dqb.dqb_curinodes) : 0;
6768 : }
6769 :
6770 137 : spin_unlock(&dquot->dq_dqb_lock);
6771 137 : dqput(dquot);
6772 137 : return 0;
6773 : }
6774 : #endif
6775 :
6776 618462 : static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf)
6777 : {
6778 618462 : struct super_block *sb = dentry->d_sb;
6779 618462 : struct ext4_sb_info *sbi = EXT4_SB(sb);
6780 618462 : struct ext4_super_block *es = sbi->s_es;
6781 618462 : ext4_fsblk_t overhead = 0, resv_blocks;
6782 618462 : s64 bfree;
6783 618462 : resv_blocks = EXT4_C2B(sbi, atomic64_read(&sbi->s_resv_clusters));
6784 :
6785 618462 : if (!test_opt(sb, MINIX_DF))
6786 618342 : overhead = sbi->s_overhead;
6787 :
6788 618462 : buf->f_type = EXT4_SUPER_MAGIC;
6789 618462 : buf->f_bsize = sb->s_blocksize;
6790 618462 : buf->f_blocks = ext4_blocks_count(es) - EXT4_C2B(sbi, overhead);
6791 618462 : bfree = percpu_counter_sum_positive(&sbi->s_freeclusters_counter) -
6792 619624 : percpu_counter_sum_positive(&sbi->s_dirtyclusters_counter);
6793 : /* prevent underflow in case that few free space is available */
6794 619622 : buf->f_bfree = EXT4_C2B(sbi, max_t(s64, bfree, 0));
6795 619622 : buf->f_bavail = buf->f_bfree -
6796 619622 : (ext4_r_blocks_count(es) + resv_blocks);
6797 1239037 : if (buf->f_bfree < (ext4_r_blocks_count(es) + resv_blocks))
6798 157131 : buf->f_bavail = 0;
6799 619622 : buf->f_files = le32_to_cpu(es->s_inodes_count);
6800 619622 : buf->f_ffree = percpu_counter_sum_positive(&sbi->s_freeinodes_counter);
6801 619620 : buf->f_namelen = EXT4_NAME_LEN;
6802 619620 : buf->f_fsid = uuid_to_fsid(es->s_uuid);
6803 :
6804 : #ifdef CONFIG_QUOTA
6805 619620 : if (ext4_test_inode_flag(dentry->d_inode, EXT4_INODE_PROJINHERIT) &&
6806 : sb_has_quota_limits_enabled(sb, PRJQUOTA))
6807 137 : ext4_statfs_project(sb, EXT4_I(dentry->d_inode)->i_projid, buf);
6808 : #endif
6809 619620 : return 0;
6810 : }
6811 :
6812 :
6813 : #ifdef CONFIG_QUOTA
6814 :
6815 : /*
6816 : * Helper functions so that transaction is started before we acquire dqio_sem
6817 : * to keep correct lock ordering of transaction > dqio_sem
6818 : */
6819 : static inline struct inode *dquot_to_inode(struct dquot *dquot)
6820 : {
6821 54850 : return sb_dqopt(dquot->dq_sb)->files[dquot->dq_id.type];
6822 : }
6823 :
6824 20190 : static int ext4_write_dquot(struct dquot *dquot)
6825 : {
6826 20190 : int ret, err;
6827 20190 : handle_t *handle;
6828 20190 : struct inode *inode;
6829 :
6830 20190 : inode = dquot_to_inode(dquot);
6831 20187 : handle = ext4_journal_start(inode, EXT4_HT_QUOTA,
6832 : EXT4_QUOTA_TRANS_BLOCKS(dquot->dq_sb));
6833 20194 : if (IS_ERR(handle))
6834 0 : return PTR_ERR(handle);
6835 20194 : ret = dquot_commit(dquot);
6836 20194 : err = ext4_journal_stop(handle);
6837 20195 : if (!ret)
6838 20195 : ret = err;
6839 : return ret;
6840 : }
6841 :
6842 27425 : static int ext4_acquire_dquot(struct dquot *dquot)
6843 : {
6844 27425 : int ret, err;
6845 27425 : handle_t *handle;
6846 :
6847 27425 : handle = ext4_journal_start(dquot_to_inode(dquot), EXT4_HT_QUOTA,
6848 : EXT4_QUOTA_INIT_BLOCKS(dquot->dq_sb));
6849 27380 : if (IS_ERR(handle))
6850 0 : return PTR_ERR(handle);
6851 27380 : ret = dquot_acquire(dquot);
6852 27429 : err = ext4_journal_stop(handle);
6853 27428 : if (!ret)
6854 27126 : ret = err;
6855 : return ret;
6856 : }
6857 :
6858 27425 : static int ext4_release_dquot(struct dquot *dquot)
6859 : {
6860 27425 : int ret, err;
6861 27425 : handle_t *handle;
6862 :
6863 27425 : handle = ext4_journal_start(dquot_to_inode(dquot), EXT4_HT_QUOTA,
6864 : EXT4_QUOTA_DEL_BLOCKS(dquot->dq_sb));
6865 27422 : if (IS_ERR(handle)) {
6866 : /* Release dquot anyway to avoid endless cycle in dqput() */
6867 8 : dquot_release(dquot);
6868 8 : return PTR_ERR(handle);
6869 : }
6870 27414 : ret = dquot_release(dquot);
6871 27417 : err = ext4_journal_stop(handle);
6872 27418 : if (!ret)
6873 27418 : ret = err;
6874 : return ret;
6875 : }
6876 :
6877 399101 : static int ext4_mark_dquot_dirty(struct dquot *dquot)
6878 : {
6879 399101 : struct super_block *sb = dquot->dq_sb;
6880 :
6881 399101 : if (ext4_is_quota_journalled(sb)) {
6882 960 : dquot_mark_dquot_dirty(dquot);
6883 960 : return ext4_write_dquot(dquot);
6884 : } else {
6885 398141 : return dquot_mark_dquot_dirty(dquot);
6886 : }
6887 : }
6888 :
6889 903 : static int ext4_write_info(struct super_block *sb, int type)
6890 : {
6891 903 : int ret, err;
6892 903 : handle_t *handle;
6893 :
6894 : /* Data block + inode block */
6895 903 : handle = ext4_journal_start_sb(sb, EXT4_HT_QUOTA, 2);
6896 903 : if (IS_ERR(handle))
6897 0 : return PTR_ERR(handle);
6898 903 : ret = dquot_commit_info(sb, type);
6899 903 : err = ext4_journal_stop(handle);
6900 903 : if (!ret)
6901 903 : ret = err;
6902 : return ret;
6903 : }
6904 :
6905 : static void lockdep_set_quota_inode(struct inode *inode, int subclass)
6906 : {
6907 : struct ext4_inode_info *ei = EXT4_I(inode);
6908 :
6909 : /* The first argument of lockdep_set_subclass has to be
6910 : * *exactly* the same as the argument to init_rwsem() --- in
6911 : * this case, in init_once() --- or lockdep gets unhappy
6912 : * because the name of the lock is set using the
6913 : * stringification of the argument to init_rwsem().
6914 : */
6915 : (void) ei; /* shut up clang warning if !CONFIG_LOCKDEP */
6916 : lockdep_set_subclass(&ei->i_data_sem, subclass);
6917 : }
6918 :
6919 : /*
6920 : * Standard function to be called on quota_on
6921 : */
6922 78 : static int ext4_quota_on(struct super_block *sb, int type, int format_id,
6923 : const struct path *path)
6924 : {
6925 78 : int err;
6926 :
6927 78 : if (!test_opt(sb, QUOTA))
6928 : return -EINVAL;
6929 :
6930 : /* Quotafile not on the same filesystem? */
6931 78 : if (path->dentry->d_sb != sb)
6932 : return -EXDEV;
6933 :
6934 : /* Quota already enabled for this file? */
6935 78 : if (IS_NOQUOTA(d_inode(path->dentry)))
6936 : return -EBUSY;
6937 :
6938 : /* Journaling quota? */
6939 74 : if (EXT4_SB(sb)->s_qf_names[type]) {
6940 : /* Quotafile not in fs root? */
6941 4 : if (path->dentry->d_parent != sb->s_root)
6942 0 : ext4_msg(sb, KERN_WARNING,
6943 : "Quota file not on filesystem root. "
6944 : "Journaled quota will not work");
6945 4 : sb_dqopt(sb)->flags |= DQUOT_NOLIST_DIRTY;
6946 : } else {
6947 : /*
6948 : * Clear the flag just in case mount options changed since
6949 : * last time.
6950 : */
6951 70 : sb_dqopt(sb)->flags &= ~DQUOT_NOLIST_DIRTY;
6952 : }
6953 :
6954 74 : lockdep_set_quota_inode(path->dentry->d_inode, I_DATA_SEM_QUOTA);
6955 74 : err = dquot_quota_on(sb, type, format_id, path);
6956 74 : if (!err) {
6957 74 : struct inode *inode = d_inode(path->dentry);
6958 74 : handle_t *handle;
6959 :
6960 : /*
6961 : * Set inode flags to prevent userspace from messing with quota
6962 : * files. If this fails, we return success anyway since quotas
6963 : * are already enabled and this is not a hard failure.
6964 : */
6965 74 : inode_lock(inode);
6966 74 : handle = ext4_journal_start(inode, EXT4_HT_QUOTA, 1);
6967 74 : if (IS_ERR(handle))
6968 0 : goto unlock_inode;
6969 74 : EXT4_I(inode)->i_flags |= EXT4_NOATIME_FL | EXT4_IMMUTABLE_FL;
6970 74 : inode_set_flags(inode, S_NOATIME | S_IMMUTABLE,
6971 : S_NOATIME | S_IMMUTABLE);
6972 74 : err = ext4_mark_inode_dirty(handle, inode);
6973 74 : ext4_journal_stop(handle);
6974 74 : unlock_inode:
6975 74 : inode_unlock(inode);
6976 74 : if (err)
6977 0 : dquot_quota_off(sb, type);
6978 : }
6979 74 : if (err)
6980 : lockdep_set_quota_inode(path->dentry->d_inode,
6981 : I_DATA_SEM_NORMAL);
6982 : return err;
6983 : }
6984 :
6985 125 : static inline bool ext4_check_quota_inum(int type, unsigned long qf_inum)
6986 : {
6987 125 : switch (type) {
6988 32 : case USRQUOTA:
6989 32 : return qf_inum == EXT4_USR_QUOTA_INO;
6990 32 : case GRPQUOTA:
6991 32 : return qf_inum == EXT4_GRP_QUOTA_INO;
6992 61 : case PRJQUOTA:
6993 61 : return qf_inum >= EXT4_GOOD_OLD_FIRST_INO;
6994 0 : default:
6995 0 : BUG();
6996 : }
6997 : }
6998 :
6999 125 : static int ext4_quota_enable(struct super_block *sb, int type, int format_id,
7000 : unsigned int flags)
7001 : {
7002 125 : int err;
7003 125 : struct inode *qf_inode;
7004 125 : unsigned long qf_inums[EXT4_MAXQUOTAS] = {
7005 125 : le32_to_cpu(EXT4_SB(sb)->s_es->s_usr_quota_inum),
7006 125 : le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum),
7007 125 : le32_to_cpu(EXT4_SB(sb)->s_es->s_prj_quota_inum)
7008 : };
7009 :
7010 125 : BUG_ON(!ext4_has_feature_quota(sb));
7011 :
7012 125 : if (!qf_inums[type])
7013 : return -EPERM;
7014 :
7015 125 : if (!ext4_check_quota_inum(type, qf_inums[type])) {
7016 0 : ext4_error(sb, "Bad quota inum: %lu, type: %d",
7017 : qf_inums[type], type);
7018 0 : return -EUCLEAN;
7019 : }
7020 :
7021 125 : qf_inode = ext4_iget(sb, qf_inums[type], EXT4_IGET_SPECIAL);
7022 125 : if (IS_ERR(qf_inode)) {
7023 0 : ext4_error(sb, "Bad quota inode: %lu, type: %d",
7024 : qf_inums[type], type);
7025 0 : return PTR_ERR(qf_inode);
7026 : }
7027 :
7028 : /* Don't account quota for quota files to avoid recursion */
7029 125 : qf_inode->i_flags |= S_NOQUOTA;
7030 125 : lockdep_set_quota_inode(qf_inode, I_DATA_SEM_QUOTA);
7031 125 : err = dquot_load_quota_inode(qf_inode, type, format_id, flags);
7032 125 : if (err)
7033 : lockdep_set_quota_inode(qf_inode, I_DATA_SEM_NORMAL);
7034 125 : iput(qf_inode);
7035 :
7036 125 : return err;
7037 : }
7038 :
7039 : /* Enable usage tracking for all quota types. */
7040 64 : int ext4_enable_quotas(struct super_block *sb)
7041 : {
7042 64 : int type, err = 0;
7043 64 : unsigned long qf_inums[EXT4_MAXQUOTAS] = {
7044 64 : le32_to_cpu(EXT4_SB(sb)->s_es->s_usr_quota_inum),
7045 64 : le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum),
7046 64 : le32_to_cpu(EXT4_SB(sb)->s_es->s_prj_quota_inum)
7047 : };
7048 64 : bool quota_mopt[EXT4_MAXQUOTAS] = {
7049 64 : test_opt(sb, USRQUOTA),
7050 64 : test_opt(sb, GRPQUOTA),
7051 64 : test_opt(sb, PRJQUOTA),
7052 : };
7053 :
7054 64 : sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE | DQUOT_NOLIST_DIRTY;
7055 256 : for (type = 0; type < EXT4_MAXQUOTAS; type++) {
7056 192 : if (qf_inums[type]) {
7057 125 : err = ext4_quota_enable(sb, type, QFMT_VFS_V1,
7058 : DQUOT_USAGE_ENABLED |
7059 125 : (quota_mopt[type] ? DQUOT_LIMITS_ENABLED : 0));
7060 125 : if (err) {
7061 0 : ext4_warning(sb,
7062 : "Failed to enable quota tracking "
7063 : "(type=%d, err=%d, ino=%lu). "
7064 : "Please run e2fsck to fix.", type,
7065 : err, qf_inums[type]);
7066 :
7067 0 : ext4_quotas_off(sb, type);
7068 0 : return err;
7069 : }
7070 : }
7071 : }
7072 : return 0;
7073 : }
7074 :
7075 7555 : static int ext4_quota_off(struct super_block *sb, int type)
7076 : {
7077 7555 : struct inode *inode = sb_dqopt(sb)->files[type];
7078 7555 : handle_t *handle;
7079 7555 : int err;
7080 :
7081 : /* Force all delayed allocation blocks to be allocated.
7082 : * Caller already holds s_umount sem */
7083 7555 : if (test_opt(sb, DELALLOC))
7084 6989 : sync_filesystem(sb);
7085 :
7086 7555 : if (!inode || !igrab(inode))
7087 7356 : goto out;
7088 :
7089 199 : err = dquot_quota_off(sb, type);
7090 199 : if (err || ext4_has_feature_quota(sb))
7091 125 : goto out_put;
7092 :
7093 74 : inode_lock(inode);
7094 : /*
7095 : * Update modification times of quota files when userspace can
7096 : * start looking at them. If we fail, we return success anyway since
7097 : * this is not a hard failure and quotas are already disabled.
7098 : */
7099 74 : handle = ext4_journal_start(inode, EXT4_HT_QUOTA, 1);
7100 74 : if (IS_ERR(handle)) {
7101 2 : err = PTR_ERR(handle);
7102 2 : goto out_unlock;
7103 : }
7104 72 : EXT4_I(inode)->i_flags &= ~(EXT4_NOATIME_FL | EXT4_IMMUTABLE_FL);
7105 72 : inode_set_flags(inode, 0, S_NOATIME | S_IMMUTABLE);
7106 72 : inode->i_mtime = inode->i_ctime = current_time(inode);
7107 72 : err = ext4_mark_inode_dirty(handle, inode);
7108 72 : ext4_journal_stop(handle);
7109 74 : out_unlock:
7110 74 : inode_unlock(inode);
7111 199 : out_put:
7112 199 : lockdep_set_quota_inode(inode, I_DATA_SEM_NORMAL);
7113 199 : iput(inode);
7114 199 : return err;
7115 : out:
7116 7356 : return dquot_quota_off(sb, type);
7117 : }
7118 :
7119 : /* Read data from quotafile - avoid pagecache and such because we cannot afford
7120 : * acquiring the locks... As quota files are never truncated and quota code
7121 : * itself serializes the operations (and no one else should touch the files)
7122 : * we don't have to be afraid of races */
7123 233436 : static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data,
7124 : size_t len, loff_t off)
7125 : {
7126 233436 : struct inode *inode = sb_dqopt(sb)->files[type];
7127 233439 : ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb);
7128 233439 : int offset = off & (sb->s_blocksize - 1);
7129 233439 : int tocopy;
7130 233439 : size_t toread;
7131 233439 : struct buffer_head *bh;
7132 233439 : loff_t i_size = i_size_read(inode);
7133 :
7134 233439 : if (off > i_size)
7135 : return 0;
7136 233439 : if (off+len > i_size)
7137 0 : len = i_size-off;
7138 233439 : toread = len;
7139 466885 : while (toread > 0) {
7140 233437 : tocopy = min_t(unsigned long, sb->s_blocksize - offset, toread);
7141 233437 : bh = ext4_bread(NULL, inode, blk, 0);
7142 233440 : if (IS_ERR(bh))
7143 0 : return PTR_ERR(bh);
7144 233440 : if (!bh) /* A hole? */
7145 0 : memset(data, 0, tocopy);
7146 : else
7147 466880 : memcpy(data, bh->b_data+offset, tocopy);
7148 233440 : brelse(bh);
7149 233446 : offset = 0;
7150 233446 : toread -= tocopy;
7151 233446 : data += tocopy;
7152 233446 : blk++;
7153 : }
7154 233448 : return len;
7155 : }
7156 :
7157 : /* Write to quotafile (we know the transaction is already started and has
7158 : * enough credits) */
7159 201408 : static ssize_t ext4_quota_write(struct super_block *sb, int type,
7160 : const char *data, size_t len, loff_t off)
7161 : {
7162 201408 : struct inode *inode = sb_dqopt(sb)->files[type];
7163 201404 : ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb);
7164 201404 : int err = 0, err2 = 0, offset = off & (sb->s_blocksize - 1);
7165 201404 : int retries = 0;
7166 201404 : struct buffer_head *bh;
7167 201404 : handle_t *handle = journal_current_handle();
7168 :
7169 201404 : if (!handle) {
7170 0 : ext4_msg(sb, KERN_WARNING, "Quota write (off=%llu, len=%llu)"
7171 : " cancelled because transaction is not started",
7172 : (unsigned long long)off, (unsigned long long)len);
7173 0 : return -EIO;
7174 : }
7175 : /*
7176 : * Since we account only one data block in transaction credits,
7177 : * then it is impossible to cross a block boundary.
7178 : */
7179 201404 : if (sb->s_blocksize - offset < len) {
7180 0 : ext4_msg(sb, KERN_WARNING, "Quota write (off=%llu, len=%llu)"
7181 : " cancelled because not block aligned",
7182 : (unsigned long long)off, (unsigned long long)len);
7183 0 : return -EIO;
7184 : }
7185 :
7186 201404 : do {
7187 201404 : bh = ext4_bread(handle, inode, blk,
7188 : EXT4_GET_BLOCKS_CREATE |
7189 : EXT4_GET_BLOCKS_METADATA_NOFAIL);
7190 201410 : } while (PTR_ERR(bh) == -ENOSPC &&
7191 0 : ext4_should_retry_alloc(inode->i_sb, &retries));
7192 201410 : if (IS_ERR(bh))
7193 : return PTR_ERR(bh);
7194 201410 : if (!bh)
7195 0 : goto out;
7196 201410 : BUFFER_TRACE(bh, "get write access");
7197 201410 : err = ext4_journal_get_write_access(handle, sb, bh, EXT4_JTR_NONE);
7198 201412 : if (err) {
7199 0 : brelse(bh);
7200 0 : return err;
7201 : }
7202 201412 : lock_buffer(bh);
7203 402822 : memcpy(bh->b_data+offset, data, len);
7204 201411 : flush_dcache_page(bh->b_page);
7205 201411 : unlock_buffer(bh);
7206 201409 : err = ext4_handle_dirty_metadata(handle, NULL, bh);
7207 201406 : brelse(bh);
7208 201412 : out:
7209 201412 : if (inode->i_size < off + len) {
7210 7804 : i_size_write(inode, off + len);
7211 7804 : EXT4_I(inode)->i_disksize = inode->i_size;
7212 7804 : err2 = ext4_mark_inode_dirty(handle, inode);
7213 7804 : if (unlikely(err2 && !err))
7214 0 : err = err2;
7215 : }
7216 201412 : return err ? err : len;
7217 : }
7218 : #endif
7219 :
7220 : #if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT2)
7221 : static inline void register_as_ext2(void)
7222 : {
7223 : int err = register_filesystem(&ext2_fs_type);
7224 : if (err)
7225 : printk(KERN_WARNING
7226 : "EXT4-fs: Unable to register as ext2 (%d)\n", err);
7227 : }
7228 :
7229 : static inline void unregister_as_ext2(void)
7230 : {
7231 : unregister_filesystem(&ext2_fs_type);
7232 : }
7233 :
7234 : static inline int ext2_feature_set_ok(struct super_block *sb)
7235 : {
7236 : if (ext4_has_unknown_ext2_incompat_features(sb))
7237 : return 0;
7238 : if (sb_rdonly(sb))
7239 : return 1;
7240 : if (ext4_has_unknown_ext2_ro_compat_features(sb))
7241 : return 0;
7242 : return 1;
7243 : }
7244 : #else
7245 : static inline void register_as_ext2(void) { }
7246 : static inline void unregister_as_ext2(void) { }
7247 : static inline int ext2_feature_set_ok(struct super_block *sb) { return 0; }
7248 : #endif
7249 :
7250 12 : static inline void register_as_ext3(void)
7251 : {
7252 12 : int err = register_filesystem(&ext3_fs_type);
7253 12 : if (err)
7254 0 : printk(KERN_WARNING
7255 : "EXT4-fs: Unable to register as ext3 (%d)\n", err);
7256 12 : }
7257 :
7258 : static inline void unregister_as_ext3(void)
7259 : {
7260 0 : unregister_filesystem(&ext3_fs_type);
7261 : }
7262 :
7263 170 : static inline int ext3_feature_set_ok(struct super_block *sb)
7264 : {
7265 170 : if (ext4_has_unknown_ext3_incompat_features(sb))
7266 : return 0;
7267 170 : if (!ext4_has_feature_journal(sb))
7268 : return 0;
7269 170 : if (sb_rdonly(sb))
7270 : return 1;
7271 169 : if (ext4_has_unknown_ext3_ro_compat_features(sb))
7272 0 : return 0;
7273 : return 1;
7274 : }
7275 :
7276 : static struct file_system_type ext4_fs_type = {
7277 : .owner = THIS_MODULE,
7278 : .name = "ext4",
7279 : .init_fs_context = ext4_init_fs_context,
7280 : .parameters = ext4_param_specs,
7281 : .kill_sb = kill_block_super,
7282 : .fs_flags = FS_REQUIRES_DEV | FS_ALLOW_IDMAP,
7283 : };
7284 : MODULE_ALIAS_FS("ext4");
7285 :
7286 : /* Shared across all ext4 file systems */
7287 : wait_queue_head_t ext4__ioend_wq[EXT4_WQ_HASH_SZ];
7288 :
7289 12 : static int __init ext4_init_fs(void)
7290 : {
7291 12 : int i, err;
7292 :
7293 12 : ratelimit_state_init(&ext4_mount_msg_ratelimit, 30 * HZ, 64);
7294 12 : ext4_li_info = NULL;
7295 :
7296 : /* Build-time check for flags consistency */
7297 12 : ext4_check_flag_values();
7298 :
7299 456 : for (i = 0; i < EXT4_WQ_HASH_SZ; i++)
7300 444 : init_waitqueue_head(&ext4__ioend_wq[i]);
7301 :
7302 12 : err = ext4_init_es();
7303 12 : if (err)
7304 : return err;
7305 :
7306 12 : err = ext4_init_pending();
7307 12 : if (err)
7308 0 : goto out7;
7309 :
7310 12 : err = ext4_init_post_read_processing();
7311 12 : if (err)
7312 0 : goto out6;
7313 :
7314 12 : err = ext4_init_pageio();
7315 12 : if (err)
7316 0 : goto out5;
7317 :
7318 12 : err = ext4_init_system_zone();
7319 12 : if (err)
7320 0 : goto out4;
7321 :
7322 12 : err = ext4_init_sysfs();
7323 12 : if (err)
7324 0 : goto out3;
7325 :
7326 12 : err = ext4_init_mballoc();
7327 12 : if (err)
7328 0 : goto out2;
7329 12 : err = init_inodecache();
7330 12 : if (err)
7331 0 : goto out1;
7332 :
7333 12 : err = ext4_fc_init_dentry_cache();
7334 12 : if (err)
7335 0 : goto out05;
7336 :
7337 12 : register_as_ext3();
7338 12 : register_as_ext2();
7339 12 : err = register_filesystem(&ext4_fs_type);
7340 12 : if (err)
7341 0 : goto out;
7342 :
7343 : return 0;
7344 : out:
7345 0 : unregister_as_ext2();
7346 0 : unregister_as_ext3();
7347 0 : ext4_fc_destroy_dentry_cache();
7348 0 : out05:
7349 0 : destroy_inodecache();
7350 0 : out1:
7351 0 : ext4_exit_mballoc();
7352 0 : out2:
7353 0 : ext4_exit_sysfs();
7354 0 : out3:
7355 0 : ext4_exit_system_zone();
7356 0 : out4:
7357 0 : ext4_exit_pageio();
7358 0 : out5:
7359 0 : ext4_exit_post_read_processing();
7360 0 : out6:
7361 0 : ext4_exit_pending();
7362 0 : out7:
7363 0 : ext4_exit_es();
7364 :
7365 0 : return err;
7366 : }
7367 :
7368 0 : static void __exit ext4_exit_fs(void)
7369 : {
7370 0 : ext4_destroy_lazyinit_thread();
7371 0 : unregister_as_ext2();
7372 0 : unregister_as_ext3();
7373 0 : unregister_filesystem(&ext4_fs_type);
7374 0 : ext4_fc_destroy_dentry_cache();
7375 0 : destroy_inodecache();
7376 0 : ext4_exit_mballoc();
7377 0 : ext4_exit_sysfs();
7378 0 : ext4_exit_system_zone();
7379 0 : ext4_exit_pageio();
7380 0 : ext4_exit_post_read_processing();
7381 0 : ext4_exit_es();
7382 0 : ext4_exit_pending();
7383 0 : }
7384 :
7385 : MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others");
7386 : MODULE_DESCRIPTION("Fourth Extended Filesystem");
7387 : MODULE_LICENSE("GPL");
7388 : MODULE_SOFTDEP("pre: crc32c");
7389 : module_init(ext4_init_fs)
7390 : module_exit(ext4_exit_fs)
|