Line data Source code
1 : // SPDX-License-Identifier: GPL-2.0
2 : /*
3 : * linux/fs/ext4/balloc.c
4 : *
5 : * Copyright (C) 1992, 1993, 1994, 1995
6 : * Remy Card (card@masi.ibp.fr)
7 : * Laboratoire MASI - Institut Blaise Pascal
8 : * Universite Pierre et Marie Curie (Paris VI)
9 : *
10 : * Enhanced block allocation by Stephen Tweedie (sct@redhat.com), 1993
11 : * Big-endian to little-endian byte-swapping/bitmaps by
12 : * David S. Miller (davem@caip.rutgers.edu), 1995
13 : */
14 :
15 : #include <linux/time.h>
16 : #include <linux/capability.h>
17 : #include <linux/fs.h>
18 : #include <linux/quotaops.h>
19 : #include <linux/buffer_head.h>
20 : #include "ext4.h"
21 : #include "ext4_jbd2.h"
22 : #include "mballoc.h"
23 :
24 : #include <trace/events/ext4.h>
25 :
26 : static unsigned ext4_num_base_meta_clusters(struct super_block *sb,
27 : ext4_group_t block_group);
28 : /*
29 : * balloc.c contains the blocks allocation and deallocation routines
30 : */
31 :
32 : /*
33 : * Calculate block group number for a given block number
34 : */
35 36481108 : ext4_group_t ext4_get_group_number(struct super_block *sb,
36 : ext4_fsblk_t block)
37 : {
38 36481108 : ext4_group_t group;
39 :
40 36481108 : if (test_opt2(sb, STD_GROUP_SIZE))
41 72677728 : group = (block -
42 36338864 : le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block)) >>
43 36338864 : (EXT4_BLOCK_SIZE_BITS(sb) + EXT4_CLUSTER_BITS(sb) + 3);
44 : else
45 142244 : ext4_get_group_no_and_offset(sb, block, &group, NULL);
46 36481108 : return group;
47 : }
48 :
49 : /*
50 : * Calculate the block group number and offset into the block/cluster
51 : * allocation bitmap, given a block number
52 : */
53 103776218 : void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr,
54 : ext4_group_t *blockgrpp, ext4_grpblk_t *offsetp)
55 : {
56 103776218 : struct ext4_super_block *es = EXT4_SB(sb)->s_es;
57 103776218 : ext4_grpblk_t offset;
58 :
59 103776218 : blocknr = blocknr - le32_to_cpu(es->s_first_data_block);
60 103776218 : offset = do_div(blocknr, EXT4_BLOCKS_PER_GROUP(sb)) >>
61 103776218 : EXT4_SB(sb)->s_cluster_bits;
62 103776218 : if (offsetp)
63 103633974 : *offsetp = offset;
64 103776218 : if (blockgrpp)
65 9710369 : *blockgrpp = blocknr;
66 :
67 103776218 : }
68 :
69 : /*
70 : * Check whether the 'block' lives within the 'block_group'. Returns 1 if so
71 : * and 0 otherwise.
72 : */
73 : static inline int ext4_block_in_group(struct super_block *sb,
74 : ext4_fsblk_t block,
75 : ext4_group_t block_group)
76 : {
77 36016335 : ext4_group_t actual_group;
78 :
79 1353866 : actual_group = ext4_get_group_number(sb, block);
80 36016344 : return (actual_group == block_group) ? 1 : 0;
81 : }
82 :
83 : /*
84 : * Return the number of clusters used for file system metadata; this
85 : * represents the overhead needed by the file system.
86 : */
87 676933 : static unsigned ext4_num_overhead_clusters(struct super_block *sb,
88 : ext4_group_t block_group,
89 : struct ext4_group_desc *gdp)
90 : {
91 676933 : unsigned base_clusters, num_clusters;
92 676933 : int block_cluster = -1, inode_cluster;
93 676933 : int itbl_cluster_start = -1, itbl_cluster_end = -1;
94 676933 : ext4_fsblk_t start = ext4_group_first_block_no(sb, block_group);
95 676933 : ext4_fsblk_t end = start + EXT4_BLOCKS_PER_GROUP(sb) - 1;
96 676933 : ext4_fsblk_t itbl_blk_start, itbl_blk_end;
97 676933 : struct ext4_sb_info *sbi = EXT4_SB(sb);
98 :
99 : /* This is the number of clusters used by the superblock,
100 : * block group descriptors, and reserved block group
101 : * descriptor blocks */
102 676933 : base_clusters = ext4_num_base_meta_clusters(sb, block_group);
103 676933 : num_clusters = base_clusters;
104 :
105 : /*
106 : * Account and record inode table clusters if any cluster
107 : * is in the block group, or inode table cluster range is
108 : * [-1, -1] and won't overlap with block/inode bitmap cluster
109 : * accounted below.
110 : */
111 676933 : itbl_blk_start = ext4_inode_table(sb, gdp);
112 676933 : itbl_blk_end = itbl_blk_start + sbi->s_itb_per_group - 1;
113 676933 : if (itbl_blk_start <= end && itbl_blk_end >= start) {
114 0 : itbl_blk_start = itbl_blk_start >= start ?
115 : itbl_blk_start : start;
116 0 : itbl_blk_end = itbl_blk_end <= end ?
117 : itbl_blk_end : end;
118 :
119 0 : itbl_cluster_start = EXT4_B2C(sbi, itbl_blk_start - start);
120 0 : itbl_cluster_end = EXT4_B2C(sbi, itbl_blk_end - start);
121 :
122 0 : num_clusters += itbl_cluster_end - itbl_cluster_start + 1;
123 : /* check if border cluster is overlapped */
124 0 : if (itbl_cluster_start == base_clusters - 1)
125 0 : num_clusters--;
126 : }
127 :
128 : /*
129 : * For the allocation bitmaps, we first need to check to see
130 : * if the block is in the block group. If it is, then check
131 : * to see if the cluster is already accounted for in the clusters
132 : * used for the base metadata cluster and inode tables cluster.
133 : * Normally all of these blocks are contiguous, so the special
134 : * case handling shouldn't be necessary except for *very*
135 : * unusual file system layouts.
136 : */
137 1353866 : if (ext4_block_in_group(sb, ext4_block_bitmap(sb, gdp), block_group)) {
138 0 : block_cluster = EXT4_B2C(sbi,
139 : ext4_block_bitmap(sb, gdp) - start);
140 0 : if (block_cluster >= base_clusters &&
141 0 : (block_cluster < itbl_cluster_start ||
142 0 : block_cluster > itbl_cluster_end))
143 0 : num_clusters++;
144 : }
145 :
146 1353866 : if (ext4_block_in_group(sb, ext4_inode_bitmap(sb, gdp), block_group)) {
147 0 : inode_cluster = EXT4_B2C(sbi,
148 : ext4_inode_bitmap(sb, gdp) - start);
149 : /*
150 : * Additional check if inode bitmap is in just accounted
151 : * block_cluster
152 : */
153 0 : if (inode_cluster != block_cluster &&
154 0 : inode_cluster >= base_clusters &&
155 0 : (inode_cluster < itbl_cluster_start ||
156 0 : inode_cluster > itbl_cluster_end))
157 0 : num_clusters++;
158 : }
159 :
160 676933 : return num_clusters;
161 : }
162 :
163 847672 : static unsigned int num_clusters_in_group(struct super_block *sb,
164 : ext4_group_t block_group)
165 : {
166 847672 : unsigned int blocks;
167 :
168 847672 : if (block_group == ext4_get_groups_count(sb) - 1) {
169 : /*
170 : * Even though mke2fs always initializes the first and
171 : * last group, just in case some other tool was used,
172 : * we need to make sure we calculate the right free
173 : * blocks.
174 : */
175 1418 : blocks = ext4_blocks_count(EXT4_SB(sb)->s_es) -
176 : ext4_group_first_block_no(sb, block_group);
177 : } else
178 846961 : blocks = EXT4_BLOCKS_PER_GROUP(sb);
179 847672 : return EXT4_NUM_B2C(EXT4_SB(sb), blocks);
180 : }
181 :
182 : /* Initializes an uninitialized block bitmap */
183 70712 : static int ext4_init_block_bitmap(struct super_block *sb,
184 : struct buffer_head *bh,
185 : ext4_group_t block_group,
186 : struct ext4_group_desc *gdp)
187 : {
188 70712 : unsigned int bit, bit_max;
189 70712 : struct ext4_sb_info *sbi = EXT4_SB(sb);
190 70712 : ext4_fsblk_t start, tmp;
191 :
192 141424 : ASSERT(buffer_locked(bh));
193 :
194 70712 : if (!ext4_group_desc_csum_verify(sb, block_group, gdp)) {
195 0 : ext4_mark_group_bitmap_corrupted(sb, block_group,
196 : EXT4_GROUP_INFO_BBITMAP_CORRUPT |
197 : EXT4_GROUP_INFO_IBITMAP_CORRUPT);
198 0 : return -EFSBADCRC;
199 : }
200 70712 : memset(bh->b_data, 0, sb->s_blocksize);
201 :
202 70712 : bit_max = ext4_num_base_meta_clusters(sb, block_group);
203 70712 : if ((bit_max >> 3) >= bh->b_size)
204 : return -EFSCORRUPTED;
205 :
206 4180249 : for (bit = 0; bit < bit_max; bit++)
207 4109537 : ext4_set_bit(bit, bh->b_data);
208 :
209 70712 : start = ext4_group_first_block_no(sb, block_group);
210 :
211 : /* Set bits for block and inode bitmaps, and inode table */
212 70712 : tmp = ext4_block_bitmap(sb, gdp);
213 70712 : if (ext4_block_in_group(sb, tmp, block_group))
214 0 : ext4_set_bit(EXT4_B2C(sbi, tmp - start), bh->b_data);
215 :
216 70712 : tmp = ext4_inode_bitmap(sb, gdp);
217 70712 : if (ext4_block_in_group(sb, tmp, block_group))
218 0 : ext4_set_bit(EXT4_B2C(sbi, tmp - start), bh->b_data);
219 :
220 70712 : tmp = ext4_inode_table(sb, gdp);
221 34662478 : for (; tmp < ext4_inode_table(sb, gdp) +
222 69112811 : sbi->s_itb_per_group; tmp++) {
223 34521045 : if (ext4_block_in_group(sb, tmp, block_group))
224 0 : ext4_set_bit(EXT4_B2C(sbi, tmp - start), bh->b_data);
225 : }
226 :
227 : /*
228 : * Also if the number of blocks within the group is less than
229 : * the blocksize * 8 ( which is the size of bitmap ), set rest
230 : * of the block bitmap to 1
231 : */
232 141424 : ext4_mark_bitmap_end(num_clusters_in_group(sb, block_group),
233 70712 : sb->s_blocksize * 8, bh->b_data);
234 70712 : return 0;
235 : }
236 :
237 : /* Return the number of free blocks in a block group. It is used when
238 : * the block bitmap is uninitialized, so we can't just count the bits
239 : * in the bitmap. */
240 676933 : unsigned ext4_free_clusters_after_init(struct super_block *sb,
241 : ext4_group_t block_group,
242 : struct ext4_group_desc *gdp)
243 : {
244 676933 : return num_clusters_in_group(sb, block_group) -
245 676933 : ext4_num_overhead_clusters(sb, block_group, gdp);
246 : }
247 :
248 : /*
249 : * The free blocks are managed by bitmaps. A file system contains several
250 : * blocks groups. Each group contains 1 bitmap block for blocks, 1 bitmap
251 : * block for inodes, N blocks for the inode table and data blocks.
252 : *
253 : * The file system contains group descriptors which are located after the
254 : * super block. Each descriptor contains the number of the bitmap block and
255 : * the free blocks count in the block. The descriptors are loaded in memory
256 : * when a file system is mounted (see ext4_fill_super).
257 : */
258 :
259 : /**
260 : * ext4_get_group_desc() -- load group descriptor from disk
261 : * @sb: super block
262 : * @block_group: given block group
263 : * @bh: pointer to the buffer head to store the block
264 : * group descriptor
265 : */
266 307613947 : struct ext4_group_desc * ext4_get_group_desc(struct super_block *sb,
267 : ext4_group_t block_group,
268 : struct buffer_head **bh)
269 : {
270 307613947 : unsigned int group_desc;
271 307613947 : unsigned int offset;
272 307613947 : ext4_group_t ngroups = ext4_get_groups_count(sb);
273 307544205 : struct ext4_group_desc *desc;
274 307544205 : struct ext4_sb_info *sbi = EXT4_SB(sb);
275 307544205 : struct buffer_head *bh_p;
276 :
277 307544205 : if (block_group >= ngroups) {
278 0 : ext4_error(sb, "block_group >= groups_count - block_group = %u,"
279 : " groups_count = %u", block_group, ngroups);
280 :
281 0 : return NULL;
282 : }
283 :
284 307544205 : group_desc = block_group >> EXT4_DESC_PER_BLOCK_BITS(sb);
285 307544205 : offset = block_group & (EXT4_DESC_PER_BLOCK(sb) - 1);
286 307544205 : bh_p = sbi_array_rcu_deref(sbi, s_group_desc, group_desc);
287 : /*
288 : * sbi_array_rcu_deref returns with rcu unlocked, this is ok since
289 : * the pointer being dereferenced won't be dereferenced again. By
290 : * looking at the usage in add_new_gdb() the value isn't modified,
291 : * just the pointer, and so it remains valid.
292 : */
293 307595908 : if (!bh_p) {
294 0 : ext4_error(sb, "Group descriptor not loaded - "
295 : "block_group = %u, group_desc = %u, desc = %u",
296 : block_group, group_desc, offset);
297 0 : return NULL;
298 : }
299 :
300 307595908 : desc = (struct ext4_group_desc *)(
301 307595908 : (__u8 *)bh_p->b_data +
302 307595908 : offset * EXT4_DESC_SIZE(sb));
303 307595908 : if (bh)
304 10533166 : *bh = bh_p;
305 : return desc;
306 : }
307 :
308 100027 : static ext4_fsblk_t ext4_valid_block_bitmap_padding(struct super_block *sb,
309 : ext4_group_t block_group,
310 : struct buffer_head *bh)
311 : {
312 100027 : ext4_grpblk_t next_zero_bit;
313 100027 : unsigned long bitmap_size = sb->s_blocksize * 8;
314 100027 : unsigned int offset = num_clusters_in_group(sb, block_group);
315 :
316 100027 : if (bitmap_size <= offset)
317 : return 0;
318 :
319 689 : next_zero_bit = ext4_find_next_zero_bit(bh->b_data, bitmap_size, offset);
320 :
321 689 : return (next_zero_bit < bitmap_size ? next_zero_bit : 0);
322 : }
323 :
324 540101445 : struct ext4_group_info *ext4_get_group_info(struct super_block *sb,
325 : ext4_group_t group)
326 : {
327 540101445 : struct ext4_group_info **grp_info;
328 540101445 : long indexv, indexh;
329 :
330 540101445 : if (unlikely(group >= EXT4_SB(sb)->s_groups_count))
331 : return NULL;
332 540101445 : indexv = group >> (EXT4_DESC_PER_BLOCK_BITS(sb));
333 540101445 : indexh = group & ((EXT4_DESC_PER_BLOCK(sb)) - 1);
334 540101445 : grp_info = sbi_array_rcu_deref(EXT4_SB(sb), s_group_info, indexv);
335 540643876 : return grp_info[indexh];
336 : }
337 :
338 : /*
339 : * Return the block number which was discovered to be invalid, or 0 if
340 : * the block bitmap is valid.
341 : */
342 100027 : static ext4_fsblk_t ext4_valid_block_bitmap(struct super_block *sb,
343 : struct ext4_group_desc *desc,
344 : ext4_group_t block_group,
345 : struct buffer_head *bh)
346 : {
347 100027 : struct ext4_sb_info *sbi = EXT4_SB(sb);
348 100027 : ext4_grpblk_t offset;
349 100027 : ext4_grpblk_t next_zero_bit;
350 100027 : ext4_grpblk_t max_bit = EXT4_CLUSTERS_PER_GROUP(sb);
351 100027 : ext4_fsblk_t blk;
352 100027 : ext4_fsblk_t group_first_block;
353 :
354 100027 : if (ext4_has_feature_flex_bg(sb)) {
355 : /* with FLEX_BG, the inode/block bitmaps and itable
356 : * blocks may not be in the group at all
357 : * so the bitmap validation will be skipped for those groups
358 : * or it has to also read the block group where the bitmaps
359 : * are located to verify they are set.
360 : */
361 : return 0;
362 : }
363 128 : group_first_block = ext4_group_first_block_no(sb, block_group);
364 :
365 : /* check whether block bitmap block number is set */
366 128 : blk = ext4_block_bitmap(sb, desc);
367 128 : offset = blk - group_first_block;
368 256 : if (offset < 0 || EXT4_B2C(sbi, offset) >= max_bit ||
369 128 : !ext4_test_bit(EXT4_B2C(sbi, offset), bh->b_data))
370 : /* bad block bitmap */
371 0 : return blk;
372 :
373 : /* check whether the inode bitmap block number is set */
374 128 : blk = ext4_inode_bitmap(sb, desc);
375 128 : offset = blk - group_first_block;
376 256 : if (offset < 0 || EXT4_B2C(sbi, offset) >= max_bit ||
377 128 : !ext4_test_bit(EXT4_B2C(sbi, offset), bh->b_data))
378 : /* bad block bitmap */
379 0 : return blk;
380 :
381 : /* check whether the inode table block number is set */
382 128 : blk = ext4_inode_table(sb, desc);
383 128 : offset = blk - group_first_block;
384 128 : if (offset < 0 || EXT4_B2C(sbi, offset) >= max_bit ||
385 128 : EXT4_B2C(sbi, offset + sbi->s_itb_per_group - 1) >= max_bit)
386 0 : return blk;
387 512 : next_zero_bit = ext4_find_next_zero_bit(bh->b_data,
388 128 : EXT4_B2C(sbi, offset + sbi->s_itb_per_group - 1) + 1,
389 128 : EXT4_B2C(sbi, offset));
390 256 : if (next_zero_bit <
391 128 : EXT4_B2C(sbi, offset + sbi->s_itb_per_group - 1) + 1)
392 : /* bad bitmap for inode tables */
393 0 : return blk;
394 : return 0;
395 : }
396 :
397 6683132 : static int ext4_validate_block_bitmap(struct super_block *sb,
398 : struct ext4_group_desc *desc,
399 : ext4_group_t block_group,
400 : struct buffer_head *bh)
401 : {
402 6683132 : ext4_fsblk_t blk;
403 6683132 : struct ext4_group_info *grp;
404 :
405 6683132 : if (EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY)
406 : return 0;
407 :
408 6683137 : grp = ext4_get_group_info(sb, block_group);
409 :
410 13365990 : if (buffer_verified(bh))
411 : return 0;
412 100293 : if (!grp || EXT4_MB_GRP_BBITMAP_CORRUPT(grp))
413 : return -EFSCORRUPTED;
414 :
415 100292 : ext4_lock_group(sb, block_group);
416 200584 : if (buffer_verified(bh))
417 0 : goto verified;
418 200319 : if (unlikely(!ext4_block_bitmap_csum_verify(sb, desc, bh) ||
419 : ext4_simulate_fail(sb, EXT4_SIM_BBITMAP_CRC))) {
420 265 : ext4_unlock_group(sb, block_group);
421 265 : ext4_error(sb, "bg %u: bad block bitmap checksum", block_group);
422 265 : ext4_mark_group_bitmap_corrupted(sb, block_group,
423 : EXT4_GROUP_INFO_BBITMAP_CORRUPT);
424 265 : return -EFSBADCRC;
425 : }
426 100027 : blk = ext4_valid_block_bitmap(sb, desc, block_group, bh);
427 100027 : if (unlikely(blk != 0)) {
428 0 : ext4_unlock_group(sb, block_group);
429 0 : ext4_error(sb, "bg %u: block %llu: invalid block bitmap",
430 : block_group, blk);
431 0 : ext4_mark_group_bitmap_corrupted(sb, block_group,
432 : EXT4_GROUP_INFO_BBITMAP_CORRUPT);
433 0 : return -EFSCORRUPTED;
434 : }
435 100027 : blk = ext4_valid_block_bitmap_padding(sb, block_group, bh);
436 100027 : if (unlikely(blk != 0)) {
437 0 : ext4_unlock_group(sb, block_group);
438 0 : ext4_error(sb, "bg %u: block %llu: padding at end of block bitmap is not set",
439 : block_group, blk);
440 0 : ext4_mark_group_bitmap_corrupted(sb, block_group,
441 : EXT4_GROUP_INFO_BBITMAP_CORRUPT);
442 0 : return -EFSCORRUPTED;
443 : }
444 100027 : set_buffer_verified(bh);
445 100027 : verified:
446 100027 : ext4_unlock_group(sb, block_group);
447 100027 : return 0;
448 : }
449 :
450 : /**
451 : * ext4_read_block_bitmap_nowait()
452 : * @sb: super block
453 : * @block_group: given block group
454 : * @ignore_locked: ignore locked buffers
455 : *
456 : * Read the bitmap for a given block_group,and validate the
457 : * bits for block/inode/inode tables are set in the bitmaps
458 : *
459 : * Return buffer_head on success or an ERR_PTR in case of failure.
460 : */
461 : struct buffer_head *
462 6753446 : ext4_read_block_bitmap_nowait(struct super_block *sb, ext4_group_t block_group,
463 : bool ignore_locked)
464 : {
465 6753446 : struct ext4_group_desc *desc;
466 6753446 : struct ext4_sb_info *sbi = EXT4_SB(sb);
467 6753446 : struct buffer_head *bh;
468 6753446 : ext4_fsblk_t bitmap_blk;
469 6753446 : int err;
470 :
471 6753446 : desc = ext4_get_group_desc(sb, block_group, NULL);
472 6753818 : if (!desc)
473 : return ERR_PTR(-EFSCORRUPTED);
474 6753818 : bitmap_blk = ext4_block_bitmap(sb, desc);
475 13507858 : if ((bitmap_blk <= le32_to_cpu(sbi->s_es->s_first_data_block)) ||
476 : (bitmap_blk >= ext4_blocks_count(sbi->s_es))) {
477 0 : ext4_error(sb, "Invalid block bitmap block %llu in "
478 : "block_group %u", bitmap_blk, block_group);
479 0 : ext4_mark_group_bitmap_corrupted(sb, block_group,
480 : EXT4_GROUP_INFO_BBITMAP_CORRUPT);
481 0 : return ERR_PTR(-EFSCORRUPTED);
482 : }
483 6753976 : bh = sb_getblk(sb, bitmap_blk);
484 6754718 : if (unlikely(!bh)) {
485 0 : ext4_warning(sb, "Cannot get buffer for block bitmap - "
486 : "block_group = %u, block_bitmap = %llu",
487 : block_group, bitmap_blk);
488 0 : return ERR_PTR(-ENOMEM);
489 : }
490 :
491 6919865 : if (ignore_locked && buffer_locked(bh)) {
492 : /* buffer under IO already, return if called for prefetching */
493 10 : put_bh(bh);
494 10 : return NULL;
495 : }
496 :
497 6754708 : if (bitmap_uptodate(bh))
498 6579764 : goto verify;
499 :
500 174592 : lock_buffer(bh);
501 174592 : if (bitmap_uptodate(bh)) {
502 3465 : unlock_buffer(bh);
503 3465 : goto verify;
504 : }
505 171127 : ext4_lock_group(sb, block_group);
506 171128 : if (ext4_has_group_desc_csum(sb) &&
507 170942 : (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))) {
508 70712 : if (block_group == 0) {
509 0 : ext4_unlock_group(sb, block_group);
510 0 : unlock_buffer(bh);
511 0 : ext4_error(sb, "Block bitmap for bg 0 marked "
512 : "uninitialized");
513 0 : err = -EFSCORRUPTED;
514 0 : goto out;
515 : }
516 70712 : err = ext4_init_block_bitmap(sb, bh, block_group, desc);
517 70712 : if (err) {
518 0 : ext4_unlock_group(sb, block_group);
519 0 : unlock_buffer(bh);
520 0 : ext4_error(sb, "Failed to init block bitmap for group "
521 : "%u: %d", block_group, err);
522 0 : goto out;
523 : }
524 70712 : set_bitmap_uptodate(bh);
525 70712 : set_buffer_uptodate(bh);
526 70712 : set_buffer_verified(bh);
527 70712 : ext4_unlock_group(sb, block_group);
528 70712 : unlock_buffer(bh);
529 70712 : return bh;
530 : }
531 100416 : ext4_unlock_group(sb, block_group);
532 200830 : if (buffer_uptodate(bh)) {
533 : /*
534 : * if not uninit if bh is uptodate,
535 : * bitmap is also uptodate
536 : */
537 87 : set_bitmap_uptodate(bh);
538 87 : unlock_buffer(bh);
539 87 : goto verify;
540 : }
541 : /*
542 : * submit the buffer_head for reading
543 : */
544 100327 : set_buffer_new(bh);
545 100329 : trace_ext4_read_block_bitmap_load(sb, block_group, ignore_locked);
546 100329 : ext4_read_bh_nowait(bh, REQ_META | REQ_PRIO |
547 : (ignore_locked ? REQ_RAHEAD : 0),
548 : ext4_end_bitmap_read);
549 100329 : return bh;
550 6583316 : verify:
551 6583316 : err = ext4_validate_block_bitmap(sb, desc, block_group, bh);
552 6583067 : if (err)
553 266 : goto out;
554 : return bh;
555 266 : out:
556 266 : put_bh(bh);
557 266 : return ERR_PTR(err);
558 : }
559 :
560 : /* Returns 0 on success, -errno on error */
561 6588632 : int ext4_wait_block_bitmap(struct super_block *sb, ext4_group_t block_group,
562 : struct buffer_head *bh)
563 : {
564 6588632 : struct ext4_group_desc *desc;
565 :
566 13177264 : if (!buffer_new(bh))
567 : return 0;
568 99947 : desc = ext4_get_group_desc(sb, block_group, NULL);
569 99947 : if (!desc)
570 : return -EFSCORRUPTED;
571 99947 : wait_on_buffer(bh);
572 99947 : ext4_simulate_fail_bh(sb, bh, EXT4_SIM_BBITMAP_EIO);
573 199894 : if (!buffer_uptodate(bh)) {
574 0 : ext4_error_err(sb, EIO, "Cannot read block bitmap - "
575 : "block_group = %u, block_bitmap = %llu",
576 : block_group, (unsigned long long) bh->b_blocknr);
577 0 : ext4_mark_group_bitmap_corrupted(sb, block_group,
578 : EXT4_GROUP_INFO_BBITMAP_CORRUPT);
579 0 : return -EIO;
580 : }
581 99947 : clear_buffer_new(bh);
582 : /* Panic or remount fs read-only if block bitmap is invalid */
583 99947 : return ext4_validate_block_bitmap(sb, desc, block_group, bh);
584 : }
585 :
586 : struct buffer_head *
587 6249267 : ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group)
588 : {
589 6249267 : struct buffer_head *bh;
590 6249267 : int err;
591 :
592 6249267 : bh = ext4_read_block_bitmap_nowait(sb, block_group, false);
593 6249843 : if (IS_ERR(bh))
594 : return bh;
595 6249843 : err = ext4_wait_block_bitmap(sb, block_group, bh);
596 6249770 : if (err) {
597 0 : put_bh(bh);
598 0 : return ERR_PTR(err);
599 : }
600 : return bh;
601 : }
602 :
603 : /**
604 : * ext4_has_free_clusters()
605 : * @sbi: in-core super block structure.
606 : * @nclusters: number of needed blocks
607 : * @flags: flags from ext4_mb_new_blocks()
608 : *
609 : * Check if filesystem has nclusters free & available for allocation.
610 : * On success return 1, return 0 on failure.
611 : */
612 30899279 : static int ext4_has_free_clusters(struct ext4_sb_info *sbi,
613 : s64 nclusters, unsigned int flags)
614 : {
615 30899279 : s64 free_clusters, dirty_clusters, rsv, resv_clusters;
616 30899279 : struct percpu_counter *fcc = &sbi->s_freeclusters_counter;
617 30899279 : struct percpu_counter *dcc = &sbi->s_dirtyclusters_counter;
618 :
619 30899279 : free_clusters = percpu_counter_read_positive(fcc);
620 30899279 : dirty_clusters = percpu_counter_read_positive(dcc);
621 30899279 : resv_clusters = atomic64_read(&sbi->s_resv_clusters);
622 :
623 : /*
624 : * r_blocks_count should always be multiple of the cluster ratio so
625 : * we are safe to do a plane bit shift only.
626 : */
627 61657670 : rsv = (ext4_r_blocks_count(sbi->s_es) >> sbi->s_cluster_bits) +
628 : resv_clusters;
629 :
630 30899279 : if (free_clusters - (nclusters + rsv + dirty_clusters) <
631 30899279 : EXT4_FREECLUSTERS_WATERMARK) {
632 4536811 : free_clusters = percpu_counter_sum_positive(fcc);
633 4541825 : dirty_clusters = percpu_counter_sum_positive(dcc);
634 : }
635 : /* Check whether we have space after accounting for current
636 : * dirty clusters & root reserved clusters.
637 : */
638 30904488 : if (free_clusters >= (rsv + nclusters + dirty_clusters))
639 : return 1;
640 :
641 : /* Hm, nope. Are (enough) root reserved clusters available? */
642 4474610 : if (uid_eq(sbi->s_resuid, current_fsuid()) ||
643 223199 : (!gid_eq(sbi->s_resgid, GLOBAL_ROOT_GID) && in_group_p(sbi->s_resgid)) ||
644 223234 : capable(CAP_SYS_RESOURCE) ||
645 223175 : (flags & EXT4_MB_USE_ROOT_BLOCKS)) {
646 :
647 4252261 : if (free_clusters >= (nclusters + dirty_clusters +
648 : resv_clusters))
649 : return 1;
650 : }
651 : /* No free blocks. Let's see if we can dip into reserved pool */
652 3973411 : if (flags & EXT4_MB_USE_RESERVED) {
653 822 : if (free_clusters >= (nclusters + dirty_clusters))
654 822 : return 1;
655 : }
656 :
657 : return 0;
658 : }
659 :
660 30114371 : int ext4_claim_free_clusters(struct ext4_sb_info *sbi,
661 : s64 nclusters, unsigned int flags)
662 : {
663 30114371 : if (ext4_has_free_clusters(sbi, nclusters, flags)) {
664 26813135 : percpu_counter_add(&sbi->s_dirtyclusters_counter, nclusters);
665 26813135 : return 0;
666 : } else
667 : return -ENOSPC;
668 : }
669 :
670 : /**
671 : * ext4_should_retry_alloc() - check if a block allocation should be retried
672 : * @sb: superblock
673 : * @retries: number of retry attempts made so far
674 : *
675 : * ext4_should_retry_alloc() is called when ENOSPC is returned while
676 : * attempting to allocate blocks. If there's an indication that a pending
677 : * journal transaction might free some space and allow another attempt to
678 : * succeed, this function will wait for the current or committing transaction
679 : * to complete and then return TRUE.
680 : */
681 982317 : int ext4_should_retry_alloc(struct super_block *sb, int *retries)
682 : {
683 982317 : struct ext4_sb_info *sbi = EXT4_SB(sb);
684 :
685 982317 : if (!sbi->s_journal)
686 : return 0;
687 :
688 982317 : if (++(*retries) > 3) {
689 66328 : percpu_counter_inc(&sbi->s_sra_exceeded_retry_limit);
690 66328 : return 0;
691 : }
692 :
693 : /*
694 : * if there's no indication that blocks are about to be freed it's
695 : * possible we just missed a transaction commit that did so
696 : */
697 915989 : smp_mb();
698 916049 : if (sbi->s_mb_free_pending == 0) {
699 792495 : if (test_opt(sb, DISCARD)) {
700 0 : atomic_inc(&sbi->s_retry_alloc_pending);
701 0 : flush_work(&sbi->s_discard_work);
702 0 : atomic_dec(&sbi->s_retry_alloc_pending);
703 : }
704 792495 : return ext4_has_free_clusters(sbi, 1, 0);
705 : }
706 :
707 : /*
708 : * it's possible we've just missed a transaction commit here,
709 : * so ignore the returned status
710 : */
711 123554 : ext4_debug("%s: retrying operation after ENOSPC\n", sb->s_id);
712 123554 : (void) jbd2_journal_force_commit_nested(sbi->s_journal);
713 123554 : return 1;
714 : }
715 :
716 : /*
717 : * ext4_new_meta_blocks() -- allocate block for meta data (indexing) blocks
718 : *
719 : * @handle: handle to this transaction
720 : * @inode: file inode
721 : * @goal: given target block(filesystem wide)
722 : * @count: pointer to total number of clusters needed
723 : * @errp: error code
724 : *
725 : * Return 1st allocated block number on success, *count stores total account
726 : * error stores in errp pointer
727 : */
728 121071 : ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode,
729 : ext4_fsblk_t goal, unsigned int flags,
730 : unsigned long *count, int *errp)
731 : {
732 121071 : struct ext4_allocation_request ar;
733 121071 : ext4_fsblk_t ret;
734 :
735 121071 : memset(&ar, 0, sizeof(ar));
736 : /* Fill with neighbour allocated blocks */
737 121071 : ar.inode = inode;
738 121071 : ar.goal = goal;
739 121071 : ar.len = count ? *count : 1;
740 121071 : ar.flags = flags;
741 :
742 121071 : ret = ext4_mb_new_blocks(handle, &ar, errp);
743 121039 : if (count)
744 0 : *count = ar.len;
745 : /*
746 : * Account for the allocated meta blocks. We will never
747 : * fail EDQUOT for metdata, but we do account for it.
748 : */
749 121039 : if (!(*errp) && (flags & EXT4_MB_DELALLOC_RESERVED)) {
750 43542 : dquot_alloc_block_nofail(inode,
751 21771 : EXT4_C2B(EXT4_SB(inode->i_sb), ar.len));
752 : }
753 121039 : return ret;
754 : }
755 :
756 : /**
757 : * ext4_count_free_clusters() -- count filesystem free clusters
758 : * @sb: superblock
759 : *
760 : * Adds up the number of free clusters from each block group.
761 : */
762 2596 : ext4_fsblk_t ext4_count_free_clusters(struct super_block *sb)
763 : {
764 2596 : ext4_fsblk_t desc_count;
765 2596 : struct ext4_group_desc *gdp;
766 2596 : ext4_group_t i;
767 2596 : ext4_group_t ngroups = ext4_get_groups_count(sb);
768 2596 : struct ext4_group_info *grp;
769 : #ifdef EXT4FS_DEBUG
770 : struct ext4_super_block *es;
771 : ext4_fsblk_t bitmap_count;
772 : unsigned int x;
773 : struct buffer_head *bitmap_bh = NULL;
774 :
775 : es = EXT4_SB(sb)->s_es;
776 : desc_count = 0;
777 : bitmap_count = 0;
778 : gdp = NULL;
779 :
780 : for (i = 0; i < ngroups; i++) {
781 : gdp = ext4_get_group_desc(sb, i, NULL);
782 : if (!gdp)
783 : continue;
784 : grp = NULL;
785 : if (EXT4_SB(sb)->s_group_info)
786 : grp = ext4_get_group_info(sb, i);
787 : if (!grp || !EXT4_MB_GRP_BBITMAP_CORRUPT(grp))
788 : desc_count += ext4_free_group_clusters(sb, gdp);
789 : brelse(bitmap_bh);
790 : bitmap_bh = ext4_read_block_bitmap(sb, i);
791 : if (IS_ERR(bitmap_bh)) {
792 : bitmap_bh = NULL;
793 : continue;
794 : }
795 :
796 : x = ext4_count_free(bitmap_bh->b_data,
797 : EXT4_CLUSTERS_PER_GROUP(sb) / 8);
798 : printk(KERN_DEBUG "group %u: stored = %d, counted = %u\n",
799 : i, ext4_free_group_clusters(sb, gdp), x);
800 : bitmap_count += x;
801 : }
802 : brelse(bitmap_bh);
803 : printk(KERN_DEBUG "ext4_count_free_clusters: stored = %llu"
804 : ", computed = %llu, %llu\n",
805 : EXT4_NUM_B2C(EXT4_SB(sb), ext4_free_blocks_count(es)),
806 : desc_count, bitmap_count);
807 : return bitmap_count;
808 : #else
809 2596 : desc_count = 0;
810 862952 : for (i = 0; i < ngroups; i++) {
811 857760 : gdp = ext4_get_group_desc(sb, i, NULL);
812 857760 : if (!gdp)
813 0 : continue;
814 857760 : grp = NULL;
815 857760 : if (EXT4_SB(sb)->s_group_info)
816 857760 : grp = ext4_get_group_info(sb, i);
817 857760 : if (!grp || !EXT4_MB_GRP_BBITMAP_CORRUPT(grp))
818 857760 : desc_count += ext4_free_group_clusters(sb, gdp);
819 : }
820 :
821 2596 : return desc_count;
822 : #endif
823 : }
824 :
825 5908379 : static inline int test_root(ext4_group_t a, int b)
826 : {
827 9485769 : while (1) {
828 7697074 : if (a < b)
829 : return 0;
830 7643759 : if (a == b)
831 : return 1;
832 7508470 : if ((a % b) != 0)
833 : return 0;
834 1788695 : a = a / b;
835 : }
836 : }
837 :
838 : /**
839 : * ext4_bg_has_super - number of blocks used by the superblock in group
840 : * @sb: superblock for filesystem
841 : * @group: group number to check
842 : *
843 : * Return the number of blocks used by the superblock (primary or backup)
844 : * in this group. Currently this will be only 0 or 1.
845 : */
846 4012993 : int ext4_bg_has_super(struct super_block *sb, ext4_group_t group)
847 : {
848 4012993 : struct ext4_super_block *es = EXT4_SB(sb)->s_es;
849 :
850 4012993 : if (group == 0)
851 : return 1;
852 3998815 : if (ext4_has_feature_sparse_super2(sb)) {
853 93 : if (group == le32_to_cpu(es->s_backup_bgs[0]) ||
854 73 : group == le32_to_cpu(es->s_backup_bgs[1]))
855 : return 1;
856 61 : return 0;
857 : }
858 3998722 : if ((group <= 1) || !ext4_has_feature_sparse_super(sb))
859 : return 1;
860 3982798 : if (!(group & 1))
861 : return 0;
862 3947555 : if (test_root(group, 3) || (test_root(group, 5)) ||
863 1920141 : test_root(group, 7))
864 135289 : return 1;
865 :
866 : return 0;
867 : }
868 :
869 137728 : static unsigned long ext4_bg_num_gdb_meta(struct super_block *sb,
870 : ext4_group_t group)
871 : {
872 137728 : unsigned long metagroup = group / EXT4_DESC_PER_BLOCK(sb);
873 137728 : ext4_group_t first = metagroup * EXT4_DESC_PER_BLOCK(sb);
874 137728 : ext4_group_t last = first + EXT4_DESC_PER_BLOCK(sb) - 1;
875 :
876 137728 : if (group == first || group == first + 1 || group == last)
877 5637 : return 1;
878 : return 0;
879 : }
880 :
881 1187595 : static unsigned long ext4_bg_num_gdb_nometa(struct super_block *sb,
882 : ext4_group_t group)
883 : {
884 1187595 : if (!ext4_bg_has_super(sb, group))
885 : return 0;
886 :
887 81017 : if (ext4_has_feature_meta_bg(sb))
888 32 : return le32_to_cpu(EXT4_SB(sb)->s_es->s_first_meta_bg);
889 : else
890 80985 : return EXT4_SB(sb)->s_gdb_count;
891 : }
892 :
893 : /**
894 : * ext4_bg_num_gdb - number of blocks used by the group table in group
895 : * @sb: superblock for filesystem
896 : * @group: group number to check
897 : *
898 : * Return the number of blocks used by the group descriptor table
899 : * (primary or backup) in this group. In the future there may be a
900 : * different number of descriptor blocks in each group.
901 : */
902 1241247 : unsigned long ext4_bg_num_gdb(struct super_block *sb, ext4_group_t group)
903 : {
904 1241247 : unsigned long first_meta_bg =
905 1241247 : le32_to_cpu(EXT4_SB(sb)->s_es->s_first_meta_bg);
906 1241247 : unsigned long metagroup = group / EXT4_DESC_PER_BLOCK(sb);
907 :
908 1241247 : if (!ext4_has_feature_meta_bg(sb) || metagroup < first_meta_bg)
909 1168560 : return ext4_bg_num_gdb_nometa(sb, group);
910 :
911 72687 : return ext4_bg_num_gdb_meta(sb,group);
912 :
913 : }
914 :
915 : /*
916 : * This function returns the number of file system metadata clusters at
917 : * the beginning of a block group, including the reserved gdt blocks.
918 : */
919 747645 : static unsigned ext4_num_base_meta_clusters(struct super_block *sb,
920 : ext4_group_t block_group)
921 : {
922 747645 : struct ext4_sb_info *sbi = EXT4_SB(sb);
923 747645 : unsigned num;
924 :
925 : /* Check for superblock and gdt backups in this group */
926 747645 : num = ext4_bg_has_super(sb, block_group);
927 :
928 747645 : if (!ext4_has_feature_meta_bg(sb) ||
929 65068 : block_group < le32_to_cpu(sbi->s_es->s_first_meta_bg) *
930 65068 : sbi->s_desc_per_block) {
931 682604 : if (num) {
932 19035 : num += ext4_bg_num_gdb_nometa(sb, block_group);
933 19035 : num += le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks);
934 : }
935 : } else { /* For META_BG_BLOCK_GROUPS */
936 65041 : num += ext4_bg_num_gdb_meta(sb, block_group);
937 : }
938 747645 : return EXT4_NUM_B2C(sbi, num);
939 : }
940 : /**
941 : * ext4_inode_to_goal_block - return a hint for block allocation
942 : * @inode: inode for block allocation
943 : *
944 : * Return the ideal location to start allocating blocks for a
945 : * newly created inode.
946 : */
947 925530 : ext4_fsblk_t ext4_inode_to_goal_block(struct inode *inode)
948 : {
949 925530 : struct ext4_inode_info *ei = EXT4_I(inode);
950 925530 : ext4_group_t block_group;
951 925530 : ext4_grpblk_t colour;
952 925530 : int flex_size = ext4_flex_bg_size(EXT4_SB(inode->i_sb));
953 925569 : ext4_fsblk_t bg_start;
954 925569 : ext4_fsblk_t last_block;
955 :
956 925569 : block_group = ei->i_block_group;
957 925569 : if (flex_size >= EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME) {
958 : /*
959 : * If there are at least EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME
960 : * block groups per flexgroup, reserve the first block
961 : * group for directories and special files. Regular
962 : * files will start at the second block group. This
963 : * tends to speed up directory access and improves
964 : * fsck times.
965 : */
966 925045 : block_group &= ~(flex_size-1);
967 925045 : if (S_ISREG(inode->i_mode))
968 532487 : block_group++;
969 : }
970 925569 : bg_start = ext4_group_first_block_no(inode->i_sb, block_group);
971 925569 : last_block = ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es) - 1;
972 :
973 : /*
974 : * If we are doing delayed allocation, we don't need take
975 : * colour into account.
976 : */
977 925569 : if (test_opt(inode->i_sb, DELALLOC))
978 : return bg_start;
979 :
980 5432 : if (bg_start + EXT4_BLOCKS_PER_GROUP(inode->i_sb) <= last_block)
981 5432 : colour = (task_pid_nr(current) % 16) *
982 5432 : (EXT4_BLOCKS_PER_GROUP(inode->i_sb) / 16);
983 : else
984 0 : colour = (task_pid_nr(current) % 16) *
985 0 : ((last_block - bg_start) / 16);
986 5432 : return bg_start + colour;
987 : }
988 :
|